(define-charset): New args :min-code and :max-code.
[bpt/emacs.git] / src / coding.c
CommitLineData
4ed46869 1/* Coding system handler (conversion, detection, and etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
70ad9fc4 4 Copyright (C) 2001 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9
KH
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
df7492f9 111 A coding system for a text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
5bad0796 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
e19c3639 138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9
KH
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
4ed46869 157#if 0
df7492f9
KH
158static int
159detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
4ed46869 162{
df7492f9
KH
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
4ed46869
KH
188}
189#endif
190
191/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
df7492f9
KH
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
d46c5b12 197
df7492f9
KH
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
d46c5b12 202
df7492f9 203 Below is the template of these functions. */
d46c5b12 204
4ed46869 205#if 0
b73bfc1c 206static void
df7492f9 207decode_coding_XXXX (coding)
4ed46869 208 struct coding_system *coding;
4ed46869 209{
df7492f9
KH
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
243}
244#endif
245
246/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
df7492f9
KH
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
d46c5b12 252
df7492f9
KH
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 257
df7492f9
KH
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
d46c5b12 261
df7492f9 262 Below is a template of these functions. */
4ed46869 263#if 0
b73bfc1c 264static void
df7492f9 265encode_coding_XXX (coding)
4ed46869 266 struct coding_system *coding;
4ed46869 267{
df7492f9
KH
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
4ed46869
KH
285}
286#endif
287
4ed46869
KH
288\f
289/*** 1. Preamble ***/
290
68c45bf0 291#include <config.h>
4ed46869
KH
292#include <stdio.h>
293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
302
df7492f9 303Lisp_Object Vcoding_system_hash_table;
4ed46869 304
df7492f9 305Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
306Lisp_Object Qunix, Qdos;
307extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
308Lisp_Object Qbuffer_file_coding_system;
309Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 310Lisp_Object Qdefault_char;
27901516 311Lisp_Object Qno_conversion, Qundecided;
df7492f9
KH
312Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
313Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
314Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
bb0115a2 315Lisp_Object Qcoding_system_history;
1397dc18 316Lisp_Object Qvalid_codes;
4ed46869
KH
317
318extern Lisp_Object Qinsert_file_contents, Qwrite_region;
319Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
320Lisp_Object Qstart_process, Qopen_network_stream;
321Lisp_Object Qtarget_idx;
322
d46c5b12
KH
323Lisp_Object Vselect_safe_coding_system_function;
324
7722baf9
EZ
325/* Mnemonic string for each format of end-of-line. */
326Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
327/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 328 decided. */
7722baf9 329Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
330
331#ifdef emacs
332
4608c386
KH
333Lisp_Object Vcoding_system_list, Vcoding_system_alist;
334
335Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 336
d46c5b12
KH
337/* Coding system emacs-mule and raw-text are for converting only
338 end-of-line format. */
339Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 340
4ed46869
KH
341/* Coding-systems are handed between Emacs Lisp programs and C internal
342 routines by the following three variables. */
343/* Coding-system for reading files and receiving data from process. */
344Lisp_Object Vcoding_system_for_read;
345/* Coding-system for writing files and sending data to process. */
346Lisp_Object Vcoding_system_for_write;
347/* Coding-system actually used in the latest I/O. */
348Lisp_Object Vlast_coding_system_used;
349
c4825358 350/* A vector of length 256 which contains information about special
94487c4e 351 Latin codes (especially for dealing with Microsoft codes). */
3f003981 352Lisp_Object Vlatin_extra_code_table;
c4825358 353
9ce27fde
KH
354/* Flag to inhibit code conversion of end-of-line format. */
355int inhibit_eol_conversion;
356
74383408
KH
357/* Flag to inhibit ISO2022 escape sequence detection. */
358int inhibit_iso_escape_detection;
359
ed29121d
EZ
360/* Flag to make buffer-file-coding-system inherit from process-coding. */
361int inherit_process_coding_system;
362
c4825358 363/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
364struct coding_system terminal_coding;
365
c4825358
KH
366/* Coding system to be used to encode text for terminal display when
367 terminal coding system is nil. */
368struct coding_system safe_terminal_coding;
369
370/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
371struct coding_system keyboard_coding;
372
02ba4723
KH
373Lisp_Object Vfile_coding_system_alist;
374Lisp_Object Vprocess_coding_system_alist;
375Lisp_Object Vnetwork_coding_system_alist;
4ed46869 376
68c45bf0
PE
377Lisp_Object Vlocale_coding_system;
378
4ed46869
KH
379#endif /* emacs */
380
f967223b
KH
381/* Flag to tell if we look up translation table on character code
382 conversion. */
84fbb8a0 383Lisp_Object Venable_character_translation;
f967223b
KH
384/* Standard translation table to look up on decoding (reading). */
385Lisp_Object Vstandard_translation_table_for_decode;
386/* Standard translation table to look up on encoding (writing). */
387Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 388
f967223b
KH
389Lisp_Object Qtranslation_table;
390Lisp_Object Qtranslation_table_id;
391Lisp_Object Qtranslation_table_for_decode;
392Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
393
394/* Alist of charsets vs revision number. */
df7492f9 395static Lisp_Object Vcharset_revision_table;
4ed46869 396
02ba4723
KH
397/* Default coding systems used for process I/O. */
398Lisp_Object Vdefault_process_coding_system;
399
b843d1ae
KH
400/* Global flag to tell that we can't call post-read-conversion and
401 pre-write-conversion functions. Usually the value is zero, but it
402 is set to 1 temporarily while such functions are running. This is
403 to avoid infinite recursive call. */
404static int inhibit_pre_post_conversion;
405
df7492f9
KH
406/* Two special coding systems. */
407Lisp_Object Vsjis_coding_system;
408Lisp_Object Vbig5_coding_system;
409
410
411static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
412static void decode_coding_utf_8 P_ ((struct coding_system *));
413static int encode_coding_utf_8 P_ ((struct coding_system *));
414
415static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
416static void decode_coding_utf_16 P_ ((struct coding_system *));
417static int encode_coding_utf_16 P_ ((struct coding_system *));
418
419static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
420static void decode_coding_iso_2022 P_ ((struct coding_system *));
421static int encode_coding_iso_2022 P_ ((struct coding_system *));
422
423static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
424static void decode_coding_emacs_mule P_ ((struct coding_system *));
425static int encode_coding_emacs_mule P_ ((struct coding_system *));
426
427static int detect_coding_sjis P_ ((struct coding_system *, int *));
428static void decode_coding_sjis P_ ((struct coding_system *));
429static int encode_coding_sjis P_ ((struct coding_system *));
430
431static int detect_coding_big5 P_ ((struct coding_system *, int *));
432static void decode_coding_big5 P_ ((struct coding_system *));
433static int encode_coding_big5 P_ ((struct coding_system *));
434
435static int detect_coding_ccl P_ ((struct coding_system *, int *));
436static void decode_coding_ccl P_ ((struct coding_system *));
437static int encode_coding_ccl P_ ((struct coding_system *));
438
439static void decode_coding_raw_text P_ ((struct coding_system *));
440static int encode_coding_raw_text P_ ((struct coding_system *));
441
442
443/* ISO2022 section */
444
445#define CODING_ISO_INITIAL(coding, reg) \
446 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
447 coding_attr_iso_initial), \
448 reg)))
449
450
451#define CODING_ISO_REQUEST(coding, charset_id) \
452 ((charset_id <= (coding)->max_charset_id \
453 ? (coding)->safe_charsets[charset_id] \
454 : -1))
455
456
457#define CODING_ISO_FLAGS(coding) \
458 ((coding)->spec.iso_2022.flags)
459#define CODING_ISO_DESIGNATION(coding, reg) \
460 ((coding)->spec.iso_2022.current_designation[reg])
461#define CODING_ISO_INVOCATION(coding, plane) \
462 ((coding)->spec.iso_2022.current_invocation[plane])
463#define CODING_ISO_SINGLE_SHIFTING(coding) \
464 ((coding)->spec.iso_2022.single_shifting)
465#define CODING_ISO_BOL(coding) \
466 ((coding)->spec.iso_2022.bol)
467#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
468 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
469
470/* Control characters of ISO2022. */
471 /* code */ /* function */
472#define ISO_CODE_LF 0x0A /* line-feed */
473#define ISO_CODE_CR 0x0D /* carriage-return */
474#define ISO_CODE_SO 0x0E /* shift-out */
475#define ISO_CODE_SI 0x0F /* shift-in */
476#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
477#define ISO_CODE_ESC 0x1B /* escape */
478#define ISO_CODE_SS2 0x8E /* single-shift-2 */
479#define ISO_CODE_SS3 0x8F /* single-shift-3 */
480#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
481
482/* All code (1-byte) of ISO2022 is classified into one of the
483 followings. */
484enum iso_code_class_type
485 {
486 ISO_control_0, /* Control codes in the range
487 0x00..0x1F and 0x7F, except for the
488 following 5 codes. */
489 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
490 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
491 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
492 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
493 ISO_escape, /* ISO_CODE_SO (0x1B) */
494 ISO_control_1, /* Control codes in the range
495 0x80..0x9F, except for the
496 following 3 codes. */
497 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
498 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
499 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
500 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
501 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
502 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
503 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
504 };
05e6f5dc 505
df7492f9
KH
506/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
507 `iso-flags' attribute of an iso2022 coding system. */
93dec019 508
df7492f9
KH
509/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
510 instead of the correct short-form sequence (e.g. ESC $ A). */
511#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 512
df7492f9
KH
513/* If set, reset graphic planes and registers at end-of-line to the
514 initial state. */
515#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 516
df7492f9
KH
517/* If set, reset graphic planes and registers before any control
518 characters to the initial state. */
519#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 520
df7492f9
KH
521/* If set, encode by 7-bit environment. */
522#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 523
df7492f9
KH
524/* If set, use locking-shift function. */
525#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 526
df7492f9
KH
527/* If set, use single-shift function. Overwrite
528 CODING_ISO_FLAG_LOCKING_SHIFT. */
529#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 530
df7492f9
KH
531/* If set, use designation escape sequence. */
532#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 533
df7492f9
KH
534/* If set, produce revision number sequence. */
535#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 536
df7492f9
KH
537/* If set, produce ISO6429's direction specifying sequence. */
538#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 539
df7492f9
KH
540/* If set, assume designation states are reset at beginning of line on
541 output. */
542#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 543
df7492f9
KH
544/* If set, designation sequence should be placed at beginning of line
545 on output. */
546#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 547
df7492f9
KH
548/* If set, do not encode unsafe charactes on output. */
549#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 550
df7492f9
KH
551/* If set, extra latin codes (128..159) are accepted as a valid code
552 on input. */
553#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 554
df7492f9 555#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 556
df7492f9 557#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 558
df7492f9 559#define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
aa72b389 560
df7492f9
KH
561/* A character to be produced on output if encoding of the original
562 character is prohibited by CODING_ISO_FLAG_SAFE. */
563#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 564
aa72b389 565
df7492f9
KH
566/* UTF-16 section */
567#define CODING_UTF_16_BOM(coding) \
568 ((coding)->spec.utf_16.bom)
4ed46869 569
df7492f9
KH
570#define CODING_UTF_16_ENDIAN(coding) \
571 ((coding)->spec.utf_16.endian)
4ed46869 572
df7492f9
KH
573#define CODING_UTF_16_SURROGATE(coding) \
574 ((coding)->spec.utf_16.surrogate)
4ed46869 575
4ed46869 576
df7492f9
KH
577/* CCL section */
578#define CODING_CCL_DECODER(coding) \
579 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
580#define CODING_CCL_ENCODER(coding) \
581 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
582#define CODING_CCL_VALIDS(coding) \
583 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
584 ->data)
4ed46869 585
df7492f9 586/* Index for each coding category in `coding_category_table' */
4ed46869 587
df7492f9
KH
588enum coding_category
589 {
590 coding_category_iso_7,
591 coding_category_iso_7_tight,
592 coding_category_iso_8_1,
593 coding_category_iso_8_2,
594 coding_category_iso_7_else,
595 coding_category_iso_8_else,
596 coding_category_utf_8,
597 coding_category_utf_16_auto,
598 coding_category_utf_16_be,
599 coding_category_utf_16_le,
600 coding_category_utf_16_be_nosig,
601 coding_category_utf_16_le_nosig,
602 coding_category_charset,
603 coding_category_sjis,
604 coding_category_big5,
605 coding_category_ccl,
606 coding_category_emacs_mule,
607 /* All above are targets of code detection. */
608 coding_category_raw_text,
609 coding_category_undecided,
610 coding_category_max
611 };
612
613/* Definitions of flag bits used in detect_coding_XXXX. */
614#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
615#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
616#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
617#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
618#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
619#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
620#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
621#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
622#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
623#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
624#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
625#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
626#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
627#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
628#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
629#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
630
631/* This value is returned if detect_coding_mask () find nothing other
632 than ASCII characters. */
633#define CATEGORY_MASK_ANY \
634 (CATEGORY_MASK_ISO_7 \
635 | CATEGORY_MASK_ISO_7_TIGHT \
636 | CATEGORY_MASK_ISO_8_1 \
637 | CATEGORY_MASK_ISO_8_2 \
638 | CATEGORY_MASK_ISO_7_ELSE \
639 | CATEGORY_MASK_ISO_8_ELSE \
640 | CATEGORY_MASK_UTF_8 \
641 | CATEGORY_MASK_UTF_16_BE \
642 | CATEGORY_MASK_UTF_16_LE \
643 | CATEGORY_MASK_UTF_16_BE_NOSIG \
644 | CATEGORY_MASK_UTF_16_LE_NOSIG \
645 | CATEGORY_MASK_CHARSET \
646 | CATEGORY_MASK_SJIS \
647 | CATEGORY_MASK_BIG5 \
648 | CATEGORY_MASK_CCL \
649 | CATEGORY_MASK_EMACS_MULE)
650
651
652#define CATEGORY_MASK_ISO_7BIT \
653 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
654
655#define CATEGORY_MASK_ISO_8BIT \
656 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
657
658#define CATEGORY_MASK_ISO_ELSE \
659 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
660
661#define CATEGORY_MASK_ISO_ESCAPE \
662 (CATEGORY_MASK_ISO_7 \
663 | CATEGORY_MASK_ISO_7_TIGHT \
664 | CATEGORY_MASK_ISO_7_ELSE \
665 | CATEGORY_MASK_ISO_8_ELSE)
666
667#define CATEGORY_MASK_ISO \
668 ( CATEGORY_MASK_ISO_7BIT \
669 | CATEGORY_MASK_ISO_8BIT \
670 | CATEGORY_MASK_ISO_ELSE)
671
672#define CATEGORY_MASK_UTF_16 \
673 (CATEGORY_MASK_UTF_16_BE \
674 | CATEGORY_MASK_UTF_16_LE \
675 | CATEGORY_MASK_UTF_16_BE_NOSIG \
676 | CATEGORY_MASK_UTF_16_LE_NOSIG)
677
678
679/* List of symbols `coding-category-xxx' ordered by priority. This
680 variable is exposed to Emacs Lisp. */
681static Lisp_Object Vcoding_category_list;
682
683/* Table of coding categories (Lisp symbols). This variable is for
684 internal use oly. */
685static Lisp_Object Vcoding_category_table;
686
687/* Table of coding-categories ordered by priority. */
688static enum coding_category coding_priorities[coding_category_max];
689
690/* Nth element is a coding context for the coding system bound to the
691 Nth coding category. */
692static struct coding_system coding_categories[coding_category_max];
693
694static int detected_mask[coding_category_raw_text] =
695 { CATEGORY_MASK_ISO,
696 CATEGORY_MASK_ISO,
697 CATEGORY_MASK_ISO,
698 CATEGORY_MASK_ISO,
699 CATEGORY_MASK_ISO,
700 CATEGORY_MASK_ISO,
701 CATEGORY_MASK_UTF_8,
702 CATEGORY_MASK_UTF_16,
703 CATEGORY_MASK_UTF_16,
704 CATEGORY_MASK_UTF_16,
705 CATEGORY_MASK_UTF_16,
706 CATEGORY_MASK_UTF_16,
707 CATEGORY_MASK_CHARSET,
708 CATEGORY_MASK_SJIS,
709 CATEGORY_MASK_BIG5,
710 CATEGORY_MASK_CCL,
711 CATEGORY_MASK_EMACS_MULE
712 };
713
714/*** Commonly used macros and functions ***/
715
716#ifndef min
717#define min(a, b) ((a) < (b) ? (a) : (b))
718#endif
719#ifndef max
720#define max(a, b) ((a) > (b) ? (a) : (b))
721#endif
4ed46869 722
df7492f9
KH
723#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
724 do { \
725 attrs = CODING_ID_ATTRS (coding->id); \
726 eol_type = CODING_ID_EOL_TYPE (coding->id); \
727 if (VECTORP (eol_type)) \
728 eol_type = Qunix; \
729 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
730 } while (0)
4ed46869 731
4ed46869 732
df7492f9
KH
733/* Safely get one byte from the source text pointed by SRC which ends
734 at SRC_END, and set C to that byte. If there are not enough bytes
735 in the source, it jumps to `no_more_source'. The caller
736 should declare and set these variables appropriately in advance:
737 src, src_end, multibytep
738*/
aa72b389 739
df7492f9 740#define ONE_MORE_BYTE(c) \
aa72b389 741 do { \
df7492f9
KH
742 if (src == src_end) \
743 { \
744 if (src_base < src) \
745 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
746 goto no_more_source; \
747 } \
748 c = *src++; \
749 if (multibytep && (c & 0x80)) \
750 { \
751 if ((c & 0xFE) != 0xC0) \
752 error ("Undecodable char found"); \
753 c = ((c & 1) << 6) | *src++; \
754 } \
755 consumed_chars++; \
aa72b389
KH
756 } while (0)
757
aa72b389 758
df7492f9
KH
759#define ONE_MORE_BYTE_NO_CHECK(c) \
760 do { \
761 c = *src++; \
762 if (multibytep && (c & 0x80)) \
763 { \
764 if ((c & 0xFE) != 0xC0) \
765 error ("Undecodable char found"); \
766 c = ((c & 1) << 6) | *src++; \
767 } \
781d7a48 768 consumed_chars++; \
aa72b389
KH
769 } while (0)
770
aa72b389 771
df7492f9
KH
772/* Store a byte C in the place pointed by DST and increment DST to the
773 next free point, and increment PRODUCED_CHARS. The caller should
774 assure that C is 0..127, and declare and set the variable `dst'
775 appropriately in advance.
776*/
aa72b389
KH
777
778
df7492f9
KH
779#define EMIT_ONE_ASCII_BYTE(c) \
780 do { \
781 produced_chars++; \
782 *dst++ = (c); \
783 } while (0)
aa72b389 784
aa72b389 785
df7492f9 786/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 787
df7492f9
KH
788#define EMIT_TWO_ASCII_BYTES(c1, c2) \
789 do { \
790 produced_chars += 2; \
791 *dst++ = (c1), *dst++ = (c2); \
792 } while (0)
aa72b389 793
df7492f9
KH
794
795/* Store a byte C in the place pointed by DST and increment DST to the
796 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
797 nonzero, store in an appropriate multibyte from. The caller should
798 declare and set the variables `dst' and `multibytep' appropriately
799 in advance. */
800
801#define EMIT_ONE_BYTE(c) \
802 do { \
803 produced_chars++; \
804 if (multibytep) \
805 { \
806 int ch = (c); \
807 if (ch >= 0x80) \
808 ch = BYTE8_TO_CHAR (ch); \
809 CHAR_STRING_ADVANCE (ch, dst); \
810 } \
811 else \
812 *dst++ = (c); \
aa72b389
KH
813 } while (0)
814
815
df7492f9 816/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 817
e19c3639
KH
818#define EMIT_TWO_BYTES(c1, c2) \
819 do { \
820 produced_chars += 2; \
821 if (multibytep) \
822 { \
823 int ch; \
824 \
825 ch = (c1); \
826 if (ch >= 0x80) \
827 ch = BYTE8_TO_CHAR (ch); \
828 CHAR_STRING_ADVANCE (ch, dst); \
829 ch = (c2); \
830 if (ch >= 0x80) \
831 ch = BYTE8_TO_CHAR (ch); \
832 CHAR_STRING_ADVANCE (ch, dst); \
833 } \
834 else \
835 { \
836 *dst++ = (c1); \
837 *dst++ = (c2); \
838 } \
aa72b389
KH
839 } while (0)
840
841
df7492f9
KH
842#define EMIT_THREE_BYTES(c1, c2, c3) \
843 do { \
844 EMIT_ONE_BYTE (c1); \
845 EMIT_TWO_BYTES (c2, c3); \
846 } while (0)
aa72b389 847
aa72b389 848
df7492f9
KH
849#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
850 do { \
851 EMIT_TWO_BYTES (c1, c2); \
852 EMIT_TWO_BYTES (c3, c4); \
853 } while (0)
aa72b389 854
aa72b389 855
df7492f9
KH
856#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
857 do { \
858 charset_map_loaded = 0; \
859 c = DECODE_CHAR (charset, code); \
860 if (charset_map_loaded) \
861 { \
862 unsigned char *orig = coding->source; \
863 EMACS_INT offset; \
864 \
865 coding_set_source (coding); \
866 offset = coding->source - orig; \
867 src += offset; \
868 src_base += offset; \
869 src_end += offset; \
870 } \
871 } while (0)
aa72b389 872
aa72b389 873
df7492f9
KH
874#define ASSURE_DESTINATION(bytes) \
875 do { \
876 if (dst + (bytes) >= dst_end) \
877 { \
878 int more_bytes = charbuf_end - charbuf + (bytes); \
879 \
880 dst = alloc_destination (coding, more_bytes, dst); \
881 dst_end = coding->destination + coding->dst_bytes; \
882 } \
883 } while (0)
b1887814 884
df7492f9
KH
885
886
887static void
888coding_set_source (coding)
889 struct coding_system *coding;
890{
891 if (BUFFERP (coding->src_object))
892 {
893 if (coding->src_pos < 0)
894 coding->source = GAP_END_ADDR + coding->src_pos_byte;
895 else
896 {
e19c3639 897 struct buffer *buf = XBUFFER (coding->src_object);
e19c3639
KH
898 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
899 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
900
901 coding->source = beg_addr + coding->src_pos_byte - 1;
902 if (coding->src_pos_byte >= gpt_byte)
903 coding->source += BUF_GAP_SIZE (buf);
aa72b389
KH
904 }
905 }
df7492f9 906 else if (STRINGP (coding->src_object))
aa72b389 907 {
df7492f9
KH
908 coding->source = (XSTRING (coding->src_object)->data
909 + coding->src_pos_byte);
910 }
911 else
912 /* Otherwise, the source is C string and is never relocated
913 automatically. Thus we don't have to update anything. */
914 ;
915}
916
917static void
918coding_set_destination (coding)
919 struct coding_system *coding;
920{
921 if (BUFFERP (coding->dst_object))
922 {
923 /* We are sure that coding->dst_pos_byte is before the gap of the
924 buffer. */
925 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
926 + coding->dst_pos_byte - 1);
927 if (coding->src_pos < 0)
df7492f9
KH
928 coding->dst_bytes = (GAP_END_ADDR
929 - (coding->src_bytes - coding->consumed)
930 - coding->destination);
931 else
932 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
933 - coding->destination);
934 }
935 else
936 /* Otherwise, the destination is C string and is never relocated
937 automatically. Thus we don't have to update anything. */
938 ;
939}
940
941
942static void
943coding_alloc_by_realloc (coding, bytes)
944 struct coding_system *coding;
945 EMACS_INT bytes;
946{
947 coding->destination = (unsigned char *) xrealloc (coding->destination,
948 coding->dst_bytes + bytes);
949 coding->dst_bytes += bytes;
950}
951
952static void
953coding_alloc_by_making_gap (coding, bytes)
954 struct coding_system *coding;
955 EMACS_INT bytes;
956{
2c78b7e1
KH
957 if (BUFFERP (coding->dst_object)
958 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
959 {
960 EMACS_INT add = coding->src_bytes - coding->consumed;
961
962 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
963 make_gap (bytes);
964 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
965 }
966 else
967 {
2c78b7e1
KH
968 Lisp_Object this_buffer;
969
970 this_buffer = Fcurrent_buffer ();
df7492f9
KH
971 set_buffer_internal (XBUFFER (coding->dst_object));
972 make_gap (bytes);
973 set_buffer_internal (XBUFFER (this_buffer));
974 }
975}
976
977
978static unsigned char *
979alloc_destination (coding, nbytes, dst)
980 struct coding_system *coding;
981 int nbytes;
982 unsigned char *dst;
983{
984 EMACS_INT offset = dst - coding->destination;
985
986 if (BUFFERP (coding->dst_object))
987 coding_alloc_by_making_gap (coding, nbytes);
988 else
989 coding_alloc_by_realloc (coding, nbytes);
990 coding->result = CODING_RESULT_SUCCESS;
991 coding_set_destination (coding);
992 dst = coding->destination + offset;
993 return dst;
994}
aa72b389 995
df7492f9
KH
996\f
997/*** 2. Emacs' internal format (emacs-utf-8) ***/
998
999
1000
1001\f
1002/*** 3. UTF-8 ***/
1003
1004/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1005 Check if a text is encoded in UTF-8. If it is, return
1006 CATEGORY_MASK_UTF_8, else return 0. */
1007
1008#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1009#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1010#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1011#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1012#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1013#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1014
1015static int
1016detect_coding_utf_8 (coding, mask)
1017 struct coding_system *coding;
1018 int *mask;
1019{
1020 unsigned char *src = coding->source, *src_base = src;
1021 unsigned char *src_end = coding->source + coding->src_bytes;
1022 int multibytep = coding->src_multibyte;
1023 int consumed_chars = 0;
1024 int found = 0;
1025
1026 /* A coding system of this category is always ASCII compatible. */
1027 src += coding->head_ascii;
1028
1029 while (1)
1030 {
1031 int c, c1, c2, c3, c4;
1032
1033 ONE_MORE_BYTE (c);
1034 if (UTF_8_1_OCTET_P (c))
1035 continue;
1036 ONE_MORE_BYTE (c1);
1037 if (! UTF_8_EXTRA_OCTET_P (c1))
1038 break;
1039 if (UTF_8_2_OCTET_LEADING_P (c))
1040 {
1041 found++;
1042 continue;
1043 }
1044 ONE_MORE_BYTE (c2);
1045 if (! UTF_8_EXTRA_OCTET_P (c2))
1046 break;
1047 if (UTF_8_3_OCTET_LEADING_P (c))
1048 {
1049 found++;
1050 continue;
1051 }
1052 ONE_MORE_BYTE (c3);
1053 if (! UTF_8_EXTRA_OCTET_P (c3))
1054 break;
1055 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1056 {
df7492f9
KH
1057 found++;
1058 continue;
1059 }
1060 ONE_MORE_BYTE (c4);
1061 if (! UTF_8_EXTRA_OCTET_P (c4))
1062 break;
1063 if (UTF_8_5_OCTET_LEADING_P (c))
1064 {
1065 found++;
1066 continue;
1067 }
1068 break;
1069 }
1070 *mask &= ~CATEGORY_MASK_UTF_8;
1071 return 0;
1072
1073 no_more_source:
1074 if (! found)
1075 return 0;
1076 *mask &= CATEGORY_MASK_UTF_8;
1077 return 1;
1078}
1079
1080
b0edb2c5 1081/* Fixme: deal with surrogates? */
df7492f9
KH
1082static void
1083decode_coding_utf_8 (coding)
1084 struct coding_system *coding;
1085{
1086 unsigned char *src = coding->source + coding->consumed;
1087 unsigned char *src_end = coding->source + coding->src_bytes;
1088 unsigned char *src_base;
1089 int *charbuf = coding->charbuf;
1090 int *charbuf_end = charbuf + coding->charbuf_size;
1091 int consumed_chars = 0, consumed_chars_base;
1092 int multibytep = coding->src_multibyte;
1093 Lisp_Object attr, eol_type, charset_list;
1094
1095 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1096
1097 while (1)
1098 {
1099 int c, c1, c2, c3, c4, c5;
1100
1101 src_base = src;
1102 consumed_chars_base = consumed_chars;
1103
1104 if (charbuf >= charbuf_end)
1105 break;
1106
1107 ONE_MORE_BYTE (c1);
1108 if (UTF_8_1_OCTET_P(c1))
1109 {
1110 c = c1;
1111 if (c == '\r')
aa72b389 1112 {
df7492f9
KH
1113 if (EQ (eol_type, Qdos))
1114 {
1115 if (src == src_end)
1116 goto no_more_source;
1117 if (*src == '\n')
1118 ONE_MORE_BYTE (c);
1119 }
1120 else if (EQ (eol_type, Qmac))
1121 c = '\n';
aa72b389 1122 }
aa72b389 1123 }
df7492f9 1124 else
aa72b389 1125 {
df7492f9
KH
1126 ONE_MORE_BYTE (c2);
1127 if (! UTF_8_EXTRA_OCTET_P (c2))
1128 goto invalid_code;
1129 if (UTF_8_2_OCTET_LEADING_P (c1))
b0edb2c5
DL
1130 {
1131 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1132 /* Reject overlong sequences here and below. Encoders
1133 producing them are incorrect, they can be misleading,
1134 and they mess up read/write invariance. */
1135 if (c < 128)
1136 goto invalid_code;
1137 }
df7492f9 1138 else
aa72b389 1139 {
df7492f9
KH
1140 ONE_MORE_BYTE (c3);
1141 if (! UTF_8_EXTRA_OCTET_P (c3))
1142 goto invalid_code;
1143 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1144 {
1145 c = (((c1 & 0xF) << 12)
1146 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1147 if (c < 0x800)
1148 goto invalid_code;
1149 }
df7492f9
KH
1150 else
1151 {
1152 ONE_MORE_BYTE (c4);
1153 if (! UTF_8_EXTRA_OCTET_P (c4))
1154 goto invalid_code;
1155 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1156 {
df7492f9
KH
1157 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1158 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1159 if (c < 0x10000)
1160 goto invalid_code;
1161 }
df7492f9
KH
1162 else
1163 {
1164 ONE_MORE_BYTE (c5);
1165 if (! UTF_8_EXTRA_OCTET_P (c5))
1166 goto invalid_code;
1167 if (UTF_8_5_OCTET_LEADING_P (c1))
1168 {
1169 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1170 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1171 | (c5 & 0x3F));
b0edb2c5 1172 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1173 goto invalid_code;
1174 }
1175 else
1176 goto invalid_code;
1177 }
1178 }
aa72b389 1179 }
aa72b389 1180 }
df7492f9
KH
1181
1182 *charbuf++ = c;
1183 continue;
1184
1185 invalid_code:
1186 src = src_base;
1187 consumed_chars = consumed_chars_base;
1188 ONE_MORE_BYTE (c);
1189 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1190 coding->errors++;
aa72b389
KH
1191 }
1192
df7492f9
KH
1193 no_more_source:
1194 coding->consumed_char += consumed_chars_base;
1195 coding->consumed = src_base - coding->source;
1196 coding->charbuf_used = charbuf - coding->charbuf;
1197}
1198
1199
1200static int
1201encode_coding_utf_8 (coding)
1202 struct coding_system *coding;
1203{
1204 int multibytep = coding->dst_multibyte;
1205 int *charbuf = coding->charbuf;
1206 int *charbuf_end = charbuf + coding->charbuf_used;
1207 unsigned char *dst = coding->destination + coding->produced;
1208 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1209 int produced_chars = 0;
df7492f9
KH
1210 int c;
1211
1212 if (multibytep)
aa72b389 1213 {
df7492f9
KH
1214 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1215
1216 while (charbuf < charbuf_end)
aa72b389 1217 {
df7492f9
KH
1218 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1219
1220 ASSURE_DESTINATION (safe_room);
1221 c = *charbuf++;
1222 CHAR_STRING_ADVANCE (c, pend);
1223 for (p = str; p < pend; p++)
1224 EMIT_ONE_BYTE (*p);
aa72b389 1225 }
aa72b389 1226 }
df7492f9
KH
1227 else
1228 {
1229 int safe_room = MAX_MULTIBYTE_LENGTH;
1230
1231 while (charbuf < charbuf_end)
1232 {
1233 ASSURE_DESTINATION (safe_room);
1234 c = *charbuf++;
1235 dst += CHAR_STRING (c, dst);
1236 produced_chars++;
1237 }
1238 }
1239 coding->result = CODING_RESULT_SUCCESS;
1240 coding->produced_char += produced_chars;
1241 coding->produced = dst - coding->destination;
1242 return 0;
aa72b389
KH
1243}
1244
4ed46869 1245
df7492f9
KH
1246/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1247 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1248 Little Endian (otherwise). If it is, return
1249 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1250 else return 0. */
1251
1252#define UTF_16_HIGH_SURROGATE_P(val) \
1253 (((val) & 0xFC00) == 0xD800)
1254
1255#define UTF_16_LOW_SURROGATE_P(val) \
1256 (((val) & 0xFC00) == 0xDC00)
1257
1258#define UTF_16_INVALID_P(val) \
1259 (((val) == 0xFFFE) \
1260 || ((val) == 0xFFFF) \
1261 || UTF_16_LOW_SURROGATE_P (val))
1262
1263
1264static int
1265detect_coding_utf_16 (coding, mask)
b73bfc1c 1266 struct coding_system *coding;
df7492f9 1267 int *mask;
b73bfc1c 1268{
df7492f9
KH
1269 unsigned char *src = coding->source, *src_base = src;
1270 unsigned char *src_end = coding->source + coding->src_bytes;
1271 int multibytep = coding->src_multibyte;
1272 int consumed_chars = 0;
1273 int c1, c2;
1274
1275 ONE_MORE_BYTE (c1);
1276 ONE_MORE_BYTE (c2);
4ed46869 1277
df7492f9 1278 if ((c1 == 0xFF) && (c2 == 0xFE))
b73bfc1c 1279 {
df7492f9
KH
1280 *mask &= CATEGORY_MASK_UTF_16_LE;
1281 return 1;
1282 }
1283 else if ((c1 == 0xFE) && (c2 == 0xFF))
1284 {
1285 *mask &= CATEGORY_MASK_UTF_16_BE;
1286 return 1;
1287 }
1288 no_more_source:
1289 return 0;
1290}
ec6d2bb8 1291
df7492f9
KH
1292static void
1293decode_coding_utf_16 (coding)
1294 struct coding_system *coding;
1295{
1296 unsigned char *src = coding->source + coding->consumed;
1297 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1298 unsigned char *src_base;
df7492f9
KH
1299 int *charbuf = coding->charbuf;
1300 int *charbuf_end = charbuf + coding->charbuf_size;
1301 int consumed_chars = 0, consumed_chars_base;
1302 int multibytep = coding->src_multibyte;
1303 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1304 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1305 int surrogate = CODING_UTF_16_SURROGATE (coding);
1306 Lisp_Object attr, eol_type, charset_list;
1307
1308 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1309
1310 if (bom != utf_16_without_bom)
1311 {
1312 int c, c1, c2;
4af310db 1313
df7492f9
KH
1314 src_base = src;
1315 ONE_MORE_BYTE (c1);
1316 ONE_MORE_BYTE (c2);
e19c3639 1317 c = (c1 << 8) | c2;
df7492f9
KH
1318 if (bom == utf_16_with_bom)
1319 {
1320 if (endian == utf_16_big_endian
1321 ? c != 0xFFFE : c != 0xFEFF)
4af310db 1322 {
df7492f9
KH
1323 /* We are sure that there's enouph room at CHARBUF. */
1324 *charbuf++ = c1;
1325 *charbuf++ = c2;
1326 coding->errors++;
4af310db 1327 }
4af310db 1328 }
df7492f9 1329 else
4af310db 1330 {
df7492f9
KH
1331 if (c == 0xFFFE)
1332 CODING_UTF_16_ENDIAN (coding)
1333 = endian = utf_16_big_endian;
1334 else if (c == 0xFEFF)
1335 CODING_UTF_16_ENDIAN (coding)
1336 = endian = utf_16_little_endian;
1337 else
4af310db 1338 {
df7492f9
KH
1339 CODING_UTF_16_ENDIAN (coding)
1340 = endian = utf_16_big_endian;
1341 src = src_base;
4af310db 1342 }
4af310db 1343 }
df7492f9
KH
1344 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1345 }
1346
1347 while (1)
1348 {
1349 int c, c1, c2;
1350
1351 src_base = src;
1352 consumed_chars_base = consumed_chars;
1353
1354 if (charbuf + 2 >= charbuf_end)
1355 break;
1356
1357 ONE_MORE_BYTE (c1);
1358 ONE_MORE_BYTE (c2);
1359 c = (endian == utf_16_big_endian
e19c3639 1360 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1361 if (surrogate)
aa72b389 1362 {
df7492f9 1363 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1364 {
df7492f9
KH
1365 if (endian == utf_16_big_endian)
1366 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1367 else
1368 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1369 *charbuf++ = c1;
1370 *charbuf++ = c2;
1371 coding->errors++;
1372 if (UTF_16_HIGH_SURROGATE_P (c))
1373 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1374 else
1375 *charbuf++ = c;
aa72b389 1376 }
df7492f9
KH
1377 else
1378 {
1379 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1380 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1381 *charbuf++ = c;
1382 }
1383 }
1384 else
1385 {
1386 if (UTF_16_HIGH_SURROGATE_P (c))
1387 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1388 else
1389 *charbuf++ = c;
1390 }
1391 }
1392
1393 no_more_source:
1394 coding->consumed_char += consumed_chars_base;
1395 coding->consumed = src_base - coding->source;
1396 coding->charbuf_used = charbuf - coding->charbuf;
1397}
1398
1399static int
1400encode_coding_utf_16 (coding)
1401 struct coding_system *coding;
1402{
1403 int multibytep = coding->dst_multibyte;
1404 int *charbuf = coding->charbuf;
1405 int *charbuf_end = charbuf + coding->charbuf_used;
1406 unsigned char *dst = coding->destination + coding->produced;
1407 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1408 int safe_room = 8;
1409 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1410 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1411 int produced_chars = 0;
1412 Lisp_Object attrs, eol_type, charset_list;
1413 int c;
1414
1415 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1416
1417 if (bom == utf_16_with_bom)
1418 {
1419 ASSURE_DESTINATION (safe_room);
1420 if (big_endian)
1421 EMIT_TWO_BYTES (0xFF, 0xFE);
1422 else
1423 EMIT_TWO_BYTES (0xFE, 0xFF);
1424 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1425 }
1426
1427 while (charbuf < charbuf_end)
1428 {
1429 ASSURE_DESTINATION (safe_room);
1430 c = *charbuf++;
e19c3639
KH
1431 if (c >= MAX_UNICODE_CHAR)
1432 c = coding->default_char;
df7492f9
KH
1433
1434 if (c < 0x10000)
1435 {
1436 if (big_endian)
1437 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1438 else
1439 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1440 }
1441 else
1442 {
1443 int c1, c2;
1444
1445 c -= 0x10000;
1446 c1 = (c >> 10) + 0xD800;
1447 c2 = (c & 0x3FF) + 0xDC00;
1448 if (big_endian)
1449 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1450 else
1451 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1452 }
1453 }
1454 coding->result = CODING_RESULT_SUCCESS;
1455 coding->produced = dst - coding->destination;
1456 coding->produced_char += produced_chars;
1457 return 0;
1458}
1459
1460\f
1461/*** 6. Old Emacs' internal format (emacs-mule) ***/
1462
1463/* Emacs' internal format for representation of multiple character
1464 sets is a kind of multi-byte encoding, i.e. characters are
1465 represented by variable-length sequences of one-byte codes.
1466
1467 ASCII characters and control characters (e.g. `tab', `newline') are
1468 represented by one-byte sequences which are their ASCII codes, in
1469 the range 0x00 through 0x7F.
1470
1471 8-bit characters of the range 0x80..0x9F are represented by
1472 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1473 code + 0x20).
1474
1475 8-bit characters of the range 0xA0..0xFF are represented by
1476 one-byte sequences which are their 8-bit code.
1477
1478 The other characters are represented by a sequence of `base
1479 leading-code', optional `extended leading-code', and one or two
1480 `position-code's. The length of the sequence is determined by the
1481 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1482 whereas extended leading-code and position-code take the range 0xA0
1483 through 0xFF. See `charset.h' for more details about leading-code
1484 and position-code.
1485
1486 --- CODE RANGE of Emacs' internal format ---
1487 character set range
1488 ------------- -----
1489 ascii 0x00..0x7F
1490 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1491 eight-bit-graphic 0xA0..0xBF
1492 ELSE 0x81..0x9D + [0xA0..0xFF]+
1493 ---------------------------------------------
1494
1495 As this is the internal character representation, the format is
1496 usually not used externally (i.e. in a file or in a data sent to a
1497 process). But, it is possible to have a text externally in this
1498 format (i.e. by encoding by the coding system `emacs-mule').
1499
1500 In that case, a sequence of one-byte codes has a slightly different
1501 form.
1502
1503 At first, all characters in eight-bit-control are represented by
1504 one-byte sequences which are their 8-bit code.
1505
1506 Next, character composition data are represented by the byte
1507 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1508 where,
1509 METHOD is 0xF0 plus one of composition method (enum
1510 composition_method),
1511
1512 BYTES is 0xA0 plus a byte length of this composition data,
1513
1514 CHARS is 0x20 plus a number of characters composed by this
1515 data,
1516
1517 COMPONENTs are characters of multibye form or composition
1518 rules encoded by two-byte of ASCII codes.
1519
1520 In addition, for backward compatibility, the following formats are
1521 also recognized as composition data on decoding.
1522
1523 0x80 MSEQ ...
1524 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1525
1526 Here,
1527 MSEQ is a multibyte form but in these special format:
1528 ASCII: 0xA0 ASCII_CODE+0x80,
1529 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1530 RULE is a one byte code of the range 0xA0..0xF0 that
1531 represents a composition rule.
1532 */
1533
1534char emacs_mule_bytes[256];
1535
1536/* Leading-code followed by extended leading-code. */
1537#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1538#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1539#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1540#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1541
1542
1543int
781d7a48 1544emacs_mule_char (coding, src, nbytes, nchars)
df7492f9 1545 struct coding_system *coding;
781d7a48 1546 unsigned char *src;
df7492f9
KH
1547 int *nbytes, *nchars;
1548{
df7492f9
KH
1549 unsigned char *src_end = coding->source + coding->src_bytes;
1550 int multibytep = coding->src_multibyte;
1551 unsigned char *src_base = src;
1552 struct charset *charset;
1553 unsigned code;
1554 int c;
1555 int consumed_chars = 0;
1556
1557 ONE_MORE_BYTE (c);
df7492f9
KH
1558 switch (emacs_mule_bytes[c])
1559 {
1560 case 2:
1561 if (! (charset = emacs_mule_charset[c]))
1562 goto invalid_code;
1563 ONE_MORE_BYTE (c);
1564 code = c & 0x7F;
1565 break;
1566
1567 case 3:
1568 if (c == LEADING_CODE_PRIVATE_11
1569 || c == LEADING_CODE_PRIVATE_12)
b73bfc1c 1570 {
df7492f9
KH
1571 ONE_MORE_BYTE (c);
1572 if (! (charset = emacs_mule_charset[c]))
1573 goto invalid_code;
1574 ONE_MORE_BYTE (c);
1575 code = c & 0x7F;
b73bfc1c
KH
1576 }
1577 else
1578 {
df7492f9
KH
1579 if (! (charset = emacs_mule_charset[c]))
1580 goto invalid_code;
1581 ONE_MORE_BYTE (c);
781d7a48 1582 code = (c & 0x7F) << 8;
df7492f9
KH
1583 ONE_MORE_BYTE (c);
1584 code |= c & 0x7F;
1585 }
1586 break;
1587
1588 case 4:
781d7a48 1589 ONE_MORE_BYTE (c);
df7492f9
KH
1590 if (! (charset = emacs_mule_charset[c]))
1591 goto invalid_code;
1592 ONE_MORE_BYTE (c);
781d7a48 1593 code = (c & 0x7F) << 8;
df7492f9
KH
1594 ONE_MORE_BYTE (c);
1595 code |= c & 0x7F;
1596 break;
1597
1598 case 1:
1599 code = c;
1600 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) ? charset_ascii
1601 : code < 0xA0 ? charset_8_bit_control
1602 : charset_8_bit_graphic);
1603 break;
1604
1605 default:
1606 abort ();
1607 }
1608 c = DECODE_CHAR (charset, code);
1609 if (c < 0)
1610 goto invalid_code;
1611 *nbytes = src - src_base;
1612 *nchars = consumed_chars;
1613 return c;
1614
1615 no_more_source:
1616 return -2;
1617
1618 invalid_code:
1619 return -1;
1620}
1621
1622
1623/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1624 Check if a text is encoded in `emacs-mule'. */
1625
1626static int
1627detect_coding_emacs_mule (coding, mask)
1628 struct coding_system *coding;
1629 int *mask;
1630{
1631 unsigned char *src = coding->source, *src_base = src;
1632 unsigned char *src_end = coding->source + coding->src_bytes;
1633 int multibytep = coding->src_multibyte;
1634 int consumed_chars = 0;
1635 int c;
1636 int found = 0;
1637
1638 /* A coding system of this category is always ASCII compatible. */
1639 src += coding->head_ascii;
1640
1641 while (1)
1642 {
1643 ONE_MORE_BYTE (c);
1644
1645 if (c == 0x80)
1646 {
1647 /* Perhaps the start of composite character. We simple skip
1648 it because analyzing it is too heavy for detecting. But,
1649 at least, we check that the composite character
1650 constitues of more than 4 bytes. */
1651 unsigned char *src_base;
1652
1653 repeat:
1654 src_base = src;
1655 do
1656 {
1657 ONE_MORE_BYTE (c);
1658 }
1659 while (c >= 0xA0);
1660
1661 if (src - src_base <= 4)
1662 break;
1663 found = 1;
1664 if (c == 0x80)
1665 goto repeat;
b73bfc1c 1666 }
df7492f9
KH
1667
1668 if (c < 0x80)
b73bfc1c 1669 {
df7492f9
KH
1670 if (c < 0x20
1671 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1672 break;
1673 }
1674 else
1675 {
1676 unsigned char *src_base = src - 1;
1677
1678 do
1679 {
1680 ONE_MORE_BYTE (c);
1681 }
1682 while (c >= 0xA0);
1683 if (src - src_base != emacs_mule_bytes[*src_base])
1684 break;
1685 found = 1;
4ed46869
KH
1686 }
1687 }
df7492f9
KH
1688 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1689 return 0;
1690
1691 no_more_source:
1692 if (!found)
1693 return 0;
1694 *mask &= CATEGORY_MASK_EMACS_MULE;
1695 return 1;
4ed46869
KH
1696}
1697
b73bfc1c 1698
df7492f9
KH
1699/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1700
1701/* Decode a character represented as a component of composition
1702 sequence of Emacs 20/21 style at SRC. Set C to that character and
1703 update SRC to the head of next character (or an encoded composition
1704 rule). If SRC doesn't points a composition component, set C to -1.
1705 If SRC points an invalid byte sequence, global exit by a return
1706 value 0. */
1707
1708#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1709 if (1) \
1710 { \
1711 int c; \
1712 int nbytes, nchars; \
1713 \
1714 if (src == src_end) \
1715 break; \
781d7a48 1716 c = emacs_mule_char (coding, src, &nbytes, &nchars); \
df7492f9
KH
1717 if (c < 0) \
1718 { \
1719 if (c == -2) \
1720 break; \
1721 goto invalid_code; \
1722 } \
1723 *buf++ = c; \
1724 src += nbytes; \
1725 consumed_chars += nchars; \
1726 } \
1727 else
1728
1729
1730/* Decode a composition rule represented as a component of composition
781d7a48
KH
1731 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1732 and increment BUF. If SRC points an invalid byte sequence, set C
1733 to -1. */
df7492f9 1734
781d7a48 1735#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1736 do { \
1737 int c, gref, nref; \
1738 \
781d7a48 1739 if (src >= src_end) \
df7492f9
KH
1740 goto invalid_code; \
1741 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1742 c -= 0x20; \
df7492f9
KH
1743 if (c < 0 || c >= 81) \
1744 goto invalid_code; \
1745 \
1746 gref = c / 9, nref = c % 9; \
1747 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1748 } while (0)
1749
1750
781d7a48
KH
1751/* Decode a composition rule represented as a component of composition
1752 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1753 and increment BUF. If SRC points an invalid byte sequence, set C
1754 to -1. */
1755
1756#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1757 do { \
1758 int gref, nref; \
1759 \
1760 if (src + 1>= src_end) \
1761 goto invalid_code; \
1762 ONE_MORE_BYTE_NO_CHECK (gref); \
1763 gref -= 0x20; \
1764 ONE_MORE_BYTE_NO_CHECK (nref); \
1765 nref -= 0x20; \
1766 if (gref < 0 || gref >= 81 \
1767 || nref < 0 || nref >= 81) \
1768 goto invalid_code; \
1769 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1770 } while (0)
1771
1772
df7492f9
KH
1773#define ADD_COMPOSITION_DATA(buf, method, nchars) \
1774 do { \
1775 *buf++ = -5; \
1776 *buf++ = coding->produced_char + char_offset; \
1777 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1778 *buf++ = method; \
1779 *buf++ = nchars; \
1780 } while (0)
aa72b389 1781
df7492f9
KH
1782
1783#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1784 do { \
df7492f9 1785 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1786 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1787 the byte length of this composition information, CHARS is the \
1788 number of characters composed by this composition. */ \
781d7a48
KH
1789 enum composition_method method = c - 0xF2; \
1790 int *charbuf_base = charbuf; \
df7492f9
KH
1791 int consumed_chars_limit; \
1792 int nbytes, nchars; \
1793 \
1794 ONE_MORE_BYTE (c); \
1795 nbytes = c - 0xA0; \
1796 if (nbytes < 3) \
1797 goto invalid_code; \
1798 ONE_MORE_BYTE (c); \
1799 nchars = c - 0xA0; \
1800 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1801 consumed_chars_limit = consumed_chars_base + nbytes; \
1802 if (method != COMPOSITION_RELATIVE) \
aa72b389 1803 { \
df7492f9
KH
1804 int i = 0; \
1805 while (consumed_chars < consumed_chars_limit) \
aa72b389 1806 { \
df7492f9 1807 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1808 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1809 else \
1810 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1811 i++; \
aa72b389 1812 } \
df7492f9
KH
1813 if (consumed_chars < consumed_chars_limit) \
1814 goto invalid_code; \
781d7a48 1815 charbuf_base[0] -= i; \
aa72b389
KH
1816 } \
1817 } while (0)
93dec019 1818
aa72b389 1819
df7492f9
KH
1820#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1821 do { \
1822 /* Emacs 20 style format for relative composition. */ \
1823 /* Store multibyte form of characters to be composed. */ \
1824 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1825 int *buf = components; \
1826 int i, j; \
1827 \
1828 src = src_base; \
1829 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1830 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1831 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1832 if (i < 2) \
1833 goto invalid_code; \
1834 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1835 for (j = 0; j < i; j++) \
1836 *charbuf++ = components[j]; \
1837 } while (0)
1838
1839
1840#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1841 do { \
1842 /* Emacs 20 style format for rule-base composition. */ \
1843 /* Store multibyte form of characters to be composed. */ \
1844 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1845 int *buf = components; \
1846 int i, j; \
1847 \
1848 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1849 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1850 { \
781d7a48 1851 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1852 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1853 } \
1854 if (i < 1 || (buf - components) % 2 == 0) \
1855 goto invalid_code; \
1856 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1857 goto no_more_source; \
1858 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1859 for (j = 0; j < i; j++) \
1860 *charbuf++ = components[j]; \
1861 for (j = 0; j < i; j += 2) \
1862 *charbuf++ = components[j]; \
1863 } while (0)
1864
aa72b389
KH
1865
1866static void
df7492f9 1867decode_coding_emacs_mule (coding)
aa72b389 1868 struct coding_system *coding;
aa72b389 1869{
df7492f9
KH
1870 unsigned char *src = coding->source + coding->consumed;
1871 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1872 unsigned char *src_base;
df7492f9
KH
1873 int *charbuf = coding->charbuf;
1874 int *charbuf_end = charbuf + coding->charbuf_size;
1875 int consumed_chars = 0, consumed_chars_base;
1876 int char_offset = 0;
1877 int multibytep = coding->src_multibyte;
1878 Lisp_Object attrs, eol_type, charset_list;
aa72b389 1879
df7492f9 1880 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1881
aa72b389
KH
1882 while (1)
1883 {
df7492f9
KH
1884 int c;
1885
aa72b389 1886 src_base = src;
df7492f9
KH
1887 consumed_chars_base = consumed_chars;
1888
1889 if (charbuf >= charbuf_end)
1890 break;
aa72b389 1891
df7492f9
KH
1892 ONE_MORE_BYTE (c);
1893
1894 if (c < 0x80)
aa72b389 1895 {
df7492f9
KH
1896 if (c == '\r')
1897 {
1898 if (EQ (eol_type, Qdos))
1899 {
1900 if (src == src_end)
1901 goto no_more_source;
1902 if (*src == '\n')
1903 ONE_MORE_BYTE (c);
1904 }
1905 else if (EQ (eol_type, Qmac))
1906 c = '\n';
1907 }
1908 *charbuf++ = c;
1909 char_offset++;
aa72b389 1910 }
df7492f9
KH
1911 else if (c == 0x80)
1912 {
1913 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1914 break;
1915 ONE_MORE_BYTE (c);
781d7a48
KH
1916 if (c - 0xF2 >= COMPOSITION_RELATIVE
1917 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
1918 DECODE_EMACS_MULE_21_COMPOSITION (c);
1919 else if (c < 0xC0)
1920 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1921 else if (c == 0xFF)
1922 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1923 else
1924 goto invalid_code;
781d7a48 1925 coding->annotated = 1;
df7492f9
KH
1926 }
1927 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1928 {
1929 int nbytes, nchars;
781d7a48
KH
1930 src = src_base;
1931 consumed_chars = consumed_chars_base;
1932 c = emacs_mule_char (coding, src, &nbytes, &nchars);
df7492f9
KH
1933 if (c < 0)
1934 {
1935 if (c == -2)
1936 break;
1937 goto invalid_code;
1938 }
1939 *charbuf++ = c;
781d7a48
KH
1940 src += nbytes;
1941 consumed_chars += nchars;
df7492f9
KH
1942 char_offset++;
1943 }
1944 continue;
1945
1946 invalid_code:
1947 src = src_base;
1948 consumed_chars = consumed_chars_base;
1949 ONE_MORE_BYTE (c);
1950 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1951 coding->errors++;
1952 }
1953
1954 no_more_source:
1955 coding->consumed_char += consumed_chars_base;
1956 coding->consumed = src_base - coding->source;
1957 coding->charbuf_used = charbuf - coding->charbuf;
1958}
1959
1960
1961#define EMACS_MULE_LEADING_CODES(id, codes) \
1962 do { \
1963 if (id < 0xA0) \
1964 codes[0] = id, codes[1] = 0; \
1965 else if (id < 0xE0) \
1966 codes[0] = 0x9A, codes[1] = id; \
1967 else if (id < 0xF0) \
1968 codes[0] = 0x9B, codes[1] = id; \
1969 else if (id < 0xF5) \
1970 codes[0] = 0x9C, codes[1] = id; \
1971 else \
1972 codes[0] = 0x9D, codes[1] = id; \
1973 } while (0);
1974
aa72b389 1975
df7492f9
KH
1976static int
1977encode_coding_emacs_mule (coding)
1978 struct coding_system *coding;
1979{
1980 int multibytep = coding->dst_multibyte;
1981 int *charbuf = coding->charbuf;
1982 int *charbuf_end = charbuf + coding->charbuf_used;
1983 unsigned char *dst = coding->destination + coding->produced;
1984 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1985 int safe_room = 8;
df7492f9
KH
1986 int produced_chars = 0;
1987 Lisp_Object attrs, eol_type, charset_list;
1988 int c;
1989
1990 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1991
1992 while (charbuf < charbuf_end)
1993 {
1994 ASSURE_DESTINATION (safe_room);
1995 c = *charbuf++;
1996 if (ASCII_CHAR_P (c))
1997 EMIT_ONE_ASCII_BYTE (c);
1998 else
aa72b389 1999 {
df7492f9
KH
2000 struct charset *charset;
2001 unsigned code;
2002 int dimension;
2003 int emacs_mule_id;
2004 unsigned char leading_codes[2];
2005
2006 charset = char_charset (c, charset_list, &code);
2007 if (! charset)
2008 {
2009 c = coding->default_char;
2010 if (ASCII_CHAR_P (c))
2011 {
2012 EMIT_ONE_ASCII_BYTE (c);
2013 continue;
2014 }
2015 charset = char_charset (c, charset_list, &code);
2016 }
2017 dimension = CHARSET_DIMENSION (charset);
2018 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2019 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2020 EMIT_ONE_BYTE (leading_codes[0]);
2021 if (leading_codes[1])
2022 EMIT_ONE_BYTE (leading_codes[1]);
2023 if (dimension == 1)
2024 EMIT_ONE_BYTE (code);
aa72b389 2025 else
df7492f9
KH
2026 {
2027 EMIT_ONE_BYTE (code >> 8);
2028 EMIT_ONE_BYTE (code & 0xFF);
2029 }
aa72b389 2030 }
aa72b389 2031 }
df7492f9
KH
2032 coding->result = CODING_RESULT_SUCCESS;
2033 coding->produced_char += produced_chars;
2034 coding->produced = dst - coding->destination;
2035 return 0;
aa72b389 2036}
b73bfc1c 2037
4ed46869 2038\f
df7492f9 2039/*** 7. ISO2022 handlers ***/
4ed46869
KH
2040
2041/* The following note describes the coding system ISO2022 briefly.
39787efd 2042 Since the intention of this note is to help understand the
df7492f9 2043 functions in this file, some parts are NOT ACCURATE or OVERLY
39787efd 2044 SIMPLIFIED. For thorough understanding, please refer to the
df7492f9 2045 original document of ISO2022.
4ed46869
KH
2046
2047 ISO2022 provides many mechanisms to encode several character sets
df7492f9 2048 in 7-bit and 8-bit environments. For 7-bite environments, all text
39787efd
KH
2049 is encoded using bytes less than 128. This may make the encoded
2050 text a little bit longer, but the text passes more easily through
df7492f9 2051 several gateways, some of which strip off MSB (Most Signigant Bit).
b73bfc1c 2052
df7492f9
KH
2053 There are two kinds of character sets: control character set and
2054 graphic character set. The former contains control characters such
4ed46869 2055 as `newline' and `escape' to provide control functions (control
39787efd 2056 functions are also provided by escape sequences). The latter
df7492f9 2057 contains graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2058 two control character sets and many graphic character sets.
2059
2060 Graphic character sets are classified into one of the following
39787efd
KH
2061 four classes, according to the number of bytes (DIMENSION) and
2062 number of characters in one dimension (CHARS) of the set:
2063 - DIMENSION1_CHARS94
2064 - DIMENSION1_CHARS96
2065 - DIMENSION2_CHARS94
2066 - DIMENSION2_CHARS96
2067
2068 In addition, each character set is assigned an identification tag,
df7492f9 2069 unique for each set, called "final character" (denoted as <F>
39787efd
KH
2070 hereafter). The <F> of each character set is decided by ECMA(*)
2071 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2072 (0x30..0x3F are for private use only).
4ed46869
KH
2073
2074 Note (*): ECMA = European Computer Manufacturers Association
2075
df7492f9 2076 Here are examples of graphic character set [NAME(<F>)]:
4ed46869
KH
2077 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2078 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2079 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2080 o DIMENSION2_CHARS96 -- none for the moment
2081
39787efd 2082 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2083 C0 [0x00..0x1F] -- control character plane 0
2084 GL [0x20..0x7F] -- graphic character plane 0
2085 C1 [0x80..0x9F] -- control character plane 1
2086 GR [0xA0..0xFF] -- graphic character plane 1
2087
2088 A control character set is directly designated and invoked to C0 or
39787efd
KH
2089 C1 by an escape sequence. The most common case is that:
2090 - ISO646's control character set is designated/invoked to C0, and
2091 - ISO6429's control character set is designated/invoked to C1,
2092 and usually these designations/invocations are omitted in encoded
2093 text. In a 7-bit environment, only C0 can be used, and a control
2094 character for C1 is encoded by an appropriate escape sequence to
2095 fit into the environment. All control characters for C1 are
2096 defined to have corresponding escape sequences.
4ed46869
KH
2097
2098 A graphic character set is at first designated to one of four
2099 graphic registers (G0 through G3), then these graphic registers are
2100 invoked to GL or GR. These designations and invocations can be
2101 done independently. The most common case is that G0 is invoked to
39787efd
KH
2102 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2103 these invocations and designations are omitted in encoded text.
2104 In a 7-bit environment, only GL can be used.
4ed46869 2105
39787efd
KH
2106 When a graphic character set of CHARS94 is invoked to GL, codes
2107 0x20 and 0x7F of the GL area work as control characters SPACE and
2108 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2109 be used.
4ed46869
KH
2110
2111 There are two ways of invocation: locking-shift and single-shift.
2112 With locking-shift, the invocation lasts until the next different
39787efd
KH
2113 invocation, whereas with single-shift, the invocation affects the
2114 following character only and doesn't affect the locking-shift
2115 state. Invocations are done by the following control characters or
2116 escape sequences:
4ed46869
KH
2117
2118 ----------------------------------------------------------------------
39787efd 2119 abbrev function cntrl escape seq description
4ed46869 2120 ----------------------------------------------------------------------
39787efd
KH
2121 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2122 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2123 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2124 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2125 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2126 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2127 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2128 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2129 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2130 ----------------------------------------------------------------------
39787efd
KH
2131 (*) These are not used by any known coding system.
2132
2133 Control characters for these functions are defined by macros
2134 ISO_CODE_XXX in `coding.h'.
4ed46869 2135
39787efd 2136 Designations are done by the following escape sequences:
4ed46869
KH
2137 ----------------------------------------------------------------------
2138 escape sequence description
2139 ----------------------------------------------------------------------
2140 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2141 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2142 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2143 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2144 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2145 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2146 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2147 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2148 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2149 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2150 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2151 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2152 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2153 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2154 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2155 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2156 ----------------------------------------------------------------------
2157
2158 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2159 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2160
2161 Note (*): Although these designations are not allowed in ISO2022,
2162 Emacs accepts them on decoding, and produces them on encoding
39787efd 2163 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2164 7-bit environment, non-locking-shift, and non-single-shift.
2165
2166 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2167 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2168
df7492f9 2169 Now you may notice that there are a lot of ways for encoding the
39787efd
KH
2170 same multilingual text in ISO2022. Actually, there exist many
2171 coding systems such as Compound Text (used in X11's inter client
df7492f9
KH
2172 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2173 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2174 localized platforms), and all of these are variants of ISO2022.
2175
2176 In addition to the above, Emacs handles two more kinds of escape
2177 sequences: ISO6429's direction specification and Emacs' private
2178 sequence for specifying character composition.
2179
39787efd 2180 ISO6429's direction specification takes the following form:
4ed46869
KH
2181 o CSI ']' -- end of the current direction
2182 o CSI '0' ']' -- end of the current direction
2183 o CSI '1' ']' -- start of left-to-right text
2184 o CSI '2' ']' -- start of right-to-left text
2185 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2186 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2187
2188 Character composition specification takes the following form:
ec6d2bb8
KH
2189 o ESC '0' -- start relative composition
2190 o ESC '1' -- end composition
2191 o ESC '2' -- start rule-base composition (*)
2192 o ESC '3' -- start relative composition with alternate chars (**)
2193 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2194 Since these are not standard escape sequences of any ISO standard,
df7492f9 2195 the use of them for these meaning is restricted to Emacs only.
ec6d2bb8 2196
df7492f9 2197 (*) This form is used only in Emacs 20.5 and the older versions,
b73bfc1c 2198 but the newer versions can safely decode it.
df7492f9 2199 (**) This form is used only in Emacs 21.1 and the newer versions,
b73bfc1c 2200 and the older versions can't decode it.
ec6d2bb8 2201
df7492f9 2202 Here's a list of examples usages of these composition escape
b73bfc1c 2203 sequences (categorized by `enum composition_method').
ec6d2bb8 2204
b73bfc1c 2205 COMPOSITION_RELATIVE:
ec6d2bb8 2206 ESC 0 CHAR [ CHAR ] ESC 1
df7492f9 2207 COMPOSITOIN_WITH_RULE:
ec6d2bb8 2208 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2209 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2210 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2211 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2212 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2213
2214enum iso_code_class_type iso_code_class[256];
2215
df7492f9
KH
2216#define SAFE_CHARSET_P(coding, id) \
2217 ((id) <= (coding)->max_charset_id \
2218 && (coding)->safe_charsets[id] >= 0)
2219
2220
2221#define SHIFT_OUT_OK(category) \
2222 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2223
2224static void
f0064e1f
DL
2225setup_iso_safe_charsets (attrs)
2226 Lisp_Object attrs;
df7492f9
KH
2227{
2228 Lisp_Object charset_list, safe_charsets;
2229 Lisp_Object request;
2230 Lisp_Object reg_usage;
2231 Lisp_Object tail;
2232 int reg94, reg96;
2233 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2234 int max_charset_id;
2235
2236 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2237 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2238 && ! EQ (charset_list, Viso_2022_charset_list))
2239 {
2240 CODING_ATTR_CHARSET_LIST (attrs)
2241 = charset_list = Viso_2022_charset_list;
2242 ASET (attrs, coding_attr_safe_charsets, Qnil);
2243 }
2244
2245 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2246 return;
2247
2248 max_charset_id = 0;
2249 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2250 {
2251 int id = XINT (XCAR (tail));
2252 if (max_charset_id < id)
2253 max_charset_id = id;
2254 }
2255
2256 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2257 make_number (255));
2258 request = AREF (attrs, coding_attr_iso_request);
2259 reg_usage = AREF (attrs, coding_attr_iso_usage);
2260 reg94 = XINT (XCAR (reg_usage));
2261 reg96 = XINT (XCDR (reg_usage));
2262
2263 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2264 {
2265 Lisp_Object id;
2266 Lisp_Object reg;
2267 struct charset *charset;
2268
2269 id = XCAR (tail);
2270 charset = CHARSET_FROM_ID (XINT (id));
2271 reg = Fcdr (Fassq (request, id));
2272 if (! NILP (reg))
2273 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2274 else if (charset->iso_chars_96)
2275 {
2276 if (reg96 < 4)
2277 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2278 }
2279 else
2280 {
2281 if (reg94 < 4)
2282 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2283 }
2284 }
2285 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2286}
d46c5b12 2287
d46c5b12 2288
4ed46869 2289/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
df7492f9 2290 Check if a text is encoded in ISO2022. If it is, returns an
4ed46869 2291 integer in which appropriate flag bits any of:
df7492f9
KH
2292 CATEGORY_MASK_ISO_7
2293 CATEGORY_MASK_ISO_7_TIGHT
2294 CATEGORY_MASK_ISO_8_1
2295 CATEGORY_MASK_ISO_8_2
2296 CATEGORY_MASK_ISO_7_ELSE
2297 CATEGORY_MASK_ISO_8_ELSE
4ed46869
KH
2298 are set. If a code which should never appear in ISO2022 is found,
2299 returns 0. */
2300
0a28aafb 2301static int
df7492f9
KH
2302detect_coding_iso_2022 (coding, mask)
2303 struct coding_system *coding;
2304 int *mask;
4ed46869 2305{
df7492f9
KH
2306 unsigned char *src = coding->source, *src_base = src;
2307 unsigned char *src_end = coding->source + coding->src_bytes;
2308 int multibytep = coding->src_multibyte;
2309 int mask_iso = CATEGORY_MASK_ISO;
2310 int mask_found = 0, mask_8bit_found = 0;
f46869e4 2311 int reg[4], shift_out = 0, single_shifting = 0;
df7492f9
KH
2312 int id;
2313 int c, c1;
2314 int consumed_chars = 0;
2315 int i;
2316
2317 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2318 {
2319 struct coding_system *this = &(coding_categories[i]);
2320 Lisp_Object attrs, val;
2321
2322 attrs = CODING_ID_ATTRS (this->id);
2323 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2324 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2325 setup_iso_safe_charsets (attrs);
2326 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2327 this->max_charset_id = XSTRING (val)->size - 1;
2328 this->safe_charsets = (char *) XSTRING (val)->data;
2329 }
2330
2331 /* A coding system of this category is always ASCII compatible. */
2332 src += coding->head_ascii;
3f003981 2333
df7492f9
KH
2334 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2335 while (mask_iso && src < src_end)
4ed46869 2336 {
df7492f9 2337 ONE_MORE_BYTE (c);
4ed46869
KH
2338 switch (c)
2339 {
2340 case ISO_CODE_ESC:
74383408
KH
2341 if (inhibit_iso_escape_detection)
2342 break;
f46869e4 2343 single_shifting = 0;
df7492f9 2344 ONE_MORE_BYTE (c);
d46c5b12 2345 if (c >= '(' && c <= '/')
4ed46869 2346 {
bf9cdd4e 2347 /* Designation sequence for a charset of dimension 1. */
df7492f9 2348 ONE_MORE_BYTE (c1);
d46c5b12 2349 if (c1 < ' ' || c1 >= 0x80
df7492f9 2350 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2351 /* Invalid designation sequence. Just ignore. */
2352 break;
df7492f9 2353 reg[(c - '(') % 4] = id;
bf9cdd4e
KH
2354 }
2355 else if (c == '$')
2356 {
2357 /* Designation sequence for a charset of dimension 2. */
df7492f9 2358 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2359 if (c >= '@' && c <= 'B')
2360 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
df7492f9 2361 reg[0] = id = iso_charset_table[1][0][c];
bf9cdd4e 2362 else if (c >= '(' && c <= '/')
bcf26d6a 2363 {
df7492f9 2364 ONE_MORE_BYTE (c1);
d46c5b12 2365 if (c1 < ' ' || c1 >= 0x80
df7492f9 2366 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2367 /* Invalid designation sequence. Just ignore. */
2368 break;
df7492f9 2369 reg[(c - '(') % 4] = id;
bcf26d6a 2370 }
bf9cdd4e 2371 else
d46c5b12
KH
2372 /* Invalid designation sequence. Just ignore. */
2373 break;
2374 }
ae9ff118 2375 else if (c == 'N' || c == 'O')
d46c5b12 2376 {
ae9ff118 2377 /* ESC <Fe> for SS2 or SS3. */
df7492f9 2378 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
d46c5b12 2379 break;
4ed46869 2380 }
ec6d2bb8
KH
2381 else if (c >= '0' && c <= '4')
2382 {
2383 /* ESC <Fp> for start/end composition. */
df7492f9 2384 mask_found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2385 break;
2386 }
bf9cdd4e 2387 else
df7492f9
KH
2388 {
2389 /* Invalid escape sequence. */
2390 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2391 break;
2392 }
d46c5b12
KH
2393
2394 /* We found a valid designation sequence for CHARSET. */
df7492f9
KH
2395 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2396 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2397 id))
2398 mask_found |= CATEGORY_MASK_ISO_7;
d46c5b12 2399 else
df7492f9
KH
2400 mask_iso &= ~CATEGORY_MASK_ISO_7;
2401 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2402 id))
2403 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2404 else
df7492f9
KH
2405 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2406 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2407 id))
2408 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2409 else
df7492f9
KH
2410 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2411 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2412 id))
2413 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2414 else
df7492f9 2415 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2416 break;
2417
4ed46869 2418 case ISO_CODE_SO:
74383408
KH
2419 if (inhibit_iso_escape_detection)
2420 break;
f46869e4 2421 single_shifting = 0;
d46c5b12
KH
2422 if (shift_out == 0
2423 && (reg[1] >= 0
df7492f9
KH
2424 || SHIFT_OUT_OK (coding_category_iso_7_else)
2425 || SHIFT_OUT_OK (coding_category_iso_8_else)))
d46c5b12
KH
2426 {
2427 /* Locking shift out. */
df7492f9
KH
2428 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2429 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2430 }
e0e989f6 2431 break;
df7492f9 2432
d46c5b12 2433 case ISO_CODE_SI:
74383408
KH
2434 if (inhibit_iso_escape_detection)
2435 break;
f46869e4 2436 single_shifting = 0;
d46c5b12
KH
2437 if (shift_out == 1)
2438 {
2439 /* Locking shift in. */
df7492f9
KH
2440 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2441 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2442 }
2443 break;
2444
4ed46869 2445 case ISO_CODE_CSI:
f46869e4 2446 single_shifting = 0;
4ed46869
KH
2447 case ISO_CODE_SS2:
2448 case ISO_CODE_SS3:
3f003981 2449 {
df7492f9 2450 int newmask = CATEGORY_MASK_ISO_8_ELSE;
3f003981 2451
74383408
KH
2452 if (inhibit_iso_escape_detection)
2453 break;
70c22245
KH
2454 if (c != ISO_CODE_CSI)
2455 {
df7492f9
KH
2456 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2457 & CODING_ISO_FLAG_SINGLE_SHIFT)
2458 newmask |= CATEGORY_MASK_ISO_8_1;
2459 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2460 & CODING_ISO_FLAG_SINGLE_SHIFT)
2461 newmask |= CATEGORY_MASK_ISO_8_2;
f46869e4 2462 single_shifting = 1;
70c22245 2463 }
3f003981
KH
2464 if (VECTORP (Vlatin_extra_code_table)
2465 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2466 {
df7492f9
KH
2467 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2468 & CODING_ISO_FLAG_LATIN_EXTRA)
2469 newmask |= CATEGORY_MASK_ISO_8_1;
2470 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2471 & CODING_ISO_FLAG_LATIN_EXTRA)
2472 newmask |= CATEGORY_MASK_ISO_8_2;
3f003981 2473 }
df7492f9 2474 mask_iso &= newmask;
d46c5b12 2475 mask_found |= newmask;
3f003981
KH
2476 }
2477 break;
4ed46869
KH
2478
2479 default:
2480 if (c < 0x80)
f46869e4
KH
2481 {
2482 single_shifting = 0;
2483 break;
2484 }
4ed46869 2485 else if (c < 0xA0)
c4825358 2486 {
f46869e4 2487 single_shifting = 0;
df7492f9 2488 mask_8bit_found = 1;
3f003981
KH
2489 if (VECTORP (Vlatin_extra_code_table)
2490 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
c4825358 2491 {
3f003981
KH
2492 int newmask = 0;
2493
df7492f9
KH
2494 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2495 & CODING_ISO_FLAG_LATIN_EXTRA)
2496 newmask |= CATEGORY_MASK_ISO_8_1;
2497 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2498 & CODING_ISO_FLAG_LATIN_EXTRA)
2499 newmask |= CATEGORY_MASK_ISO_8_2;
2500 mask_iso &= newmask;
d46c5b12 2501 mask_found |= newmask;
c4825358 2502 }
3f003981
KH
2503 else
2504 return 0;
c4825358 2505 }
4ed46869
KH
2506 else
2507 {
df7492f9
KH
2508 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2509 | CATEGORY_MASK_ISO_7_ELSE);
2510 mask_found |= CATEGORY_MASK_ISO_8_1;
2511 mask_8bit_found = 1;
f46869e4
KH
2512 /* Check the length of succeeding codes of the range
2513 0xA0..0FF. If the byte length is odd, we exclude
df7492f9 2514 CATEGORY_MASK_ISO_8_2. We can check this only
f46869e4 2515 when we are not single shifting. */
b73bfc1c 2516 if (!single_shifting
df7492f9 2517 && mask_iso & CATEGORY_MASK_ISO_8_2)
f46869e4 2518 {
e17de821 2519 int i = 1;
b73bfc1c
KH
2520 while (src < src_end)
2521 {
df7492f9 2522 ONE_MORE_BYTE (c);
b73bfc1c
KH
2523 if (c < 0xA0)
2524 break;
2525 i++;
2526 }
2527
2528 if (i & 1 && src < src_end)
df7492f9 2529 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
f46869e4 2530 else
df7492f9 2531 mask_found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2532 }
4ed46869
KH
2533 }
2534 break;
2535 }
2536 }
df7492f9
KH
2537 no_more_source:
2538 if (!mask_iso)
2539 {
2540 *mask &= ~CATEGORY_MASK_ISO;
2541 return 0;
2542 }
2543 if (!mask_found)
2544 return 0;
2545 *mask &= mask_iso & mask_found;
2546 if (! mask_8bit_found)
2547 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2548 return 1;
4ed46869
KH
2549}
2550
4ed46869
KH
2551
2552/* Set designation state into CODING. */
df7492f9
KH
2553#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2554 do { \
2555 int id, prev; \
2556 \
2557 if (final < '0' || final >= 128 \
2558 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2559 || !SAFE_CHARSET_P (coding, id)) \
2560 { \
2561 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2562 goto invalid_code; \
2563 } \
2564 prev = CODING_ISO_DESIGNATION (coding, reg); \
2565 CODING_ISO_DESIGNATION (coding, reg) = id; \
2566 /* If there was an invalid designation to REG previously, and this \
2567 designation is ASCII to REG, we should keep this designation \
2568 sequence. */ \
2569 if (prev == -2 && id == charset_ascii) \
2570 goto invalid_code; \
4ed46869
KH
2571 } while (0)
2572
d46c5b12 2573
df7492f9
KH
2574#define MAYBE_FINISH_COMPOSITION() \
2575 do { \
2576 int i; \
2577 if (composition_state == COMPOSING_NO) \
2578 break; \
2579 /* It is assured that we have enough room for producing \
2580 characters stored in the table `components'. */ \
2581 if (charbuf + component_idx > charbuf_end) \
2582 goto no_more_source; \
2583 composition_state = COMPOSING_NO; \
2584 if (method == COMPOSITION_RELATIVE \
2585 || method == COMPOSITION_WITH_ALTCHARS) \
2586 { \
2587 for (i = 0; i < component_idx; i++) \
2588 *charbuf++ = components[i]; \
2589 char_offset += component_idx; \
2590 } \
2591 else \
2592 { \
2593 for (i = 0; i < component_idx; i += 2) \
2594 *charbuf++ = components[i]; \
2595 char_offset += (component_idx / 2) + 1; \
2596 } \
2597 } while (0)
2598
d46c5b12 2599
aa72b389
KH
2600/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2601 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2602 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2603 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2604 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2605 */
ec6d2bb8 2606
df7492f9
KH
2607#define DECODE_COMPOSITION_START(c1) \
2608 do { \
2609 if (c1 == '0' \
781d7a48 2610 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2611 { \
2612 component_len = component_idx; \
2613 composition_state = COMPOSING_CHAR; \
2614 } \
2615 else \
2616 { \
2617 unsigned char *p; \
2618 \
2619 MAYBE_FINISH_COMPOSITION (); \
2620 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2621 goto no_more_source; \
2622 for (p = src; p < src_end - 1; p++) \
2623 if (*p == ISO_CODE_ESC && p[1] == '1') \
2624 break; \
2625 if (p == src_end - 1) \
2626 { \
2627 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2628 goto invalid_code; \
2629 goto no_more_source; \
2630 } \
2631 \
2632 /* This is surely the start of a composition. */ \
2633 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2634 : c1 == '2' ? COMPOSITION_WITH_RULE \
2635 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2636 : COMPOSITION_WITH_RULE_ALTCHARS); \
2637 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2638 : COMPOSING_COMPONENT_CHAR); \
2639 component_idx = component_len = 0; \
2640 } \
ec6d2bb8
KH
2641 } while (0)
2642
ec6d2bb8 2643
df7492f9
KH
2644/* Handle compositoin end sequence ESC 1. */
2645
2646#define DECODE_COMPOSITION_END() \
ec6d2bb8 2647 do { \
df7492f9
KH
2648 int nchars = (component_len > 0 ? component_idx - component_len \
2649 : method == COMPOSITION_RELATIVE ? component_idx \
2650 : (component_idx + 1) / 2); \
2651 int i; \
2652 int *saved_charbuf = charbuf; \
2653 \
2654 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2655 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2656 { \
df7492f9
KH
2657 if (component_len == 0) \
2658 for (i = 0; i < component_idx; i++) \
2659 *charbuf++ = components[i]; \
2660 else \
2661 for (i = 0; i < component_len; i++) \
2662 *charbuf++ = components[i]; \
2663 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2664 } \
df7492f9
KH
2665 if (method == COMPOSITION_WITH_RULE) \
2666 for (i = 0; i < component_idx; i += 2, char_offset++) \
2667 *charbuf++ = components[i]; \
ec6d2bb8 2668 else \
df7492f9
KH
2669 for (i = component_len; i < component_idx; i++, char_offset++) \
2670 *charbuf++ = components[i]; \
2671 coding->annotated = 1; \
2672 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2673 } while (0)
2674
df7492f9 2675
ec6d2bb8
KH
2676/* Decode a composition rule from the byte C1 (and maybe one more byte
2677 from SRC) and store one encoded composition rule in
2678 coding->cmp_data. */
2679
2680#define DECODE_COMPOSITION_RULE(c1) \
2681 do { \
ec6d2bb8
KH
2682 (c1) -= 32; \
2683 if (c1 < 81) /* old format (before ver.21) */ \
2684 { \
2685 int gref = (c1) / 9; \
2686 int nref = (c1) % 9; \
2687 if (gref == 4) gref = 10; \
2688 if (nref == 4) nref = 10; \
df7492f9 2689 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2690 } \
b73bfc1c 2691 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2692 { \
2693 ONE_MORE_BYTE (c2); \
df7492f9 2694 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2695 } \
df7492f9
KH
2696 else \
2697 c1 = 0; \
ec6d2bb8 2698 } while (0)
88993dfd 2699
d46c5b12 2700
4ed46869
KH
2701/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2702
b73bfc1c 2703static void
df7492f9 2704decode_coding_iso_2022 (coding)
4ed46869 2705 struct coding_system *coding;
4ed46869 2706{
df7492f9
KH
2707 unsigned char *src = coding->source + coding->consumed;
2708 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2709 unsigned char *src_base;
df7492f9
KH
2710 int *charbuf = coding->charbuf;
2711 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2712 int consumed_chars = 0, consumed_chars_base;
2713 int char_offset = 0;
2714 int multibytep = coding->src_multibyte;
2715 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2716 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2717 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2718 struct charset *charset;
2719 int c;
2720 /* For handling composition sequence. */
2721#define COMPOSING_NO 0
2722#define COMPOSING_CHAR 1
2723#define COMPOSING_RULE 2
2724#define COMPOSING_COMPONENT_CHAR 3
2725#define COMPOSING_COMPONENT_RULE 4
2726
2727 int composition_state = COMPOSING_NO;
2728 enum composition_method method;
2729 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2730 int component_idx;
2731 int component_len;
2732 Lisp_Object attrs, eol_type, charset_list;
2733
2734 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2735 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2736
2737 while (1)
4ed46869 2738 {
b73bfc1c
KH
2739 int c1, c2;
2740
2741 src_base = src;
df7492f9
KH
2742 consumed_chars_base = consumed_chars;
2743
2744 if (charbuf >= charbuf_end)
2745 break;
2746
b73bfc1c 2747 ONE_MORE_BYTE (c1);
4ed46869 2748
ec6d2bb8 2749 /* We produce no character or one character. */
4ed46869
KH
2750 switch (iso_code_class [c1])
2751 {
2752 case ISO_0x20_or_0x7F:
df7492f9 2753 if (composition_state != COMPOSING_NO)
ec6d2bb8 2754 {
df7492f9
KH
2755 if (composition_state == COMPOSING_RULE
2756 || composition_state == COMPOSING_COMPONENT_RULE)
2757 {
2758 DECODE_COMPOSITION_RULE (c1);
2759 components[component_idx++] = c1;
2760 composition_state--;
2761 continue;
2762 }
ec6d2bb8 2763 }
df7492f9
KH
2764 if (charset_id_0 < 0
2765 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2766 /* This is SPACE or DEL. */
2767 charset = CHARSET_FROM_ID (charset_ascii);
2768 else
2769 charset = CHARSET_FROM_ID (charset_id_0);
2770 break;
4ed46869
KH
2771
2772 case ISO_graphic_plane_0:
781d7a48 2773 if (composition_state != COMPOSING_NO)
b73bfc1c 2774 {
781d7a48
KH
2775 if (composition_state == COMPOSING_RULE
2776 || composition_state == COMPOSING_COMPONENT_RULE)
2777 {
2778 DECODE_COMPOSITION_RULE (c1);
2779 components[component_idx++] = c1;
2780 composition_state--;
2781 continue;
2782 }
b73bfc1c 2783 }
df7492f9 2784 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2785 break;
2786
2787 case ISO_0xA0_or_0xFF:
df7492f9
KH
2788 if (charset_id_1 < 0
2789 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2790 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2791 goto invalid_code;
4ed46869
KH
2792 /* This is a graphic character, we fall down ... */
2793
2794 case ISO_graphic_plane_1:
df7492f9
KH
2795 if (charset_id_1 < 0)
2796 goto invalid_code;
2797 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2798 break;
2799
2800 case ISO_carriage_return:
df7492f9 2801 if (c1 == '\r')
4ed46869 2802 {
df7492f9 2803 if (EQ (eol_type, Qdos))
4ed46869 2804 {
df7492f9
KH
2805 if (src == src_end)
2806 goto no_more_source;
2807 if (*src == '\n')
2808 ONE_MORE_BYTE (c1);
4ed46869 2809 }
df7492f9
KH
2810 else if (EQ (eol_type, Qmac))
2811 c1 = '\n';
4ed46869 2812 }
df7492f9
KH
2813 /* fall through */
2814
2815 case ISO_control_0:
2816 MAYBE_FINISH_COMPOSITION ();
2817 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2818 break;
2819
df7492f9
KH
2820 case ISO_control_1:
2821 MAYBE_FINISH_COMPOSITION ();
2822 goto invalid_code;
2823
4ed46869 2824 case ISO_shift_out:
df7492f9
KH
2825 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2826 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2827 goto invalid_code;
2828 CODING_ISO_INVOCATION (coding, 0) = 1;
2829 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2830 continue;
4ed46869
KH
2831
2832 case ISO_shift_in:
df7492f9
KH
2833 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2834 goto invalid_code;
2835 CODING_ISO_INVOCATION (coding, 0) = 0;
2836 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2837 continue;
4ed46869
KH
2838
2839 case ISO_single_shift_2_7:
2840 case ISO_single_shift_2:
df7492f9
KH
2841 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2842 goto invalid_code;
4ed46869
KH
2843 /* SS2 is handled as an escape sequence of ESC 'N' */
2844 c1 = 'N';
2845 goto label_escape_sequence;
2846
2847 case ISO_single_shift_3:
df7492f9
KH
2848 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2849 goto invalid_code;
4ed46869
KH
2850 /* SS2 is handled as an escape sequence of ESC 'O' */
2851 c1 = 'O';
2852 goto label_escape_sequence;
2853
2854 case ISO_control_sequence_introducer:
2855 /* CSI is handled as an escape sequence of ESC '[' ... */
2856 c1 = '[';
2857 goto label_escape_sequence;
2858
2859 case ISO_escape:
2860 ONE_MORE_BYTE (c1);
2861 label_escape_sequence:
df7492f9 2862 /* Escape sequences handled here are invocation,
4ed46869
KH
2863 designation, direction specification, and character
2864 composition specification. */
2865 switch (c1)
2866 {
2867 case '&': /* revision of following character set */
2868 ONE_MORE_BYTE (c1);
2869 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2870 goto invalid_code;
4ed46869
KH
2871 ONE_MORE_BYTE (c1);
2872 if (c1 != ISO_CODE_ESC)
df7492f9 2873 goto invalid_code;
4ed46869
KH
2874 ONE_MORE_BYTE (c1);
2875 goto label_escape_sequence;
2876
2877 case '$': /* designation of 2-byte character set */
df7492f9
KH
2878 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2879 goto invalid_code;
4ed46869
KH
2880 ONE_MORE_BYTE (c1);
2881 if (c1 >= '@' && c1 <= 'B')
2882 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2883 or JISX0208.1980 */
df7492f9 2884 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2885 }
2886 else if (c1 >= 0x28 && c1 <= 0x2B)
2887 { /* designation of DIMENSION2_CHARS94 character set */
2888 ONE_MORE_BYTE (c2);
df7492f9 2889 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2890 }
2891 else if (c1 >= 0x2C && c1 <= 0x2F)
2892 { /* designation of DIMENSION2_CHARS96 character set */
2893 ONE_MORE_BYTE (c2);
df7492f9 2894 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2895 }
2896 else
df7492f9 2897 goto invalid_code;
b73bfc1c 2898 /* We must update these variables now. */
df7492f9
KH
2899 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2900 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2901 continue;
4ed46869
KH
2902
2903 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
2904 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2905 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2906 goto invalid_code;
2907 CODING_ISO_INVOCATION (coding, 0) = 2;
2908 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2909 continue;
4ed46869
KH
2910
2911 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
2912 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2913 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2914 goto invalid_code;
2915 CODING_ISO_INVOCATION (coding, 0) = 3;
2916 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2917 continue;
4ed46869
KH
2918
2919 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
2920 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2921 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2922 goto invalid_code;
2923 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 2924 ONE_MORE_BYTE (c1);
e7046a18 2925 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2926 goto invalid_code;
4ed46869
KH
2927 break;
2928
2929 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
2930 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2931 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2932 goto invalid_code;
2933 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 2934 ONE_MORE_BYTE (c1);
e7046a18 2935 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2936 goto invalid_code;
4ed46869
KH
2937 break;
2938
ec6d2bb8 2939 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
2940 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2941 goto invalid_code;
ec6d2bb8 2942 DECODE_COMPOSITION_START (c1);
b73bfc1c 2943 continue;
4ed46869 2944
ec6d2bb8 2945 case '1': /* end composition */
df7492f9
KH
2946 if (composition_state == COMPOSING_NO)
2947 goto invalid_code;
2948 DECODE_COMPOSITION_END ();
b73bfc1c 2949 continue;
4ed46869
KH
2950
2951 case '[': /* specification of direction */
df7492f9
KH
2952 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2953 goto invalid_code;
4ed46869 2954 /* For the moment, nested direction is not supported.
d46c5b12 2955 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 2956 left-to-right, and nozero means right-to-left. */
4ed46869
KH
2957 ONE_MORE_BYTE (c1);
2958 switch (c1)
2959 {
2960 case ']': /* end of the current direction */
d46c5b12 2961 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
2962
2963 case '0': /* end of the current direction */
2964 case '1': /* start of left-to-right direction */
2965 ONE_MORE_BYTE (c1);
2966 if (c1 == ']')
d46c5b12 2967 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 2968 else
df7492f9 2969 goto invalid_code;
4ed46869
KH
2970 break;
2971
2972 case '2': /* start of right-to-left direction */
2973 ONE_MORE_BYTE (c1);
2974 if (c1 == ']')
d46c5b12 2975 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 2976 else
df7492f9 2977 goto invalid_code;
4ed46869
KH
2978 break;
2979
2980 default:
df7492f9 2981 goto invalid_code;
4ed46869 2982 }
b73bfc1c 2983 continue;
4ed46869
KH
2984
2985 default:
df7492f9
KH
2986 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2987 goto invalid_code;
4ed46869
KH
2988 if (c1 >= 0x28 && c1 <= 0x2B)
2989 { /* designation of DIMENSION1_CHARS94 character set */
2990 ONE_MORE_BYTE (c2);
df7492f9 2991 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
2992 }
2993 else if (c1 >= 0x2C && c1 <= 0x2F)
2994 { /* designation of DIMENSION1_CHARS96 character set */
2995 ONE_MORE_BYTE (c2);
df7492f9 2996 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
2997 }
2998 else
df7492f9 2999 goto invalid_code;
b73bfc1c 3000 /* We must update these variables now. */
df7492f9
KH
3001 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3002 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3003 continue;
4ed46869 3004 }
b73bfc1c 3005 }
4ed46869 3006
b73bfc1c 3007 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3008 Produce a decoded character while getting 2nd position code
3009 C2 if necessary. */
3010 c1 &= 0x7F;
3011 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3012 {
3013 ONE_MORE_BYTE (c2);
df7492f9 3014 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3015 /* C2 is not in a valid range. */
df7492f9
KH
3016 goto invalid_code;
3017 c1 = (c1 << 8) | (c2 & 0x7F);
3018 if (CHARSET_DIMENSION (charset) > 2)
3019 {
3020 ONE_MORE_BYTE (c2);
3021 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3022 /* C2 is not in a valid range. */
3023 goto invalid_code;
3024 c1 = (c1 << 8) | (c2 & 0x7F);
3025 }
3026 }
3027
3028 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3029 if (c < 0)
3030 {
3031 MAYBE_FINISH_COMPOSITION ();
3032 for (; src_base < src; src_base++, char_offset++)
3033 {
3034 if (ASCII_BYTE_P (*src_base))
3035 *charbuf++ = *src_base;
3036 else
3037 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3038 }
3039 }
3040 else if (composition_state == COMPOSING_NO)
3041 {
3042 *charbuf++ = c;
3043 char_offset++;
4ed46869 3044 }
df7492f9 3045 else
781d7a48
KH
3046 {
3047 components[component_idx++] = c;
3048 if (method == COMPOSITION_WITH_RULE
3049 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3050 && composition_state == COMPOSING_COMPONENT_CHAR))
3051 composition_state++;
3052 }
4ed46869
KH
3053 continue;
3054
df7492f9
KH
3055 invalid_code:
3056 MAYBE_FINISH_COMPOSITION ();
4ed46869 3057 src = src_base;
df7492f9
KH
3058 consumed_chars = consumed_chars_base;
3059 ONE_MORE_BYTE (c);
3060 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3061 coding->errors++;
4ed46869 3062 }
fb88bf2d 3063
df7492f9
KH
3064 no_more_source:
3065 coding->consumed_char += consumed_chars_base;
3066 coding->consumed = src_base - coding->source;
3067 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3068}
3069
b73bfc1c 3070
f4dee582 3071/* ISO2022 encoding stuff. */
4ed46869
KH
3072
3073/*
f4dee582 3074 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3075 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3076 variant has the following specifications:
df7492f9 3077 1. Initial designation to G0 thru G3.
4ed46869
KH
3078 2. Allows short-form designation?
3079 3. ASCII should be designated to G0 before control characters?
3080 4. ASCII should be designated to G0 at end of line?
3081 5. 7-bit environment or 8-bit environment?
3082 6. Use locking-shift?
3083 7. Use Single-shift?
3084 And the following two are only for Japanese:
3085 8. Use ASCII in place of JIS0201-1976-Roman?
3086 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3087 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3088 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3089 details.
4ed46869
KH
3090*/
3091
3092/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3093 register REG at DST, and increment DST. If <final-char> of CHARSET is
3094 '@', 'A', or 'B' and the coding system CODING allows, produce
3095 designation sequence of short-form. */
4ed46869
KH
3096
3097#define ENCODE_DESIGNATION(charset, reg, coding) \
3098 do { \
df7492f9 3099 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3100 char *intermediate_char_94 = "()*+"; \
3101 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3102 int revision = -1; \
3103 int c; \
3104 \
3105 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3106 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3107 \
3108 if (revision >= 0) \
70c22245 3109 { \
df7492f9
KH
3110 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3111 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3112 } \
df7492f9 3113 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3114 if (CHARSET_DIMENSION (charset) == 1) \
3115 { \
df7492f9
KH
3116 if (! CHARSET_ISO_CHARS_96 (charset)) \
3117 c = intermediate_char_94[reg]; \
4ed46869 3118 else \
df7492f9
KH
3119 c = intermediate_char_96[reg]; \
3120 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3121 } \
3122 else \
3123 { \
df7492f9
KH
3124 EMIT_ONE_ASCII_BYTE ('$'); \
3125 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3126 { \
df7492f9 3127 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3128 || reg != 0 \
3129 || final_char < '@' || final_char > 'B') \
df7492f9 3130 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3131 } \
3132 else \
df7492f9 3133 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3134 } \
df7492f9
KH
3135 EMIT_ONE_ASCII_BYTE (final_char); \
3136 \
3137 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3138 } while (0)
3139
df7492f9 3140
4ed46869
KH
3141/* The following two macros produce codes (control character or escape
3142 sequence) for ISO2022 single-shift functions (single-shift-2 and
3143 single-shift-3). */
3144
df7492f9
KH
3145#define ENCODE_SINGLE_SHIFT_2 \
3146 do { \
3147 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3148 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3149 else \
3150 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3151 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3152 } while (0)
3153
df7492f9
KH
3154
3155#define ENCODE_SINGLE_SHIFT_3 \
3156 do { \
3157 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3158 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3159 else \
3160 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3161 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3162 } while (0)
3163
df7492f9 3164
4ed46869
KH
3165/* The following four macros produce codes (control character or
3166 escape sequence) for ISO2022 locking-shift functions (shift-in,
3167 shift-out, locking-shift-2, and locking-shift-3). */
3168
df7492f9
KH
3169#define ENCODE_SHIFT_IN \
3170 do { \
3171 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3172 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3173 } while (0)
3174
df7492f9
KH
3175
3176#define ENCODE_SHIFT_OUT \
3177 do { \
3178 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3179 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3180 } while (0)
3181
df7492f9
KH
3182
3183#define ENCODE_LOCKING_SHIFT_2 \
3184 do { \
3185 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3186 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3187 } while (0)
3188
df7492f9
KH
3189
3190#define ENCODE_LOCKING_SHIFT_3 \
3191 do { \
3192 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3193 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3194 } while (0)
3195
df7492f9 3196
f4dee582
RS
3197/* Produce codes for a DIMENSION1 character whose character set is
3198 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3199 sequences are also produced in advance if necessary. */
3200
6e85d753
KH
3201#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3202 do { \
df7492f9
KH
3203 int id = CHARSET_ID (charset); \
3204 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3205 { \
df7492f9
KH
3206 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3207 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3208 else \
df7492f9
KH
3209 EMIT_ONE_BYTE (c1 | 0x80); \
3210 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3211 break; \
3212 } \
df7492f9 3213 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3214 { \
df7492f9 3215 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3216 break; \
3217 } \
df7492f9 3218 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3219 { \
df7492f9 3220 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3221 break; \
3222 } \
6e85d753
KH
3223 else \
3224 /* Since CHARSET is not yet invoked to any graphic planes, we \
3225 must invoke it, or, at first, designate it to some graphic \
3226 register. Then repeat the loop to actually produce the \
3227 character. */ \
df7492f9
KH
3228 dst = encode_invocation_designation (charset, coding, dst, \
3229 &produced_chars); \
4ed46869
KH
3230 } while (1)
3231
df7492f9 3232
f4dee582
RS
3233/* Produce codes for a DIMENSION2 character whose character set is
3234 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3235 invocation codes are also produced in advance if necessary. */
3236
6e85d753
KH
3237#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3238 do { \
df7492f9
KH
3239 int id = CHARSET_ID (charset); \
3240 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3241 { \
df7492f9
KH
3242 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3243 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3244 else \
df7492f9
KH
3245 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3246 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3247 break; \
3248 } \
df7492f9 3249 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3250 { \
df7492f9 3251 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3252 break; \
3253 } \
df7492f9 3254 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3255 { \
df7492f9 3256 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3257 break; \
3258 } \
6e85d753
KH
3259 else \
3260 /* Since CHARSET is not yet invoked to any graphic planes, we \
3261 must invoke it, or, at first, designate it to some graphic \
3262 register. Then repeat the loop to actually produce the \
3263 character. */ \
df7492f9
KH
3264 dst = encode_invocation_designation (charset, coding, dst, \
3265 &produced_chars); \
4ed46869
KH
3266 } while (1)
3267
05e6f5dc 3268
df7492f9
KH
3269#define ENCODE_ISO_CHARACTER(charset, c) \
3270 do { \
3271 int code = ENCODE_CHAR ((charset),(c)); \
3272 \
3273 if (CHARSET_DIMENSION (charset) == 1) \
3274 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3275 else \
3276 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3277 } while (0)
bdd9fb48 3278
05e6f5dc 3279
4ed46869 3280/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3281 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3282 Return new DST. */
3283
3284unsigned char *
df7492f9
KH
3285encode_invocation_designation (charset, coding, dst, p_nchars)
3286 struct charset *charset;
4ed46869
KH
3287 struct coding_system *coding;
3288 unsigned char *dst;
df7492f9 3289 int *p_nchars;
4ed46869 3290{
df7492f9
KH
3291 int multibytep = coding->dst_multibyte;
3292 int produced_chars = *p_nchars;
4ed46869 3293 int reg; /* graphic register number */
df7492f9 3294 int id = CHARSET_ID (charset);
4ed46869
KH
3295
3296 /* At first, check designations. */
3297 for (reg = 0; reg < 4; reg++)
df7492f9 3298 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3299 break;
3300
3301 if (reg >= 4)
3302 {
3303 /* CHARSET is not yet designated to any graphic registers. */
3304 /* At first check the requested designation. */
df7492f9
KH
3305 reg = CODING_ISO_REQUEST (coding, id);
3306 if (reg < 0)
1ba9e4ab
KH
3307 /* Since CHARSET requests no special designation, designate it
3308 to graphic register 0. */
4ed46869
KH
3309 reg = 0;
3310
3311 ENCODE_DESIGNATION (charset, reg, coding);
3312 }
3313
df7492f9
KH
3314 if (CODING_ISO_INVOCATION (coding, 0) != reg
3315 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3316 {
3317 /* Since the graphic register REG is not invoked to any graphic
3318 planes, invoke it to graphic plane 0. */
3319 switch (reg)
3320 {
3321 case 0: /* graphic register 0 */
3322 ENCODE_SHIFT_IN;
3323 break;
3324
3325 case 1: /* graphic register 1 */
3326 ENCODE_SHIFT_OUT;
3327 break;
3328
3329 case 2: /* graphic register 2 */
df7492f9 3330 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3331 ENCODE_SINGLE_SHIFT_2;
3332 else
3333 ENCODE_LOCKING_SHIFT_2;
3334 break;
3335
3336 case 3: /* graphic register 3 */
df7492f9 3337 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3338 ENCODE_SINGLE_SHIFT_3;
3339 else
3340 ENCODE_LOCKING_SHIFT_3;
3341 break;
3342 }
3343 }
b73bfc1c 3344
df7492f9 3345 *p_nchars = produced_chars;
4ed46869
KH
3346 return dst;
3347}
3348
df7492f9
KH
3349/* The following three macros produce codes for indicating direction
3350 of text. */
3351#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3352 do { \
df7492f9
KH
3353 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3354 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3355 else \
df7492f9 3356 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3357 } while (0)
3358
ec6d2bb8 3359
df7492f9
KH
3360#define ENCODE_DIRECTION_R2L() \
3361 do { \
3362 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3363 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3364 } while (0)
3365
ec6d2bb8 3366
df7492f9 3367#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3368 do { \
df7492f9
KH
3369 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3370 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3371 } while (0)
3372
4ed46869
KH
3373
3374/* Produce codes for designation and invocation to reset the graphic
3375 planes and registers to initial state. */
df7492f9
KH
3376#define ENCODE_RESET_PLANE_AND_REGISTER() \
3377 do { \
3378 int reg; \
3379 struct charset *charset; \
3380 \
3381 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3382 ENCODE_SHIFT_IN; \
3383 for (reg = 0; reg < 4; reg++) \
3384 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3385 && (CODING_ISO_DESIGNATION (coding, reg) \
3386 != CODING_ISO_INITIAL (coding, reg))) \
3387 { \
3388 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3389 ENCODE_DESIGNATION (charset, reg, coding); \
3390 } \
4ed46869
KH
3391 } while (0)
3392
df7492f9 3393
bdd9fb48 3394/* Produce designation sequences of charsets in the line started from
b73bfc1c 3395 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3396
3397 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3398 find all the necessary designations. */
3399
b73bfc1c 3400static unsigned char *
df7492f9 3401encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3402 struct coding_system *coding;
df7492f9
KH
3403 int *charbuf, *charbuf_end;
3404 unsigned char *dst;
e0e989f6 3405{
df7492f9 3406 struct charset *charset;
bdd9fb48
KH
3407 /* Table of charsets to be designated to each graphic register. */
3408 int r[4];
df7492f9
KH
3409 int c, found = 0, reg;
3410 int produced_chars = 0;
3411 int multibytep = coding->dst_multibyte;
3412 Lisp_Object attrs;
3413 Lisp_Object charset_list;
3414
3415 attrs = CODING_ID_ATTRS (coding->id);
3416 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3417 if (EQ (charset_list, Qiso_2022))
3418 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3419
3420 for (reg = 0; reg < 4; reg++)
3421 r[reg] = -1;
3422
b73bfc1c 3423 while (found < 4)
e0e989f6 3424 {
df7492f9
KH
3425 int id;
3426
3427 c = *charbuf++;
b73bfc1c
KH
3428 if (c == '\n')
3429 break;
df7492f9
KH
3430 charset = char_charset (c, charset_list, NULL);
3431 id = CHARSET_ID (charset);
3432 reg = CODING_ISO_REQUEST (coding, id);
3433 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3434 {
3435 found++;
df7492f9 3436 r[reg] = id;
bdd9fb48 3437 }
bdd9fb48
KH
3438 }
3439
3440 if (found)
3441 {
3442 for (reg = 0; reg < 4; reg++)
3443 if (r[reg] >= 0
df7492f9
KH
3444 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3445 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3446 }
b73bfc1c
KH
3447
3448 return dst;
e0e989f6
KH
3449}
3450
4ed46869
KH
3451/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3452
df7492f9
KH
3453static int
3454encode_coding_iso_2022 (coding)
4ed46869 3455 struct coding_system *coding;
4ed46869 3456{
df7492f9
KH
3457 int multibytep = coding->dst_multibyte;
3458 int *charbuf = coding->charbuf;
3459 int *charbuf_end = charbuf + coding->charbuf_used;
3460 unsigned char *dst = coding->destination + coding->produced;
3461 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3462 int safe_room = 16;
3463 int bol_designation
3464 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3465 && CODING_ISO_BOL (coding));
3466 int produced_chars = 0;
3467 Lisp_Object attrs, eol_type, charset_list;
3468 int ascii_compatible;
b73bfc1c 3469 int c;
05e6f5dc 3470
df7492f9 3471 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4
KH
3472 setup_iso_safe_charsets (attrs);
3473 coding->safe_charsets
3474 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
bdd9fb48 3475
df7492f9 3476 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3477
df7492f9 3478 while (charbuf < charbuf_end)
4ed46869 3479 {
df7492f9 3480 ASSURE_DESTINATION (safe_room);
b73bfc1c 3481
df7492f9 3482 if (bol_designation)
b73bfc1c 3483 {
df7492f9 3484 unsigned char *dst_prev = dst;
4ed46869 3485
bdd9fb48 3486 /* We have to produce designation sequences if any now. */
df7492f9
KH
3487 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3488 bol_designation = 0;
3489 /* We are sure that designation sequences are all ASCII bytes. */
3490 produced_chars += dst - dst_prev;
4ed46869 3491 }
ec6d2bb8 3492
df7492f9 3493 c = *charbuf++;
4ed46869 3494
b73bfc1c
KH
3495 /* Now encode the character C. */
3496 if (c < 0x20 || c == 0x7F)
3497 {
df7492f9
KH
3498 if (c == '\n'
3499 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3500 {
df7492f9
KH
3501 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3502 ENCODE_RESET_PLANE_AND_REGISTER ();
3503 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3504 {
df7492f9
KH
3505 int i;
3506
3507 for (i = 0; i < 4; i++)
3508 CODING_ISO_DESIGNATION (coding, i)
3509 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3510 }
df7492f9
KH
3511 bol_designation
3512 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3513 }
df7492f9
KH
3514 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3515 ENCODE_RESET_PLANE_AND_REGISTER ();
3516 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3517 }
df7492f9 3518 else if (ASCII_CHAR_P (c))
88993dfd 3519 {
df7492f9
KH
3520 if (ascii_compatible)
3521 EMIT_ONE_ASCII_BYTE (c);
3522 else
3523 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii), c);
88993dfd 3524 }
b73bfc1c 3525 else
df7492f9
KH
3526 {
3527 struct charset *charset = char_charset (c, charset_list, NULL);
b73bfc1c 3528
df7492f9
KH
3529 if (!charset)
3530 {
41cbe562
KH
3531 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3532 {
3533 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3534 charset = CHARSET_FROM_ID (charset_ascii);
3535 }
3536 else
3537 {
3538 c = coding->default_char;
3539 charset = char_charset (c, charset_list, NULL);
3540 }
df7492f9
KH
3541 }
3542 ENCODE_ISO_CHARACTER (charset, c);
3543 }
84fbb8a0 3544 }
b73bfc1c 3545
df7492f9
KH
3546 if (coding->mode & CODING_MODE_LAST_BLOCK
3547 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3548 {
3549 ASSURE_DESTINATION (safe_room);
3550 ENCODE_RESET_PLANE_AND_REGISTER ();
3551 }
3552 coding->result = CODING_RESULT_SUCCESS;
3553 CODING_ISO_BOL (coding) = bol_designation;
3554 coding->produced_char += produced_chars;
3555 coding->produced = dst - coding->destination;
3556 return 0;
4ed46869
KH
3557}
3558
3559\f
df7492f9 3560/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3561
df7492f9 3562/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3563 quite widely. So, for the moment, Emacs supports them in the bare
3564 C code. But, in the future, they may be supported only by CCL. */
3565
3566/* SJIS is a coding system encoding three character sets: ASCII, right
3567 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3568 as is. A character of charset katakana-jisx0201 is encoded by
3569 "position-code + 0x80". A character of charset japanese-jisx0208
3570 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3571 so that it fit in the range below.
4ed46869
KH
3572
3573 --- CODE RANGE of SJIS ---
3574 (character set) (range)
3575 ASCII 0x00 .. 0x7F
df7492f9 3576 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3577 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3578 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3579 -------------------------------
3580
3581*/
3582
3583/* BIG5 is a coding system encoding two character sets: ASCII and
3584 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3585 character set and is encoded in two-byte.
4ed46869
KH
3586
3587 --- CODE RANGE of BIG5 ---
3588 (character set) (range)
3589 ASCII 0x00 .. 0x7F
3590 Big5 (1st byte) 0xA1 .. 0xFE
3591 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3592 --------------------------
3593
df7492f9 3594 */
4ed46869
KH
3595
3596/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3597 Check if a text is encoded in SJIS. If it is, return
df7492f9 3598 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3599
0a28aafb 3600static int
df7492f9
KH
3601detect_coding_sjis (coding, mask)
3602 struct coding_system *coding;
3603 int *mask;
4ed46869 3604{
df7492f9
KH
3605 unsigned char *src = coding->source, *src_base = src;
3606 unsigned char *src_end = coding->source + coding->src_bytes;
3607 int multibytep = coding->src_multibyte;
3608 int consumed_chars = 0;
3609 int found = 0;
b73bfc1c 3610 int c;
df7492f9
KH
3611
3612 /* A coding system of this category is always ASCII compatible. */
3613 src += coding->head_ascii;
4ed46869 3614
b73bfc1c 3615 while (1)
4ed46869 3616 {
df7492f9 3617 ONE_MORE_BYTE (c);
682169fe
KH
3618 if (c < 0x80)
3619 continue;
df7492f9 3620 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3621 {
df7492f9 3622 ONE_MORE_BYTE (c);
682169fe 3623 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9
KH
3624 break;
3625 found = 1;
4ed46869 3626 }
df7492f9
KH
3627 else if (c >= 0xA0 && c < 0xE0)
3628 found = 1;
3629 else
3630 break;
4ed46869 3631 }
df7492f9
KH
3632 *mask &= ~CATEGORY_MASK_SJIS;
3633 return 0;
3634
3635 no_more_source:
3636 if (!found)
3637 return 0;
3638 *mask &= CATEGORY_MASK_SJIS;
3639 return 1;
4ed46869
KH
3640}
3641
3642/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3643 Check if a text is encoded in BIG5. If it is, return
df7492f9 3644 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3645
0a28aafb 3646static int
df7492f9
KH
3647detect_coding_big5 (coding, mask)
3648 struct coding_system *coding;
3649 int *mask;
4ed46869 3650{
df7492f9
KH
3651 unsigned char *src = coding->source, *src_base = src;
3652 unsigned char *src_end = coding->source + coding->src_bytes;
3653 int multibytep = coding->src_multibyte;
3654 int consumed_chars = 0;
3655 int found = 0;
b73bfc1c 3656 int c;
fa42c37f 3657
df7492f9
KH
3658 /* A coding system of this category is always ASCII compatible. */
3659 src += coding->head_ascii;
fa42c37f 3660
b73bfc1c 3661 while (1)
fa42c37f 3662 {
df7492f9
KH
3663 ONE_MORE_BYTE (c);
3664 if (c < 0x80)
fa42c37f 3665 continue;
df7492f9 3666 if (c >= 0xA1)
fa42c37f 3667 {
df7492f9
KH
3668 ONE_MORE_BYTE (c);
3669 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3670 return 0;
df7492f9 3671 found = 1;
fa42c37f 3672 }
df7492f9
KH
3673 else
3674 break;
fa42c37f 3675 }
df7492f9 3676 *mask &= ~CATEGORY_MASK_BIG5;
fa42c37f 3677 return 0;
df7492f9
KH
3678
3679 no_more_source:
3680 if (!found)
3681 return 0;
3682 *mask &= CATEGORY_MASK_BIG5;
3683 return 1;
fa42c37f
KH
3684}
3685
4ed46869
KH
3686/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3687 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3688
b73bfc1c 3689static void
df7492f9 3690decode_coding_sjis (coding)
4ed46869 3691 struct coding_system *coding;
4ed46869 3692{
df7492f9
KH
3693 unsigned char *src = coding->source + coding->consumed;
3694 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3695 unsigned char *src_base;
df7492f9
KH
3696 int *charbuf = coding->charbuf;
3697 int *charbuf_end = charbuf + coding->charbuf_size;
3698 int consumed_chars = 0, consumed_chars_base;
3699 int multibytep = coding->src_multibyte;
3700 struct charset *charset_roman, *charset_kanji, *charset_kana;
3701 Lisp_Object attrs, eol_type, charset_list, val;
a5d301df 3702
df7492f9
KH
3703 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3704
3705 val = charset_list;
3706 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3707 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3708 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3709
b73bfc1c 3710 while (1)
4ed46869 3711 {
df7492f9 3712 int c, c1;
b73bfc1c
KH
3713
3714 src_base = src;
df7492f9
KH
3715 consumed_chars_base = consumed_chars;
3716
3717 if (charbuf >= charbuf_end)
3718 break;
3719
3720 ONE_MORE_BYTE (c);
b73bfc1c 3721
df7492f9 3722 if (c == '\r')
4ed46869 3723 {
df7492f9 3724 if (EQ (eol_type, Qdos))
4ed46869 3725 {
df7492f9
KH
3726 if (src == src_end)
3727 goto no_more_source;
3728 if (*src == '\n')
3729 ONE_MORE_BYTE (c);
4ed46869 3730 }
df7492f9
KH
3731 else if (EQ (eol_type, Qmac))
3732 c = '\n';
4ed46869 3733 }
54f78171 3734 else
df7492f9
KH
3735 {
3736 struct charset *charset;
3737
3738 if (c < 0x80)
3739 charset = charset_roman;
3740 else
4ed46869 3741 {
df7492f9
KH
3742 if (c >= 0xF0)
3743 goto invalid_code;
3744 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3745 {
54f78171 3746 /* SJIS -> JISX0208 */
df7492f9
KH
3747 ONE_MORE_BYTE (c1);
3748 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3749 goto invalid_code;
3750 c = (c << 8) | c1;
3751 SJIS_TO_JIS (c);
3752 charset = charset_kanji;
5e34de15 3753 }
fb88bf2d 3754 else
b73bfc1c 3755 /* SJIS -> JISX0201-Kana */
df7492f9
KH
3756 charset = charset_kana;
3757 }
3758 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3759 }
3760 *charbuf++ = c;
3761 continue;
3762
3763 invalid_code:
3764 src = src_base;
3765 consumed_chars = consumed_chars_base;
3766 ONE_MORE_BYTE (c);
3767 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3768 coding->errors++;
3769 }
3770
3771 no_more_source:
3772 coding->consumed_char += consumed_chars_base;
3773 coding->consumed = src_base - coding->source;
3774 coding->charbuf_used = charbuf - coding->charbuf;
3775}
3776
3777static void
3778decode_coding_big5 (coding)
3779 struct coding_system *coding;
3780{
3781 unsigned char *src = coding->source + coding->consumed;
3782 unsigned char *src_end = coding->source + coding->src_bytes;
3783 unsigned char *src_base;
3784 int *charbuf = coding->charbuf;
3785 int *charbuf_end = charbuf + coding->charbuf_size;
3786 int consumed_chars = 0, consumed_chars_base;
3787 int multibytep = coding->src_multibyte;
3788 struct charset *charset_roman, *charset_big5;
3789 Lisp_Object attrs, eol_type, charset_list, val;
3790
3791 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3792 val = charset_list;
3793 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3794 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3795
3796 while (1)
3797 {
3798 int c, c1;
3799
3800 src_base = src;
3801 consumed_chars_base = consumed_chars;
3802
3803 if (charbuf >= charbuf_end)
3804 break;
3805
3806 ONE_MORE_BYTE (c);
3807
3808 if (c == '\r')
3809 {
3810 if (EQ (eol_type, Qdos))
3811 {
3812 if (src == src_end)
3813 goto no_more_source;
3814 if (*src == '\n')
3815 ONE_MORE_BYTE (c);
4ed46869 3816 }
df7492f9
KH
3817 else if (EQ (eol_type, Qmac))
3818 c = '\n';
3819 }
3820 else
3821 {
3822 struct charset *charset;
3823 if (c < 0x80)
3824 charset = charset_roman;
fb88bf2d 3825 else
fb88bf2d 3826 {
54f78171 3827 /* BIG5 -> Big5 */
df7492f9
KH
3828 if (c < 0xA1 || c > 0xFE)
3829 goto invalid_code;
3830 ONE_MORE_BYTE (c1);
3831 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3832 goto invalid_code;
3833 c = c << 8 | c1;
3834 charset = charset_big5;
4ed46869 3835 }
df7492f9 3836 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 3837 }
4ed46869 3838
df7492f9 3839 *charbuf++ = c;
fb88bf2d
KH
3840 continue;
3841
df7492f9 3842 invalid_code:
4ed46869 3843 src = src_base;
df7492f9
KH
3844 consumed_chars = consumed_chars_base;
3845 ONE_MORE_BYTE (c);
3846 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3847 coding->errors++;
fb88bf2d 3848 }
d46c5b12 3849
df7492f9
KH
3850 no_more_source:
3851 coding->consumed_char += consumed_chars_base;
3852 coding->consumed = src_base - coding->source;
3853 coding->charbuf_used = charbuf - coding->charbuf;
3854}
3855
3856/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3857 This function can encode charsets `ascii', `katakana-jisx0201',
3858 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3859 are sure that all these charsets are registered as official charset
3860 (i.e. do not have extended leading-codes). Characters of other
3861 charsets are produced without any encoding. If SJIS_P is 1, encode
3862 SJIS text, else encode BIG5 text. */
3863
3864static int
3865encode_coding_sjis (coding)
3866 struct coding_system *coding;
3867{
3868 int multibytep = coding->dst_multibyte;
3869 int *charbuf = coding->charbuf;
3870 int *charbuf_end = charbuf + coding->charbuf_used;
3871 unsigned char *dst = coding->destination + coding->produced;
3872 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3873 int safe_room = 4;
3874 int produced_chars = 0;
3875 Lisp_Object attrs, eol_type, charset_list, val;
3876 int ascii_compatible;
3877 struct charset *charset_roman, *charset_kanji, *charset_kana;
3878 int c;
3879
3880 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3881 val = charset_list;
3882 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3883 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3884 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3885
3886 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3887
3888 while (charbuf < charbuf_end)
3889 {
3890 ASSURE_DESTINATION (safe_room);
3891 c = *charbuf++;
3892 /* Now encode the character C. */
3893 if (ASCII_CHAR_P (c) && ascii_compatible)
3894 EMIT_ONE_ASCII_BYTE (c);
3895 else
3896 {
3897 unsigned code;
3898 struct charset *charset = char_charset (c, charset_list, &code);
3899
3900 if (!charset)
3901 {
41cbe562
KH
3902 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3903 {
3904 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3905 charset = CHARSET_FROM_ID (charset_ascii);
3906 }
3907 else
3908 {
3909 c = coding->default_char;
3910 charset = char_charset (c, charset_list, &code);
3911 }
df7492f9
KH
3912 }
3913 if (code == CHARSET_INVALID_CODE (charset))
3914 abort ();
3915 if (charset == charset_kanji)
3916 {
3917 int c1, c2;
3918 JIS_TO_SJIS (code);
3919 c1 = code >> 8, c2 = code & 0xFF;
3920 EMIT_TWO_BYTES (c1, c2);
3921 }
3922 else if (charset == charset_kana)
3923 EMIT_ONE_BYTE (code | 0x80);
3924 else
3925 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3926 }
3927 }
3928 coding->result = CODING_RESULT_SUCCESS;
3929 coding->produced_char += produced_chars;
3930 coding->produced = dst - coding->destination;
3931 return 0;
3932}
3933
3934static int
3935encode_coding_big5 (coding)
3936 struct coding_system *coding;
3937{
3938 int multibytep = coding->dst_multibyte;
3939 int *charbuf = coding->charbuf;
3940 int *charbuf_end = charbuf + coding->charbuf_used;
3941 unsigned char *dst = coding->destination + coding->produced;
3942 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3943 int safe_room = 4;
3944 int produced_chars = 0;
3945 Lisp_Object attrs, eol_type, charset_list, val;
3946 int ascii_compatible;
3947 struct charset *charset_roman, *charset_big5;
3948 int c;
3949
3950 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3951 val = charset_list;
3952 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3953 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3954 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3955
3956 while (charbuf < charbuf_end)
3957 {
3958 ASSURE_DESTINATION (safe_room);
3959 c = *charbuf++;
3960 /* Now encode the character C. */
3961 if (ASCII_CHAR_P (c) && ascii_compatible)
3962 EMIT_ONE_ASCII_BYTE (c);
3963 else
3964 {
3965 unsigned code;
3966 struct charset *charset = char_charset (c, charset_list, &code);
3967
3968 if (! charset)
3969 {
41cbe562
KH
3970 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3971 {
3972 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3973 charset = CHARSET_FROM_ID (charset_ascii);
3974 }
3975 else
3976 {
3977 c = coding->default_char;
3978 charset = char_charset (c, charset_list, &code);
3979 }
df7492f9
KH
3980 }
3981 if (code == CHARSET_INVALID_CODE (charset))
3982 abort ();
3983 if (charset == charset_big5)
3984 {
3985 int c1, c2;
3986
3987 c1 = code >> 8, c2 = code & 0xFF;
3988 EMIT_TWO_BYTES (c1, c2);
3989 }
3990 else
3991 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3992 }
3993 }
3994 coding->result = CODING_RESULT_SUCCESS;
3995 coding->produced_char += produced_chars;
3996 coding->produced = dst - coding->destination;
3997 return 0;
3998}
3999
4000\f
4001/*** 10. CCL handlers ***/
4002
4003/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4004 Check if a text is encoded in a coding system of which
4005 encoder/decoder are written in CCL program. If it is, return
4006 CATEGORY_MASK_CCL, else return 0. */
4007
4008static int
4009detect_coding_ccl (coding, mask)
4010 struct coding_system *coding;
4011 int *mask;
4012{
4013 unsigned char *src = coding->source, *src_base = src;
4014 unsigned char *src_end = coding->source + coding->src_bytes;
4015 int multibytep = coding->src_multibyte;
4016 int consumed_chars = 0;
4017 int found = 0;
4018 unsigned char *valids = CODING_CCL_VALIDS (coding);
4019 int head_ascii = coding->head_ascii;
4020 Lisp_Object attrs;
4021
4022 coding = &coding_categories[coding_category_ccl];
4023 attrs = CODING_ID_ATTRS (coding->id);
4024 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4025 src += head_ascii;
4026
4027 while (1)
4028 {
4029 int c;
4030 ONE_MORE_BYTE (c);
4031 if (! valids[c])
4032 break;
4033 if (!found && valids[c] > 1)
4034 found = 1;
4035 }
4036 *mask &= ~CATEGORY_MASK_CCL;
4037 return 0;
4038
4039 no_more_source:
4040 if (!found)
4041 return 0;
4042 *mask &= CATEGORY_MASK_CCL;
4043 return 1;
4044}
4045
4046static void
4047decode_coding_ccl (coding)
4048 struct coding_system *coding;
4049{
4050 unsigned char *src = coding->source + coding->consumed;
4051 unsigned char *src_end = coding->source + coding->src_bytes;
4052 int *charbuf = coding->charbuf;
4053 int *charbuf_end = charbuf + coding->charbuf_size;
4054 int consumed_chars = 0;
4055 int multibytep = coding->src_multibyte;
4056 struct ccl_program ccl;
4057 int source_charbuf[1024];
4058 int source_byteidx[1024];
4059
4060 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4061
4062 while (src < src_end)
4063 {
4064 unsigned char *p = src;
4065 int *source, *source_end;
4066 int i = 0;
4067
4068 if (multibytep)
4069 while (i < 1024 && p < src_end)
4070 {
4071 source_byteidx[i] = p - src;
4072 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4073 }
4074 else
4075 while (i < 1024 && p < src_end)
4076 source_charbuf[i++] = *p++;
4077
4078 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4079 ccl.last_block = 1;
4080
4081 source = source_charbuf;
4082 source_end = source + i;
4083 while (source < source_end)
4084 {
4085 ccl_driver (&ccl, source, charbuf,
4086 source_end - source, charbuf_end - charbuf);
4087 source += ccl.consumed;
4088 charbuf += ccl.produced;
4089 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4090 break;
4091 }
4092 if (source < source_end)
4093 src += source_byteidx[source - source_charbuf];
4094 else
4095 src = p;
4096 consumed_chars += source - source_charbuf;
4097
4098 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4099 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4100 break;
4101 }
4102
4103 switch (ccl.status)
4104 {
4105 case CCL_STAT_SUSPEND_BY_SRC:
4106 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4107 break;
4108 case CCL_STAT_SUSPEND_BY_DST:
4109 break;
4110 case CCL_STAT_QUIT:
4111 case CCL_STAT_INVALID_CMD:
4112 coding->result = CODING_RESULT_INTERRUPT;
4113 break;
4114 default:
4115 coding->result = CODING_RESULT_SUCCESS;
4116 break;
4117 }
4118 coding->consumed_char += consumed_chars;
4119 coding->consumed = src - coding->source;
4120 coding->charbuf_used = charbuf - coding->charbuf;
4121}
4122
4123static int
4124encode_coding_ccl (coding)
4125 struct coding_system *coding;
4126{
4127 struct ccl_program ccl;
4128 int multibytep = coding->dst_multibyte;
4129 int *charbuf = coding->charbuf;
4130 int *charbuf_end = charbuf + coding->charbuf_used;
4131 unsigned char *dst = coding->destination + coding->produced;
4132 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4133 unsigned char *adjusted_dst_end = dst_end - 1;
4134 int destination_charbuf[1024];
4135 int i, produced_chars = 0;
4136
4137 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4138
4139 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4140 ccl.dst_multibyte = coding->dst_multibyte;
4141
4142 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4143 {
4144 int dst_bytes = dst_end - dst;
4145 if (dst_bytes > 1024)
4146 dst_bytes = 1024;
4147
4148 ccl_driver (&ccl, charbuf, destination_charbuf,
4149 charbuf_end - charbuf, dst_bytes);
4150 charbuf += ccl.consumed;
4151 if (multibytep)
4152 for (i = 0; i < ccl.produced; i++)
4153 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4154 else
4155 {
4156 for (i = 0; i < ccl.produced; i++)
4157 *dst++ = destination_charbuf[i] & 0xFF;
4158 produced_chars += ccl.produced;
4159 }
4160 }
4161
4162 switch (ccl.status)
4163 {
4164 case CCL_STAT_SUSPEND_BY_SRC:
4165 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4166 break;
4167 case CCL_STAT_SUSPEND_BY_DST:
4168 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4169 break;
4170 case CCL_STAT_QUIT:
4171 case CCL_STAT_INVALID_CMD:
4172 coding->result = CODING_RESULT_INTERRUPT;
4173 break;
4174 default:
4175 coding->result = CODING_RESULT_SUCCESS;
4176 break;
4177 }
4178
4179 coding->produced_char += produced_chars;
4180 coding->produced = dst - coding->destination;
4181 return 0;
4ed46869
KH
4182}
4183
df7492f9
KH
4184
4185\f
4186/*** 10, 11. no-conversion handlers ***/
4187
4188/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4189
b73bfc1c 4190static void
df7492f9 4191decode_coding_raw_text (coding)
4ed46869 4192 struct coding_system *coding;
4ed46869 4193{
df7492f9 4194 coding->chars_at_source = 1;
2c78b7e1
KH
4195 coding->consumed_char = 0;
4196 coding->consumed = 0;
df7492f9
KH
4197 coding->result = CODING_RESULT_SUCCESS;
4198}
4ed46869 4199
df7492f9
KH
4200static int
4201encode_coding_raw_text (coding)
4202 struct coding_system *coding;
4203{
4204 int multibytep = coding->dst_multibyte;
4205 int *charbuf = coding->charbuf;
4206 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4207 unsigned char *dst = coding->destination + coding->produced;
4208 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4209 int produced_chars = 0;
4210 int c;
a5d301df 4211
df7492f9 4212 if (multibytep)
b73bfc1c 4213 {
df7492f9 4214 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4215
df7492f9
KH
4216 if (coding->src_multibyte)
4217 while (charbuf < charbuf_end)
4218 {
4219 ASSURE_DESTINATION (safe_room);
4220 c = *charbuf++;
4221 if (ASCII_CHAR_P (c))
4222 EMIT_ONE_ASCII_BYTE (c);
4223 else if (CHAR_BYTE8_P (c))
4224 {
4225 c = CHAR_TO_BYTE8 (c);
4226 EMIT_ONE_BYTE (c);
4227 }
4228 else
4229 {
4230 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4231
df7492f9
KH
4232 CHAR_STRING_ADVANCE (c, p1);
4233 while (p0 < p1)
4234 EMIT_ONE_BYTE (*p0);
4235 }
4236 }
b73bfc1c 4237 else
df7492f9
KH
4238 while (charbuf < charbuf_end)
4239 {
4240 ASSURE_DESTINATION (safe_room);
4241 c = *charbuf++;
4242 EMIT_ONE_BYTE (c);
4243 }
4244 }
4245 else
4246 {
4247 if (coding->src_multibyte)
b73bfc1c 4248 {
df7492f9
KH
4249 int safe_room = MAX_MULTIBYTE_LENGTH;
4250
4251 while (charbuf < charbuf_end)
b73bfc1c 4252 {
df7492f9
KH
4253 ASSURE_DESTINATION (safe_room);
4254 c = *charbuf++;
4255 if (ASCII_CHAR_P (c))
4256 *dst++ = c;
4257 else if (CHAR_BYTE8_P (c))
4258 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4259 else
df7492f9
KH
4260 CHAR_STRING_ADVANCE (c, dst);
4261 produced_chars++;
b73bfc1c 4262 }
4ed46869 4263 }
df7492f9
KH
4264 else
4265 {
4266 ASSURE_DESTINATION (charbuf_end - charbuf);
4267 while (charbuf < charbuf_end && dst < dst_end)
4268 *dst++ = *charbuf++;
4269 produced_chars = dst - (coding->destination + coding->dst_bytes);
4270 }
4ed46869 4271 }
df7492f9
KH
4272 coding->result = CODING_RESULT_SUCCESS;
4273 coding->produced_char += produced_chars;
4274 coding->produced = dst - coding->destination;
4275 return 0;
4ed46869
KH
4276}
4277
0a28aafb 4278static int
df7492f9
KH
4279detect_coding_charset (coding, mask)
4280 struct coding_system *coding;
4281 int *mask;
1397dc18 4282{
df7492f9
KH
4283 unsigned char *src = coding->source, *src_base = src;
4284 unsigned char *src_end = coding->source + coding->src_bytes;
4285 int multibytep = coding->src_multibyte;
4286 int consumed_chars = 0;
4287 Lisp_Object attrs, valids;
1397dc18 4288
df7492f9
KH
4289 coding = &coding_categories[coding_category_charset];
4290 attrs = CODING_ID_ATTRS (coding->id);
4291 valids = AREF (attrs, coding_attr_charset_valids);
4292
4293 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4294 src += coding->head_ascii;
1397dc18 4295
b73bfc1c 4296 while (1)
1397dc18 4297 {
df7492f9 4298 int c;
1397dc18 4299
df7492f9
KH
4300 ONE_MORE_BYTE (c);
4301 if (NILP (AREF (valids, c)))
4302 break;
4303 }
4304 *mask &= ~CATEGORY_MASK_CHARSET;
4305 return 0;
4ed46869 4306
df7492f9
KH
4307 no_more_source:
4308 *mask &= CATEGORY_MASK_CHARSET;
4309 return 1;
4310}
4ed46869 4311
b73bfc1c 4312static void
df7492f9 4313decode_coding_charset (coding)
4ed46869 4314 struct coding_system *coding;
4ed46869 4315{
df7492f9
KH
4316 unsigned char *src = coding->source + coding->consumed;
4317 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4318 unsigned char *src_base;
df7492f9
KH
4319 int *charbuf = coding->charbuf;
4320 int *charbuf_end = charbuf + coding->charbuf_size;
4321 int consumed_chars = 0, consumed_chars_base;
4322 int multibytep = coding->src_multibyte;
4eb6d3f1 4323 Lisp_Object attrs, eol_type, charset_list, valids;
df7492f9
KH
4324
4325 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4326 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4327
df7492f9 4328 while (1)
4ed46869 4329 {
4eb6d3f1 4330 int c;
df7492f9
KH
4331
4332 src_base = src;
4333 consumed_chars_base = consumed_chars;
b73bfc1c 4334
df7492f9
KH
4335 if (charbuf >= charbuf_end)
4336 break;
4337
4eb6d3f1 4338 ONE_MORE_BYTE (c);
df7492f9 4339 if (c == '\r')
d46c5b12 4340 {
c7c66a95
KH
4341 /* Here we assume that no charset maps '\r' to something
4342 else. */
df7492f9 4343 if (EQ (eol_type, Qdos))
b73bfc1c 4344 {
4eb6d3f1
KH
4345 if (src < src_end
4346 && *src == '\n')
df7492f9 4347 ONE_MORE_BYTE (c);
b73bfc1c 4348 }
df7492f9 4349 else if (EQ (eol_type, Qmac))
b73bfc1c 4350 c = '\n';
d46c5b12 4351 }
df7492f9 4352 else
d46c5b12 4353 {
4eb6d3f1
KH
4354 Lisp_Object val;
4355 struct charset *charset;
c7c66a95 4356 int dim;
acb2a965
KH
4357 int len = 1;
4358 unsigned code = c;
4eb6d3f1
KH
4359
4360 val = AREF (valids, c);
4361 if (NILP (val))
4362 goto invalid_code;
c7c66a95 4363 if (INTEGERP (val))
4eb6d3f1 4364 {
c7c66a95
KH
4365 charset = CHARSET_FROM_ID (XFASTINT (val));
4366 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4367 while (len < dim)
4eb6d3f1 4368 {
acb2a965
KH
4369 ONE_MORE_BYTE (c);
4370 code = (code << 8) | c;
f9d71dcd 4371 len++;
4eb6d3f1 4372 }
c7c66a95
KH
4373 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4374 charset, code, c);
4375 }
4376 else
4377 {
4378 /* VAL is a list of charset IDs. It is assured that the
4379 list is sorted by charset dimensions (smaller one
4380 comes first). */
c7c66a95
KH
4381 while (CONSP (val))
4382 {
4383 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4384 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4385 while (len < dim)
c7c66a95 4386 {
acb2a965
KH
4387 ONE_MORE_BYTE (c);
4388 code = (code << 8) | c;
f9d71dcd 4389 len++;
c7c66a95 4390 }
c7c66a95
KH
4391 CODING_DECODE_CHAR (coding, src, src_base,
4392 src_end, charset, code, c);
4393 if (c >= 0)
4394 break;
4395 val = XCDR (val);
4396 }
4eb6d3f1 4397 }
df7492f9
KH
4398 if (c < 0)
4399 goto invalid_code;
d46c5b12 4400 }
df7492f9
KH
4401 *charbuf++ = c;
4402 continue;
4403
4404 invalid_code:
4405 src = src_base;
4406 consumed_chars = consumed_chars_base;
4407 ONE_MORE_BYTE (c);
4408 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4409 coding->errors++;
4ed46869
KH
4410 }
4411
df7492f9
KH
4412 no_more_source:
4413 coding->consumed_char += consumed_chars_base;
4414 coding->consumed = src_base - coding->source;
4415 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4416}
4417
df7492f9
KH
4418static int
4419encode_coding_charset (coding)
4ed46869 4420 struct coding_system *coding;
4ed46869 4421{
df7492f9
KH
4422 int multibytep = coding->dst_multibyte;
4423 int *charbuf = coding->charbuf;
4424 int *charbuf_end = charbuf + coding->charbuf_used;
4425 unsigned char *dst = coding->destination + coding->produced;
4426 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4427 int safe_room = MAX_MULTIBYTE_LENGTH;
4428 int produced_chars = 0;
df7492f9
KH
4429 Lisp_Object attrs, eol_type, charset_list;
4430 int ascii_compatible;
b73bfc1c 4431 int c;
b73bfc1c 4432
df7492f9 4433 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4434 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4435
df7492f9 4436 while (charbuf < charbuf_end)
4ed46869 4437 {
4eb6d3f1 4438 struct charset *charset;
df7492f9
KH
4439 unsigned code;
4440
4441 ASSURE_DESTINATION (safe_room);
4442 c = *charbuf++;
4443 if (ascii_compatible && ASCII_CHAR_P (c))
4444 EMIT_ONE_ASCII_BYTE (c);
d46c5b12 4445 else
4eb6d3f1
KH
4446 {
4447 charset = char_charset (c, charset_list, &code);
4448 if (charset)
4449 {
4450 if (CHARSET_DIMENSION (charset) == 1)
4451 EMIT_ONE_BYTE (code);
4452 else if (CHARSET_DIMENSION (charset) == 2)
4453 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4454 else if (CHARSET_DIMENSION (charset) == 3)
4455 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4456 else
4457 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4458 (code >> 8) & 0xFF, code & 0xFF);
4459 }
4460 else
41cbe562
KH
4461 {
4462 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4463 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4464 else
4465 c = coding->default_char;
4466 EMIT_ONE_BYTE (c);
4467 }
4eb6d3f1 4468 }
4ed46869
KH
4469 }
4470
df7492f9
KH
4471 coding->result = CODING_RESULT_SUCCESS;
4472 coding->produced_char += produced_chars;
4473 coding->produced = dst - coding->destination;
4474 return 0;
4ed46869
KH
4475}
4476
4477\f
1397dc18 4478/*** 7. C library functions ***/
4ed46869 4479
df7492f9 4480/* In Emacs Lisp, coding system is represented by a Lisp symbol which
4ed46869 4481 has a property `coding-system'. The value of this property is a
df7492f9 4482 vector of length 5 (called as coding-vector). Among elements of
4ed46869
KH
4483 this vector, the first (element[0]) and the fifth (element[4])
4484 carry important information for decoding/encoding. Before
4485 decoding/encoding, this information should be set in fields of a
4486 structure of type `coding_system'.
4487
df7492f9 4488 A value of property `coding-system' can be a symbol of another
4ed46869
KH
4489 subsidiary coding-system. In that case, Emacs gets coding-vector
4490 from that symbol.
4491
4492 `element[0]' contains information to be set in `coding->type'. The
4493 value and its meaning is as follows:
4494
0ef69138
KH
4495 0 -- coding_type_emacs_mule
4496 1 -- coding_type_sjis
df7492f9 4497 2 -- coding_type_iso_2022
0ef69138
KH
4498 3 -- coding_type_big5
4499 4 -- coding_type_ccl encoder/decoder written in CCL
4500 nil -- coding_type_no_conversion
4501 t -- coding_type_undecided (automatic conversion on decoding,
4502 no-conversion on encoding)
4ed46869
KH
4503
4504 `element[4]' contains information to be set in `coding->flags' and
4505 `coding->spec'. The meaning varies by `coding->type'.
4506
df7492f9 4507 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4ed46869
KH
4508 of length 32 (of which the first 13 sub-elements are used now).
4509 Meanings of these sub-elements are:
4510
df7492f9
KH
4511 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4512 If the value is an integer of valid charset, the charset is
4513 assumed to be designated to graphic register N initially.
4ed46869 4514
df7492f9
KH
4515 If the value is minus, it is a minus value of charset which
4516 reserves graphic register N, which means that the charset is
4517 not designated initially but should be designated to graphic
4518 register N just before encoding a character in that charset.
1397dc18 4519
df7492f9
KH
4520 If the value is nil, graphic register N is never used on
4521 encoding.
4522
4523 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4524 Each value takes t or nil. See the section ISO2022 of
4525 `coding.h' for more information.
1397dc18 4526
df7492f9
KH
4527 If `coding->type' is `coding_type_big5', element[4] is t to denote
4528 BIG5-ETen or nil to denote BIG5-HKU.
4ed46869 4529
df7492f9 4530 If `coding->type' takes the other value, element[4] is ignored.
27901516 4531
df7492f9
KH
4532 Emacs Lisp's coding system also carries information about format of
4533 end-of-line in a value of property `eol-type'. If the value is
4534 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4535 it is not integer, it should be a vector of subsidiary coding
4536 systems of which property `eol-type' has one of above values.
4ed46869 4537
df7492f9 4538*/
4ed46869 4539
df7492f9
KH
4540/* Setup coding context CODING from information about CODING_SYSTEM.
4541 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4542 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4543
4544void
df7492f9
KH
4545setup_coding_system (coding_system, coding)
4546 Lisp_Object coding_system;
ec6d2bb8
KH
4547 struct coding_system *coding;
4548{
df7492f9
KH
4549 Lisp_Object attrs;
4550 Lisp_Object eol_type;
4551 Lisp_Object coding_type;
4552 Lisp_Object val;
ec6d2bb8 4553
df7492f9
KH
4554 if (NILP (coding_system))
4555 coding_system = Qno_conversion;
4556
4557 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4558
4559 attrs = CODING_ID_ATTRS (coding->id);
4560 eol_type = CODING_ID_EOL_TYPE (coding->id);
4561
4562 coding->mode = 0;
4563 coding->head_ascii = -1;
4564 coding->common_flags
4565 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4566
4567 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4568 coding->max_charset_id = XSTRING (val)->size - 1;
4569 coding->safe_charsets = (char *) XSTRING (val)->data;
4570 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4571
4572 coding_type = CODING_ATTR_TYPE (attrs);
4573 if (EQ (coding_type, Qundecided))
4574 {
4575 coding->detector = NULL;
4576 coding->decoder = decode_coding_raw_text;
4577 coding->encoder = encode_coding_raw_text;
4578 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4579 }
4580 else if (EQ (coding_type, Qiso_2022))
4581 {
4582 int i;
4583 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4584
4585 /* Invoke graphic register 0 to plane 0. */
4586 CODING_ISO_INVOCATION (coding, 0) = 0;
4587 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4588 CODING_ISO_INVOCATION (coding, 1)
4589 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4590 /* Setup the initial status of designation. */
4591 for (i = 0; i < 4; i++)
4592 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4593 /* Not single shifting initially. */
4594 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4595 /* Beginning of buffer should also be regarded as bol. */
4596 CODING_ISO_BOL (coding) = 1;
4597 coding->detector = detect_coding_iso_2022;
4598 coding->decoder = decode_coding_iso_2022;
4599 coding->encoder = encode_coding_iso_2022;
4600 if (flags & CODING_ISO_FLAG_SAFE)
4601 coding->mode |= CODING_MODE_SAFE_ENCODING;
4602 coding->common_flags
4603 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4604 | CODING_REQUIRE_FLUSHING_MASK);
4605 if (flags & CODING_ISO_FLAG_COMPOSITION)
4606 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4607 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4608 {
4609 setup_iso_safe_charsets (attrs);
4610 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4611 coding->max_charset_id = XSTRING (val)->size - 1;
4612 coding->safe_charsets = (char *) XSTRING (val)->data;
4613 }
4614 CODING_ISO_FLAGS (coding) = flags;
4615 }
4616 else if (EQ (coding_type, Qcharset))
4617 {
4618 coding->detector = detect_coding_charset;
4619 coding->decoder = decode_coding_charset;
4620 coding->encoder = encode_coding_charset;
4621 coding->common_flags
4622 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4623 }
4624 else if (EQ (coding_type, Qutf_8))
4625 {
4626 coding->detector = detect_coding_utf_8;
4627 coding->decoder = decode_coding_utf_8;
4628 coding->encoder = encode_coding_utf_8;
4629 coding->common_flags
4630 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4631 }
4632 else if (EQ (coding_type, Qutf_16))
4633 {
4634 val = AREF (attrs, coding_attr_utf_16_bom);
4635 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4636 : EQ (val, Qt) ? utf_16_with_bom
4637 : utf_16_without_bom);
4638 val = AREF (attrs, coding_attr_utf_16_endian);
4639 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4640 : utf_16_little_endian);
e19c3639 4641 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4642 coding->detector = detect_coding_utf_16;
4643 coding->decoder = decode_coding_utf_16;
4644 coding->encoder = encode_coding_utf_16;
4645 coding->common_flags
4646 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4647 }
4648 else if (EQ (coding_type, Qccl))
4649 {
4650 coding->detector = detect_coding_ccl;
4651 coding->decoder = decode_coding_ccl;
4652 coding->encoder = encode_coding_ccl;
4653 coding->common_flags
4654 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4655 | CODING_REQUIRE_FLUSHING_MASK);
4656 }
4657 else if (EQ (coding_type, Qemacs_mule))
4658 {
4659 coding->detector = detect_coding_emacs_mule;
4660 coding->decoder = decode_coding_emacs_mule;
4661 coding->encoder = encode_coding_emacs_mule;
4662 coding->common_flags
4663 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4664 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4665 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4666 {
4667 Lisp_Object tail, safe_charsets;
4668 int max_charset_id = 0;
4669
4670 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4671 tail = XCDR (tail))
4672 if (max_charset_id < XFASTINT (XCAR (tail)))
4673 max_charset_id = XFASTINT (XCAR (tail));
4674 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4675 make_number (255));
4676 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4677 tail = XCDR (tail))
4678 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4679 coding->max_charset_id = max_charset_id;
4680 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4681 }
4682 }
4683 else if (EQ (coding_type, Qshift_jis))
4684 {
4685 coding->detector = detect_coding_sjis;
4686 coding->decoder = decode_coding_sjis;
4687 coding->encoder = encode_coding_sjis;
4688 coding->common_flags
4689 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4690 }
4691 else if (EQ (coding_type, Qbig5))
4692 {
4693 coding->detector = detect_coding_big5;
4694 coding->decoder = decode_coding_big5;
4695 coding->encoder = encode_coding_big5;
4696 coding->common_flags
4697 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4698 }
4699 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4700 {
df7492f9
KH
4701 coding->detector = NULL;
4702 coding->decoder = decode_coding_raw_text;
4703 coding->encoder = encode_coding_raw_text;
4704 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4705 }
df7492f9
KH
4706
4707 return;
ec6d2bb8
KH
4708}
4709
df7492f9
KH
4710/* Return raw-text or one of its subsidiaries that has the same
4711 eol_type as CODING-SYSTEM. */
ec6d2bb8 4712
df7492f9
KH
4713Lisp_Object
4714raw_text_coding_system (coding_system)
4715 Lisp_Object coding_system;
ec6d2bb8 4716{
0be8721c 4717 Lisp_Object spec, attrs;
df7492f9
KH
4718 Lisp_Object eol_type, raw_text_eol_type;
4719
4720 spec = CODING_SYSTEM_SPEC (coding_system);
4721 attrs = AREF (spec, 0);
4722
4723 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4724 return coding_system;
ec6d2bb8 4725
df7492f9
KH
4726 eol_type = AREF (spec, 2);
4727 if (VECTORP (eol_type))
4728 return Qraw_text;
4729 spec = CODING_SYSTEM_SPEC (Qraw_text);
4730 raw_text_eol_type = AREF (spec, 2);
4731 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4732 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4733 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4734}
4735
54f78171 4736
df7492f9
KH
4737/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4738 does, return one of the subsidiary that has the same eol-spec as
4739 PARENT. Otherwise, return CODING_SYSTEM. */
4740
4741Lisp_Object
4742coding_inherit_eol_type (coding_system, parent)
b74e4686 4743 Lisp_Object coding_system, parent;
54f78171 4744{
df7492f9 4745 Lisp_Object spec, attrs, eol_type;
54f78171 4746
df7492f9
KH
4747 spec = CODING_SYSTEM_SPEC (coding_system);
4748 attrs = AREF (spec, 0);
4749 eol_type = AREF (spec, 2);
4750 if (VECTORP (eol_type))
4751 {
4752 Lisp_Object parent_spec;
df7492f9
KH
4753 Lisp_Object parent_eol_type;
4754
4755 parent_spec
4756 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4757 parent_eol_type = AREF (parent_spec, 2);
4758 if (EQ (parent_eol_type, Qunix))
4759 coding_system = AREF (eol_type, 0);
4760 else if (EQ (parent_eol_type, Qdos))
4761 coding_system = AREF (eol_type, 1);
4762 else if (EQ (parent_eol_type, Qmac))
4763 coding_system = AREF (eol_type, 2);
54f78171 4764 }
df7492f9 4765 return coding_system;
54f78171
KH
4766}
4767
4ed46869
KH
4768/* Emacs has a mechanism to automatically detect a coding system if it
4769 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4770 it's impossible to distinguish some coding systems accurately
4771 because they use the same range of codes. So, at first, coding
4772 systems are categorized into 7, those are:
4773
0ef69138 4774 o coding-category-emacs-mule
4ed46869
KH
4775
4776 The category for a coding system which has the same code range
4777 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 4778 symbol) `emacs-mule' by default.
4ed46869
KH
4779
4780 o coding-category-sjis
4781
4782 The category for a coding system which has the same code range
4783 as SJIS. Assigned the coding-system (Lisp
7717c392 4784 symbol) `japanese-shift-jis' by default.
4ed46869
KH
4785
4786 o coding-category-iso-7
4787
4788 The category for a coding system which has the same code range
7717c392 4789 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
4790 shift and single shift functions. This can encode/decode all
4791 charsets. Assigned the coding-system (Lisp symbol)
4792 `iso-2022-7bit' by default.
4793
4794 o coding-category-iso-7-tight
4795
4796 Same as coding-category-iso-7 except that this can
4797 encode/decode only the specified charsets.
4ed46869
KH
4798
4799 o coding-category-iso-8-1
4800
4801 The category for a coding system which has the same code range
4802 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4803 for DIMENSION1 charset. This doesn't use any locking shift
4804 and single shift functions. Assigned the coding-system (Lisp
4805 symbol) `iso-latin-1' by default.
4ed46869
KH
4806
4807 o coding-category-iso-8-2
4808
4809 The category for a coding system which has the same code range
4810 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4811 for DIMENSION2 charset. This doesn't use any locking shift
4812 and single shift functions. Assigned the coding-system (Lisp
4813 symbol) `japanese-iso-8bit' by default.
4ed46869 4814
7717c392 4815 o coding-category-iso-7-else
4ed46869
KH
4816
4817 The category for a coding system which has the same code range
df7492f9 4818 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
4819 single shift functions. Assigned the coding-system (Lisp
4820 symbol) `iso-2022-7bit-lock' by default.
4821
4822 o coding-category-iso-8-else
4823
4824 The category for a coding system which has the same code range
df7492f9 4825 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
4826 single shift functions. Assigned the coding-system (Lisp
4827 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
4828
4829 o coding-category-big5
4830
4831 The category for a coding system which has the same code range
4832 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 4833 `cn-big5' by default.
4ed46869 4834
fa42c37f
KH
4835 o coding-category-utf-8
4836
4837 The category for a coding system which has the same code range
4838 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4839 symbol) `utf-8' by default.
4840
4841 o coding-category-utf-16-be
4842
4843 The category for a coding system in which a text has an
4844 Unicode signature (cf. Unicode Standard) in the order of BIG
4845 endian at the head. Assigned the coding-system (Lisp symbol)
4846 `utf-16-be' by default.
4847
4848 o coding-category-utf-16-le
4849
4850 The category for a coding system in which a text has an
4851 Unicode signature (cf. Unicode Standard) in the order of
4852 LITTLE endian at the head. Assigned the coding-system (Lisp
4853 symbol) `utf-16-le' by default.
4854
1397dc18
KH
4855 o coding-category-ccl
4856
4857 The category for a coding system of which encoder/decoder is
4858 written in CCL programs. The default value is nil, i.e., no
4859 coding system is assigned.
4860
4ed46869
KH
4861 o coding-category-binary
4862
4863 The category for a coding system not categorized in any of the
4864 above. Assigned the coding-system (Lisp symbol)
e0e989f6 4865 `no-conversion' by default.
4ed46869
KH
4866
4867 Each of them is a Lisp symbol and the value is an actual
df7492f9 4868 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
4869 What Emacs does actually is to detect a category of coding system.
4870 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 4871 decide only one possible category, it selects a category of the
4ed46869
KH
4872 highest priority. Priorities of categories are also specified by a
4873 user in a Lisp variable `coding-category-list'.
4874
4875*/
4876
df7492f9
KH
4877#define EOL_SEEN_NONE 0
4878#define EOL_SEEN_LF 1
4879#define EOL_SEEN_CR 2
4880#define EOL_SEEN_CRLF 4
4ed46869 4881
df7492f9
KH
4882/* Detect how end-of-line of a text of length CODING->src_bytes
4883 pointed by CODING->source is encoded. Return one of
4884 EOL_SEEN_XXX. */
4ed46869 4885
bc4bc72a
RS
4886#define MAX_EOL_CHECK_COUNT 3
4887
d46c5b12 4888static int
df7492f9
KH
4889detect_eol (coding, source, src_bytes)
4890 struct coding_system *coding;
d46c5b12 4891 unsigned char *source;
df7492f9 4892 EMACS_INT src_bytes;
4ed46869 4893{
df7492f9 4894 Lisp_Object attrs, coding_type;
d46c5b12 4895 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 4896 unsigned char c;
df7492f9
KH
4897 int total = 0;
4898 int eol_seen = EOL_SEEN_NONE;
4ed46869 4899
df7492f9
KH
4900 attrs = CODING_ID_ATTRS (coding->id);
4901 coding_type = CODING_ATTR_TYPE (attrs);
d46c5b12 4902
df7492f9 4903 if (EQ (coding_type, Qccl))
4ed46869 4904 {
df7492f9 4905 int msb, lsb;
fa42c37f 4906
df7492f9
KH
4907 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4908 lsb = 1 - msb;
fa42c37f 4909
df7492f9 4910 while (src + 1 < src_end)
fa42c37f 4911 {
df7492f9
KH
4912 c = src[lsb];
4913 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 4914 {
df7492f9
KH
4915 int this_eol;
4916
4917 if (c == '\n')
4918 this_eol = EOL_SEEN_LF;
4919 else if (src + 3 >= src_end
4920 || src[msb + 2] != 0
4921 || src[lsb + 2] != '\n')
4922 this_eol = EOL_SEEN_CR;
fa42c37f 4923 else
df7492f9
KH
4924 this_eol = EOL_SEEN_CRLF;
4925
4926 if (eol_seen == EOL_SEEN_NONE)
4927 /* This is the first end-of-line. */
4928 eol_seen = this_eol;
4929 else if (eol_seen != this_eol)
fa42c37f 4930 {
df7492f9
KH
4931 /* The found type is different from what found before. */
4932 eol_seen = EOL_SEEN_LF;
4933 break;
fa42c37f 4934 }
df7492f9
KH
4935 if (++total == MAX_EOL_CHECK_COUNT)
4936 break;
fa42c37f 4937 }
df7492f9 4938 src += 2;
fa42c37f 4939 }
df7492f9 4940 }
d46c5b12 4941 else
27901516 4942 {
df7492f9 4943 while (src < src_end)
27901516 4944 {
df7492f9
KH
4945 c = *src++;
4946 if (c == '\n' || c == '\r')
4947 {
4948 int this_eol;
d46c5b12 4949
df7492f9
KH
4950 if (c == '\n')
4951 this_eol = EOL_SEEN_LF;
4952 else if (src >= src_end || *src != '\n')
4953 this_eol = EOL_SEEN_CR;
4954 else
4955 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 4956
df7492f9
KH
4957 if (eol_seen == EOL_SEEN_NONE)
4958 /* This is the first end-of-line. */
4959 eol_seen = this_eol;
4960 else if (eol_seen != this_eol)
4961 {
4962 /* The found type is different from what found before. */
4963 eol_seen = EOL_SEEN_LF;
4964 break;
4965 }
4966 if (++total == MAX_EOL_CHECK_COUNT)
4967 break;
4968 }
4969 }
73be902c 4970 }
df7492f9 4971 return eol_seen;
73be902c
KH
4972}
4973
df7492f9 4974
73be902c 4975static void
df7492f9
KH
4976adjust_coding_eol_type (coding, eol_seen)
4977 struct coding_system *coding;
4978 int eol_seen;
73be902c 4979{
0be8721c 4980 Lisp_Object eol_type;
df7492f9
KH
4981
4982 eol_type = CODING_ID_EOL_TYPE (coding->id);
4983 if (eol_seen & EOL_SEEN_LF)
4984 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
4985 else if (eol_type & EOL_SEEN_CRLF)
4986 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
4987 else if (eol_type & EOL_SEEN_CR)
4988 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
4989}
4990
df7492f9
KH
4991/* Detect how a text specified in CODING is encoded. If a coding
4992 system is detected, update fields of CODING by the detected coding
4993 system. */
4994
4995void
4996detect_coding (coding)
d46c5b12 4997 struct coding_system *coding;
d46c5b12 4998{
df7492f9
KH
4999 unsigned char *src, *src_end;
5000 Lisp_Object attrs, coding_type;
d46c5b12 5001
df7492f9
KH
5002 coding->consumed = coding->consumed_char = 0;
5003 coding->produced = coding->produced_char = 0;
5004 coding_set_source (coding);
1c3478b0 5005
df7492f9 5006 src_end = coding->source + coding->src_bytes;
1c3478b0 5007
df7492f9
KH
5008 /* If we have not yet decided the text encoding type, detect it
5009 now. */
5010 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5011 {
df7492f9
KH
5012 int mask = CATEGORY_MASK_ANY;
5013 int c, i;
5014
5015 for (src = coding->source; src < src_end; src++)
5016 {
5017 c = *src;
5018 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5019 || c == ISO_CODE_SI
5020 || c == ISO_CODE_SO)))
5021 break;
5022 }
5023 coding->head_ascii = src - (coding->source + coding->consumed);
5024
5025 if (coding->head_ascii < coding->src_bytes)
1c3478b0 5026 {
df7492f9
KH
5027 int detected = 0;
5028
5029 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 5030 {
df7492f9
KH
5031 enum coding_category category = coding_priorities[i];
5032 struct coding_system *this = coding_categories + category;
5033
5034 if (category >= coding_category_raw_text
5035 || detected & (1 << category))
5036 continue;
5037
5038 if (this->id < 0)
1c3478b0 5039 {
df7492f9
KH
5040 /* No coding system of this category is defined. */
5041 mask &= ~(1 << category);
5042 }
5043 else
5044 {
5045 detected |= detected_mask[category];
5046 if ((*(this->detector)) (coding, &mask))
5047 break;
1c3478b0
KH
5048 }
5049 }
df7492f9
KH
5050 if (! mask)
5051 setup_coding_system (Qraw_text, coding);
5052 else if (mask != CATEGORY_MASK_ANY)
5053 for (i = 0; i < coding_category_raw_text; i++)
5054 {
5055 enum coding_category category = coding_priorities[i];
5056 struct coding_system *this = coding_categories + category;
5057
5058 if (mask & (1 << category))
5059 {
5060 setup_coding_system (CODING_ID_NAME (this->id), coding);
5061 break;
5062 }
5063 }
1c3478b0 5064 }
b73bfc1c 5065 }
69f76525 5066
df7492f9
KH
5067 attrs = CODING_ID_ATTRS (coding->id);
5068 coding_type = CODING_ATTR_TYPE (attrs);
5069
5070 /* If we have not yet decided the EOL type, detect it now. But, the
5071 detection is impossible for a CCL based coding system, in which
5072 case, we detct the EOL type after decoding. */
5073 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5074 && ! EQ (coding_type, Qccl))
d46c5b12 5075 {
df7492f9
KH
5076 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
5077
5078 if (eol_seen != EOL_SEEN_NONE)
5079 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5080 }
4ed46869
KH
5081}
5082
aaaf0b1e
KH
5083
5084static void
df7492f9 5085decode_eol (coding)
aaaf0b1e 5086 struct coding_system *coding;
aaaf0b1e 5087{
df7492f9 5088 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5089 {
df7492f9
KH
5090 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5091 unsigned char *pend = p + coding->produced;
5092 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5093
df7492f9 5094 for (; p < pend; p++)
aaaf0b1e 5095 {
df7492f9
KH
5096 if (*p == '\n')
5097 eol_seen |= EOL_SEEN_LF;
5098 else if (*p == '\r')
aaaf0b1e 5099 {
df7492f9 5100 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5101 {
df7492f9
KH
5102 eol_seen |= EOL_SEEN_CRLF;
5103 p++;
aaaf0b1e 5104 }
aaaf0b1e 5105 else
df7492f9 5106 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5107 }
aaaf0b1e 5108 }
df7492f9
KH
5109 if (eol_seen != EOL_SEEN_NONE)
5110 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5111 }
aaaf0b1e 5112
df7492f9
KH
5113 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5114 {
5115 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5116 unsigned char *pend = p + coding->produced;
5117
5118 for (; p < pend; p++)
5119 if (*p == '\r')
5120 *p = '\n';
5121 }
5122 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5123 {
5124 unsigned char *p, *pbeg, *pend;
5125 Lisp_Object undo_list;
5126
5127 move_gap_both (coding->dst_pos + coding->produced_char,
5128 coding->dst_pos_byte + coding->produced);
5129 undo_list = current_buffer->undo_list;
5130 current_buffer->undo_list = Qt;
5131 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, Qnil);
5132 current_buffer->undo_list = undo_list;
5133 pbeg = GPT_ADDR;
5134 pend = pbeg + coding->produced;
5135
5136 for (p = pend - 1; p >= pbeg; p--)
5137 if (*p == '\r')
5138 {
5139 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5140 pend--;
5141 }
5142 coding->produced_char -= coding->produced - (pend - pbeg);
5143 coding->produced = pend - pbeg;
5144 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5145 }
5146}
5147
df7492f9
KH
5148static void
5149translate_chars (coding, table)
4ed46869 5150 struct coding_system *coding;
df7492f9 5151 Lisp_Object table;
4ed46869 5152{
df7492f9
KH
5153 int *charbuf = coding->charbuf;
5154 int *charbuf_end = charbuf + coding->charbuf_used;
5155 int c;
5156
5157 if (coding->chars_at_source)
5158 return;
4ed46869 5159
df7492f9 5160 while (charbuf < charbuf_end)
8844fa83 5161 {
df7492f9
KH
5162 c = *charbuf;
5163 if (c < 0)
5164 charbuf += c;
5165 else
5166 *charbuf++ = translate_char (table, c);
8844fa83 5167 }
df7492f9 5168}
4ed46869 5169
df7492f9
KH
5170static int
5171produce_chars (coding)
5172 struct coding_system *coding;
5173{
5174 unsigned char *dst = coding->destination + coding->produced;
5175 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5176 int produced;
5177 int produced_chars = 0;
b73bfc1c 5178
df7492f9 5179 if (! coding->chars_at_source)
4ed46869 5180 {
df7492f9
KH
5181 /* Characters are in coding->charbuf. */
5182 int *buf = coding->charbuf;
5183 int *buf_end = buf + coding->charbuf_used;
5184 unsigned char *adjusted_dst_end;
4ed46869 5185
df7492f9
KH
5186 if (BUFFERP (coding->src_object)
5187 && EQ (coding->src_object, coding->dst_object))
5188 dst_end = coding->source + coding->consumed;
5189 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5190
df7492f9
KH
5191 while (buf < buf_end)
5192 {
5193 int c = *buf++;
5194
5195 if (dst >= adjusted_dst_end)
5196 {
5197 dst = alloc_destination (coding,
5198 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5199 dst);
5200 dst_end = coding->destination + coding->dst_bytes;
5201 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5202 }
5203 if (c >= 0)
5204 {
5205 if (coding->dst_multibyte
5206 || ! CHAR_BYTE8_P (c))
5207 CHAR_STRING_ADVANCE (c, dst);
5208 else
5209 *dst++ = CHAR_TO_BYTE8 (c);
5210 produced_chars++;
5211 }
5212 else
5213 /* This is an annotation data. */
5214 buf -= c + 1;
5215 }
5216 }
5217 else
5218 {
df7492f9
KH
5219 unsigned char *src = coding->source;
5220 unsigned char *src_end = src + coding->src_bytes;
5221 Lisp_Object eol_type;
b73bfc1c 5222
df7492f9 5223 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5224
df7492f9 5225 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5226 {
df7492f9
KH
5227 if (coding->src_multibyte)
5228 {
71c81426 5229 int multibytep = 1;
df7492f9 5230 int consumed_chars;
d46c5b12 5231
df7492f9
KH
5232 while (1)
5233 {
5234 unsigned char *src_base = src;
5235 int c;
b73bfc1c 5236
df7492f9
KH
5237 ONE_MORE_BYTE (c);
5238 if (c == '\r')
5239 {
5240 if (EQ (eol_type, Qdos))
5241 {
5242 if (src < src_end
5243 && *src == '\n')
5244 c = *src++;
5245 }
5246 else if (EQ (eol_type, Qmac))
5247 c = '\n';
5248 }
5249 if (dst == dst_end)
5250 {
2c78b7e1 5251 coding->consumed = src - coding->source;
b73bfc1c 5252
2c78b7e1
KH
5253 if (EQ (coding->src_object, coding->dst_object))
5254 dst_end = src;
5255 if (dst == dst_end)
5256 {
5257 dst = alloc_destination (coding, src_end - src + 1,
5258 dst);
5259 dst_end = coding->destination + coding->dst_bytes;
5260 coding_set_source (coding);
5261 src = coding->source + coding->consumed;
5262 src_end = coding->source + coding->src_bytes;
5263 }
df7492f9
KH
5264 }
5265 *dst++ = c;
5266 produced_chars++;
5267 }
5268 no_more_source:
5269 ;
5270 }
5271 else
5272 while (src < src_end)
5273 {
71c81426 5274 int multibytep = 1;
df7492f9 5275 int c = *src++;
b73bfc1c 5276
df7492f9
KH
5277 if (c == '\r')
5278 {
5279 if (EQ (eol_type, Qdos))
5280 {
5281 if (src < src_end
5282 && *src == '\n')
5283 c = *src++;
5284 }
5285 else if (EQ (eol_type, Qmac))
5286 c = '\n';
5287 }
5288 if (dst >= dst_end - 1)
5289 {
2c78b7e1 5290 coding->consumed = src - coding->source;
df7492f9 5291
2c78b7e1
KH
5292 if (EQ (coding->src_object, coding->dst_object))
5293 dst_end = src;
5294 if (dst >= dst_end - 1)
5295 {
5296 dst = alloc_destination (coding, src_end - src + 2,
5297 dst);
5298 dst_end = coding->destination + coding->dst_bytes;
5299 coding_set_source (coding);
5300 src = coding->source + coding->consumed;
5301 src_end = coding->source + coding->src_bytes;
5302 }
df7492f9
KH
5303 }
5304 EMIT_ONE_BYTE (c);
5305 }
d46c5b12 5306 }
df7492f9
KH
5307 else
5308 {
5309 if (!EQ (coding->src_object, coding->dst_object))
5310 {
5311 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5312
df7492f9
KH
5313 if (require > 0)
5314 {
5315 EMACS_INT offset = src - coding->source;
5316
5317 dst = alloc_destination (coding, require, dst);
5318 coding_set_source (coding);
5319 src = coding->source + offset;
5320 src_end = coding->source + coding->src_bytes;
5321 }
5322 }
5323 produced_chars = coding->src_chars;
5324 while (src < src_end)
5325 {
5326 int c = *src++;
5327
5328 if (c == '\r')
5329 {
5330 if (EQ (eol_type, Qdos))
5331 {
5332 if (src < src_end
5333 && *src == '\n')
5334 c = *src++;
5335 produced_chars--;
5336 }
5337 else if (EQ (eol_type, Qmac))
5338 c = '\n';
5339 }
5340 *dst++ = c;
5341 }
5342 }
2c78b7e1
KH
5343 coding->consumed = coding->src_bytes;
5344 coding->consumed_char = coding->src_chars;
b73bfc1c 5345 }
4ed46869 5346
df7492f9
KH
5347 produced = dst - (coding->destination + coding->produced);
5348 if (BUFFERP (coding->dst_object))
5349 insert_from_gap (produced_chars, produced);
5350 coding->produced += produced;
5351 coding->produced_char += produced_chars;
5352 return produced_chars;
b73bfc1c 5353}
52d41803 5354
df7492f9
KH
5355/* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5356 or
5357 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5358 */
4ed46869 5359
df7492f9
KH
5360static INLINE void
5361produce_composition (coding, charbuf)
4ed46869 5362 struct coding_system *coding;
df7492f9 5363 int *charbuf;
4ed46869 5364{
df7492f9
KH
5365 Lisp_Object buffer;
5366 int len;
5367 EMACS_INT pos;
5368 enum composition_method method;
5369 int cmp_len;
5370 Lisp_Object components;
5371
5372 buffer = coding->dst_object;
5373 len = -charbuf[0];
5374 pos = coding->dst_pos + charbuf[1];
5375 method = (enum composition_method) (charbuf[3]);
5376 cmp_len = charbuf[4];
5377
5378 if (method == COMPOSITION_RELATIVE)
5379 components = Qnil;
5380 else
d46c5b12 5381 {
df7492f9
KH
5382 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5383 int i;
4ed46869 5384
df7492f9
KH
5385 len -= 5;
5386 charbuf += 5;
5387 for (i = 0; i < len; i++)
5388 args[i] = make_number (charbuf[i]);
5389 components = (method == COMPOSITION_WITH_ALTCHARS
5390 ? Fstring (len, args) : Fvector (len, args));
5391 }
5392 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5393}
b73bfc1c 5394
df7492f9
KH
5395static int *
5396save_composition_data (buf, buf_end, prop)
5397 int *buf, *buf_end;
5398 Lisp_Object prop;
5399{
5400 enum composition_method method = COMPOSITION_METHOD (prop);
5401 int cmp_len = COMPOSITION_LENGTH (prop);
4ed46869 5402
df7492f9
KH
5403 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5404 return NULL;
d46c5b12 5405
df7492f9
KH
5406 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5407 buf[2] = method;
5408 buf[3] = cmp_len;
b73bfc1c 5409
df7492f9
KH
5410 if (method == COMPOSITION_RELATIVE)
5411 buf[0] = 4;
5412 else
b73bfc1c 5413 {
df7492f9
KH
5414 Lisp_Object components;
5415 int len, i;
b73bfc1c 5416
df7492f9
KH
5417 components = COMPOSITION_COMPONENTS (prop);
5418 if (VECTORP (components))
d46c5b12 5419 {
df7492f9
KH
5420 len = XVECTOR (components)->size;
5421 for (i = 0; i < len; i++)
5422 buf[4 + i] = XINT (AREF (components, i));
5423 }
5424 else if (STRINGP (components))
5425 {
5426 int i_byte;
b73bfc1c 5427
df7492f9
KH
5428 len = XSTRING (components)->size;
5429 i = i_byte = 0;
5430 while (i < len)
5431 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5432 }
5433 else if (INTEGERP (components))
5434 {
5435 len = 1;
5436 buf[4] = XINT (components);
5437 }
5438 else if (CONSP (components))
5439 {
5440 for (len = 0; CONSP (components);
5441 len++, components = XCDR (components))
5442 buf[4 + len] = XINT (XCAR (components));
d46c5b12 5443 }
df7492f9
KH
5444 else
5445 abort ();
5446 buf[0] = 4 + len;
4ed46869 5447 }
df7492f9 5448 return (buf + buf[0]);
4ed46869
KH
5449}
5450
df7492f9
KH
5451#define CHARBUF_SIZE 0x4000
5452
5453#define ALLOC_CONVERSION_WORK_AREA(coding) \
5454 do { \
5455 int size = CHARBUF_SIZE;; \
5456 \
5457 coding->charbuf = NULL; \
5458 while (size > 1024) \
5459 { \
5460 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5461 if (coding->charbuf) \
5462 break; \
5463 size >>= 1; \
5464 } \
5465 if (! coding->charbuf) \
5466 { \
5467 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5468 return coding->result; \
5469 } \
5470 coding->charbuf_size = size; \
5471 } while (0)
4ed46869 5472
d46c5b12
KH
5473
5474static void
df7492f9 5475produce_annotation (coding)
d46c5b12 5476 struct coding_system *coding;
d46c5b12 5477{
df7492f9
KH
5478 int *charbuf = coding->charbuf;
5479 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5480
df7492f9 5481 while (charbuf < charbuf_end)
d46c5b12 5482 {
df7492f9
KH
5483 if (*charbuf >= 0)
5484 charbuf++;
d46c5b12 5485 else
d46c5b12 5486 {
df7492f9
KH
5487 int len = -*charbuf;
5488 switch (charbuf[2])
5489 {
5490 case CODING_ANNOTATE_COMPOSITION_MASK:
5491 produce_composition (coding, charbuf);
5492 break;
5493 default:
5494 abort ();
5495 }
5496 charbuf += len;
d46c5b12 5497 }
df7492f9
KH
5498 }
5499}
d46c5b12 5500
df7492f9
KH
5501/* Decode the data at CODING->src_object into CODING->dst_object.
5502 CODING->src_object is a buffer, a string, or nil.
5503 CODING->dst_object is a buffer.
de79a6a5 5504
df7492f9
KH
5505 If CODING->src_object is a buffer, it must be the current buffer.
5506 In this case, if CODING->src_pos is positive, it is a position of
5507 the source text in the buffer, otherwise, the source text is in the
5508 gap area of the buffer, and CODING->src_pos specifies the offset of
5509 the text from GPT (which must be the same as PT). If this is the
5510 same buffer as CODING->dst_object, CODING->src_pos must be
5511 negative.
b73bfc1c 5512
df7492f9
KH
5513 If CODING->src_object is a string, CODING->src_pos in an index to
5514 that string.
d46c5b12 5515
df7492f9
KH
5516 If CODING->src_object is nil, CODING->source must already point to
5517 the non-relocatable memory area. In this case, CODING->src_pos is
5518 an offset from CODING->source.
d46c5b12 5519
df7492f9
KH
5520 The decoded data is inserted at the current point of the buffer
5521 CODING->dst_object.
5522*/
5523
5524static int
5525decode_coding (coding)
d46c5b12 5526 struct coding_system *coding;
d46c5b12 5527{
df7492f9 5528 Lisp_Object attrs;
d46c5b12 5529
df7492f9
KH
5530 if (BUFFERP (coding->src_object)
5531 && coding->src_pos > 0
5532 && coding->src_pos < GPT
5533 && coding->src_pos + coding->src_chars > GPT)
5534 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5535
df7492f9 5536 if (BUFFERP (coding->dst_object))
88993dfd 5537 {
df7492f9
KH
5538 if (current_buffer != XBUFFER (coding->dst_object))
5539 set_buffer_internal (XBUFFER (coding->dst_object));
5540 if (GPT != PT)
5541 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5542 }
5543
df7492f9
KH
5544 coding->consumed = coding->consumed_char = 0;
5545 coding->produced = coding->produced_char = 0;
5546 coding->chars_at_source = 0;
5547 coding->result = CODING_RESULT_SUCCESS;
5548 coding->errors = 0;
5549
5550 ALLOC_CONVERSION_WORK_AREA (coding);
5551
5552 attrs = CODING_ID_ATTRS (coding->id);
5553
5554 do
d46c5b12 5555 {
df7492f9
KH
5556 coding_set_source (coding);
5557 coding->annotated = 0;
5558 (*(coding->decoder)) (coding);
5559 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5560 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5561 coding_set_destination (coding);
5562 produce_chars (coding);
5563 if (coding->annotated)
5564 produce_annotation (coding);
d46c5b12 5565 }
df7492f9
KH
5566 while (coding->consumed < coding->src_bytes
5567 && ! coding->result);
d46c5b12 5568
df7492f9
KH
5569 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5570 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5571 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5572 decode_eol (coding);
d46c5b12 5573
df7492f9
KH
5574 coding->carryover_bytes = 0;
5575 if (coding->consumed < coding->src_bytes)
d46c5b12 5576 {
df7492f9
KH
5577 int nbytes = coding->src_bytes - coding->consumed;
5578 unsigned char *src;
5579
5580 coding_set_source (coding);
5581 coding_set_destination (coding);
5582 src = coding->source + coding->consumed;
5583
5584 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5585 {
df7492f9
KH
5586 /* Flush out unprocessed data as binary chars. We are sure
5587 that the number of data is less than the size of
5588 coding->charbuf. */
5589 int *charbuf = coding->charbuf;
5590
5591 while (nbytes-- > 0)
d46c5b12 5592 {
df7492f9
KH
5593 int c = *src++;
5594 *charbuf++ = (c & 0x80 ? - c : c);
d46c5b12 5595 }
df7492f9 5596 produce_chars (coding);
d46c5b12 5597 }
d46c5b12 5598 else
df7492f9
KH
5599 {
5600 /* Record unprocessed bytes in coding->carryover. We are
5601 sure that the number of data is less than the size of
5602 coding->carryover. */
5603 unsigned char *p = coding->carryover;
5604
5605 coding->carryover_bytes = nbytes;
5606 while (nbytes-- > 0)
5607 *p++ = *src++;
5608 }
5609 coding->consumed = coding->src_bytes;
5610 }
b73bfc1c 5611
df7492f9 5612 return coding->result;
d46c5b12
KH
5613}
5614
df7492f9
KH
5615static void
5616consume_chars (coding)
5617 struct coding_system *coding;
5618{
5619 int *buf = coding->charbuf;
5620 /* -1 is to compensate for CRLF. */
5621 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5622 unsigned char *src = coding->source + coding->consumed;
5623 int pos = coding->src_pos + coding->consumed_char;
5624 int end_pos = coding->src_pos + coding->src_chars;
5625 int multibytep = coding->src_multibyte;
5626 Lisp_Object eol_type;
5627 int c;
5628 int start, end, stop;
5629 Lisp_Object object, prop;
88993dfd 5630
df7492f9
KH
5631 eol_type = CODING_ID_EOL_TYPE (coding->id);
5632 if (VECTORP (eol_type))
5633 eol_type = Qunix;
88993dfd 5634
df7492f9 5635 object = coding->src_object;
b843d1ae 5636
df7492f9
KH
5637 /* Note: composition handling is not yet implemented. */
5638 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 5639
df7492f9
KH
5640 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5641 && find_composition (pos, end_pos, &start, &end, &prop, object)
5642 && end <= end_pos
5643 && (start >= pos
5644 || (find_composition (end, end_pos, &start, &end, &prop, object)
5645 && end <= end_pos)))
5646 stop = start;
5647 else
5648 stop = end_pos;
ec6d2bb8 5649
df7492f9 5650 while (buf < buf_end)
ec6d2bb8 5651 {
df7492f9 5652 if (pos == stop)
ec6d2bb8 5653 {
df7492f9 5654 int *p;
ec6d2bb8 5655
df7492f9
KH
5656 if (pos == end_pos)
5657 break;
5658 p = save_composition_data (buf, buf_end, prop);
5659 if (p == NULL)
5660 break;
5661 buf = p;
5662 if (find_composition (end, end_pos, &start, &end, &prop, object)
5663 && end <= end_pos)
5664 stop = start;
5665 else
5666 stop = end_pos;
5667 }
5668
5669 if (! multibytep)
5670 c = *src++;
5671 else
5672 c = STRING_CHAR_ADVANCE (src);
5673 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5674 c = '\n';
5675 if (! EQ (eol_type, Qunix))
5676 {
5677 if (c == '\n')
5678 {
5679 if (EQ (eol_type, Qdos))
5680 *buf++ = '\r';
5681 else
5682 c = '\r';
ec6d2bb8 5683 }
ec6d2bb8 5684 }
df7492f9
KH
5685 *buf++ = c;
5686 pos++;
ec6d2bb8 5687 }
ec6d2bb8 5688
df7492f9
KH
5689 coding->consumed = src - coding->source;
5690 coding->consumed_char = pos - coding->src_pos;
5691 coding->charbuf_used = buf - coding->charbuf;
5692 coding->chars_at_source = 0;
ec6d2bb8
KH
5693}
5694
ec6d2bb8 5695
df7492f9
KH
5696/* Encode the text at CODING->src_object into CODING->dst_object.
5697 CODING->src_object is a buffer or a string.
5698 CODING->dst_object is a buffer or nil.
5699
5700 If CODING->src_object is a buffer, it must be the current buffer.
5701 In this case, if CODING->src_pos is positive, it is a position of
5702 the source text in the buffer, otherwise. the source text is in the
5703 gap area of the buffer, and coding->src_pos specifies the offset of
5704 the text from GPT (which must be the same as PT). If this is the
5705 same buffer as CODING->dst_object, CODING->src_pos must be
5706 negative and CODING should not have `pre-write-conversion'.
5707
5708 If CODING->src_object is a string, CODING should not have
5709 `pre-write-conversion'.
5710
5711 If CODING->dst_object is a buffer, the encoded data is inserted at
5712 the current point of that buffer.
5713
5714 If CODING->dst_object is nil, the encoded data is placed at the
5715 memory area specified by CODING->destination. */
5716
5717static int
5718encode_coding (coding)
ec6d2bb8 5719 struct coding_system *coding;
ec6d2bb8 5720{
df7492f9 5721 Lisp_Object attrs;
ec6d2bb8 5722
df7492f9 5723 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 5724
df7492f9 5725 if (BUFFERP (coding->dst_object))
ec6d2bb8 5726 {
df7492f9
KH
5727 set_buffer_internal (XBUFFER (coding->dst_object));
5728 coding->dst_multibyte
5729 = ! NILP (current_buffer->enable_multibyte_characters);
5730 }
ec6d2bb8 5731
df7492f9
KH
5732 coding->consumed = coding->consumed_char = 0;
5733 coding->produced = coding->produced_char = 0;
5734 coding->result = CODING_RESULT_SUCCESS;
5735 coding->errors = 0;
ec6d2bb8 5736
df7492f9 5737 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 5738
df7492f9
KH
5739 do {
5740 coding_set_source (coding);
5741 consume_chars (coding);
5742
5743 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5744 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5745
5746 coding_set_destination (coding);
5747 (*(coding->encoder)) (coding);
5748 } while (coding->consumed_char < coding->src_chars);
5749
5750 if (BUFFERP (coding->dst_object))
5751 insert_from_gap (coding->produced_char, coding->produced);
5752
5753 return (coding->result);
ec6d2bb8
KH
5754}
5755
df7492f9 5756/* Work buffer */
fb88bf2d 5757
df7492f9
KH
5758/* List of currently used working buffer. */
5759Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 5760
df7492f9
KH
5761/* A working buffer used by the top level conversion. */
5762Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 5763
4ed46869 5764
df7492f9
KH
5765/* Return a working buffer that can be freely used by the following
5766 code conversion. MULTIBYTEP specifies the multibyteness of the
5767 buffer. */
b73bfc1c 5768
df7492f9
KH
5769Lisp_Object
5770make_conversion_work_buffer (multibytep)
5771 int multibytep;
5772{
5773 struct buffer *current = current_buffer;
5774 Lisp_Object buf;
d46c5b12 5775
df7492f9 5776 if (NILP (Vcode_conversion_work_buf_list))
e133c8fa 5777 {
df7492f9
KH
5778 if (NILP (Vcode_conversion_reused_work_buf))
5779 Vcode_conversion_reused_work_buf
5780 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5781 Vcode_conversion_work_buf_list
5782 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
e133c8fa 5783 }
df7492f9 5784 else
d46c5b12 5785 {
df7492f9
KH
5786 int depth = Flength (Vcode_conversion_work_buf_list);
5787 char str[128];
e077cc80 5788
df7492f9
KH
5789 sprintf (str, " *code-conversion-work*<%d>", depth);
5790 Vcode_conversion_work_buf_list
5791 = Fcons (Fget_buffer_create (build_string (str)),
5792 Vcode_conversion_work_buf_list);
d46c5b12 5793 }
d46c5b12 5794
df7492f9
KH
5795 buf = XCAR (Vcode_conversion_work_buf_list);
5796 set_buffer_internal (XBUFFER (buf));
5797 current_buffer->undo_list = Qt;
5798 Ferase_buffer ();
5799 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
5800 set_buffer_internal (current);
5801 return buf;
5802}
d46c5b12 5803
df7492f9 5804static struct coding_system *saved_coding;
d46c5b12 5805
df7492f9
KH
5806Lisp_Object
5807code_conversion_restore (info)
5808 Lisp_Object info;
5809{
5810 int depth = Flength (Vcode_conversion_work_buf_list);
5811 Lisp_Object buf;
d46c5b12 5812
df7492f9 5813 if (depth > 0)
d46c5b12 5814 {
df7492f9
KH
5815 buf = XCAR (Vcode_conversion_work_buf_list);
5816 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5817 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5818 Fkill_buffer (buf);
5819 }
d46c5b12 5820
df7492f9
KH
5821 if (saved_coding->dst_object == Qt
5822 && saved_coding->destination)
5823 xfree (saved_coding->destination);
b843d1ae 5824
df7492f9
KH
5825 return save_excursion_restore (info);
5826}
d46c5b12 5827
12410ef1 5828
df7492f9
KH
5829int
5830decode_coding_gap (coding, chars, bytes)
5831 struct coding_system *coding;
5832 EMACS_INT chars, bytes;
5833{
5834 int count = specpdl_ptr - specpdl;
fb88bf2d 5835
df7492f9
KH
5836 saved_coding = coding;
5837 record_unwind_protect (code_conversion_restore, save_excursion_save ());
ec6d2bb8 5838
df7492f9
KH
5839 coding->src_object = Fcurrent_buffer ();
5840 coding->src_chars = chars;
5841 coding->src_bytes = bytes;
5842 coding->src_pos = -chars;
5843 coding->src_pos_byte = -bytes;
5844 coding->src_multibyte = chars < bytes;
5845 coding->dst_object = coding->src_object;
5846 coding->dst_pos = PT;
5847 coding->dst_pos_byte = PT_BYTE;
71c81426 5848 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4956c225 5849
df7492f9
KH
5850 if (CODING_REQUIRE_DETECTION (coding))
5851 detect_coding (coding);
5852
5853 decode_coding (coding);
d46c5b12 5854
df7492f9
KH
5855 unbind_to (count, Qnil);
5856 return coding->result;
5857}
d46c5b12 5858
df7492f9
KH
5859int
5860encode_coding_gap (coding, chars, bytes)
5861 struct coding_system *coding;
5862 EMACS_INT chars, bytes;
5863{
5864 int count = specpdl_ptr - specpdl;
5865 Lisp_Object buffer;
d46c5b12 5866
df7492f9
KH
5867 saved_coding = coding;
5868 record_unwind_protect (code_conversion_restore, save_excursion_save ());
fb88bf2d 5869
df7492f9
KH
5870 buffer = Fcurrent_buffer ();
5871 coding->src_object = buffer;
5872 coding->src_chars = chars;
5873 coding->src_bytes = bytes;
5874 coding->src_pos = -chars;
5875 coding->src_pos_byte = -bytes;
5876 coding->src_multibyte = chars < bytes;
5877 coding->dst_object = coding->src_object;
5878 coding->dst_pos = PT;
5879 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 5880
df7492f9 5881 encode_coding (coding);
f2558efd 5882
df7492f9
KH
5883 unbind_to (count, Qnil);
5884 return coding->result;
5885}
b73bfc1c 5886
d46c5b12 5887
df7492f9
KH
5888/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5889 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 5890
df7492f9 5891 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 5892
df7492f9
KH
5893 If it is a buffer, the text is at point of the buffer. FROM and TO
5894 are positions in the buffer.
ec6d2bb8 5895
df7492f9
KH
5896 If it is a string, the text is at the beginning of the string.
5897 FROM and TO are indices to the string.
ec6d2bb8 5898
df7492f9
KH
5899 If it is nil, the text is at coding->source. FROM and TO are
5900 indices to coding->source.
ec6d2bb8 5901
df7492f9 5902 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 5903
df7492f9
KH
5904 If it is a buffer, the decoded text is inserted at point of the
5905 buffer. If the buffer is the same as SRC_OBJECT, the source text
5906 is deleted.
d46c5b12 5907
df7492f9
KH
5908 If it is Qt, a string is made from the decoded text, and
5909 set in CODING->dst_object.
d46c5b12 5910
df7492f9
KH
5911 If it is Qnil, the decoded text is stored at CODING->destination.
5912 The called must allocate CODING->dst_bytes bytes at
5913 CODING->destination by xmalloc. If the decoded text is longer than
5914 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5915 */
d46c5b12 5916
df7492f9
KH
5917void
5918decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5919 dst_object)
5920 struct coding_system *coding;
5921 Lisp_Object src_object;
5922 EMACS_INT from, from_byte, to, to_byte;
5923 Lisp_Object dst_object;
5924{
5925 int count = specpdl_ptr - specpdl;
5926 unsigned char *destination;
5927 EMACS_INT dst_bytes;
5928 EMACS_INT chars = to - from;
5929 EMACS_INT bytes = to_byte - from_byte;
5930 Lisp_Object attrs;
d46c5b12 5931
df7492f9
KH
5932 saved_coding = coding;
5933 record_unwind_protect (code_conversion_restore, save_excursion_save ());
93dec019 5934
df7492f9
KH
5935 if (NILP (dst_object))
5936 {
5937 destination = coding->destination;
5938 dst_bytes = coding->dst_bytes;
5939 }
93dec019 5940
df7492f9
KH
5941 coding->src_object = src_object;
5942 coding->src_chars = chars;
5943 coding->src_bytes = bytes;
5944 coding->src_multibyte = chars < bytes;
70ad9fc4 5945
df7492f9
KH
5946 if (STRINGP (src_object))
5947 {
5948 coding->src_pos = from;
5949 coding->src_pos_byte = from_byte;
5950 }
5951 else if (BUFFERP (src_object))
5952 {
5953 set_buffer_internal (XBUFFER (src_object));
5954 if (from != GPT)
5955 move_gap_both (from, from_byte);
5956 if (EQ (src_object, dst_object))
fb88bf2d 5957 {
df7492f9
KH
5958 TEMP_SET_PT_BOTH (from, from_byte);
5959 del_range_both (from, from_byte, to, to_byte, 1);
5960 coding->src_pos = -chars;
5961 coding->src_pos_byte = -bytes;
fb88bf2d 5962 }
df7492f9 5963 else
fb88bf2d 5964 {
df7492f9
KH
5965 coding->src_pos = from;
5966 coding->src_pos_byte = from_byte;
fb88bf2d 5967 }
d46c5b12 5968 }
fb88bf2d 5969
df7492f9
KH
5970 if (CODING_REQUIRE_DETECTION (coding))
5971 detect_coding (coding);
5972 attrs = CODING_ID_ATTRS (coding->id);
5973
5974 if (! NILP (CODING_ATTR_POST_READ (attrs))
5975 || EQ (dst_object, Qt))
b73bfc1c 5976 {
df7492f9
KH
5977 coding->dst_object = make_conversion_work_buffer (1);
5978 coding->dst_pos = BEG;
5979 coding->dst_pos_byte = BEG_BYTE;
5980 coding->dst_multibyte = 1;
b73bfc1c 5981 }
df7492f9 5982 else if (BUFFERP (dst_object))
12410ef1 5983 {
df7492f9
KH
5984 coding->dst_object = dst_object;
5985 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5986 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5987 coding->dst_multibyte
5988 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 5989 }
72d1a715 5990 else
df7492f9
KH
5991 {
5992 coding->dst_object = Qnil;
5993 coding->dst_multibyte = 1;
5994 }
5995
5996 decode_coding (coding);
4ed46869 5997
df7492f9
KH
5998 if (BUFFERP (coding->dst_object))
5999 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 6000
df7492f9 6001 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6002 {
df7492f9
KH
6003 struct gcpro gcpro1, gcpro2;
6004 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6005 Lisp_Object val;
4ed46869 6006
c0cc7f7f 6007 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6008 GCPRO2 (coding->src_object, coding->dst_object);
6009 val = call1 (CODING_ATTR_POST_READ (attrs),
6010 make_number (coding->produced_char));
6011 UNGCPRO;
6012 CHECK_NATNUM (val);
6013 coding->produced_char += Z - prev_Z;
6014 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6015 }
4ed46869 6016
df7492f9 6017 if (EQ (dst_object, Qt))
ec6d2bb8 6018 {
df7492f9
KH
6019 coding->dst_object = Fbuffer_string ();
6020 }
6021 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6022 {
6023 set_buffer_internal (XBUFFER (coding->dst_object));
6024 if (dst_bytes < coding->produced)
6025 {
6026 destination
6027 = (unsigned char *) xrealloc (destination, coding->produced);
6028 if (! destination)
6029 {
6030 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6031 unbind_to (count, Qnil);
6032 return;
6033 }
6034 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6035 move_gap_both (BEGV, BEGV_BYTE);
6036 bcopy (BEGV_ADDR, destination, coding->produced);
6037 coding->destination = destination;
6038 }
ec6d2bb8 6039 }
2b4f9037 6040
df7492f9 6041 unbind_to (count, Qnil);
d46c5b12
KH
6042}
6043
df7492f9
KH
6044
6045void
6046encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6047 dst_object)
b73bfc1c 6048 struct coding_system *coding;
df7492f9
KH
6049 Lisp_Object src_object;
6050 EMACS_INT from, from_byte, to, to_byte;
6051 Lisp_Object dst_object;
b73bfc1c
KH
6052{
6053 int count = specpdl_ptr - specpdl;
df7492f9
KH
6054 EMACS_INT chars = to - from;
6055 EMACS_INT bytes = to_byte - from_byte;
6056 Lisp_Object attrs;
6057
6058 saved_coding = coding;
6059 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6060
6061 coding->src_object = src_object;
6062 coding->src_chars = chars;
6063 coding->src_bytes = bytes;
6064 coding->src_multibyte = chars < bytes;
6065
6066 attrs = CODING_ID_ATTRS (coding->id);
6067
6068 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6069 {
df7492f9
KH
6070 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6071 set_buffer_internal (XBUFFER (coding->src_object));
6072 if (STRINGP (src_object))
6073 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6074 else if (BUFFERP (src_object))
6075 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6076 else
6077 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6078
6079 if (EQ (src_object, dst_object))
6080 {
6081 set_buffer_internal (XBUFFER (src_object));
6082 del_range_both (from, from_byte, to, to_byte, 1);
6083 set_buffer_internal (XBUFFER (coding->src_object));
6084 }
6085
ac87bbef
KH
6086 call2 (CODING_ATTR_PRE_WRITE (attrs),
6087 make_number (BEG), make_number (Z));
6088 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6089 if (BEG != GPT)
6090 move_gap_both (BEG, BEG_BYTE);
6091 coding->src_chars = Z - BEG;
6092 coding->src_bytes = Z_BYTE - BEG_BYTE;
6093 coding->src_pos = BEG;
6094 coding->src_pos_byte = BEG_BYTE;
6095 coding->src_multibyte = Z < Z_BYTE;
6096 }
6097 else if (STRINGP (src_object))
6098 {
6099 coding->src_pos = from;
6100 coding->src_pos_byte = from_byte;
6101 }
6102 else if (BUFFERP (src_object))
d46c5b12 6103 {
df7492f9
KH
6104 set_buffer_internal (XBUFFER (src_object));
6105 if (from != GPT)
6106 move_gap_both (from, from_byte);
6107 if (EQ (src_object, dst_object))
d46c5b12 6108 {
df7492f9
KH
6109 del_range_both (from, from_byte, to, to_byte, 1);
6110 coding->src_pos = -chars;
6111 coding->src_pos_byte = -bytes;
d46c5b12 6112 }
df7492f9 6113 else
d46c5b12 6114 {
df7492f9
KH
6115 coding->src_pos = from;
6116 coding->src_pos_byte = from_byte;
d46c5b12
KH
6117 }
6118 }
4ed46869 6119
df7492f9 6120 if (BUFFERP (dst_object))
d46c5b12 6121 {
df7492f9
KH
6122 coding->dst_object = dst_object;
6123 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6124 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6125 coding->dst_multibyte
6126 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6127 }
df7492f9 6128 else if (EQ (dst_object, Qt))
4956c225 6129 {
df7492f9 6130 coding->dst_object = Qnil;
df7492f9 6131 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6132 if (coding->dst_bytes == 0)
6133 coding->dst_bytes = 1;
6134 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6135 coding->dst_multibyte = 0;
4956c225 6136 }
df7492f9 6137 else
78108bcd 6138 {
df7492f9
KH
6139 coding->dst_object = Qnil;
6140 coding->dst_multibyte = 0;
78108bcd
KH
6141 }
6142
df7492f9 6143 encode_coding (coding);
4ed46869 6144
df7492f9 6145 if (EQ (dst_object, Qt))
4ed46869 6146 {
df7492f9
KH
6147 if (BUFFERP (coding->dst_object))
6148 coding->dst_object = Fbuffer_string ();
6149 else
73be902c 6150 {
df7492f9
KH
6151 coding->dst_object
6152 = make_unibyte_string ((char *) coding->destination,
6153 coding->produced);
6154 xfree (coding->destination);
73be902c 6155 }
4ed46869 6156 }
d46c5b12 6157
df7492f9 6158 unbind_to (count, Qnil);
b73bfc1c
KH
6159}
6160
df7492f9 6161
b73bfc1c 6162Lisp_Object
df7492f9 6163preferred_coding_system ()
b73bfc1c 6164{
df7492f9 6165 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6166
df7492f9 6167 return CODING_ID_NAME (id);
4ed46869
KH
6168}
6169
6170\f
6171#ifdef emacs
1397dc18 6172/*** 8. Emacs Lisp library functions ***/
4ed46869 6173
4ed46869 6174DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6175 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6176See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6177about coding-system objects. */)
6178 (obj)
4ed46869
KH
6179 Lisp_Object obj;
6180{
df7492f9 6181 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6182}
6183
9d991de8
RS
6184DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6185 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6186 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6187 (prompt)
4ed46869
KH
6188 Lisp_Object prompt;
6189{
e0e989f6 6190 Lisp_Object val;
9d991de8
RS
6191 do
6192 {
4608c386
KH
6193 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6194 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6195 }
6196 while (XSTRING (val)->size == 0);
e0e989f6 6197 return (Fintern (val, Qnil));
4ed46869
KH
6198}
6199
9b787f3e 6200DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6201 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6202If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6203 (prompt, default_coding_system)
9b787f3e 6204 Lisp_Object prompt, default_coding_system;
4ed46869 6205{
f44d27ce 6206 Lisp_Object val;
9b787f3e
RS
6207 if (SYMBOLP (default_coding_system))
6208 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6209 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6210 Qt, Qnil, Qcoding_system_history,
6211 default_coding_system, Qnil);
e0e989f6 6212 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6213}
6214
6215DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6216 1, 1, 0,
48b0f3ae
PJ
6217 doc: /* Check validity of CODING-SYSTEM.
6218If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6219It is valid if it is a symbol with a non-nil `coding-system' property.
6220The value of property should be a vector of length 5. */)
df7492f9 6221 (coding_system)
4ed46869
KH
6222 Lisp_Object coding_system;
6223{
b7826503 6224 CHECK_SYMBOL (coding_system);
4ed46869
KH
6225 if (!NILP (Fcoding_system_p (coding_system)))
6226 return coding_system;
6227 while (1)
02ba4723 6228 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6229}
df7492f9 6230
3a73fa5d 6231\f
d46c5b12 6232Lisp_Object
df7492f9 6233detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6234 unsigned char *src;
6235 int src_bytes, highest;
0a28aafb 6236 int multibytep;
df7492f9 6237 Lisp_Object coding_system;
4ed46869 6238{
df7492f9
KH
6239 unsigned char *src_end = src + src_bytes;
6240 int mask = CATEGORY_MASK_ANY;
6241 int detected = 0;
6242 int c, i;
6243 Lisp_Object attrs, eol_type;
6244 Lisp_Object val;
6245 struct coding_system coding;
6246
6247 if (NILP (coding_system))
6248 coding_system = Qundecided;
6249 setup_coding_system (coding_system, &coding);
6250 attrs = CODING_ID_ATTRS (coding.id);
6251 eol_type = CODING_ID_EOL_TYPE (coding.id);
4ed46869 6252
df7492f9
KH
6253 coding.source = src;
6254 coding.src_bytes = src_bytes;
6255 coding.src_multibyte = multibytep;
6256 coding.consumed = 0;
4ed46869 6257
df7492f9 6258 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
4ed46869 6259 {
df7492f9 6260 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
4ed46869 6261 }
df7492f9 6262 else
4ed46869 6263 {
df7492f9
KH
6264 coding_system = Qnil;
6265 for (; src < src_end; src++)
4ed46869 6266 {
df7492f9
KH
6267 c = *src;
6268 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6269 || c == ISO_CODE_SI
6270 || c == ISO_CODE_SO)))
d46c5b12 6271 break;
4ed46869 6272 }
df7492f9
KH
6273 coding.head_ascii = src - coding.source;
6274
6275 if (src < src_end)
6276 for (i = 0; i < coding_category_raw_text; i++)
6277 {
6278 enum coding_category category = coding_priorities[i];
6279 struct coding_system *this = coding_categories + category;
6280
6281 if (category >= coding_category_raw_text
6282 || detected & (1 << category))
6283 continue;
6284
6285 if (this->id < 0)
6286 {
6287 /* No coding system of this category is defined. */
6288 mask &= ~(1 << category);
6289 }
6290 else
6291 {
6292 detected |= detected_mask[category];
6293 if ((*(coding_categories[category].detector)) (&coding, &mask)
6294 && highest)
6295 {
6296 mask &= detected_mask[category];
6297 break;
6298 }
6299 }
6300 }
4ed46869 6301 }
4ed46869 6302
df7492f9
KH
6303 if (!mask)
6304 val = Fcons (make_number (coding_category_raw_text), Qnil);
6305 else if (mask == CATEGORY_MASK_ANY)
6306 val = Fcons (make_number (coding_category_undecided), Qnil);
6307 else if (highest)
4ed46869 6308 {
df7492f9
KH
6309 for (i = 0; i < coding_category_raw_text; i++)
6310 if (mask & (1 << coding_priorities[i]))
6311 {
6312 val = Fcons (make_number (coding_priorities[i]), Qnil);
6313 break;
6314 }
6315 }
6316 else
6317 {
6318 val = Qnil;
6319 for (i = coding_category_raw_text - 1; i >= 0; i--)
6320 if (mask & (1 << coding_priorities[i]))
6321 val = Fcons (make_number (coding_priorities[i]), val);
4ed46869 6322 }
df7492f9
KH
6323
6324 {
6325 int one_byte_eol = -1, two_byte_eol = -1;
6326 Lisp_Object tail;
6327
6328 for (tail = val; CONSP (tail); tail = XCDR (tail))
6329 {
6330 struct coding_system *this
6331 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6332 : &coding);
6333 int this_eol;
6334
6335 attrs = CODING_ID_ATTRS (this->id);
6336 eol_type = CODING_ID_EOL_TYPE (this->id);
6337 XSETCAR (tail, CODING_ID_NAME (this->id));
6338 if (VECTORP (eol_type))
6339 {
6340 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6341 {
6342 if (two_byte_eol < 0)
6343 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6344 this_eol = two_byte_eol;
6345 }
6346 else
6347 {
6348 if (one_byte_eol < 0)
6349 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6350 this_eol = one_byte_eol;
6351 }
6352 if (this_eol == EOL_SEEN_LF)
6353 XSETCAR (tail, AREF (eol_type, 0));
6354 else if (this_eol == EOL_SEEN_CRLF)
6355 XSETCAR (tail, AREF (eol_type, 1));
6356 else if (this_eol == EOL_SEEN_CR)
6357 XSETCAR (tail, AREF (eol_type, 2));
6358 }
6359 }
6360 }
6361
03699b14 6362 return (highest ? XCAR (val) : val);
93dec019 6363}
4ed46869 6364
df7492f9 6365
d46c5b12
KH
6366DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6367 2, 3, 0,
48b0f3ae
PJ
6368 doc: /* Detect coding system of the text in the region between START and END.
6369Return a list of possible coding systems ordered by priority.
6370
6371If only ASCII characters are found, it returns a list of single element
6372`undecided' or its subsidiary coding system according to a detected
6373end-of-line format.
6374
6375If optional argument HIGHEST is non-nil, return the coding system of
6376highest priority. */)
6377 (start, end, highest)
d46c5b12
KH
6378 Lisp_Object start, end, highest;
6379{
6380 int from, to;
6381 int from_byte, to_byte;
6289dd10 6382
b7826503
PJ
6383 CHECK_NUMBER_COERCE_MARKER (start);
6384 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6385
d46c5b12
KH
6386 validate_region (&start, &end);
6387 from = XINT (start), to = XINT (end);
6388 from_byte = CHAR_TO_BYTE (from);
6389 to_byte = CHAR_TO_BYTE (to);
6289dd10 6390
d46c5b12
KH
6391 if (from < GPT && to >= GPT)
6392 move_gap_both (to, to_byte);
c210f766 6393
d46c5b12 6394 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6395 to_byte - from_byte,
0a28aafb
KH
6396 !NILP (highest),
6397 !NILP (current_buffer
df7492f9
KH
6398 ->enable_multibyte_characters),
6399 Qnil);
d46c5b12 6400}
6289dd10 6401
d46c5b12
KH
6402DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6403 1, 2, 0,
48b0f3ae
PJ
6404 doc: /* Detect coding system of the text in STRING.
6405Return a list of possible coding systems ordered by priority.
6406
6407If only ASCII characters are found, it returns a list of single element
6408`undecided' or its subsidiary coding system according to a detected
6409end-of-line format.
6410
6411If optional argument HIGHEST is non-nil, return the coding system of
6412highest priority. */)
6413 (string, highest)
d46c5b12
KH
6414 Lisp_Object string, highest;
6415{
b7826503 6416 CHECK_STRING (string);
4ed46869 6417
d46c5b12 6418 return detect_coding_system (XSTRING (string)->data,
df7492f9 6419 STRING_BYTES (XSTRING (string)),
0a28aafb 6420 !NILP (highest),
df7492f9
KH
6421 STRING_MULTIBYTE (string),
6422 Qnil);
4ed46869
KH
6423}
6424
05e6f5dc 6425
df7492f9
KH
6426static INLINE int
6427char_encodable_p (c, attrs)
6428 int c;
6429 Lisp_Object attrs;
05e6f5dc 6430{
df7492f9 6431 Lisp_Object tail;
df7492f9 6432 struct charset *charset;
05e6f5dc 6433
df7492f9
KH
6434 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6435 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6436 {
df7492f9
KH
6437 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6438 if (CHAR_CHARSET_P (c, charset))
6439 break;
05e6f5dc 6440 }
df7492f9 6441 return (! NILP (tail));
05e6f5dc
KH
6442}
6443
6444
df7492f9
KH
6445/* Return a list of coding systems that safely encode the text between
6446 START and END. If EXCLUDE is non-nil, it is a list of coding
6447 systems not to check. The returned list doesn't contain any such
6448 coding systems. In any case, If the text contains only ASCII or is
6449 unibyte, return t. */
6450
6451DEFUN ("find-coding-systems-region-internal",
6452 Ffind_coding_systems_region_internal,
6453 Sfind_coding_systems_region_internal, 2, 3, 0,
6454 doc: /* Internal use only. */)
6455 (start, end, exclude)
6456 Lisp_Object start, end, exclude;
6457{
6458 Lisp_Object coding_attrs_list, safe_codings;
6459 EMACS_INT start_byte, end_byte;
6460 unsigned char *p, *pbeg, *pend;
6461 int c;
6462 Lisp_Object tail, elt;
05e6f5dc 6463
df7492f9
KH
6464 if (STRINGP (start))
6465 {
6466 if (!STRING_MULTIBYTE (start)
6467 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6468 return Qt;
6469 start_byte = 0;
6470 end_byte = STRING_BYTES (XSTRING (start));
6471 }
6472 else
6473 {
6474 CHECK_NUMBER_COERCE_MARKER (start);
6475 CHECK_NUMBER_COERCE_MARKER (end);
6476 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6477 args_out_of_range (start, end);
6478 if (NILP (current_buffer->enable_multibyte_characters))
6479 return Qt;
6480 start_byte = CHAR_TO_BYTE (XINT (start));
6481 end_byte = CHAR_TO_BYTE (XINT (end));
6482 if (XINT (end) - XINT (start) == end_byte - start_byte)
6483 return Qt;
05e6f5dc 6484
df7492f9
KH
6485 if (start < GPT && end > GPT)
6486 {
6487 if ((GPT - start) < (end - GPT))
6488 move_gap_both (start, start_byte);
6489 else
6490 move_gap_both (end, end_byte);
6491 }
6492 }
05e6f5dc 6493
df7492f9
KH
6494 coding_attrs_list = Qnil;
6495 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6496 if (NILP (exclude)
6497 || NILP (Fmemq (XCAR (tail), exclude)))
6498 {
6499 Lisp_Object attrs;
05e6f5dc 6500
df7492f9
KH
6501 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6502 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6503 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6504 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6505 }
6506
6507 if (STRINGP (start))
6508 p = pbeg = XSTRING (start)->data;
6509 else
6510 p = pbeg = BYTE_POS_ADDR (start_byte);
6511 pend = p + (end_byte - start_byte);
6512
6513 while (p < pend && ASCII_BYTE_P (*p)) p++;
6514 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
6515
6516 while (p < pend)
6517 {
df7492f9
KH
6518 if (ASCII_BYTE_P (*p))
6519 p++;
6520 else
6521 {
6522 c = STRING_CHAR_ADVANCE (p);
6523
6524 charset_map_loaded = 0;
6525 for (tail = coding_attrs_list; CONSP (tail);)
6526 {
6527 elt = XCAR (tail);
6528 if (NILP (elt))
6529 tail = XCDR (tail);
6530 else if (char_encodable_p (c, elt))
6531 tail = XCDR (tail);
6532 else if (CONSP (XCDR (tail)))
6533 {
6534 XSETCAR (tail, XCAR (XCDR (tail)));
6535 XSETCDR (tail, XCDR (XCDR (tail)));
6536 }
6537 else
6538 {
6539 XSETCAR (tail, Qnil);
6540 tail = XCDR (tail);
6541 }
6542 }
6543 if (charset_map_loaded)
6544 {
6545 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 6546
df7492f9
KH
6547 if (STRINGP (start))
6548 pbeg = XSTRING (start)->data;
6549 else
6550 pbeg = BYTE_POS_ADDR (start_byte);
6551 p = pbeg + p_offset;
6552 pend = pbeg + pend_offset;
6553 }
6554 }
05e6f5dc 6555 }
df7492f9
KH
6556
6557 safe_codings = Qnil;
6558 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6559 if (! NILP (XCAR (tail)))
6560 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6561
05e6f5dc
KH
6562 return safe_codings;
6563}
6564
6565
df7492f9
KH
6566DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6567 Scheck_coding_systems_region, 3, 3, 0,
6568 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 6569
df7492f9
KH
6570START and END are buffer positions specifying the region.
6571CODING-SYSTEM-LIST is a list of coding systems to check.
6572
6573The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6574CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6575whole region, POS0, POS1, ... are buffer positions where non-encodable
6576characters are found.
6577
6578If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6579value is nil.
6580
6581START may be a string. In that case, check if the string is
6582encodable, and the value contains indices to the string instead of
6583buffer positions. END is ignored. */)
6584 (start, end, coding_system_list)
6585 Lisp_Object start, end, coding_system_list;
05e6f5dc 6586{
df7492f9
KH
6587 Lisp_Object list;
6588 EMACS_INT start_byte, end_byte;
6589 int pos;
6590 unsigned char *p, *pbeg, *pend;
6591 int c;
6592 Lisp_Object tail, elt;
05e6f5dc
KH
6593
6594 if (STRINGP (start))
6595 {
df7492f9
KH
6596 if (!STRING_MULTIBYTE (start)
6597 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6598 return Qnil;
6599 start_byte = 0;
6600 end_byte = STRING_BYTES (XSTRING (start));
6601 pos = 0;
05e6f5dc
KH
6602 }
6603 else
6604 {
b7826503
PJ
6605 CHECK_NUMBER_COERCE_MARKER (start);
6606 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
6607 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6608 args_out_of_range (start, end);
6609 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
6610 return Qnil;
6611 start_byte = CHAR_TO_BYTE (XINT (start));
6612 end_byte = CHAR_TO_BYTE (XINT (end));
6613 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 6614 return Qt;
df7492f9
KH
6615
6616 if (start < GPT && end > GPT)
6617 {
6618 if ((GPT - start) < (end - GPT))
6619 move_gap_both (start, start_byte);
6620 else
6621 move_gap_both (end, end_byte);
6622 }
6623 pos = start;
6624 }
6625
6626 list = Qnil;
6627 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6628 {
6629 elt = XCAR (tail);
6630 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6631 Qnil)),
6632 list);
05e6f5dc
KH
6633 }
6634
df7492f9
KH
6635 if (STRINGP (start))
6636 p = pbeg = XSTRING (start)->data;
6637 else
6638 p = pbeg = BYTE_POS_ADDR (start_byte);
6639 pend = p + (end_byte - start_byte);
6640
6641 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6642 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6643
6644 while (p < pend)
05e6f5dc 6645 {
df7492f9
KH
6646 if (ASCII_BYTE_P (*p))
6647 p++;
6648 else
05e6f5dc 6649 {
df7492f9
KH
6650 c = STRING_CHAR_ADVANCE (p);
6651
6652 charset_map_loaded = 0;
6653 for (tail = list; CONSP (tail); tail = XCDR (tail))
6654 {
6655 elt = XCDR (XCAR (tail));
6656 if (! char_encodable_p (c, XCAR (elt)))
6657 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6658 }
6659 if (charset_map_loaded)
6660 {
6661 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6662
6663 if (STRINGP (start))
6664 pbeg = XSTRING (start)->data;
6665 else
6666 pbeg = BYTE_POS_ADDR (start_byte);
6667 p = pbeg + p_offset;
6668 pend = pbeg + pend_offset;
6669 }
05e6f5dc 6670 }
df7492f9 6671 pos++;
05e6f5dc
KH
6672 }
6673
df7492f9
KH
6674 tail = list;
6675 list = Qnil;
6676 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 6677 {
df7492f9
KH
6678 elt = XCAR (tail);
6679 if (CONSP (XCDR (XCDR (elt))))
6680 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6681 list);
05e6f5dc 6682 }
df7492f9
KH
6683
6684 return list;
05e6f5dc
KH
6685}
6686
6687
df7492f9 6688
4031e2bf 6689Lisp_Object
df7492f9
KH
6690code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6691 Lisp_Object start, end, coding_system, dst_object;
6692 int encodep, norecord;
3a73fa5d
RS
6693{
6694 struct coding_system coding;
df7492f9
KH
6695 EMACS_INT from, from_byte, to, to_byte;
6696 Lisp_Object src_object;
3a73fa5d 6697
b7826503
PJ
6698 CHECK_NUMBER_COERCE_MARKER (start);
6699 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
6700 if (NILP (coding_system))
6701 coding_system = Qno_conversion;
6702 else
6703 CHECK_CODING_SYSTEM (coding_system);
6704 src_object = Fcurrent_buffer ();
6705 if (NILP (dst_object))
6706 dst_object = src_object;
6707 else if (! EQ (dst_object, Qt))
6708 CHECK_BUFFER (dst_object);
3a73fa5d 6709
d46c5b12
KH
6710 validate_region (&start, &end);
6711 from = XFASTINT (start);
df7492f9 6712 from_byte = CHAR_TO_BYTE (from);
d46c5b12 6713 to = XFASTINT (end);
df7492f9 6714 to_byte = CHAR_TO_BYTE (to);
d46c5b12 6715
df7492f9
KH
6716 setup_coding_system (coding_system, &coding);
6717 coding.mode |= CODING_MODE_LAST_BLOCK;
6718
6719 if (encodep)
6720 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6721 dst_object);
6722 else
6723 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6724 dst_object);
6725 if (! norecord)
6726 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 6727
df7492f9
KH
6728 if (coding.result != CODING_RESULT_SUCCESS)
6729 error ("Code conversion error: %d", coding.result);
3a73fa5d 6730
df7492f9
KH
6731 return (BUFFERP (dst_object)
6732 ? make_number (coding.produced_char)
6733 : coding.dst_object);
4031e2bf
KH
6734}
6735
df7492f9 6736
4031e2bf 6737DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 6738 3, 4, "r\nzCoding system: ",
48b0f3ae 6739 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
6740When called from a program, takes four arguments:
6741 START, END, CODING-SYSTEM, and DESTINATION.
6742START and END are buffer positions.
6743
6744Optional 4th arguments DESTINATION specifies where the decoded text goes.
6745If nil, the region between START and END is replace by the decoded text.
6746If buffer, the decoded text is inserted in the buffer.
6747If t, the decoded text is returned.
6748
48b0f3ae
PJ
6749This function sets `last-coding-system-used' to the precise coding system
6750used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6751not fully specified.)
6752It returns the length of the decoded text. */)
df7492f9
KH
6753 (start, end, coding_system, destination)
6754 Lisp_Object start, end, coding_system, destination;
4031e2bf 6755{
df7492f9 6756 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
6757}
6758
6759DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
6760 3, 4, "r\nzCoding system: ",
6761 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
6762When called from a program, takes three arguments:
6763START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
6764
6765Optional 4th arguments DESTINATION specifies where the encoded text goes.
6766If nil, the region between START and END is replace by the encoded text.
6767If buffer, the encoded text is inserted in the buffer.
6768If t, the encoded text is returned.
6769
48b0f3ae
PJ
6770This function sets `last-coding-system-used' to the precise coding system
6771used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6772not fully specified.)
6773It returns the length of the encoded text. */)
df7492f9
KH
6774 (start, end, coding_system, destination)
6775 Lisp_Object start, end, coding_system, destination;
3a73fa5d 6776{
df7492f9 6777 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 6778}
3a73fa5d 6779
4031e2bf 6780Lisp_Object
df7492f9
KH
6781code_convert_string (string, coding_system, dst_object,
6782 encodep, nocopy, norecord)
6783 Lisp_Object string, coding_system, dst_object;
6784 int encodep, nocopy, norecord;
4031e2bf
KH
6785{
6786 struct coding_system coding;
df7492f9 6787 EMACS_INT chars, bytes;
3a73fa5d 6788
b7826503 6789 CHECK_STRING (string);
d46c5b12 6790 if (NILP (coding_system))
df7492f9
KH
6791 {
6792 if (! norecord)
6793 Vlast_coding_system_used = Qno_conversion;
6794 if (NILP (dst_object))
6795 return (nocopy ? Fcopy_sequence (string) : string);
6796 }
4ed46869 6797
df7492f9
KH
6798 if (NILP (coding_system))
6799 coding_system = Qno_conversion;
6800 else
6801 CHECK_CODING_SYSTEM (coding_system);
6802 if (NILP (dst_object))
6803 dst_object = Qt;
6804 else if (! EQ (dst_object, Qt))
6805 CHECK_BUFFER (dst_object);
5f1cd180 6806
df7492f9 6807 setup_coding_system (coding_system, &coding);
d46c5b12 6808 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
6809 chars = XSTRING (string)->size;
6810 bytes = STRING_BYTES (XSTRING (string));
6811 if (encodep)
6812 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6813 else
6814 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6815 if (! norecord)
6816 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 6817
df7492f9
KH
6818 if (coding.result != CODING_RESULT_SUCCESS)
6819 error ("Code conversion error: %d", coding.result);
4ed46869 6820
df7492f9
KH
6821 return (BUFFERP (dst_object)
6822 ? make_number (coding.produced_char)
6823 : coding.dst_object);
4ed46869
KH
6824}
6825
4031e2bf 6826
ecec61c1 6827/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
6828 Do not set Vlast_coding_system_used.
6829
6830 This function is called only from macros DECODE_FILE and
6831 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
6832
6833Lisp_Object
6834code_convert_string_norecord (string, coding_system, encodep)
6835 Lisp_Object string, coding_system;
6836 int encodep;
6837{
0be8721c 6838 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 6839}
ecec61c1 6840
ecec61c1 6841
df7492f9
KH
6842DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6843 2, 4, 0,
6844 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6845
6846Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6847if the decoding operation is trivial.
ecec61c1 6848
df7492f9
KH
6849Optional fourth arg BUFFER non-nil meant that the decoded text is
6850inserted in BUFFER instead of returned as a astring. In this case,
6851the return value is BUFFER.
ecec61c1 6852
df7492f9
KH
6853This function sets `last-coding-system-used' to the precise coding system
6854used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6855not fully specified. */)
6856 (string, coding_system, nocopy, buffer)
6857 Lisp_Object string, coding_system, nocopy, buffer;
6858{
6859 return code_convert_string (string, coding_system, buffer,
6860 0, ! NILP (nocopy), 0);
6861}
6862
6863DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6864 2, 4, 0,
6865 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6866
6867Optional third arg NOCOPY non-nil means it is OK to return STRING
6868itself if the encoding operation is trivial.
6869
6870Optional fourth arg BUFFER non-nil meant that the encoded text is
6871inserted in BUFFER instead of returned as a astring. In this case,
6872the return value is BUFFER.
6873
6874This function sets `last-coding-system-used' to the precise coding system
6875used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6876not fully specified.) */)
6877 (string, coding_system, nocopy, buffer)
6878 Lisp_Object string, coding_system, nocopy, buffer;
6879{
6880 return code_convert_string (string, coding_system, buffer,
6881 nocopy, ! NILP (nocopy), 1);
ecec61c1 6882}
df7492f9 6883
3a73fa5d 6884\f
4ed46869 6885DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6886 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6887Return the corresponding character. */)
6888 (code)
4ed46869
KH
6889 Lisp_Object code;
6890{
df7492f9
KH
6891 Lisp_Object spec, attrs, val;
6892 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6893 int c;
6894
6895 CHECK_NATNUM (code);
6896 c = XFASTINT (code);
6897 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6898 attrs = AREF (spec, 0);
4ed46869 6899
df7492f9
KH
6900 if (ASCII_BYTE_P (c)
6901 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6902 return code;
6903
6904 val = CODING_ATTR_CHARSET_LIST (attrs);
6905 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
6906 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6907 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
df7492f9
KH
6908
6909 if (c <= 0x7F)
6910 charset = charset_roman;
6911 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 6912 {
df7492f9
KH
6913 charset = charset_kana;
6914 c -= 0x80;
55ab7be3
KH
6915 }
6916 else
6917 {
004068e4 6918 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
6919
6920 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6921 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6922 error ("Invalid code: %d", code);
6923 SJIS_TO_JIS (c);
6924 charset = charset_kanji;
55ab7be3 6925 }
df7492f9
KH
6926 c = DECODE_CHAR (charset, c);
6927 if (c < 0)
6928 error ("Invalid code: %d", code);
6929 return make_number (c);
4ed46869
KH
6930}
6931
df7492f9 6932
4ed46869 6933DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6934 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6935Return the corresponding code in SJIS. */)
6936 (ch)
df7492f9 6937 Lisp_Object ch;
4ed46869 6938{
df7492f9
KH
6939 Lisp_Object spec, attrs, charset_list;
6940 int c;
6941 struct charset *charset;
6942 unsigned code;
4ed46869 6943
df7492f9
KH
6944 CHECK_CHARACTER (ch);
6945 c = XFASTINT (ch);
6946 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6947 attrs = AREF (spec, 0);
6948
6949 if (ASCII_CHAR_P (c)
6950 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6951 return ch;
6952
6953 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6954 charset = char_charset (c, charset_list, &code);
6955 if (code == CHARSET_INVALID_CODE (charset))
6956 error ("Can't encode by shift_jis encoding: %d", c);
6957 JIS_TO_SJIS (code);
6958
6959 return make_number (code);
4ed46869
KH
6960}
6961
6962DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
6963 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6964Return the corresponding character. */)
6965 (code)
4ed46869
KH
6966 Lisp_Object code;
6967{
df7492f9
KH
6968 Lisp_Object spec, attrs, val;
6969 struct charset *charset_roman, *charset_big5, *charset;
6970 int c;
4ed46869 6971
df7492f9
KH
6972 CHECK_NATNUM (code);
6973 c = XFASTINT (code);
6974 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6975 attrs = AREF (spec, 0);
6976
6977 if (ASCII_BYTE_P (c)
6978 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6979 return code;
6980
6981 val = CODING_ATTR_CHARSET_LIST (attrs);
6982 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6983 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
6984
6985 if (c <= 0x7F)
6986 charset = charset_roman;
c28a9453
KH
6987 else
6988 {
df7492f9
KH
6989 int b1 = c >> 8, b2 = c & 0x7F;
6990 if (b1 < 0xA1 || b1 > 0xFE
6991 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
6992 error ("Invalid code: %d", code);
6993 charset = charset_big5;
c28a9453 6994 }
df7492f9
KH
6995 c = DECODE_CHAR (charset, (unsigned )c);
6996 if (c < 0)
6997 error ("Invalid code: %d", code);
6998 return make_number (c);
4ed46869
KH
6999}
7000
7001DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7002 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7003Return the corresponding character code in Big5. */)
7004 (ch)
4ed46869
KH
7005 Lisp_Object ch;
7006{
df7492f9
KH
7007 Lisp_Object spec, attrs, charset_list;
7008 struct charset *charset;
7009 int c;
7010 unsigned code;
7011
7012 CHECK_CHARACTER (ch);
7013 c = XFASTINT (ch);
7014 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7015 attrs = AREF (spec, 0);
7016 if (ASCII_CHAR_P (c)
7017 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7018 return ch;
7019
7020 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7021 charset = char_charset (c, charset_list, &code);
7022 if (code == CHARSET_INVALID_CODE (charset))
7023 error ("Can't encode by Big5 encoding: %d", c);
7024
7025 return make_number (code);
4ed46869 7026}
df7492f9 7027
3a73fa5d 7028\f
1ba9e4ab
KH
7029DEFUN ("set-terminal-coding-system-internal",
7030 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7031 Sset_terminal_coding_system_internal, 1, 1, 0,
7032 doc: /* Internal use only. */)
7033 (coding_system)
b74e4686 7034 Lisp_Object coding_system;
4ed46869 7035{
b7826503 7036 CHECK_SYMBOL (coding_system);
df7492f9
KH
7037 setup_coding_system (Fcheck_coding_system (coding_system),
7038 &terminal_coding);
7039
70c22245 7040 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7041 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7042 /* Characer composition should be disabled. */
7043 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7044 terminal_coding.src_multibyte = 1;
7045 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7046 return Qnil;
7047}
7048
c4825358
KH
7049DEFUN ("set-safe-terminal-coding-system-internal",
7050 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7051 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7052 doc: /* Internal use only. */)
48b0f3ae 7053 (coding_system)
b74e4686 7054 Lisp_Object coding_system;
c4825358 7055{
b7826503 7056 CHECK_SYMBOL (coding_system);
c4825358
KH
7057 setup_coding_system (Fcheck_coding_system (coding_system),
7058 &safe_terminal_coding);
df7492f9
KH
7059 /* Characer composition should be disabled. */
7060 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7061 safe_terminal_coding.src_multibyte = 1;
7062 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7063 return Qnil;
7064}
7065
4ed46869
KH
7066DEFUN ("terminal-coding-system",
7067 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7068 doc: /* Return coding system specified for terminal output. */)
7069 ()
4ed46869 7070{
df7492f9 7071 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7072}
7073
1ba9e4ab
KH
7074DEFUN ("set-keyboard-coding-system-internal",
7075 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7076 Sset_keyboard_coding_system_internal, 1, 1, 0,
7077 doc: /* Internal use only. */)
7078 (coding_system)
4ed46869
KH
7079 Lisp_Object coding_system;
7080{
b7826503 7081 CHECK_SYMBOL (coding_system);
df7492f9
KH
7082 setup_coding_system (Fcheck_coding_system (coding_system),
7083 &keyboard_coding);
7084 /* Characer composition should be disabled. */
7085 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7086 return Qnil;
7087}
7088
7089DEFUN ("keyboard-coding-system",
7090 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7091 doc: /* Return coding system specified for decoding keyboard input. */)
7092 ()
4ed46869 7093{
df7492f9 7094 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7095}
7096
7097\f
a5d301df
KH
7098DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7099 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7100 doc: /* Choose a coding system for an operation based on the target name.
7101The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7102DECODING-SYSTEM is the coding system to use for decoding
7103\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7104for encoding (in case OPERATION does encoding).
7105
7106The first argument OPERATION specifies an I/O primitive:
7107 For file I/O, `insert-file-contents' or `write-region'.
7108 For process I/O, `call-process', `call-process-region', or `start-process'.
7109 For network I/O, `open-network-stream'.
7110
7111The remaining arguments should be the same arguments that were passed
7112to the primitive. Depending on which primitive, one of those arguments
7113is selected as the TARGET. For example, if OPERATION does file I/O,
7114whichever argument specifies the file name is TARGET.
7115
7116TARGET has a meaning which depends on OPERATION:
7117 For file I/O, TARGET is a file name.
7118 For process I/O, TARGET is a process name.
7119 For network I/O, TARGET is a service name or a port number
7120
7121This function looks up what specified for TARGET in,
7122`file-coding-system-alist', `process-coding-system-alist',
7123or `network-coding-system-alist' depending on OPERATION.
7124They may specify a coding system, a cons of coding systems,
7125or a function symbol to call.
7126In the last case, we call the function with one argument,
7127which is a list of all the arguments given to this function.
7128
7129usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7130 (nargs, args)
4ed46869
KH
7131 int nargs;
7132 Lisp_Object *args;
7133{
7134 Lisp_Object operation, target_idx, target, val;
7135 register Lisp_Object chain;
7136
7137 if (nargs < 2)
7138 error ("Too few arguments");
7139 operation = args[0];
7140 if (!SYMBOLP (operation)
7141 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7142 error ("Invalid first arguement");
4ed46869
KH
7143 if (nargs < 1 + XINT (target_idx))
7144 error ("Too few arguments for operation: %s",
7145 XSYMBOL (operation)->name->data);
7146 target = args[XINT (target_idx) + 1];
7147 if (!(STRINGP (target)
7148 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7149 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7150
2e34157c
RS
7151 chain = ((EQ (operation, Qinsert_file_contents)
7152 || EQ (operation, Qwrite_region))
02ba4723 7153 ? Vfile_coding_system_alist
2e34157c 7154 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7155 ? Vnetwork_coding_system_alist
7156 : Vprocess_coding_system_alist));
4ed46869
KH
7157 if (NILP (chain))
7158 return Qnil;
7159
03699b14 7160 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7161 {
f44d27ce 7162 Lisp_Object elt;
4ed46869 7163
df7492f9 7164 elt = XCAR (chain);
4ed46869
KH
7165 if (CONSP (elt)
7166 && ((STRINGP (target)
03699b14
KR
7167 && STRINGP (XCAR (elt))
7168 && fast_string_match (XCAR (elt), target) >= 0)
7169 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7170 {
03699b14 7171 val = XCDR (elt);
b19fd4c5
KH
7172 /* Here, if VAL is both a valid coding system and a valid
7173 function symbol, we return VAL as a coding system. */
02ba4723
KH
7174 if (CONSP (val))
7175 return val;
7176 if (! SYMBOLP (val))
7177 return Qnil;
7178 if (! NILP (Fcoding_system_p (val)))
7179 return Fcons (val, val);
b19fd4c5
KH
7180 if (! NILP (Ffboundp (val)))
7181 {
7182 val = call1 (val, Flist (nargs, args));
7183 if (CONSP (val))
7184 return val;
7185 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7186 return Fcons (val, val);
7187 }
02ba4723
KH
7188 return Qnil;
7189 }
4ed46869
KH
7190 }
7191 return Qnil;
7192}
7193
df7492f9
KH
7194DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7195 Sset_coding_system_priority, 1, MANY, 0,
da7db224 7196 doc: /* Assign higher priority to the coding systems given as arguments.
1fcd6c8b 7197usage: (set-coding-system-priority CODING-SYSTEM ...) */)
df7492f9
KH
7198 (nargs, args)
7199 int nargs;
7200 Lisp_Object *args;
7201{
7202 int i, j;
7203 int changed[coding_category_max];
7204 enum coding_category priorities[coding_category_max];
7205
7206 bzero (changed, sizeof changed);
7207
7208 for (i = j = 0; i < nargs; i++)
7209 {
7210 enum coding_category category;
7211 Lisp_Object spec, attrs;
7212
7213 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7214 attrs = AREF (spec, 0);
7215 category = XINT (CODING_ATTR_CATEGORY (attrs));
7216 if (changed[category])
7217 /* Ignore this coding system because a coding system of the
7218 same category already had a higher priority. */
7219 continue;
7220 changed[category] = 1;
7221 priorities[j++] = category;
7222 if (coding_categories[category].id >= 0
7223 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7224 setup_coding_system (args[i], &coding_categories[category]);
7225 }
7226
7227 /* Now we have decided top J priorities. Reflect the order of the
7228 original priorities to the remaining priorities. */
7229
7230 for (i = j, j = 0; i < coding_category_max; i++, j++)
7231 {
7232 while (j < coding_category_max
7233 && changed[coding_priorities[j]])
7234 j++;
7235 if (j == coding_category_max)
7236 abort ();
7237 priorities[i] = coding_priorities[j];
7238 }
7239
7240 bcopy (priorities, coding_priorities, sizeof priorities);
7241 return Qnil;
7242}
7243
7244DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7245 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7246 doc: /* Return a list of coding systems ordered by their priorities.
7247HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7248 (highestp)
7249 Lisp_Object highestp;
d46c5b12
KH
7250{
7251 int i;
df7492f9 7252 Lisp_Object val;
d46c5b12 7253
df7492f9 7254 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7255 {
df7492f9
KH
7256 enum coding_category category = coding_priorities[i];
7257 int id = coding_categories[category].id;
7258 Lisp_Object attrs;
7259
7260 if (id < 0)
7261 continue;
7262 attrs = CODING_ID_ATTRS (id);
7263 if (! NILP (highestp))
7264 return CODING_ATTR_BASE_NAME (attrs);
7265 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7266 }
7267 return Fnreverse (val);
7268}
7269
f0064e1f
DL
7270static char *suffixes[] = { "-unix", "-dos", "-mac" };
7271
df7492f9
KH
7272static Lisp_Object
7273make_subsidiaries (base)
7274 Lisp_Object base;
7275{
7276 Lisp_Object subsidiaries;
df7492f9
KH
7277 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7278 char *buf = (char *) alloca (base_name_len + 6);
7279 int i;
7280
7281 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7282 subsidiaries = Fmake_vector (make_number (3), Qnil);
7283 for (i = 0; i < 3; i++)
7284 {
7285 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7286 ASET (subsidiaries, i, intern (buf));
7287 }
7288 return subsidiaries;
7289}
7290
7291
7292DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7293 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7294 doc: /* For internal use only.
7295usage: (define-coding-system-internal ...) */)
df7492f9
KH
7296 (nargs, args)
7297 int nargs;
7298 Lisp_Object *args;
7299{
7300 Lisp_Object name;
7301 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7302 Lisp_Object attrs; /* Vector of attributes. */
7303 Lisp_Object eol_type;
7304 Lisp_Object aliases;
7305 Lisp_Object coding_type, charset_list, safe_charsets;
7306 enum coding_category category;
7307 Lisp_Object tail, val;
7308 int max_charset_id = 0;
7309 int i;
7310
7311 if (nargs < coding_arg_max)
7312 goto short_args;
7313
7314 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7315
7316 name = args[coding_arg_name];
7317 CHECK_SYMBOL (name);
7318 CODING_ATTR_BASE_NAME (attrs) = name;
7319
7320 val = args[coding_arg_mnemonic];
7321 if (! STRINGP (val))
7322 CHECK_CHARACTER (val);
7323 CODING_ATTR_MNEMONIC (attrs) = val;
7324
7325 coding_type = args[coding_arg_coding_type];
7326 CHECK_SYMBOL (coding_type);
7327 CODING_ATTR_TYPE (attrs) = coding_type;
7328
7329 charset_list = args[coding_arg_charset_list];
7330 if (SYMBOLP (charset_list))
7331 {
7332 if (EQ (charset_list, Qiso_2022))
7333 {
7334 if (! EQ (coding_type, Qiso_2022))
7335 error ("Invalid charset-list");
7336 charset_list = Viso_2022_charset_list;
7337 }
7338 else if (EQ (charset_list, Qemacs_mule))
7339 {
7340 if (! EQ (coding_type, Qemacs_mule))
7341 error ("Invalid charset-list");
7342 charset_list = Vemacs_mule_charset_list;
7343 }
7344 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7345 if (max_charset_id < XFASTINT (XCAR (tail)))
7346 max_charset_id = XFASTINT (XCAR (tail));
7347 }
7348 else
7349 {
7350 charset_list = Fcopy_sequence (charset_list);
7351 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7352 {
7353 struct charset *charset;
7354
7355 val = Fcar (tail);
7356 CHECK_CHARSET_GET_CHARSET (val, charset);
7357 if (EQ (coding_type, Qiso_2022)
7358 ? CHARSET_ISO_FINAL (charset) < 0
7359 : EQ (coding_type, Qemacs_mule)
7360 ? CHARSET_EMACS_MULE_ID (charset) < 0
7361 : 0)
7362 error ("Can't handle charset `%s'",
7363 XSYMBOL (CHARSET_NAME (charset))->name->data);
7364
7365 XCAR (tail) = make_number (charset->id);
7366 if (max_charset_id < charset->id)
7367 max_charset_id = charset->id;
7368 }
7369 }
7370 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7371
7372 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7373 make_number (255));
7374 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7375 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7376 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7377
7378 val = args[coding_arg_decode_translation_table];
7379 if (! NILP (val))
7380 CHECK_CHAR_TABLE (val);
7381 CODING_ATTR_DECODE_TBL (attrs) = val;
7382
7383 val = args[coding_arg_encode_translation_table];
7384 if (! NILP (val))
7385 CHECK_CHAR_TABLE (val);
7386 CODING_ATTR_ENCODE_TBL (attrs) = val;
7387
7388 val = args[coding_arg_post_read_conversion];
7389 CHECK_SYMBOL (val);
7390 CODING_ATTR_POST_READ (attrs) = val;
7391
7392 val = args[coding_arg_pre_write_conversion];
7393 CHECK_SYMBOL (val);
7394 CODING_ATTR_PRE_WRITE (attrs) = val;
7395
7396 val = args[coding_arg_default_char];
7397 if (NILP (val))
7398 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7399 else
7400 {
7401 CHECK_CHARACTER (val);
7402 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7403 }
7404
7405 val = args[coding_arg_plist];
7406 CHECK_LIST (val);
7407 CODING_ATTR_PLIST (attrs) = val;
7408
7409 if (EQ (coding_type, Qcharset))
7410 {
c7c66a95
KH
7411 /* Generate a lisp vector of 256 elements. Each element is nil,
7412 integer, or a list of charset IDs.
7413
7414 If Nth element is nil, the byte code N is invalid in this
7415 coding system.
7416
7417 If Nth element is a number NUM, N is the first byte of a
7418 charset whose ID is NUM.
7419
7420 If Nth element is a list of charset IDs, N is the first byte
7421 of one of them. The list is sorted by dimensions of the
7422 charsets. A charset of smaller dimension comes firtst.
7423 */
df7492f9
KH
7424 val = Fmake_vector (make_number (256), Qnil);
7425
7426 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7427 {
c7c66a95
KH
7428 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7429 int dim = CHARSET_DIMENSION (charset);
7430 int idx = (dim - 1) * 4;
7431
15d143f7
KH
7432 for (i = charset->code_space[idx];
7433 i <= charset->code_space[idx + 1]; i++)
7434 {
c7c66a95
KH
7435 Lisp_Object tmp, tmp2;
7436 int dim2;
7437
7438 tmp = AREF (val, i);
7439 if (NILP (tmp))
7440 tmp = XCAR (tail);
7441 else if (NUMBERP (tmp))
7442 {
7443 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7444 if (dim < dim2)
c7c66a95 7445 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
7446 else
7447 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 7448 }
15d143f7 7449 else
c7c66a95
KH
7450 {
7451 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7452 {
7453 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7454 if (dim < dim2)
7455 break;
7456 }
7457 if (NILP (tmp2))
7458 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7459 else
7460 {
7461 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7462 XSETCAR (tmp2, XCAR (tail));
7463 }
7464 }
7465 ASET (val, i, tmp);
15d143f7 7466 }
df7492f9
KH
7467 }
7468 ASET (attrs, coding_attr_charset_valids, val);
7469 category = coding_category_charset;
7470 }
7471 else if (EQ (coding_type, Qccl))
7472 {
7473 Lisp_Object valids;
7474
7475 if (nargs < coding_arg_ccl_max)
7476 goto short_args;
7477
7478 val = args[coding_arg_ccl_decoder];
7479 CHECK_CCL_PROGRAM (val);
7480 if (VECTORP (val))
7481 val = Fcopy_sequence (val);
7482 ASET (attrs, coding_attr_ccl_decoder, val);
7483
7484 val = args[coding_arg_ccl_encoder];
7485 CHECK_CCL_PROGRAM (val);
7486 if (VECTORP (val))
7487 val = Fcopy_sequence (val);
7488 ASET (attrs, coding_attr_ccl_encoder, val);
7489
7490 val = args[coding_arg_ccl_valids];
7491 valids = Fmake_string (make_number (256), make_number (0));
7492 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7493 {
7494 val = Fcar (tail);
7495 if (INTEGERP (val))
7496 ASET (valids, XINT (val), 1);
7497 else
7498 {
7499 int from, to;
7500
7501 CHECK_CONS (val);
7502 CHECK_NUMBER (XCAR (val));
7503 CHECK_NUMBER (XCDR (val));
7504 from = XINT (XCAR (val));
7505 to = XINT (XCDR (val));
7506 for (i = from; i <= to; i++)
7507 ASET (valids, i, 1);
7508 }
7509 }
7510 ASET (attrs, coding_attr_ccl_valids, valids);
7511
7512 category = coding_category_ccl;
7513 }
7514 else if (EQ (coding_type, Qutf_16))
7515 {
7516 Lisp_Object bom, endian;
7517
7518 if (nargs < coding_arg_utf16_max)
7519 goto short_args;
7520
7521 bom = args[coding_arg_utf16_bom];
7522 if (! NILP (bom) && ! EQ (bom, Qt))
7523 {
7524 CHECK_CONS (bom);
7525 CHECK_CODING_SYSTEM (XCAR (bom));
7526 CHECK_CODING_SYSTEM (XCDR (bom));
7527 }
7528 ASET (attrs, coding_attr_utf_16_bom, bom);
7529
7530 endian = args[coding_arg_utf16_endian];
7531 ASET (attrs, coding_attr_utf_16_endian, endian);
7532
7533 category = (CONSP (bom)
7534 ? coding_category_utf_16_auto
7535 : NILP (bom)
7536 ? (NILP (endian)
7537 ? coding_category_utf_16_be_nosig
7538 : coding_category_utf_16_le_nosig)
7539 : (NILP (endian)
7540 ? coding_category_utf_16_be
7541 : coding_category_utf_16_le));
7542 }
7543 else if (EQ (coding_type, Qiso_2022))
7544 {
7545 Lisp_Object initial, reg_usage, request, flags;
0be8721c 7546 int i, id;
1397dc18 7547
df7492f9
KH
7548 if (nargs < coding_arg_iso2022_max)
7549 goto short_args;
7550
7551 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7552 CHECK_VECTOR (initial);
7553 for (i = 0; i < 4; i++)
7554 {
7555 val = Faref (initial, make_number (i));
7556 if (! NILP (val))
7557 {
7558 CHECK_CHARSET_GET_ID (val, id);
7559 ASET (initial, i, make_number (id));
7560 }
7561 else
7562 ASET (initial, i, make_number (-1));
7563 }
7564
7565 reg_usage = args[coding_arg_iso2022_reg_usage];
7566 CHECK_CONS (reg_usage);
7567 CHECK_NATNUM (XCAR (reg_usage));
7568 CHECK_NATNUM (XCDR (reg_usage));
7569
7570 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7571 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 7572 {
df7492f9
KH
7573 int id;
7574
7575 val = Fcar (tail);
7576 CHECK_CONS (val);
7577 CHECK_CHARSET_GET_ID (XCAR (val), id);
7578 CHECK_NATNUM (XCDR (val));
7579 if (XINT (XCDR (val)) >= 4)
7580 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7581 XCAR (val) = make_number (id);
1397dc18 7582 }
df7492f9
KH
7583
7584 flags = args[coding_arg_iso2022_flags];
7585 CHECK_NATNUM (flags);
7586 i = XINT (flags);
7587 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7588 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7589
7590 ASET (attrs, coding_attr_iso_initial, initial);
7591 ASET (attrs, coding_attr_iso_usage, reg_usage);
7592 ASET (attrs, coding_attr_iso_request, request);
7593 ASET (attrs, coding_attr_iso_flags, flags);
7594 setup_iso_safe_charsets (attrs);
7595
7596 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7597 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7598 | CODING_ISO_FLAG_SINGLE_SHIFT))
7599 ? coding_category_iso_7_else
7600 : EQ (args[coding_arg_charset_list], Qiso_2022)
7601 ? coding_category_iso_7
7602 : coding_category_iso_7_tight);
7603 else
7604 {
7605 int id = XINT (AREF (initial, 1));
7606
7607 category = (((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7608 | CODING_ISO_FLAG_SINGLE_SHIFT))
7609 || EQ (args[coding_arg_charset_list], Qiso_2022)
7610 || id < 0)
7611 ? coding_category_iso_8_else
7612 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7613 ? coding_category_iso_8_1
7614 : coding_category_iso_8_2);
7615 }
7616 }
7617 else if (EQ (coding_type, Qemacs_mule))
7618 {
7619 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7620 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7621
7622 category = coding_category_emacs_mule;
7623 }
7624 else if (EQ (coding_type, Qshift_jis))
7625 {
7626
7627 struct charset *charset;
7628
7629 if (XINT (Flength (charset_list)) != 3)
7630 error ("There should be just three charsets");
7631
7632 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7633 if (CHARSET_DIMENSION (charset) != 1)
7634 error ("Dimension of charset %s is not one",
7635 XSYMBOL (CHARSET_NAME (charset))->name->data);
7636
7637 charset_list = XCDR (charset_list);
7638 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7639 if (CHARSET_DIMENSION (charset) != 1)
7640 error ("Dimension of charset %s is not one",
7641 XSYMBOL (CHARSET_NAME (charset))->name->data);
7642
7643 charset_list = XCDR (charset_list);
7644 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7645 if (CHARSET_DIMENSION (charset) != 2)
7646 error ("Dimension of charset %s is not two",
7647 XSYMBOL (CHARSET_NAME (charset))->name->data);
7648
7649 category = coding_category_sjis;
7650 Vsjis_coding_system = name;
7651 }
7652 else if (EQ (coding_type, Qbig5))
7653 {
7654 struct charset *charset;
7655
7656 if (XINT (Flength (charset_list)) != 2)
7657 error ("There should be just two charsets");
7658
7659 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7660 if (CHARSET_DIMENSION (charset) != 1)
7661 error ("Dimension of charset %s is not one",
7662 XSYMBOL (CHARSET_NAME (charset))->name->data);
7663
7664 charset_list = XCDR (charset_list);
7665 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7666 if (CHARSET_DIMENSION (charset) != 2)
7667 error ("Dimension of charset %s is not two",
7668 XSYMBOL (CHARSET_NAME (charset))->name->data);
7669
7670 category = coding_category_big5;
7671 Vbig5_coding_system = name;
7672 }
7673 else if (EQ (coding_type, Qraw_text))
7674 category = coding_category_raw_text;
7675 else if (EQ (coding_type, Qutf_8))
7676 category = coding_category_utf_8;
7677 else if (EQ (coding_type, Qundecided))
7678 category = coding_category_undecided;
7679 else
7680 error ("Invalid coding system type: %s",
7681 XSYMBOL (coding_type)->name->data);
7682
7683 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7684
7685 eol_type = args[coding_arg_eol_type];
7686 if (! NILP (eol_type)
7687 && ! EQ (eol_type, Qunix)
7688 && ! EQ (eol_type, Qdos)
7689 && ! EQ (eol_type, Qmac))
7690 error ("Invalid eol-type");
7691
7692 aliases = Fcons (name, Qnil);
7693
7694 if (NILP (eol_type))
7695 {
7696 eol_type = make_subsidiaries (name);
7697 for (i = 0; i < 3; i++)
1397dc18 7698 {
df7492f9
KH
7699 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7700
7701 this_name = AREF (eol_type, i);
7702 this_aliases = Fcons (this_name, Qnil);
7703 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7704 this_spec = Fmake_vector (make_number (3), attrs);
7705 ASET (this_spec, 1, this_aliases);
7706 ASET (this_spec, 2, this_eol_type);
7707 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7708 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7709 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7710 Vcoding_system_alist);
1397dc18 7711 }
d46c5b12 7712 }
1397dc18 7713
df7492f9
KH
7714 spec_vec = Fmake_vector (make_number (3), attrs);
7715 ASET (spec_vec, 1, aliases);
7716 ASET (spec_vec, 2, eol_type);
7717
7718 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7719 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7720 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7721 Vcoding_system_alist);
7722
7723 {
7724 int id = coding_categories[category].id;
7725
7726 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7727 setup_coding_system (name, &coding_categories[category]);
7728 }
7729
d46c5b12 7730 return Qnil;
df7492f9
KH
7731
7732 short_args:
7733 return Fsignal (Qwrong_number_of_arguments,
7734 Fcons (intern ("define-coding-system-internal"),
7735 make_number (nargs)));
d46c5b12
KH
7736}
7737
da7db224
DL
7738/* Fixme: should this record the alias relationships for
7739 diagnostics? */
df7492f9
KH
7740DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7741 Sdefine_coding_system_alias, 2, 2, 0,
7742 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7743 (alias, coding_system)
7744 Lisp_Object alias, coding_system;
66cfb530 7745{
df7492f9 7746 Lisp_Object spec, aliases, eol_type;
84d60297 7747
df7492f9
KH
7748 CHECK_SYMBOL (alias);
7749 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7750 aliases = AREF (spec, 1);
7751 while (!NILP (XCDR (aliases)))
7752 aliases = XCDR (aliases);
7753 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 7754
df7492f9
KH
7755 eol_type = AREF (spec, 2);
7756 if (VECTORP (eol_type))
66cfb530 7757 {
df7492f9
KH
7758 Lisp_Object subsidiaries;
7759 int i;
7760
7761 subsidiaries = make_subsidiaries (alias);
7762 for (i = 0; i < 3; i++)
7763 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7764 AREF (eol_type, i));
7765
7766 ASET (spec, 2, subsidiaries);
66cfb530 7767 }
df7492f9
KH
7768
7769 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
7770 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
7771 Vcoding_system_alist);
66cfb530
KH
7772
7773 return Qnil;
7774}
7775
df7492f9
KH
7776DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7777 1, 1, 0,
7778 doc: /* Return the base of CODING-SYSTEM.
da7db224 7779Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
7780 (coding_system)
7781 Lisp_Object coding_system;
7782{
7783 Lisp_Object spec, attrs;
7784
7785 if (NILP (coding_system))
7786 return (Qno_conversion);
7787 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7788 attrs = AREF (spec, 0);
7789 return CODING_ATTR_BASE_NAME (attrs);
7790}
7791
7792DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7793 1, 1, 0,
7794 doc: "Return the property list of CODING-SYSTEM.")
7795 (coding_system)
7796 Lisp_Object coding_system;
7797{
7798 Lisp_Object spec, attrs;
7799
7800 if (NILP (coding_system))
7801 coding_system = Qno_conversion;
7802 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7803 attrs = AREF (spec, 0);
7804 return CODING_ATTR_PLIST (attrs);
7805}
7806
7807
7808DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7809 1, 1, 0,
da7db224 7810 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
7811 (coding_system)
7812 Lisp_Object coding_system;
7813{
7814 Lisp_Object spec;
7815
7816 if (NILP (coding_system))
7817 coding_system = Qno_conversion;
7818 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 7819 return AREF (spec, 1);
df7492f9
KH
7820}
7821
7822DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7823 Scoding_system_eol_type, 1, 1, 0,
7824 doc: /* Return eol-type of CODING-SYSTEM.
7825An eol-type is integer 0, 1, 2, or a vector of coding systems.
7826
7827Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7828and CR respectively.
7829
7830A vector value indicates that a format of end-of-line should be
7831detected automatically. Nth element of the vector is the subsidiary
7832coding system whose eol-type is N. */)
7833 (coding_system)
7834 Lisp_Object coding_system;
7835{
7836 Lisp_Object spec, eol_type;
7837 int n;
7838
7839 if (NILP (coding_system))
7840 coding_system = Qno_conversion;
7841 if (! CODING_SYSTEM_P (coding_system))
7842 return Qnil;
7843 spec = CODING_SYSTEM_SPEC (coding_system);
7844 eol_type = AREF (spec, 2);
7845 if (VECTORP (eol_type))
7846 return Fcopy_sequence (eol_type);
7847 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7848 return make_number (n);
7849}
7850
4ed46869
KH
7851#endif /* emacs */
7852
7853\f
1397dc18 7854/*** 9. Post-amble ***/
4ed46869 7855
dfcf069d 7856void
4ed46869
KH
7857init_coding_once ()
7858{
7859 int i;
7860
df7492f9
KH
7861 for (i = 0; i < coding_category_max; i++)
7862 {
7863 coding_categories[i].id = -1;
7864 coding_priorities[i] = i;
7865 }
4ed46869
KH
7866
7867 /* ISO2022 specific initialize routine. */
7868 for (i = 0; i < 0x20; i++)
b73bfc1c 7869 iso_code_class[i] = ISO_control_0;
4ed46869
KH
7870 for (i = 0x21; i < 0x7F; i++)
7871 iso_code_class[i] = ISO_graphic_plane_0;
7872 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 7873 iso_code_class[i] = ISO_control_1;
4ed46869
KH
7874 for (i = 0xA1; i < 0xFF; i++)
7875 iso_code_class[i] = ISO_graphic_plane_1;
7876 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7877 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7878 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7879 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7880 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7881 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7882 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7883 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7884 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7885 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7886
b843d1ae 7887 inhibit_pre_post_conversion = 0;
df7492f9
KH
7888
7889 for (i = 0; i < 256; i++)
7890 {
7891 emacs_mule_bytes[i] = 1;
7892 }
781d7a48
KH
7893 emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3;
7894 emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3;
7895 emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4;
7896 emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
7897}
7898
7899#ifdef emacs
7900
dfcf069d 7901void
e0e989f6
KH
7902syms_of_coding ()
7903{
df7492f9
KH
7904 staticpro (&Vcoding_system_hash_table);
7905 Vcoding_system_hash_table = Fmakehash (Qeq);
7906
7907 staticpro (&Vsjis_coding_system);
7908 Vsjis_coding_system = Qnil;
7909
7910 staticpro (&Vbig5_coding_system);
7911 Vbig5_coding_system = Qnil;
7912
7913 staticpro (&Vcode_conversion_work_buf_list);
7914 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 7915
df7492f9
KH
7916 staticpro (&Vcode_conversion_reused_work_buf);
7917 Vcode_conversion_reused_work_buf = Qnil;
7918
7919 DEFSYM (Qcharset, "charset");
7920 DEFSYM (Qtarget_idx, "target-idx");
7921 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
7922 Fset (Qcoding_system_history, Qnil);
7923
9ce27fde 7924 /* Target FILENAME is the first argument. */
e0e989f6 7925 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 7926 /* Target FILENAME is the third argument. */
e0e989f6
KH
7927 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7928
df7492f9 7929 DEFSYM (Qcall_process, "call-process");
9ce27fde 7930 /* Target PROGRAM is the first argument. */
e0e989f6
KH
7931 Fput (Qcall_process, Qtarget_idx, make_number (0));
7932
df7492f9 7933 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 7934 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7935 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7936
df7492f9 7937 DEFSYM (Qstart_process, "start-process");
9ce27fde 7938 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7939 Fput (Qstart_process, Qtarget_idx, make_number (2));
7940
df7492f9 7941 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 7942 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
7943 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7944
df7492f9
KH
7945 DEFSYM (Qcoding_system, "coding-system");
7946 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 7947
df7492f9
KH
7948 DEFSYM (Qeol_type, "eol-type");
7949 DEFSYM (Qunix, "unix");
7950 DEFSYM (Qdos, "dos");
4ed46869 7951
df7492f9
KH
7952 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7953 DEFSYM (Qpost_read_conversion, "post-read-conversion");
7954 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7955 DEFSYM (Qdefault_char, "default-char");
7956 DEFSYM (Qundecided, "undecided");
7957 DEFSYM (Qno_conversion, "no-conversion");
7958 DEFSYM (Qraw_text, "raw-text");
4ed46869 7959
df7492f9 7960 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 7961
df7492f9 7962 DEFSYM (Qutf_8, "utf-8");
27901516 7963
df7492f9
KH
7964 DEFSYM (Qutf_16, "utf-16");
7965 DEFSYM (Qutf_16_be, "utf-16-be");
7966 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7967 DEFSYM (Qutf_16_le, "utf-16-l3");
7968 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7969 DEFSYM (Qsignature, "signature");
7970 DEFSYM (Qendian, "endian");
7971 DEFSYM (Qbig, "big");
7972 DEFSYM (Qlittle, "little");
27901516 7973
df7492f9
KH
7974 DEFSYM (Qshift_jis, "shift-jis");
7975 DEFSYM (Qbig5, "big5");
4ed46869 7976
df7492f9 7977 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 7978
df7492f9 7979 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
7980 Fput (Qcoding_system_error, Qerror_conditions,
7981 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7982 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 7983 build_string ("Invalid coding system"));
4ed46869 7984
df7492f9
KH
7985 /* Intern this now in case it isn't already done.
7986 Setting this variable twice is harmless.
7987 But don't staticpro it here--that is done in alloc.c. */
7988 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 7989
df7492f9 7990 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 7991 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
7992 DEFSYM (Qtranslation_table_id, "translation-table-id");
7993 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
7994 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 7995
df7492f9 7996 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 7997
df7492f9 7998 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 7999
df7492f9
KH
8000 Vcoding_category_table
8001 = Fmake_vector (make_number (coding_category_max), Qnil);
8002 staticpro (&Vcoding_category_table);
8003 /* Followings are target of code detection. */
8004 ASET (Vcoding_category_table, coding_category_iso_7,
8005 intern ("coding-category-iso-7"));
8006 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8007 intern ("coding-category-iso-7-tight"));
8008 ASET (Vcoding_category_table, coding_category_iso_8_1,
8009 intern ("coding-category-iso-8-1"));
8010 ASET (Vcoding_category_table, coding_category_iso_8_2,
8011 intern ("coding-category-iso-8-2"));
8012 ASET (Vcoding_category_table, coding_category_iso_7_else,
8013 intern ("coding-category-iso-7-else"));
8014 ASET (Vcoding_category_table, coding_category_iso_8_else,
8015 intern ("coding-category-iso-8-else"));
8016 ASET (Vcoding_category_table, coding_category_utf_8,
8017 intern ("coding-category-utf-8"));
8018 ASET (Vcoding_category_table, coding_category_utf_16_be,
8019 intern ("coding-category-utf-16-be"));
8020 ASET (Vcoding_category_table, coding_category_utf_16_le,
8021 intern ("coding-category-utf-16-le"));
8022 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8023 intern ("coding-category-utf-16-be-nosig"));
8024 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8025 intern ("coding-category-utf-16-le-nosig"));
8026 ASET (Vcoding_category_table, coding_category_charset,
8027 intern ("coding-category-charset"));
8028 ASET (Vcoding_category_table, coding_category_sjis,
8029 intern ("coding-category-sjis"));
8030 ASET (Vcoding_category_table, coding_category_big5,
8031 intern ("coding-category-big5"));
8032 ASET (Vcoding_category_table, coding_category_ccl,
8033 intern ("coding-category-ccl"));
8034 ASET (Vcoding_category_table, coding_category_emacs_mule,
8035 intern ("coding-category-emacs-mule"));
8036 /* Followings are NOT target of code detection. */
8037 ASET (Vcoding_category_table, coding_category_raw_text,
8038 intern ("coding-category-raw-text"));
8039 ASET (Vcoding_category_table, coding_category_undecided,
8040 intern ("coding-category-undecided"));
70c22245 8041
4ed46869
KH
8042 defsubr (&Scoding_system_p);
8043 defsubr (&Sread_coding_system);
8044 defsubr (&Sread_non_nil_coding_system);
8045 defsubr (&Scheck_coding_system);
8046 defsubr (&Sdetect_coding_region);
d46c5b12 8047 defsubr (&Sdetect_coding_string);
05e6f5dc 8048 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 8049 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8050 defsubr (&Sdecode_coding_region);
8051 defsubr (&Sencode_coding_region);
8052 defsubr (&Sdecode_coding_string);
8053 defsubr (&Sencode_coding_string);
8054 defsubr (&Sdecode_sjis_char);
8055 defsubr (&Sencode_sjis_char);
8056 defsubr (&Sdecode_big5_char);
8057 defsubr (&Sencode_big5_char);
1ba9e4ab 8058 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8059 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8060 defsubr (&Sterminal_coding_system);
1ba9e4ab 8061 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8062 defsubr (&Skeyboard_coding_system);
a5d301df 8063 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
8064 defsubr (&Sset_coding_system_priority);
8065 defsubr (&Sdefine_coding_system_internal);
8066 defsubr (&Sdefine_coding_system_alias);
8067 defsubr (&Scoding_system_base);
8068 defsubr (&Scoding_system_plist);
8069 defsubr (&Scoding_system_aliases);
8070 defsubr (&Scoding_system_eol_type);
8071 defsubr (&Scoding_system_priority_list);
4ed46869 8072
4608c386 8073 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8074 doc: /* List of coding systems.
8075
8076Do not alter the value of this variable manually. This variable should be
df7492f9 8077updated by the functions `define-coding-system' and
48b0f3ae 8078`define-coding-system-alias'. */);
4608c386
KH
8079 Vcoding_system_list = Qnil;
8080
8081 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8082 doc: /* Alist of coding system names.
8083Each element is one element list of coding system name.
8084This variable is given to `completing-read' as TABLE argument.
8085
8086Do not alter the value of this variable manually. This variable should be
8087updated by the functions `make-coding-system' and
8088`define-coding-system-alias'. */);
4608c386
KH
8089 Vcoding_system_alist = Qnil;
8090
4ed46869 8091 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8092 doc: /* List of coding-categories (symbols) ordered by priority.
8093
8094On detecting a coding system, Emacs tries code detection algorithms
8095associated with each coding-category one by one in this order. When
8096one algorithm agrees with a byte sequence of source text, the coding
8097system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8098 {
8099 int i;
8100
8101 Vcoding_category_list = Qnil;
df7492f9 8102 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8103 Vcoding_category_list
d46c5b12
KH
8104 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8105 Vcoding_category_list);
4ed46869
KH
8106 }
8107
8108 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8109 doc: /* Specify the coding system for read operations.
8110It is useful to bind this variable with `let', but do not set it globally.
8111If the value is a coding system, it is used for decoding on read operation.
8112If not, an appropriate element is used from one of the coding system alists:
8113There are three such tables, `file-coding-system-alist',
8114`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8115 Vcoding_system_for_read = Qnil;
8116
8117 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8118 doc: /* Specify the coding system for write operations.
8119Programs bind this variable with `let', but you should not set it globally.
8120If the value is a coding system, it is used for encoding of output,
8121when writing it to a file and when sending it to a file or subprocess.
8122
8123If this does not specify a coding system, an appropriate element
8124is used from one of the coding system alists:
8125There are three such tables, `file-coding-system-alist',
8126`process-coding-system-alist', and `network-coding-system-alist'.
8127For output to files, if the above procedure does not specify a coding system,
8128the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8129 Vcoding_system_for_write = Qnil;
8130
8131 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8132 doc: /*
8133Coding system used in the latest file or process I/O. */);
4ed46869
KH
8134 Vlast_coding_system_used = Qnil;
8135
9ce27fde 8136 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8137 doc: /*
8138*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8139See info node `Coding Systems' and info node `Text and Binary' concerning
8140such conversion. */);
9ce27fde
KH
8141 inhibit_eol_conversion = 0;
8142
ed29121d 8143 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8144 doc: /*
8145Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8146Bind it to t if the process output is to be treated as if it were a file
8147read from some filesystem. */);
ed29121d
EZ
8148 inherit_process_coding_system = 0;
8149
02ba4723 8150 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8151 doc: /*
8152Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8153The format is ((PATTERN . VAL) ...),
8154where PATTERN is a regular expression matching a file name,
8155VAL is a coding system, a cons of coding systems, or a function symbol.
8156If VAL is a coding system, it is used for both decoding and encoding
8157the file contents.
8158If VAL is a cons of coding systems, the car part is used for decoding,
8159and the cdr part is used for encoding.
8160If VAL is a function symbol, the function must return a coding system
0192762c
DL
8161or a cons of coding systems which are used as above. The function gets
8162the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8163
8164See also the function `find-operation-coding-system'
8165and the variable `auto-coding-alist'. */);
02ba4723
KH
8166 Vfile_coding_system_alist = Qnil;
8167
8168 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8169 doc: /*
8170Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8171The format is ((PATTERN . VAL) ...),
8172where PATTERN is a regular expression matching a program name,
8173VAL is a coding system, a cons of coding systems, or a function symbol.
8174If VAL is a coding system, it is used for both decoding what received
8175from the program and encoding what sent to the program.
8176If VAL is a cons of coding systems, the car part is used for decoding,
8177and the cdr part is used for encoding.
8178If VAL is a function symbol, the function must return a coding system
8179or a cons of coding systems which are used as above.
8180
8181See also the function `find-operation-coding-system'. */);
02ba4723
KH
8182 Vprocess_coding_system_alist = Qnil;
8183
8184 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8185 doc: /*
8186Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8187The format is ((PATTERN . VAL) ...),
8188where PATTERN is a regular expression matching a network service name
8189or is a port number to connect to,
8190VAL is a coding system, a cons of coding systems, or a function symbol.
8191If VAL is a coding system, it is used for both decoding what received
8192from the network stream and encoding what sent to the network stream.
8193If VAL is a cons of coding systems, the car part is used for decoding,
8194and the cdr part is used for encoding.
8195If VAL is a function symbol, the function must return a coding system
8196or a cons of coding systems which are used as above.
8197
8198See also the function `find-operation-coding-system'. */);
02ba4723 8199 Vnetwork_coding_system_alist = Qnil;
4ed46869 8200
68c45bf0 8201 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8202 doc: /* Coding system to use with system messages.
8203Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8204 Vlocale_coding_system = Qnil;
8205
005f0d35 8206 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8207 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8208 doc: /*
8209*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8210 eol_mnemonic_unix = build_string (":");
4ed46869 8211
7722baf9 8212 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8213 doc: /*
8214*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8215 eol_mnemonic_dos = build_string ("\\");
4ed46869 8216
7722baf9 8217 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8218 doc: /*
8219*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8220 eol_mnemonic_mac = build_string ("/");
4ed46869 8221
7722baf9 8222 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8223 doc: /*
8224*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8225 eol_mnemonic_undecided = build_string (":");
4ed46869 8226
84fbb8a0 8227 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8228 doc: /*
8229*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8230 Venable_character_translation = Qt;
bdd9fb48 8231
f967223b 8232 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8233 &Vstandard_translation_table_for_decode,
8234 doc: /* Table for translating characters while decoding. */);
f967223b 8235 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8236
f967223b 8237 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8238 &Vstandard_translation_table_for_encode,
8239 doc: /* Table for translating characters while encoding. */);
f967223b 8240 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8241
df7492f9 8242 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8243 doc: /* Alist of charsets vs revision numbers.
8244While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8245designate it with the escape sequence identifying revision (cdr part
8246of the element). */);
8247 Vcharset_revision_table = Qnil;
02ba4723
KH
8248
8249 DEFVAR_LISP ("default-process-coding-system",
8250 &Vdefault_process_coding_system,
48b0f3ae
PJ
8251 doc: /* Cons of coding systems used for process I/O by default.
8252The car part is used for decoding a process output,
8253the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8254 Vdefault_process_coding_system = Qnil;
c4825358 8255
3f003981 8256 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8257 doc: /*
8258Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8259This is a vector of length 256.
8260If Nth element is non-nil, the existence of code N in a file
8261\(or output of subprocess) doesn't prevent it to be detected as
8262a coding system of ISO 2022 variant which has a flag
8263`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8264or reading output of a subprocess.
8265Only 128th through 159th elements has a meaning. */);
3f003981 8266 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8267
8268 DEFVAR_LISP ("select-safe-coding-system-function",
8269 &Vselect_safe_coding_system_function,
df7492f9
KH
8270 doc: /*
8271Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8272
8273If set, this function is called to force a user to select a proper
8274coding system which can encode the text in the case that a default
8275coding system used in each operation can't encode the text.
8276
8277The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8278 Vselect_safe_coding_system_function = Qnil;
8279
22ab2303 8280 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8281 &inhibit_iso_escape_detection,
df7492f9
KH
8282 doc: /*
8283If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8284
8285By default, on reading a file, Emacs tries to detect how the text is
8286encoded. This code detection is sensitive to escape sequences. If
8287the sequence is valid as ISO2022, the code is determined as one of
8288the ISO2022 encodings, and the file is decoded by the corresponding
8289coding system (e.g. `iso-2022-7bit').
8290
8291However, there may be a case that you want to read escape sequences in
8292a file as is. In such a case, you can set this variable to non-nil.
8293Then, as the code detection ignores any escape sequences, no file is
8294detected as encoded in some ISO2022 encoding. The result is that all
8295escape sequences become visible in a buffer.
8296
8297The default value is nil, and it is strongly recommended not to change
8298it. That is because many Emacs Lisp source files that contain
8299non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8300in Emacs's distribution, and they won't be decoded correctly on
8301reading if you suppress escape sequence detection.
8302
8303The other way to read escape sequences in a file without decoding is
8304to explicitly specify some coding system that doesn't use ISO2022's
8305escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8306 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8307
8308 {
8309 Lisp_Object args[coding_arg_max];
8310 Lisp_Object plist[14];
8311 int i;
8312
8313 for (i = 0; i < coding_arg_max; i++)
8314 args[i] = Qnil;
8315
8316 plist[0] = intern (":name");
8317 plist[1] = args[coding_arg_name] = Qno_conversion;
8318 plist[2] = intern (":mnemonic");
8319 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8320 plist[4] = intern (":coding-type");
8321 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8322 plist[6] = intern (":ascii-compatible-p");
8323 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8324 plist[8] = intern (":default-char");
8325 plist[9] = args[coding_arg_default_char] = make_number (0);
8326 plist[10] = intern (":docstring");
8327 plist[11] = build_string ("Do no conversion.\n\
8328\n\
8329When you visit a file with this coding, the file is read into a\n\
8330unibyte buffer as is, thus each byte of a file is treated as a\n\
8331character.");
8332 plist[12] = intern (":eol-type");
8333 plist[13] = args[coding_arg_eol_type] = Qunix;
8334 args[coding_arg_plist] = Flist (14, plist);
8335 Fdefine_coding_system_internal (coding_arg_max, args);
8336 }
8337
8338 setup_coding_system (Qno_conversion, &keyboard_coding);
8339 setup_coding_system (Qno_conversion, &terminal_coding);
8340 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8341}
8342
68c45bf0
PE
8343char *
8344emacs_strerror (error_number)
8345 int error_number;
8346{
8347 char *str;
8348
ca9c0567 8349 synchronize_system_messages_locale ();
68c45bf0
PE
8350 str = strerror (error_number);
8351
8352 if (! NILP (Vlocale_coding_system))
8353 {
8354 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8355 Vlocale_coding_system,
8356 0);
8357 str = (char *) XSTRING (dec)->data;
8358 }
8359
8360 return str;
8361}
8362
4ed46869 8363#endif /* emacs */