(Finsert_byte): Return a proper value.
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
6f197c07 4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
5a936b46 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
5bad0796 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
e19c3639 138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9
KH
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
4ed46869 157#if 0
df7492f9
KH
158static int
159detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
4ed46869 162{
df7492f9
KH
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
4ed46869
KH
188}
189#endif
190
191/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
df7492f9
KH
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
d46c5b12 197
df7492f9
KH
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
d46c5b12 202
df7492f9 203 Below is the template of these functions. */
d46c5b12 204
4ed46869 205#if 0
b73bfc1c 206static void
df7492f9 207decode_coding_XXXX (coding)
4ed46869 208 struct coding_system *coding;
4ed46869 209{
df7492f9
KH
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
243}
244#endif
245
246/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
df7492f9
KH
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
d46c5b12 252
df7492f9
KH
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 257
df7492f9
KH
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
d46c5b12 261
df7492f9 262 Below is a template of these functions. */
4ed46869 263#if 0
b73bfc1c 264static void
df7492f9 265encode_coding_XXX (coding)
4ed46869 266 struct coding_system *coding;
4ed46869 267{
df7492f9
KH
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
4ed46869
KH
285}
286#endif
287
4ed46869
KH
288\f
289/*** 1. Preamble ***/
290
68c45bf0 291#include <config.h>
4ed46869
KH
292#include <stdio.h>
293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
302
df7492f9 303Lisp_Object Vcoding_system_hash_table;
4ed46869 304
df7492f9 305Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
306Lisp_Object Qunix, Qdos;
307extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
308Lisp_Object Qbuffer_file_coding_system;
309Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 310Lisp_Object Qdefault_char;
27901516 311Lisp_Object Qno_conversion, Qundecided;
df7492f9
KH
312Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
313Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
314Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
bb0115a2 315Lisp_Object Qcoding_system_history;
1397dc18 316Lisp_Object Qvalid_codes;
4ed46869
KH
317
318extern Lisp_Object Qinsert_file_contents, Qwrite_region;
319Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
320Lisp_Object Qstart_process, Qopen_network_stream;
321Lisp_Object Qtarget_idx;
322
d46c5b12
KH
323Lisp_Object Vselect_safe_coding_system_function;
324
7722baf9
EZ
325/* Mnemonic string for each format of end-of-line. */
326Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
327/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 328 decided. */
7722baf9 329Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
330
331#ifdef emacs
332
4608c386
KH
333Lisp_Object Vcoding_system_list, Vcoding_system_alist;
334
335Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 336
d46c5b12
KH
337/* Coding system emacs-mule and raw-text are for converting only
338 end-of-line format. */
339Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 340
4ed46869
KH
341/* Coding-systems are handed between Emacs Lisp programs and C internal
342 routines by the following three variables. */
343/* Coding-system for reading files and receiving data from process. */
344Lisp_Object Vcoding_system_for_read;
345/* Coding-system for writing files and sending data to process. */
346Lisp_Object Vcoding_system_for_write;
347/* Coding-system actually used in the latest I/O. */
348Lisp_Object Vlast_coding_system_used;
349
c4825358 350/* A vector of length 256 which contains information about special
94487c4e 351 Latin codes (especially for dealing with Microsoft codes). */
3f003981 352Lisp_Object Vlatin_extra_code_table;
c4825358 353
9ce27fde
KH
354/* Flag to inhibit code conversion of end-of-line format. */
355int inhibit_eol_conversion;
356
74383408
KH
357/* Flag to inhibit ISO2022 escape sequence detection. */
358int inhibit_iso_escape_detection;
359
ed29121d
EZ
360/* Flag to make buffer-file-coding-system inherit from process-coding. */
361int inherit_process_coding_system;
362
c4825358 363/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
364struct coding_system terminal_coding;
365
c4825358
KH
366/* Coding system to be used to encode text for terminal display when
367 terminal coding system is nil. */
368struct coding_system safe_terminal_coding;
369
370/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
371struct coding_system keyboard_coding;
372
02ba4723
KH
373Lisp_Object Vfile_coding_system_alist;
374Lisp_Object Vprocess_coding_system_alist;
375Lisp_Object Vnetwork_coding_system_alist;
4ed46869 376
68c45bf0
PE
377Lisp_Object Vlocale_coding_system;
378
4ed46869
KH
379#endif /* emacs */
380
f967223b
KH
381/* Flag to tell if we look up translation table on character code
382 conversion. */
84fbb8a0 383Lisp_Object Venable_character_translation;
f967223b
KH
384/* Standard translation table to look up on decoding (reading). */
385Lisp_Object Vstandard_translation_table_for_decode;
386/* Standard translation table to look up on encoding (writing). */
387Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 388
f967223b
KH
389Lisp_Object Qtranslation_table;
390Lisp_Object Qtranslation_table_id;
391Lisp_Object Qtranslation_table_for_decode;
392Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
393
394/* Alist of charsets vs revision number. */
df7492f9 395static Lisp_Object Vcharset_revision_table;
4ed46869 396
02ba4723
KH
397/* Default coding systems used for process I/O. */
398Lisp_Object Vdefault_process_coding_system;
399
b843d1ae
KH
400/* Global flag to tell that we can't call post-read-conversion and
401 pre-write-conversion functions. Usually the value is zero, but it
402 is set to 1 temporarily while such functions are running. This is
403 to avoid infinite recursive call. */
404static int inhibit_pre_post_conversion;
405
df7492f9
KH
406/* Two special coding systems. */
407Lisp_Object Vsjis_coding_system;
408Lisp_Object Vbig5_coding_system;
409
410
411static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
412static void decode_coding_utf_8 P_ ((struct coding_system *));
413static int encode_coding_utf_8 P_ ((struct coding_system *));
414
415static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
416static void decode_coding_utf_16 P_ ((struct coding_system *));
417static int encode_coding_utf_16 P_ ((struct coding_system *));
418
419static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
420static void decode_coding_iso_2022 P_ ((struct coding_system *));
421static int encode_coding_iso_2022 P_ ((struct coding_system *));
422
423static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
424static void decode_coding_emacs_mule P_ ((struct coding_system *));
425static int encode_coding_emacs_mule P_ ((struct coding_system *));
426
427static int detect_coding_sjis P_ ((struct coding_system *, int *));
428static void decode_coding_sjis P_ ((struct coding_system *));
429static int encode_coding_sjis P_ ((struct coding_system *));
430
431static int detect_coding_big5 P_ ((struct coding_system *, int *));
432static void decode_coding_big5 P_ ((struct coding_system *));
433static int encode_coding_big5 P_ ((struct coding_system *));
434
435static int detect_coding_ccl P_ ((struct coding_system *, int *));
436static void decode_coding_ccl P_ ((struct coding_system *));
437static int encode_coding_ccl P_ ((struct coding_system *));
438
439static void decode_coding_raw_text P_ ((struct coding_system *));
440static int encode_coding_raw_text P_ ((struct coding_system *));
441
442
443/* ISO2022 section */
444
445#define CODING_ISO_INITIAL(coding, reg) \
446 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
447 coding_attr_iso_initial), \
448 reg)))
449
450
451#define CODING_ISO_REQUEST(coding, charset_id) \
452 ((charset_id <= (coding)->max_charset_id \
453 ? (coding)->safe_charsets[charset_id] \
454 : -1))
455
456
457#define CODING_ISO_FLAGS(coding) \
458 ((coding)->spec.iso_2022.flags)
459#define CODING_ISO_DESIGNATION(coding, reg) \
460 ((coding)->spec.iso_2022.current_designation[reg])
461#define CODING_ISO_INVOCATION(coding, plane) \
462 ((coding)->spec.iso_2022.current_invocation[plane])
463#define CODING_ISO_SINGLE_SHIFTING(coding) \
464 ((coding)->spec.iso_2022.single_shifting)
465#define CODING_ISO_BOL(coding) \
466 ((coding)->spec.iso_2022.bol)
467#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
468 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
469
470/* Control characters of ISO2022. */
471 /* code */ /* function */
472#define ISO_CODE_LF 0x0A /* line-feed */
473#define ISO_CODE_CR 0x0D /* carriage-return */
474#define ISO_CODE_SO 0x0E /* shift-out */
475#define ISO_CODE_SI 0x0F /* shift-in */
476#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
477#define ISO_CODE_ESC 0x1B /* escape */
478#define ISO_CODE_SS2 0x8E /* single-shift-2 */
479#define ISO_CODE_SS3 0x8F /* single-shift-3 */
480#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
481
482/* All code (1-byte) of ISO2022 is classified into one of the
483 followings. */
484enum iso_code_class_type
485 {
486 ISO_control_0, /* Control codes in the range
487 0x00..0x1F and 0x7F, except for the
488 following 5 codes. */
489 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
490 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
491 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
492 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
493 ISO_escape, /* ISO_CODE_SO (0x1B) */
494 ISO_control_1, /* Control codes in the range
495 0x80..0x9F, except for the
496 following 3 codes. */
497 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
498 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
499 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
500 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
501 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
502 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
503 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
504 };
05e6f5dc 505
df7492f9
KH
506/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
507 `iso-flags' attribute of an iso2022 coding system. */
93dec019 508
df7492f9
KH
509/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
510 instead of the correct short-form sequence (e.g. ESC $ A). */
511#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 512
df7492f9
KH
513/* If set, reset graphic planes and registers at end-of-line to the
514 initial state. */
515#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 516
df7492f9
KH
517/* If set, reset graphic planes and registers before any control
518 characters to the initial state. */
519#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 520
df7492f9
KH
521/* If set, encode by 7-bit environment. */
522#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 523
df7492f9
KH
524/* If set, use locking-shift function. */
525#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 526
df7492f9
KH
527/* If set, use single-shift function. Overwrite
528 CODING_ISO_FLAG_LOCKING_SHIFT. */
529#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 530
df7492f9
KH
531/* If set, use designation escape sequence. */
532#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 533
df7492f9
KH
534/* If set, produce revision number sequence. */
535#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 536
df7492f9
KH
537/* If set, produce ISO6429's direction specifying sequence. */
538#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 539
df7492f9
KH
540/* If set, assume designation states are reset at beginning of line on
541 output. */
542#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 543
df7492f9
KH
544/* If set, designation sequence should be placed at beginning of line
545 on output. */
546#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 547
df7492f9
KH
548/* If set, do not encode unsafe charactes on output. */
549#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 550
df7492f9
KH
551/* If set, extra latin codes (128..159) are accepted as a valid code
552 on input. */
553#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 554
df7492f9 555#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 556
df7492f9 557#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 558
bf16eb23
KH
559#define CODING_ISO_FLAG_USE_ROMAN 0x8000
560
561#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
562
563#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 564
df7492f9
KH
565/* A character to be produced on output if encoding of the original
566 character is prohibited by CODING_ISO_FLAG_SAFE. */
567#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 568
aa72b389 569
df7492f9
KH
570/* UTF-16 section */
571#define CODING_UTF_16_BOM(coding) \
572 ((coding)->spec.utf_16.bom)
4ed46869 573
df7492f9
KH
574#define CODING_UTF_16_ENDIAN(coding) \
575 ((coding)->spec.utf_16.endian)
4ed46869 576
df7492f9
KH
577#define CODING_UTF_16_SURROGATE(coding) \
578 ((coding)->spec.utf_16.surrogate)
4ed46869 579
4ed46869 580
df7492f9
KH
581/* CCL section */
582#define CODING_CCL_DECODER(coding) \
583 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
584#define CODING_CCL_ENCODER(coding) \
585 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
586#define CODING_CCL_VALIDS(coding) \
587 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
588 ->data)
4ed46869 589
5a936b46 590/* Index for each coding category in `coding_categories' */
4ed46869 591
df7492f9
KH
592enum coding_category
593 {
594 coding_category_iso_7,
595 coding_category_iso_7_tight,
596 coding_category_iso_8_1,
597 coding_category_iso_8_2,
598 coding_category_iso_7_else,
599 coding_category_iso_8_else,
600 coding_category_utf_8,
601 coding_category_utf_16_auto,
602 coding_category_utf_16_be,
603 coding_category_utf_16_le,
604 coding_category_utf_16_be_nosig,
605 coding_category_utf_16_le_nosig,
606 coding_category_charset,
607 coding_category_sjis,
608 coding_category_big5,
609 coding_category_ccl,
610 coding_category_emacs_mule,
611 /* All above are targets of code detection. */
612 coding_category_raw_text,
613 coding_category_undecided,
614 coding_category_max
615 };
616
617/* Definitions of flag bits used in detect_coding_XXXX. */
618#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
619#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
620#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
621#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
622#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
623#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
624#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
625#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
626#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
627#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
628#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
629#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
630#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
631#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
632#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
633#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
634
635/* This value is returned if detect_coding_mask () find nothing other
636 than ASCII characters. */
637#define CATEGORY_MASK_ANY \
638 (CATEGORY_MASK_ISO_7 \
639 | CATEGORY_MASK_ISO_7_TIGHT \
640 | CATEGORY_MASK_ISO_8_1 \
641 | CATEGORY_MASK_ISO_8_2 \
642 | CATEGORY_MASK_ISO_7_ELSE \
643 | CATEGORY_MASK_ISO_8_ELSE \
644 | CATEGORY_MASK_UTF_8 \
645 | CATEGORY_MASK_UTF_16_BE \
646 | CATEGORY_MASK_UTF_16_LE \
647 | CATEGORY_MASK_UTF_16_BE_NOSIG \
648 | CATEGORY_MASK_UTF_16_LE_NOSIG \
649 | CATEGORY_MASK_CHARSET \
650 | CATEGORY_MASK_SJIS \
651 | CATEGORY_MASK_BIG5 \
652 | CATEGORY_MASK_CCL \
653 | CATEGORY_MASK_EMACS_MULE)
654
655
656#define CATEGORY_MASK_ISO_7BIT \
657 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
658
659#define CATEGORY_MASK_ISO_8BIT \
660 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
661
662#define CATEGORY_MASK_ISO_ELSE \
663 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
664
665#define CATEGORY_MASK_ISO_ESCAPE \
666 (CATEGORY_MASK_ISO_7 \
667 | CATEGORY_MASK_ISO_7_TIGHT \
668 | CATEGORY_MASK_ISO_7_ELSE \
669 | CATEGORY_MASK_ISO_8_ELSE)
670
671#define CATEGORY_MASK_ISO \
672 ( CATEGORY_MASK_ISO_7BIT \
673 | CATEGORY_MASK_ISO_8BIT \
674 | CATEGORY_MASK_ISO_ELSE)
675
676#define CATEGORY_MASK_UTF_16 \
677 (CATEGORY_MASK_UTF_16_BE \
678 | CATEGORY_MASK_UTF_16_LE \
679 | CATEGORY_MASK_UTF_16_BE_NOSIG \
680 | CATEGORY_MASK_UTF_16_LE_NOSIG)
681
682
683/* List of symbols `coding-category-xxx' ordered by priority. This
684 variable is exposed to Emacs Lisp. */
685static Lisp_Object Vcoding_category_list;
686
687/* Table of coding categories (Lisp symbols). This variable is for
688 internal use oly. */
689static Lisp_Object Vcoding_category_table;
690
691/* Table of coding-categories ordered by priority. */
692static enum coding_category coding_priorities[coding_category_max];
693
694/* Nth element is a coding context for the coding system bound to the
695 Nth coding category. */
696static struct coding_system coding_categories[coding_category_max];
697
698static int detected_mask[coding_category_raw_text] =
699 { CATEGORY_MASK_ISO,
700 CATEGORY_MASK_ISO,
701 CATEGORY_MASK_ISO,
702 CATEGORY_MASK_ISO,
703 CATEGORY_MASK_ISO,
704 CATEGORY_MASK_ISO,
705 CATEGORY_MASK_UTF_8,
706 CATEGORY_MASK_UTF_16,
707 CATEGORY_MASK_UTF_16,
708 CATEGORY_MASK_UTF_16,
709 CATEGORY_MASK_UTF_16,
710 CATEGORY_MASK_UTF_16,
711 CATEGORY_MASK_CHARSET,
712 CATEGORY_MASK_SJIS,
713 CATEGORY_MASK_BIG5,
714 CATEGORY_MASK_CCL,
715 CATEGORY_MASK_EMACS_MULE
716 };
717
718/*** Commonly used macros and functions ***/
719
720#ifndef min
721#define min(a, b) ((a) < (b) ? (a) : (b))
722#endif
723#ifndef max
724#define max(a, b) ((a) > (b) ? (a) : (b))
725#endif
4ed46869 726
df7492f9
KH
727#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
728 do { \
729 attrs = CODING_ID_ATTRS (coding->id); \
730 eol_type = CODING_ID_EOL_TYPE (coding->id); \
731 if (VECTORP (eol_type)) \
732 eol_type = Qunix; \
733 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
734 } while (0)
4ed46869 735
4ed46869 736
df7492f9
KH
737/* Safely get one byte from the source text pointed by SRC which ends
738 at SRC_END, and set C to that byte. If there are not enough bytes
739 in the source, it jumps to `no_more_source'. The caller
740 should declare and set these variables appropriately in advance:
741 src, src_end, multibytep
742*/
aa72b389 743
df7492f9 744#define ONE_MORE_BYTE(c) \
aa72b389 745 do { \
df7492f9
KH
746 if (src == src_end) \
747 { \
748 if (src_base < src) \
749 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
750 goto no_more_source; \
751 } \
752 c = *src++; \
753 if (multibytep && (c & 0x80)) \
754 { \
755 if ((c & 0xFE) != 0xC0) \
756 error ("Undecodable char found"); \
757 c = ((c & 1) << 6) | *src++; \
758 } \
759 consumed_chars++; \
aa72b389
KH
760 } while (0)
761
aa72b389 762
df7492f9
KH
763#define ONE_MORE_BYTE_NO_CHECK(c) \
764 do { \
765 c = *src++; \
766 if (multibytep && (c & 0x80)) \
767 { \
768 if ((c & 0xFE) != 0xC0) \
769 error ("Undecodable char found"); \
770 c = ((c & 1) << 6) | *src++; \
771 } \
781d7a48 772 consumed_chars++; \
aa72b389
KH
773 } while (0)
774
aa72b389 775
df7492f9
KH
776/* Store a byte C in the place pointed by DST and increment DST to the
777 next free point, and increment PRODUCED_CHARS. The caller should
778 assure that C is 0..127, and declare and set the variable `dst'
779 appropriately in advance.
780*/
aa72b389
KH
781
782
df7492f9
KH
783#define EMIT_ONE_ASCII_BYTE(c) \
784 do { \
785 produced_chars++; \
786 *dst++ = (c); \
787 } while (0)
aa72b389 788
aa72b389 789
df7492f9 790/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 791
df7492f9
KH
792#define EMIT_TWO_ASCII_BYTES(c1, c2) \
793 do { \
794 produced_chars += 2; \
795 *dst++ = (c1), *dst++ = (c2); \
796 } while (0)
aa72b389 797
df7492f9
KH
798
799/* Store a byte C in the place pointed by DST and increment DST to the
800 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
801 nonzero, store in an appropriate multibyte from. The caller should
802 declare and set the variables `dst' and `multibytep' appropriately
803 in advance. */
804
805#define EMIT_ONE_BYTE(c) \
806 do { \
807 produced_chars++; \
808 if (multibytep) \
809 { \
810 int ch = (c); \
811 if (ch >= 0x80) \
812 ch = BYTE8_TO_CHAR (ch); \
813 CHAR_STRING_ADVANCE (ch, dst); \
814 } \
815 else \
816 *dst++ = (c); \
aa72b389
KH
817 } while (0)
818
819
df7492f9 820/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 821
e19c3639
KH
822#define EMIT_TWO_BYTES(c1, c2) \
823 do { \
824 produced_chars += 2; \
825 if (multibytep) \
826 { \
827 int ch; \
828 \
829 ch = (c1); \
830 if (ch >= 0x80) \
831 ch = BYTE8_TO_CHAR (ch); \
832 CHAR_STRING_ADVANCE (ch, dst); \
833 ch = (c2); \
834 if (ch >= 0x80) \
835 ch = BYTE8_TO_CHAR (ch); \
836 CHAR_STRING_ADVANCE (ch, dst); \
837 } \
838 else \
839 { \
840 *dst++ = (c1); \
841 *dst++ = (c2); \
842 } \
aa72b389
KH
843 } while (0)
844
845
df7492f9
KH
846#define EMIT_THREE_BYTES(c1, c2, c3) \
847 do { \
848 EMIT_ONE_BYTE (c1); \
849 EMIT_TWO_BYTES (c2, c3); \
850 } while (0)
aa72b389 851
aa72b389 852
df7492f9
KH
853#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
854 do { \
855 EMIT_TWO_BYTES (c1, c2); \
856 EMIT_TWO_BYTES (c3, c4); \
857 } while (0)
aa72b389 858
aa72b389 859
df7492f9
KH
860#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
861 do { \
862 charset_map_loaded = 0; \
863 c = DECODE_CHAR (charset, code); \
864 if (charset_map_loaded) \
865 { \
866 unsigned char *orig = coding->source; \
867 EMACS_INT offset; \
868 \
869 coding_set_source (coding); \
870 offset = coding->source - orig; \
871 src += offset; \
872 src_base += offset; \
873 src_end += offset; \
874 } \
875 } while (0)
aa72b389 876
aa72b389 877
df7492f9
KH
878#define ASSURE_DESTINATION(bytes) \
879 do { \
880 if (dst + (bytes) >= dst_end) \
881 { \
882 int more_bytes = charbuf_end - charbuf + (bytes); \
883 \
884 dst = alloc_destination (coding, more_bytes, dst); \
885 dst_end = coding->destination + coding->dst_bytes; \
886 } \
887 } while (0)
b1887814 888
df7492f9
KH
889
890
891static void
892coding_set_source (coding)
893 struct coding_system *coding;
894{
895 if (BUFFERP (coding->src_object))
896 {
897 if (coding->src_pos < 0)
898 coding->source = GAP_END_ADDR + coding->src_pos_byte;
899 else
900 {
e19c3639 901 struct buffer *buf = XBUFFER (coding->src_object);
e19c3639
KH
902 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
903 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
904
905 coding->source = beg_addr + coding->src_pos_byte - 1;
906 if (coding->src_pos_byte >= gpt_byte)
907 coding->source += BUF_GAP_SIZE (buf);
aa72b389
KH
908 }
909 }
df7492f9 910 else if (STRINGP (coding->src_object))
aa72b389 911 {
df7492f9
KH
912 coding->source = (XSTRING (coding->src_object)->data
913 + coding->src_pos_byte);
914 }
915 else
916 /* Otherwise, the source is C string and is never relocated
917 automatically. Thus we don't have to update anything. */
918 ;
919}
920
921static void
922coding_set_destination (coding)
923 struct coding_system *coding;
924{
925 if (BUFFERP (coding->dst_object))
926 {
df7492f9 927 if (coding->src_pos < 0)
28f67a95
KH
928 {
929 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
930 coding->dst_bytes = (GAP_END_ADDR
931 - (coding->src_bytes - coding->consumed)
932 - coding->destination);
933 }
df7492f9 934 else
28f67a95
KH
935 {
936 /* We are sure that coding->dst_pos_byte is before the gap
937 of the buffer. */
938 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
939 + coding->dst_pos_byte - 1);
940 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
941 - coding->destination);
942 }
df7492f9
KH
943 }
944 else
945 /* Otherwise, the destination is C string and is never relocated
946 automatically. Thus we don't have to update anything. */
947 ;
948}
949
950
951static void
952coding_alloc_by_realloc (coding, bytes)
953 struct coding_system *coding;
954 EMACS_INT bytes;
955{
956 coding->destination = (unsigned char *) xrealloc (coding->destination,
957 coding->dst_bytes + bytes);
958 coding->dst_bytes += bytes;
959}
960
961static void
962coding_alloc_by_making_gap (coding, bytes)
963 struct coding_system *coding;
964 EMACS_INT bytes;
965{
2c78b7e1
KH
966 if (BUFFERP (coding->dst_object)
967 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
968 {
969 EMACS_INT add = coding->src_bytes - coding->consumed;
970
971 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
972 make_gap (bytes);
973 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
974 }
975 else
976 {
2c78b7e1
KH
977 Lisp_Object this_buffer;
978
979 this_buffer = Fcurrent_buffer ();
df7492f9
KH
980 set_buffer_internal (XBUFFER (coding->dst_object));
981 make_gap (bytes);
982 set_buffer_internal (XBUFFER (this_buffer));
983 }
984}
985
986
987static unsigned char *
988alloc_destination (coding, nbytes, dst)
989 struct coding_system *coding;
990 int nbytes;
991 unsigned char *dst;
992{
993 EMACS_INT offset = dst - coding->destination;
994
995 if (BUFFERP (coding->dst_object))
996 coding_alloc_by_making_gap (coding, nbytes);
997 else
998 coding_alloc_by_realloc (coding, nbytes);
999 coding->result = CODING_RESULT_SUCCESS;
1000 coding_set_destination (coding);
1001 dst = coding->destination + offset;
1002 return dst;
1003}
aa72b389 1004
df7492f9
KH
1005\f
1006/*** 2. Emacs' internal format (emacs-utf-8) ***/
1007
1008
1009
1010\f
1011/*** 3. UTF-8 ***/
1012
1013/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1014 Check if a text is encoded in UTF-8. If it is, return
1015 CATEGORY_MASK_UTF_8, else return 0. */
1016
1017#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1018#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1019#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1020#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1021#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1022#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1023
1024static int
1025detect_coding_utf_8 (coding, mask)
1026 struct coding_system *coding;
1027 int *mask;
1028{
1029 unsigned char *src = coding->source, *src_base = src;
1030 unsigned char *src_end = coding->source + coding->src_bytes;
1031 int multibytep = coding->src_multibyte;
1032 int consumed_chars = 0;
1033 int found = 0;
89528eb3 1034 int incomplete;
df7492f9
KH
1035
1036 /* A coding system of this category is always ASCII compatible. */
1037 src += coding->head_ascii;
1038
1039 while (1)
1040 {
1041 int c, c1, c2, c3, c4;
1042
89528eb3 1043 incomplete = 0;
df7492f9
KH
1044 ONE_MORE_BYTE (c);
1045 if (UTF_8_1_OCTET_P (c))
1046 continue;
89528eb3 1047 incomplete = 1;
df7492f9
KH
1048 ONE_MORE_BYTE (c1);
1049 if (! UTF_8_EXTRA_OCTET_P (c1))
1050 break;
1051 if (UTF_8_2_OCTET_LEADING_P (c))
1052 {
1053 found++;
1054 continue;
1055 }
1056 ONE_MORE_BYTE (c2);
1057 if (! UTF_8_EXTRA_OCTET_P (c2))
1058 break;
1059 if (UTF_8_3_OCTET_LEADING_P (c))
1060 {
1061 found++;
1062 continue;
1063 }
1064 ONE_MORE_BYTE (c3);
1065 if (! UTF_8_EXTRA_OCTET_P (c3))
1066 break;
1067 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1068 {
df7492f9
KH
1069 found++;
1070 continue;
1071 }
1072 ONE_MORE_BYTE (c4);
1073 if (! UTF_8_EXTRA_OCTET_P (c4))
1074 break;
1075 if (UTF_8_5_OCTET_LEADING_P (c))
1076 {
1077 found++;
1078 continue;
1079 }
1080 break;
1081 }
1082 *mask &= ~CATEGORY_MASK_UTF_8;
1083 return 0;
1084
1085 no_more_source:
89528eb3
KH
1086 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1087 {
1088 *mask &= ~CATEGORY_MASK_UTF_8;
1089 return 0;
1090 }
1091 return found;
df7492f9
KH
1092}
1093
1094
1095static void
1096decode_coding_utf_8 (coding)
1097 struct coding_system *coding;
1098{
1099 unsigned char *src = coding->source + coding->consumed;
1100 unsigned char *src_end = coding->source + coding->src_bytes;
1101 unsigned char *src_base;
1102 int *charbuf = coding->charbuf;
1103 int *charbuf_end = charbuf + coding->charbuf_size;
1104 int consumed_chars = 0, consumed_chars_base;
1105 int multibytep = coding->src_multibyte;
1106 Lisp_Object attr, eol_type, charset_list;
1107
1108 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1109
1110 while (1)
1111 {
1112 int c, c1, c2, c3, c4, c5;
1113
1114 src_base = src;
1115 consumed_chars_base = consumed_chars;
1116
1117 if (charbuf >= charbuf_end)
1118 break;
1119
1120 ONE_MORE_BYTE (c1);
1121 if (UTF_8_1_OCTET_P(c1))
1122 {
1123 c = c1;
1124 if (c == '\r')
aa72b389 1125 {
df7492f9
KH
1126 if (EQ (eol_type, Qdos))
1127 {
1128 if (src == src_end)
1129 goto no_more_source;
1130 if (*src == '\n')
1131 ONE_MORE_BYTE (c);
1132 }
1133 else if (EQ (eol_type, Qmac))
1134 c = '\n';
aa72b389 1135 }
aa72b389 1136 }
df7492f9 1137 else
aa72b389 1138 {
df7492f9
KH
1139 ONE_MORE_BYTE (c2);
1140 if (! UTF_8_EXTRA_OCTET_P (c2))
1141 goto invalid_code;
1142 if (UTF_8_2_OCTET_LEADING_P (c1))
b0edb2c5
DL
1143 {
1144 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1145 /* Reject overlong sequences here and below. Encoders
1146 producing them are incorrect, they can be misleading,
1147 and they mess up read/write invariance. */
1148 if (c < 128)
1149 goto invalid_code;
1150 }
df7492f9 1151 else
aa72b389 1152 {
df7492f9
KH
1153 ONE_MORE_BYTE (c3);
1154 if (! UTF_8_EXTRA_OCTET_P (c3))
1155 goto invalid_code;
1156 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1157 {
1158 c = (((c1 & 0xF) << 12)
1159 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1160 if (c < 0x800
1161 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1162 goto invalid_code;
1163 }
df7492f9
KH
1164 else
1165 {
1166 ONE_MORE_BYTE (c4);
1167 if (! UTF_8_EXTRA_OCTET_P (c4))
1168 goto invalid_code;
1169 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1170 {
df7492f9
KH
1171 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1172 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1173 if (c < 0x10000)
1174 goto invalid_code;
1175 }
df7492f9
KH
1176 else
1177 {
1178 ONE_MORE_BYTE (c5);
1179 if (! UTF_8_EXTRA_OCTET_P (c5))
1180 goto invalid_code;
1181 if (UTF_8_5_OCTET_LEADING_P (c1))
1182 {
1183 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1184 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1185 | (c5 & 0x3F));
b0edb2c5 1186 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1187 goto invalid_code;
1188 }
1189 else
1190 goto invalid_code;
1191 }
1192 }
aa72b389 1193 }
aa72b389 1194 }
df7492f9
KH
1195
1196 *charbuf++ = c;
1197 continue;
1198
1199 invalid_code:
1200 src = src_base;
1201 consumed_chars = consumed_chars_base;
1202 ONE_MORE_BYTE (c);
1203 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1204 coding->errors++;
aa72b389
KH
1205 }
1206
df7492f9
KH
1207 no_more_source:
1208 coding->consumed_char += consumed_chars_base;
1209 coding->consumed = src_base - coding->source;
1210 coding->charbuf_used = charbuf - coding->charbuf;
1211}
1212
1213
1214static int
1215encode_coding_utf_8 (coding)
1216 struct coding_system *coding;
1217{
1218 int multibytep = coding->dst_multibyte;
1219 int *charbuf = coding->charbuf;
1220 int *charbuf_end = charbuf + coding->charbuf_used;
1221 unsigned char *dst = coding->destination + coding->produced;
1222 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1223 int produced_chars = 0;
df7492f9
KH
1224 int c;
1225
1226 if (multibytep)
aa72b389 1227 {
df7492f9
KH
1228 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1229
1230 while (charbuf < charbuf_end)
aa72b389 1231 {
df7492f9
KH
1232 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1233
1234 ASSURE_DESTINATION (safe_room);
1235 c = *charbuf++;
28f67a95
KH
1236 if (CHAR_BYTE8_P (c))
1237 {
1238 c = CHAR_TO_BYTE8 (c);
1239 EMIT_ONE_BYTE (c);
1240 }
1241 else
1242 {
1243 CHAR_STRING_ADVANCE (c, pend);
1244 for (p = str; p < pend; p++)
1245 EMIT_ONE_BYTE (*p);
1246 }
aa72b389 1247 }
aa72b389 1248 }
df7492f9
KH
1249 else
1250 {
1251 int safe_room = MAX_MULTIBYTE_LENGTH;
1252
1253 while (charbuf < charbuf_end)
1254 {
1255 ASSURE_DESTINATION (safe_room);
1256 c = *charbuf++;
1257 dst += CHAR_STRING (c, dst);
1258 produced_chars++;
1259 }
1260 }
1261 coding->result = CODING_RESULT_SUCCESS;
1262 coding->produced_char += produced_chars;
1263 coding->produced = dst - coding->destination;
1264 return 0;
aa72b389
KH
1265}
1266
4ed46869 1267
df7492f9
KH
1268/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1269 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1270 Little Endian (otherwise). If it is, return
1271 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1272 else return 0. */
1273
1274#define UTF_16_HIGH_SURROGATE_P(val) \
1275 (((val) & 0xFC00) == 0xD800)
1276
1277#define UTF_16_LOW_SURROGATE_P(val) \
1278 (((val) & 0xFC00) == 0xDC00)
1279
1280#define UTF_16_INVALID_P(val) \
1281 (((val) == 0xFFFE) \
1282 || ((val) == 0xFFFF) \
1283 || UTF_16_LOW_SURROGATE_P (val))
1284
1285
1286static int
1287detect_coding_utf_16 (coding, mask)
b73bfc1c 1288 struct coding_system *coding;
df7492f9 1289 int *mask;
b73bfc1c 1290{
df7492f9
KH
1291 unsigned char *src = coding->source, *src_base = src;
1292 unsigned char *src_end = coding->source + coding->src_bytes;
1293 int multibytep = coding->src_multibyte;
1294 int consumed_chars = 0;
1295 int c1, c2;
1296
89528eb3
KH
1297 *mask &= ~CATEGORY_MASK_UTF_16;
1298
df7492f9
KH
1299 ONE_MORE_BYTE (c1);
1300 ONE_MORE_BYTE (c2);
4ed46869 1301
df7492f9 1302 if ((c1 == 0xFF) && (c2 == 0xFE))
89528eb3 1303 *mask |= CATEGORY_MASK_UTF_16_LE;
df7492f9 1304 else if ((c1 == 0xFE) && (c2 == 0xFF))
89528eb3
KH
1305 *mask |= CATEGORY_MASK_UTF_16_BE;
1306 else
1307 *mask |= CATEGORY_MASK_UTF_16_BE_NOSIG | CATEGORY_MASK_UTF_16_LE_NOSIG;
1308 return 1;
1309
df7492f9
KH
1310 no_more_source:
1311 return 0;
1312}
ec6d2bb8 1313
df7492f9
KH
1314static void
1315decode_coding_utf_16 (coding)
1316 struct coding_system *coding;
1317{
1318 unsigned char *src = coding->source + coding->consumed;
1319 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1320 unsigned char *src_base;
df7492f9
KH
1321 int *charbuf = coding->charbuf;
1322 int *charbuf_end = charbuf + coding->charbuf_size;
1323 int consumed_chars = 0, consumed_chars_base;
1324 int multibytep = coding->src_multibyte;
1325 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1326 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1327 int surrogate = CODING_UTF_16_SURROGATE (coding);
1328 Lisp_Object attr, eol_type, charset_list;
1329
1330 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1331
1332 if (bom != utf_16_without_bom)
1333 {
1334 int c, c1, c2;
4af310db 1335
df7492f9
KH
1336 src_base = src;
1337 ONE_MORE_BYTE (c1);
1338 ONE_MORE_BYTE (c2);
e19c3639 1339 c = (c1 << 8) | c2;
df7492f9
KH
1340 if (bom == utf_16_with_bom)
1341 {
1342 if (endian == utf_16_big_endian
1343 ? c != 0xFFFE : c != 0xFEFF)
4af310db 1344 {
df7492f9
KH
1345 /* We are sure that there's enouph room at CHARBUF. */
1346 *charbuf++ = c1;
1347 *charbuf++ = c2;
1348 coding->errors++;
4af310db 1349 }
4af310db 1350 }
df7492f9 1351 else
4af310db 1352 {
df7492f9
KH
1353 if (c == 0xFFFE)
1354 CODING_UTF_16_ENDIAN (coding)
1355 = endian = utf_16_big_endian;
1356 else if (c == 0xFEFF)
1357 CODING_UTF_16_ENDIAN (coding)
1358 = endian = utf_16_little_endian;
1359 else
4af310db 1360 {
df7492f9
KH
1361 CODING_UTF_16_ENDIAN (coding)
1362 = endian = utf_16_big_endian;
1363 src = src_base;
4af310db 1364 }
4af310db 1365 }
df7492f9
KH
1366 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1367 }
1368
1369 while (1)
1370 {
1371 int c, c1, c2;
1372
1373 src_base = src;
1374 consumed_chars_base = consumed_chars;
1375
1376 if (charbuf + 2 >= charbuf_end)
1377 break;
1378
1379 ONE_MORE_BYTE (c1);
1380 ONE_MORE_BYTE (c2);
1381 c = (endian == utf_16_big_endian
e19c3639 1382 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1383 if (surrogate)
aa72b389 1384 {
df7492f9 1385 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1386 {
df7492f9
KH
1387 if (endian == utf_16_big_endian)
1388 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1389 else
1390 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1391 *charbuf++ = c1;
1392 *charbuf++ = c2;
1393 coding->errors++;
1394 if (UTF_16_HIGH_SURROGATE_P (c))
1395 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1396 else
1397 *charbuf++ = c;
aa72b389 1398 }
df7492f9
KH
1399 else
1400 {
1401 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1402 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1403 *charbuf++ = c;
1404 }
1405 }
1406 else
1407 {
1408 if (UTF_16_HIGH_SURROGATE_P (c))
1409 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1410 else
1411 *charbuf++ = c;
1412 }
1413 }
1414
1415 no_more_source:
1416 coding->consumed_char += consumed_chars_base;
1417 coding->consumed = src_base - coding->source;
1418 coding->charbuf_used = charbuf - coding->charbuf;
1419}
1420
1421static int
1422encode_coding_utf_16 (coding)
1423 struct coding_system *coding;
1424{
1425 int multibytep = coding->dst_multibyte;
1426 int *charbuf = coding->charbuf;
1427 int *charbuf_end = charbuf + coding->charbuf_used;
1428 unsigned char *dst = coding->destination + coding->produced;
1429 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1430 int safe_room = 8;
1431 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1432 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1433 int produced_chars = 0;
1434 Lisp_Object attrs, eol_type, charset_list;
1435 int c;
1436
1437 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1438
1439 if (bom == utf_16_with_bom)
1440 {
1441 ASSURE_DESTINATION (safe_room);
1442 if (big_endian)
1443 EMIT_TWO_BYTES (0xFF, 0xFE);
1444 else
1445 EMIT_TWO_BYTES (0xFE, 0xFF);
1446 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1447 }
1448
1449 while (charbuf < charbuf_end)
1450 {
1451 ASSURE_DESTINATION (safe_room);
1452 c = *charbuf++;
e19c3639
KH
1453 if (c >= MAX_UNICODE_CHAR)
1454 c = coding->default_char;
df7492f9
KH
1455
1456 if (c < 0x10000)
1457 {
1458 if (big_endian)
1459 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1460 else
1461 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1462 }
1463 else
1464 {
1465 int c1, c2;
1466
1467 c -= 0x10000;
1468 c1 = (c >> 10) + 0xD800;
1469 c2 = (c & 0x3FF) + 0xDC00;
1470 if (big_endian)
1471 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1472 else
1473 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1474 }
1475 }
1476 coding->result = CODING_RESULT_SUCCESS;
1477 coding->produced = dst - coding->destination;
1478 coding->produced_char += produced_chars;
1479 return 0;
1480}
1481
1482\f
1483/*** 6. Old Emacs' internal format (emacs-mule) ***/
1484
1485/* Emacs' internal format for representation of multiple character
1486 sets is a kind of multi-byte encoding, i.e. characters are
1487 represented by variable-length sequences of one-byte codes.
1488
1489 ASCII characters and control characters (e.g. `tab', `newline') are
1490 represented by one-byte sequences which are their ASCII codes, in
1491 the range 0x00 through 0x7F.
1492
1493 8-bit characters of the range 0x80..0x9F are represented by
1494 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1495 code + 0x20).
1496
1497 8-bit characters of the range 0xA0..0xFF are represented by
1498 one-byte sequences which are their 8-bit code.
1499
1500 The other characters are represented by a sequence of `base
1501 leading-code', optional `extended leading-code', and one or two
1502 `position-code's. The length of the sequence is determined by the
1503 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1504 whereas extended leading-code and position-code take the range 0xA0
1505 through 0xFF. See `charset.h' for more details about leading-code
1506 and position-code.
1507
1508 --- CODE RANGE of Emacs' internal format ---
1509 character set range
1510 ------------- -----
1511 ascii 0x00..0x7F
1512 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1513 eight-bit-graphic 0xA0..0xBF
1514 ELSE 0x81..0x9D + [0xA0..0xFF]+
1515 ---------------------------------------------
1516
1517 As this is the internal character representation, the format is
1518 usually not used externally (i.e. in a file or in a data sent to a
1519 process). But, it is possible to have a text externally in this
1520 format (i.e. by encoding by the coding system `emacs-mule').
1521
1522 In that case, a sequence of one-byte codes has a slightly different
1523 form.
1524
1525 At first, all characters in eight-bit-control are represented by
1526 one-byte sequences which are their 8-bit code.
1527
1528 Next, character composition data are represented by the byte
1529 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1530 where,
1531 METHOD is 0xF0 plus one of composition method (enum
1532 composition_method),
1533
1534 BYTES is 0xA0 plus a byte length of this composition data,
1535
1536 CHARS is 0x20 plus a number of characters composed by this
1537 data,
1538
1539 COMPONENTs are characters of multibye form or composition
1540 rules encoded by two-byte of ASCII codes.
1541
1542 In addition, for backward compatibility, the following formats are
1543 also recognized as composition data on decoding.
1544
1545 0x80 MSEQ ...
1546 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1547
1548 Here,
1549 MSEQ is a multibyte form but in these special format:
1550 ASCII: 0xA0 ASCII_CODE+0x80,
1551 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1552 RULE is a one byte code of the range 0xA0..0xF0 that
1553 represents a composition rule.
1554 */
1555
1556char emacs_mule_bytes[256];
1557
df7492f9 1558int
781d7a48 1559emacs_mule_char (coding, src, nbytes, nchars)
df7492f9 1560 struct coding_system *coding;
781d7a48 1561 unsigned char *src;
df7492f9
KH
1562 int *nbytes, *nchars;
1563{
df7492f9
KH
1564 unsigned char *src_end = coding->source + coding->src_bytes;
1565 int multibytep = coding->src_multibyte;
1566 unsigned char *src_base = src;
1567 struct charset *charset;
1568 unsigned code;
1569 int c;
1570 int consumed_chars = 0;
1571
1572 ONE_MORE_BYTE (c);
df7492f9
KH
1573 switch (emacs_mule_bytes[c])
1574 {
1575 case 2:
1576 if (! (charset = emacs_mule_charset[c]))
1577 goto invalid_code;
1578 ONE_MORE_BYTE (c);
1579 code = c & 0x7F;
1580 break;
1581
1582 case 3:
7c78e542
KH
1583 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1584 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
b73bfc1c 1585 {
df7492f9
KH
1586 ONE_MORE_BYTE (c);
1587 if (! (charset = emacs_mule_charset[c]))
1588 goto invalid_code;
1589 ONE_MORE_BYTE (c);
1590 code = c & 0x7F;
b73bfc1c
KH
1591 }
1592 else
1593 {
df7492f9
KH
1594 if (! (charset = emacs_mule_charset[c]))
1595 goto invalid_code;
1596 ONE_MORE_BYTE (c);
781d7a48 1597 code = (c & 0x7F) << 8;
df7492f9
KH
1598 ONE_MORE_BYTE (c);
1599 code |= c & 0x7F;
1600 }
1601 break;
1602
1603 case 4:
781d7a48 1604 ONE_MORE_BYTE (c);
df7492f9
KH
1605 if (! (charset = emacs_mule_charset[c]))
1606 goto invalid_code;
1607 ONE_MORE_BYTE (c);
781d7a48 1608 code = (c & 0x7F) << 8;
df7492f9
KH
1609 ONE_MORE_BYTE (c);
1610 code |= c & 0x7F;
1611 break;
1612
1613 case 1:
1614 code = c;
9d123124
KH
1615 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1616 ? charset_ascii : charset_eight_bit);
df7492f9
KH
1617 break;
1618
1619 default:
1620 abort ();
1621 }
1622 c = DECODE_CHAR (charset, code);
1623 if (c < 0)
1624 goto invalid_code;
1625 *nbytes = src - src_base;
1626 *nchars = consumed_chars;
1627 return c;
1628
1629 no_more_source:
1630 return -2;
1631
1632 invalid_code:
1633 return -1;
1634}
1635
1636
1637/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1638 Check if a text is encoded in `emacs-mule'. */
1639
1640static int
1641detect_coding_emacs_mule (coding, mask)
1642 struct coding_system *coding;
1643 int *mask;
1644{
1645 unsigned char *src = coding->source, *src_base = src;
1646 unsigned char *src_end = coding->source + coding->src_bytes;
1647 int multibytep = coding->src_multibyte;
1648 int consumed_chars = 0;
1649 int c;
1650 int found = 0;
89528eb3 1651 int incomplete;
df7492f9
KH
1652
1653 /* A coding system of this category is always ASCII compatible. */
1654 src += coding->head_ascii;
1655
1656 while (1)
1657 {
89528eb3 1658 incomplete = 0;
df7492f9 1659 ONE_MORE_BYTE (c);
89528eb3 1660 incomplete = 1;
df7492f9
KH
1661
1662 if (c == 0x80)
1663 {
1664 /* Perhaps the start of composite character. We simple skip
1665 it because analyzing it is too heavy for detecting. But,
1666 at least, we check that the composite character
1667 constitues of more than 4 bytes. */
1668 unsigned char *src_base;
1669
1670 repeat:
1671 src_base = src;
1672 do
1673 {
1674 ONE_MORE_BYTE (c);
1675 }
1676 while (c >= 0xA0);
1677
1678 if (src - src_base <= 4)
1679 break;
1680 found = 1;
1681 if (c == 0x80)
1682 goto repeat;
b73bfc1c 1683 }
df7492f9
KH
1684
1685 if (c < 0x80)
b73bfc1c 1686 {
df7492f9
KH
1687 if (c < 0x20
1688 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1689 break;
1690 }
1691 else
1692 {
1693 unsigned char *src_base = src - 1;
1694
1695 do
1696 {
1697 ONE_MORE_BYTE (c);
1698 }
1699 while (c >= 0xA0);
1700 if (src - src_base != emacs_mule_bytes[*src_base])
1701 break;
1702 found = 1;
4ed46869
KH
1703 }
1704 }
df7492f9
KH
1705 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1706 return 0;
1707
1708 no_more_source:
89528eb3
KH
1709 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1710 {
1711 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1712 return 0;
1713 }
1714 return found;
4ed46869
KH
1715}
1716
b73bfc1c 1717
df7492f9
KH
1718/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1719
1720/* Decode a character represented as a component of composition
1721 sequence of Emacs 20/21 style at SRC. Set C to that character and
1722 update SRC to the head of next character (or an encoded composition
1723 rule). If SRC doesn't points a composition component, set C to -1.
1724 If SRC points an invalid byte sequence, global exit by a return
1725 value 0. */
1726
1727#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1728 if (1) \
1729 { \
1730 int c; \
1731 int nbytes, nchars; \
1732 \
1733 if (src == src_end) \
1734 break; \
781d7a48 1735 c = emacs_mule_char (coding, src, &nbytes, &nchars); \
df7492f9
KH
1736 if (c < 0) \
1737 { \
1738 if (c == -2) \
1739 break; \
1740 goto invalid_code; \
1741 } \
1742 *buf++ = c; \
1743 src += nbytes; \
1744 consumed_chars += nchars; \
1745 } \
1746 else
1747
1748
1749/* Decode a composition rule represented as a component of composition
781d7a48
KH
1750 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1751 and increment BUF. If SRC points an invalid byte sequence, set C
1752 to -1. */
df7492f9 1753
781d7a48 1754#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1755 do { \
1756 int c, gref, nref; \
1757 \
781d7a48 1758 if (src >= src_end) \
df7492f9
KH
1759 goto invalid_code; \
1760 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1761 c -= 0x20; \
df7492f9
KH
1762 if (c < 0 || c >= 81) \
1763 goto invalid_code; \
1764 \
1765 gref = c / 9, nref = c % 9; \
1766 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1767 } while (0)
1768
1769
781d7a48
KH
1770/* Decode a composition rule represented as a component of composition
1771 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1772 and increment BUF. If SRC points an invalid byte sequence, set C
1773 to -1. */
1774
1775#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1776 do { \
1777 int gref, nref; \
1778 \
1779 if (src + 1>= src_end) \
1780 goto invalid_code; \
1781 ONE_MORE_BYTE_NO_CHECK (gref); \
1782 gref -= 0x20; \
1783 ONE_MORE_BYTE_NO_CHECK (nref); \
1784 nref -= 0x20; \
1785 if (gref < 0 || gref >= 81 \
1786 || nref < 0 || nref >= 81) \
1787 goto invalid_code; \
1788 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1789 } while (0)
1790
1791
df7492f9
KH
1792#define ADD_COMPOSITION_DATA(buf, method, nchars) \
1793 do { \
1794 *buf++ = -5; \
1795 *buf++ = coding->produced_char + char_offset; \
1796 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1797 *buf++ = method; \
1798 *buf++ = nchars; \
1799 } while (0)
aa72b389 1800
df7492f9
KH
1801
1802#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1803 do { \
df7492f9 1804 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1805 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1806 the byte length of this composition information, CHARS is the \
1807 number of characters composed by this composition. */ \
781d7a48
KH
1808 enum composition_method method = c - 0xF2; \
1809 int *charbuf_base = charbuf; \
df7492f9
KH
1810 int consumed_chars_limit; \
1811 int nbytes, nchars; \
1812 \
1813 ONE_MORE_BYTE (c); \
1814 nbytes = c - 0xA0; \
1815 if (nbytes < 3) \
1816 goto invalid_code; \
1817 ONE_MORE_BYTE (c); \
1818 nchars = c - 0xA0; \
1819 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1820 consumed_chars_limit = consumed_chars_base + nbytes; \
1821 if (method != COMPOSITION_RELATIVE) \
aa72b389 1822 { \
df7492f9
KH
1823 int i = 0; \
1824 while (consumed_chars < consumed_chars_limit) \
aa72b389 1825 { \
df7492f9 1826 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1827 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1828 else \
1829 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1830 i++; \
aa72b389 1831 } \
df7492f9
KH
1832 if (consumed_chars < consumed_chars_limit) \
1833 goto invalid_code; \
781d7a48 1834 charbuf_base[0] -= i; \
aa72b389
KH
1835 } \
1836 } while (0)
93dec019 1837
aa72b389 1838
df7492f9
KH
1839#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1840 do { \
1841 /* Emacs 20 style format for relative composition. */ \
1842 /* Store multibyte form of characters to be composed. */ \
1843 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1844 int *buf = components; \
1845 int i, j; \
1846 \
1847 src = src_base; \
1848 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1849 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1850 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1851 if (i < 2) \
1852 goto invalid_code; \
1853 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1854 for (j = 0; j < i; j++) \
1855 *charbuf++ = components[j]; \
1856 } while (0)
1857
1858
1859#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1860 do { \
1861 /* Emacs 20 style format for rule-base composition. */ \
1862 /* Store multibyte form of characters to be composed. */ \
1863 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1864 int *buf = components; \
1865 int i, j; \
1866 \
1867 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1868 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1869 { \
781d7a48 1870 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1871 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1872 } \
1873 if (i < 1 || (buf - components) % 2 == 0) \
1874 goto invalid_code; \
1875 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1876 goto no_more_source; \
1877 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1878 for (j = 0; j < i; j++) \
1879 *charbuf++ = components[j]; \
1880 for (j = 0; j < i; j += 2) \
1881 *charbuf++ = components[j]; \
1882 } while (0)
1883
aa72b389
KH
1884
1885static void
df7492f9 1886decode_coding_emacs_mule (coding)
aa72b389 1887 struct coding_system *coding;
aa72b389 1888{
df7492f9
KH
1889 unsigned char *src = coding->source + coding->consumed;
1890 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1891 unsigned char *src_base;
df7492f9
KH
1892 int *charbuf = coding->charbuf;
1893 int *charbuf_end = charbuf + coding->charbuf_size;
1894 int consumed_chars = 0, consumed_chars_base;
1895 int char_offset = 0;
1896 int multibytep = coding->src_multibyte;
1897 Lisp_Object attrs, eol_type, charset_list;
aa72b389 1898
df7492f9 1899 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1900
aa72b389
KH
1901 while (1)
1902 {
df7492f9
KH
1903 int c;
1904
aa72b389 1905 src_base = src;
df7492f9
KH
1906 consumed_chars_base = consumed_chars;
1907
1908 if (charbuf >= charbuf_end)
1909 break;
aa72b389 1910
df7492f9
KH
1911 ONE_MORE_BYTE (c);
1912
1913 if (c < 0x80)
aa72b389 1914 {
df7492f9
KH
1915 if (c == '\r')
1916 {
1917 if (EQ (eol_type, Qdos))
1918 {
1919 if (src == src_end)
1920 goto no_more_source;
1921 if (*src == '\n')
1922 ONE_MORE_BYTE (c);
1923 }
1924 else if (EQ (eol_type, Qmac))
1925 c = '\n';
1926 }
1927 *charbuf++ = c;
1928 char_offset++;
aa72b389 1929 }
df7492f9
KH
1930 else if (c == 0x80)
1931 {
1932 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1933 break;
1934 ONE_MORE_BYTE (c);
781d7a48
KH
1935 if (c - 0xF2 >= COMPOSITION_RELATIVE
1936 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
1937 DECODE_EMACS_MULE_21_COMPOSITION (c);
1938 else if (c < 0xC0)
1939 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1940 else if (c == 0xFF)
1941 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1942 else
1943 goto invalid_code;
781d7a48 1944 coding->annotated = 1;
df7492f9
KH
1945 }
1946 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1947 {
1948 int nbytes, nchars;
781d7a48
KH
1949 src = src_base;
1950 consumed_chars = consumed_chars_base;
1951 c = emacs_mule_char (coding, src, &nbytes, &nchars);
df7492f9
KH
1952 if (c < 0)
1953 {
1954 if (c == -2)
1955 break;
1956 goto invalid_code;
1957 }
1958 *charbuf++ = c;
781d7a48
KH
1959 src += nbytes;
1960 consumed_chars += nchars;
df7492f9
KH
1961 char_offset++;
1962 }
1963 continue;
1964
1965 invalid_code:
1966 src = src_base;
1967 consumed_chars = consumed_chars_base;
1968 ONE_MORE_BYTE (c);
1969 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1970 coding->errors++;
1971 }
1972
1973 no_more_source:
1974 coding->consumed_char += consumed_chars_base;
1975 coding->consumed = src_base - coding->source;
1976 coding->charbuf_used = charbuf - coding->charbuf;
1977}
1978
1979
1980#define EMACS_MULE_LEADING_CODES(id, codes) \
1981 do { \
1982 if (id < 0xA0) \
1983 codes[0] = id, codes[1] = 0; \
1984 else if (id < 0xE0) \
1985 codes[0] = 0x9A, codes[1] = id; \
1986 else if (id < 0xF0) \
1987 codes[0] = 0x9B, codes[1] = id; \
1988 else if (id < 0xF5) \
1989 codes[0] = 0x9C, codes[1] = id; \
1990 else \
1991 codes[0] = 0x9D, codes[1] = id; \
1992 } while (0);
1993
aa72b389 1994
df7492f9
KH
1995static int
1996encode_coding_emacs_mule (coding)
1997 struct coding_system *coding;
1998{
1999 int multibytep = coding->dst_multibyte;
2000 int *charbuf = coding->charbuf;
2001 int *charbuf_end = charbuf + coding->charbuf_used;
2002 unsigned char *dst = coding->destination + coding->produced;
2003 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2004 int safe_room = 8;
df7492f9
KH
2005 int produced_chars = 0;
2006 Lisp_Object attrs, eol_type, charset_list;
2007 int c;
2008
2009 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2010
2011 while (charbuf < charbuf_end)
2012 {
2013 ASSURE_DESTINATION (safe_room);
2014 c = *charbuf++;
2015 if (ASCII_CHAR_P (c))
2016 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2017 else if (CHAR_BYTE8_P (c))
2018 {
2019 c = CHAR_TO_BYTE8 (c);
2020 EMIT_ONE_BYTE (c);
2021 }
df7492f9 2022 else
aa72b389 2023 {
df7492f9
KH
2024 struct charset *charset;
2025 unsigned code;
2026 int dimension;
2027 int emacs_mule_id;
2028 unsigned char leading_codes[2];
2029
2030 charset = char_charset (c, charset_list, &code);
2031 if (! charset)
2032 {
2033 c = coding->default_char;
2034 if (ASCII_CHAR_P (c))
2035 {
2036 EMIT_ONE_ASCII_BYTE (c);
2037 continue;
2038 }
2039 charset = char_charset (c, charset_list, &code);
2040 }
2041 dimension = CHARSET_DIMENSION (charset);
2042 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2043 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2044 EMIT_ONE_BYTE (leading_codes[0]);
2045 if (leading_codes[1])
2046 EMIT_ONE_BYTE (leading_codes[1]);
2047 if (dimension == 1)
2048 EMIT_ONE_BYTE (code);
aa72b389 2049 else
df7492f9
KH
2050 {
2051 EMIT_ONE_BYTE (code >> 8);
2052 EMIT_ONE_BYTE (code & 0xFF);
2053 }
aa72b389 2054 }
aa72b389 2055 }
df7492f9
KH
2056 coding->result = CODING_RESULT_SUCCESS;
2057 coding->produced_char += produced_chars;
2058 coding->produced = dst - coding->destination;
2059 return 0;
aa72b389 2060}
b73bfc1c 2061
4ed46869 2062\f
df7492f9 2063/*** 7. ISO2022 handlers ***/
4ed46869
KH
2064
2065/* The following note describes the coding system ISO2022 briefly.
39787efd 2066 Since the intention of this note is to help understand the
5a936b46 2067 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2068 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46
DL
2069 original document of ISO2022. This is equivalent to the standard
2070 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2071
2072 ISO2022 provides many mechanisms to encode several character sets
5a936b46 2073 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2074 is encoded using bytes less than 128. This may make the encoded
2075 text a little bit longer, but the text passes more easily through
5a936b46
DL
2076 several types of gateway, some of which strip off the MSB (Most
2077 Significant Bit).
b73bfc1c 2078
5a936b46
DL
2079 There are two kinds of character sets: control character sets and
2080 graphic character sets. The former contain control characters such
4ed46869 2081 as `newline' and `escape' to provide control functions (control
39787efd 2082 functions are also provided by escape sequences). The latter
5a936b46 2083 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2084 two control character sets and many graphic character sets.
2085
2086 Graphic character sets are classified into one of the following
39787efd
KH
2087 four classes, according to the number of bytes (DIMENSION) and
2088 number of characters in one dimension (CHARS) of the set:
2089 - DIMENSION1_CHARS94
2090 - DIMENSION1_CHARS96
2091 - DIMENSION2_CHARS94
2092 - DIMENSION2_CHARS96
2093
2094 In addition, each character set is assigned an identification tag,
5a936b46 2095 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2096 hereafter). The <F> of each character set is decided by ECMA(*)
2097 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2098 (0x30..0x3F are for private use only).
4ed46869
KH
2099
2100 Note (*): ECMA = European Computer Manufacturers Association
2101
5a936b46 2102 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2103 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2104 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2105 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2106 o DIMENSION2_CHARS96 -- none for the moment
2107
39787efd 2108 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2109 C0 [0x00..0x1F] -- control character plane 0
2110 GL [0x20..0x7F] -- graphic character plane 0
2111 C1 [0x80..0x9F] -- control character plane 1
2112 GR [0xA0..0xFF] -- graphic character plane 1
2113
2114 A control character set is directly designated and invoked to C0 or
39787efd
KH
2115 C1 by an escape sequence. The most common case is that:
2116 - ISO646's control character set is designated/invoked to C0, and
2117 - ISO6429's control character set is designated/invoked to C1,
2118 and usually these designations/invocations are omitted in encoded
2119 text. In a 7-bit environment, only C0 can be used, and a control
2120 character for C1 is encoded by an appropriate escape sequence to
2121 fit into the environment. All control characters for C1 are
2122 defined to have corresponding escape sequences.
4ed46869
KH
2123
2124 A graphic character set is at first designated to one of four
2125 graphic registers (G0 through G3), then these graphic registers are
2126 invoked to GL or GR. These designations and invocations can be
2127 done independently. The most common case is that G0 is invoked to
39787efd
KH
2128 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2129 these invocations and designations are omitted in encoded text.
2130 In a 7-bit environment, only GL can be used.
4ed46869 2131
39787efd
KH
2132 When a graphic character set of CHARS94 is invoked to GL, codes
2133 0x20 and 0x7F of the GL area work as control characters SPACE and
2134 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2135 be used.
4ed46869
KH
2136
2137 There are two ways of invocation: locking-shift and single-shift.
2138 With locking-shift, the invocation lasts until the next different
39787efd
KH
2139 invocation, whereas with single-shift, the invocation affects the
2140 following character only and doesn't affect the locking-shift
2141 state. Invocations are done by the following control characters or
2142 escape sequences:
4ed46869
KH
2143
2144 ----------------------------------------------------------------------
39787efd 2145 abbrev function cntrl escape seq description
4ed46869 2146 ----------------------------------------------------------------------
39787efd
KH
2147 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2148 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2149 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2150 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2151 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2152 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2153 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2154 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2155 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2156 ----------------------------------------------------------------------
39787efd
KH
2157 (*) These are not used by any known coding system.
2158
2159 Control characters for these functions are defined by macros
2160 ISO_CODE_XXX in `coding.h'.
4ed46869 2161
39787efd 2162 Designations are done by the following escape sequences:
4ed46869
KH
2163 ----------------------------------------------------------------------
2164 escape sequence description
2165 ----------------------------------------------------------------------
2166 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2167 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2168 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2169 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2170 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2171 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2172 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2173 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2174 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2175 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2176 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2177 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2178 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2179 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2180 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2181 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2182 ----------------------------------------------------------------------
2183
2184 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2185 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2186
2187 Note (*): Although these designations are not allowed in ISO2022,
2188 Emacs accepts them on decoding, and produces them on encoding
39787efd 2189 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2190 7-bit environment, non-locking-shift, and non-single-shift.
2191
2192 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2193 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2194
5a936b46 2195 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2196 same multilingual text in ISO2022. Actually, there exist many
2197 coding systems such as Compound Text (used in X11's inter client
5a936b46
DL
2198 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2199 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2200 localized platforms), and all of these are variants of ISO2022.
2201
2202 In addition to the above, Emacs handles two more kinds of escape
2203 sequences: ISO6429's direction specification and Emacs' private
2204 sequence for specifying character composition.
2205
39787efd 2206 ISO6429's direction specification takes the following form:
4ed46869
KH
2207 o CSI ']' -- end of the current direction
2208 o CSI '0' ']' -- end of the current direction
2209 o CSI '1' ']' -- start of left-to-right text
2210 o CSI '2' ']' -- start of right-to-left text
2211 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2212 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2213
2214 Character composition specification takes the following form:
ec6d2bb8
KH
2215 o ESC '0' -- start relative composition
2216 o ESC '1' -- end composition
2217 o ESC '2' -- start rule-base composition (*)
2218 o ESC '3' -- start relative composition with alternate chars (**)
2219 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2220 Since these are not standard escape sequences of any ISO standard,
5a936b46 2221 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2222
5a936b46
DL
2223 (*) This form is used only in Emacs 20.7 and older versions,
2224 but newer versions can safely decode it.
2225 (**) This form is used only in Emacs 21.1 and newer versions,
2226 and older versions can't decode it.
ec6d2bb8 2227
5a936b46 2228 Here's a list of example usages of these composition escape
b73bfc1c 2229 sequences (categorized by `enum composition_method').
ec6d2bb8 2230
b73bfc1c 2231 COMPOSITION_RELATIVE:
ec6d2bb8 2232 ESC 0 CHAR [ CHAR ] ESC 1
5a936b46 2233 COMPOSITION_WITH_RULE:
ec6d2bb8 2234 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2235 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2236 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2237 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2238 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2239
2240enum iso_code_class_type iso_code_class[256];
2241
df7492f9
KH
2242#define SAFE_CHARSET_P(coding, id) \
2243 ((id) <= (coding)->max_charset_id \
2244 && (coding)->safe_charsets[id] >= 0)
2245
2246
2247#define SHIFT_OUT_OK(category) \
2248 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2249
2250static void
f0064e1f
DL
2251setup_iso_safe_charsets (attrs)
2252 Lisp_Object attrs;
df7492f9
KH
2253{
2254 Lisp_Object charset_list, safe_charsets;
2255 Lisp_Object request;
2256 Lisp_Object reg_usage;
2257 Lisp_Object tail;
2258 int reg94, reg96;
2259 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2260 int max_charset_id;
2261
2262 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2263 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2264 && ! EQ (charset_list, Viso_2022_charset_list))
2265 {
2266 CODING_ATTR_CHARSET_LIST (attrs)
2267 = charset_list = Viso_2022_charset_list;
2268 ASET (attrs, coding_attr_safe_charsets, Qnil);
2269 }
2270
2271 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2272 return;
2273
2274 max_charset_id = 0;
2275 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2276 {
2277 int id = XINT (XCAR (tail));
2278 if (max_charset_id < id)
2279 max_charset_id = id;
2280 }
2281
2282 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2283 make_number (255));
2284 request = AREF (attrs, coding_attr_iso_request);
2285 reg_usage = AREF (attrs, coding_attr_iso_usage);
2286 reg94 = XINT (XCAR (reg_usage));
2287 reg96 = XINT (XCDR (reg_usage));
2288
2289 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2290 {
2291 Lisp_Object id;
2292 Lisp_Object reg;
2293 struct charset *charset;
2294
2295 id = XCAR (tail);
2296 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2297 reg = Fcdr (Fassq (id, request));
df7492f9
KH
2298 if (! NILP (reg))
2299 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2300 else if (charset->iso_chars_96)
2301 {
2302 if (reg96 < 4)
2303 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2304 }
2305 else
2306 {
2307 if (reg94 < 4)
2308 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2309 }
2310 }
2311 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2312}
d46c5b12 2313
d46c5b12 2314
4ed46869 2315/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
df7492f9 2316 Check if a text is encoded in ISO2022. If it is, returns an
4ed46869 2317 integer in which appropriate flag bits any of:
df7492f9
KH
2318 CATEGORY_MASK_ISO_7
2319 CATEGORY_MASK_ISO_7_TIGHT
2320 CATEGORY_MASK_ISO_8_1
2321 CATEGORY_MASK_ISO_8_2
2322 CATEGORY_MASK_ISO_7_ELSE
2323 CATEGORY_MASK_ISO_8_ELSE
4ed46869
KH
2324 are set. If a code which should never appear in ISO2022 is found,
2325 returns 0. */
2326
0a28aafb 2327static int
df7492f9
KH
2328detect_coding_iso_2022 (coding, mask)
2329 struct coding_system *coding;
2330 int *mask;
4ed46869 2331{
df7492f9
KH
2332 unsigned char *src = coding->source, *src_base = src;
2333 unsigned char *src_end = coding->source + coding->src_bytes;
2334 int multibytep = coding->src_multibyte;
2335 int mask_iso = CATEGORY_MASK_ISO;
2336 int mask_found = 0, mask_8bit_found = 0;
f46869e4 2337 int reg[4], shift_out = 0, single_shifting = 0;
df7492f9
KH
2338 int id;
2339 int c, c1;
2340 int consumed_chars = 0;
2341 int i;
2342
2343 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2344 {
2345 struct coding_system *this = &(coding_categories[i]);
2346 Lisp_Object attrs, val;
2347
2348 attrs = CODING_ID_ATTRS (this->id);
2349 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2350 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2351 setup_iso_safe_charsets (attrs);
2352 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2353 this->max_charset_id = XSTRING (val)->size - 1;
2354 this->safe_charsets = (char *) XSTRING (val)->data;
2355 }
2356
2357 /* A coding system of this category is always ASCII compatible. */
2358 src += coding->head_ascii;
3f003981 2359
df7492f9
KH
2360 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2361 while (mask_iso && src < src_end)
4ed46869 2362 {
df7492f9 2363 ONE_MORE_BYTE (c);
4ed46869
KH
2364 switch (c)
2365 {
2366 case ISO_CODE_ESC:
74383408
KH
2367 if (inhibit_iso_escape_detection)
2368 break;
f46869e4 2369 single_shifting = 0;
df7492f9 2370 ONE_MORE_BYTE (c);
d46c5b12 2371 if (c >= '(' && c <= '/')
4ed46869 2372 {
bf9cdd4e 2373 /* Designation sequence for a charset of dimension 1. */
df7492f9 2374 ONE_MORE_BYTE (c1);
d46c5b12 2375 if (c1 < ' ' || c1 >= 0x80
df7492f9 2376 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2377 /* Invalid designation sequence. Just ignore. */
2378 break;
df7492f9 2379 reg[(c - '(') % 4] = id;
bf9cdd4e
KH
2380 }
2381 else if (c == '$')
2382 {
2383 /* Designation sequence for a charset of dimension 2. */
df7492f9 2384 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2385 if (c >= '@' && c <= 'B')
2386 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
df7492f9 2387 reg[0] = id = iso_charset_table[1][0][c];
bf9cdd4e 2388 else if (c >= '(' && c <= '/')
bcf26d6a 2389 {
df7492f9 2390 ONE_MORE_BYTE (c1);
d46c5b12 2391 if (c1 < ' ' || c1 >= 0x80
df7492f9 2392 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2393 /* Invalid designation sequence. Just ignore. */
2394 break;
df7492f9 2395 reg[(c - '(') % 4] = id;
bcf26d6a 2396 }
bf9cdd4e 2397 else
d46c5b12
KH
2398 /* Invalid designation sequence. Just ignore. */
2399 break;
2400 }
ae9ff118 2401 else if (c == 'N' || c == 'O')
d46c5b12 2402 {
ae9ff118 2403 /* ESC <Fe> for SS2 or SS3. */
df7492f9 2404 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
d46c5b12 2405 break;
4ed46869 2406 }
ec6d2bb8
KH
2407 else if (c >= '0' && c <= '4')
2408 {
2409 /* ESC <Fp> for start/end composition. */
df7492f9 2410 mask_found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2411 break;
2412 }
bf9cdd4e 2413 else
df7492f9
KH
2414 {
2415 /* Invalid escape sequence. */
2416 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2417 break;
2418 }
d46c5b12
KH
2419
2420 /* We found a valid designation sequence for CHARSET. */
df7492f9
KH
2421 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2422 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2423 id))
2424 mask_found |= CATEGORY_MASK_ISO_7;
d46c5b12 2425 else
df7492f9
KH
2426 mask_iso &= ~CATEGORY_MASK_ISO_7;
2427 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2428 id))
2429 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2430 else
df7492f9
KH
2431 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2432 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2433 id))
2434 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2435 else
df7492f9
KH
2436 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2437 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2438 id))
2439 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2440 else
df7492f9 2441 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2442 break;
2443
4ed46869 2444 case ISO_CODE_SO:
74383408
KH
2445 if (inhibit_iso_escape_detection)
2446 break;
f46869e4 2447 single_shifting = 0;
d46c5b12
KH
2448 if (shift_out == 0
2449 && (reg[1] >= 0
df7492f9
KH
2450 || SHIFT_OUT_OK (coding_category_iso_7_else)
2451 || SHIFT_OUT_OK (coding_category_iso_8_else)))
d46c5b12
KH
2452 {
2453 /* Locking shift out. */
df7492f9
KH
2454 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2455 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2456 }
e0e989f6 2457 break;
df7492f9 2458
d46c5b12 2459 case ISO_CODE_SI:
74383408
KH
2460 if (inhibit_iso_escape_detection)
2461 break;
f46869e4 2462 single_shifting = 0;
d46c5b12
KH
2463 if (shift_out == 1)
2464 {
2465 /* Locking shift in. */
df7492f9
KH
2466 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2467 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2468 }
2469 break;
2470
4ed46869 2471 case ISO_CODE_CSI:
f46869e4 2472 single_shifting = 0;
4ed46869
KH
2473 case ISO_CODE_SS2:
2474 case ISO_CODE_SS3:
3f003981 2475 {
df7492f9 2476 int newmask = CATEGORY_MASK_ISO_8_ELSE;
3f003981 2477
89528eb3 2478 mask_8bit_found = 1;
74383408
KH
2479 if (inhibit_iso_escape_detection)
2480 break;
70c22245
KH
2481 if (c != ISO_CODE_CSI)
2482 {
df7492f9
KH
2483 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2484 & CODING_ISO_FLAG_SINGLE_SHIFT)
2485 newmask |= CATEGORY_MASK_ISO_8_1;
2486 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2487 & CODING_ISO_FLAG_SINGLE_SHIFT)
2488 newmask |= CATEGORY_MASK_ISO_8_2;
f46869e4 2489 single_shifting = 1;
70c22245 2490 }
3f003981
KH
2491 if (VECTORP (Vlatin_extra_code_table)
2492 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2493 {
df7492f9
KH
2494 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2495 & CODING_ISO_FLAG_LATIN_EXTRA)
2496 newmask |= CATEGORY_MASK_ISO_8_1;
2497 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2498 & CODING_ISO_FLAG_LATIN_EXTRA)
2499 newmask |= CATEGORY_MASK_ISO_8_2;
3f003981 2500 }
df7492f9 2501 mask_iso &= newmask;
d46c5b12 2502 mask_found |= newmask;
3f003981
KH
2503 }
2504 break;
4ed46869
KH
2505
2506 default:
2507 if (c < 0x80)
f46869e4
KH
2508 {
2509 single_shifting = 0;
2510 break;
2511 }
4ed46869 2512 else if (c < 0xA0)
c4825358 2513 {
f46869e4 2514 single_shifting = 0;
df7492f9 2515 mask_8bit_found = 1;
3f003981
KH
2516 if (VECTORP (Vlatin_extra_code_table)
2517 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
c4825358 2518 {
3f003981
KH
2519 int newmask = 0;
2520
df7492f9
KH
2521 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2522 & CODING_ISO_FLAG_LATIN_EXTRA)
2523 newmask |= CATEGORY_MASK_ISO_8_1;
2524 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2525 & CODING_ISO_FLAG_LATIN_EXTRA)
2526 newmask |= CATEGORY_MASK_ISO_8_2;
2527 mask_iso &= newmask;
d46c5b12 2528 mask_found |= newmask;
c4825358 2529 }
3f003981
KH
2530 else
2531 return 0;
c4825358 2532 }
4ed46869
KH
2533 else
2534 {
df7492f9
KH
2535 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2536 | CATEGORY_MASK_ISO_7_ELSE);
2537 mask_found |= CATEGORY_MASK_ISO_8_1;
2538 mask_8bit_found = 1;
f46869e4
KH
2539 /* Check the length of succeeding codes of the range
2540 0xA0..0FF. If the byte length is odd, we exclude
df7492f9 2541 CATEGORY_MASK_ISO_8_2. We can check this only
f46869e4 2542 when we are not single shifting. */
b73bfc1c 2543 if (!single_shifting
df7492f9 2544 && mask_iso & CATEGORY_MASK_ISO_8_2)
f46869e4 2545 {
e17de821 2546 int i = 1;
b73bfc1c
KH
2547 while (src < src_end)
2548 {
df7492f9 2549 ONE_MORE_BYTE (c);
b73bfc1c
KH
2550 if (c < 0xA0)
2551 break;
2552 i++;
2553 }
2554
2555 if (i & 1 && src < src_end)
df7492f9 2556 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
f46869e4 2557 else
df7492f9 2558 mask_found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2559 }
4ed46869
KH
2560 }
2561 break;
2562 }
2563 }
df7492f9
KH
2564 no_more_source:
2565 if (!mask_iso)
2566 {
2567 *mask &= ~CATEGORY_MASK_ISO;
2568 return 0;
2569 }
2570 if (!mask_found)
2571 return 0;
89528eb3
KH
2572 *mask &= ~CATEGORY_MASK_ISO;
2573 *mask |= mask_iso & mask_found;
df7492f9
KH
2574 if (! mask_8bit_found)
2575 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2576 return 1;
4ed46869
KH
2577}
2578
4ed46869
KH
2579
2580/* Set designation state into CODING. */
df7492f9
KH
2581#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2582 do { \
2583 int id, prev; \
2584 \
2585 if (final < '0' || final >= 128 \
2586 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2587 || !SAFE_CHARSET_P (coding, id)) \
2588 { \
2589 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2590 goto invalid_code; \
2591 } \
2592 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2593 if (id == charset_jisx0201_roman) \
2594 { \
2595 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2596 id = charset_ascii; \
2597 } \
2598 else if (id == charset_jisx0208_1978) \
2599 { \
2600 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2601 id = charset_jisx0208; \
2602 } \
df7492f9
KH
2603 CODING_ISO_DESIGNATION (coding, reg) = id; \
2604 /* If there was an invalid designation to REG previously, and this \
2605 designation is ASCII to REG, we should keep this designation \
2606 sequence. */ \
2607 if (prev == -2 && id == charset_ascii) \
2608 goto invalid_code; \
4ed46869
KH
2609 } while (0)
2610
d46c5b12 2611
df7492f9
KH
2612#define MAYBE_FINISH_COMPOSITION() \
2613 do { \
2614 int i; \
2615 if (composition_state == COMPOSING_NO) \
2616 break; \
2617 /* It is assured that we have enough room for producing \
2618 characters stored in the table `components'. */ \
2619 if (charbuf + component_idx > charbuf_end) \
2620 goto no_more_source; \
2621 composition_state = COMPOSING_NO; \
2622 if (method == COMPOSITION_RELATIVE \
2623 || method == COMPOSITION_WITH_ALTCHARS) \
2624 { \
2625 for (i = 0; i < component_idx; i++) \
2626 *charbuf++ = components[i]; \
2627 char_offset += component_idx; \
2628 } \
2629 else \
2630 { \
2631 for (i = 0; i < component_idx; i += 2) \
2632 *charbuf++ = components[i]; \
2633 char_offset += (component_idx / 2) + 1; \
2634 } \
2635 } while (0)
2636
d46c5b12 2637
aa72b389
KH
2638/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2639 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2640 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2641 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2642 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2643 */
ec6d2bb8 2644
df7492f9
KH
2645#define DECODE_COMPOSITION_START(c1) \
2646 do { \
2647 if (c1 == '0' \
781d7a48 2648 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2649 { \
2650 component_len = component_idx; \
2651 composition_state = COMPOSING_CHAR; \
2652 } \
2653 else \
2654 { \
2655 unsigned char *p; \
2656 \
2657 MAYBE_FINISH_COMPOSITION (); \
2658 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2659 goto no_more_source; \
2660 for (p = src; p < src_end - 1; p++) \
2661 if (*p == ISO_CODE_ESC && p[1] == '1') \
2662 break; \
2663 if (p == src_end - 1) \
2664 { \
2665 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2666 goto invalid_code; \
2667 goto no_more_source; \
2668 } \
2669 \
2670 /* This is surely the start of a composition. */ \
2671 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2672 : c1 == '2' ? COMPOSITION_WITH_RULE \
2673 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2674 : COMPOSITION_WITH_RULE_ALTCHARS); \
2675 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2676 : COMPOSING_COMPONENT_CHAR); \
2677 component_idx = component_len = 0; \
2678 } \
ec6d2bb8
KH
2679 } while (0)
2680
ec6d2bb8 2681
df7492f9
KH
2682/* Handle compositoin end sequence ESC 1. */
2683
2684#define DECODE_COMPOSITION_END() \
ec6d2bb8 2685 do { \
df7492f9
KH
2686 int nchars = (component_len > 0 ? component_idx - component_len \
2687 : method == COMPOSITION_RELATIVE ? component_idx \
2688 : (component_idx + 1) / 2); \
2689 int i; \
2690 int *saved_charbuf = charbuf; \
2691 \
2692 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2693 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2694 { \
df7492f9
KH
2695 if (component_len == 0) \
2696 for (i = 0; i < component_idx; i++) \
2697 *charbuf++ = components[i]; \
2698 else \
2699 for (i = 0; i < component_len; i++) \
2700 *charbuf++ = components[i]; \
2701 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2702 } \
df7492f9
KH
2703 if (method == COMPOSITION_WITH_RULE) \
2704 for (i = 0; i < component_idx; i += 2, char_offset++) \
2705 *charbuf++ = components[i]; \
ec6d2bb8 2706 else \
df7492f9
KH
2707 for (i = component_len; i < component_idx; i++, char_offset++) \
2708 *charbuf++ = components[i]; \
2709 coding->annotated = 1; \
2710 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2711 } while (0)
2712
df7492f9 2713
ec6d2bb8
KH
2714/* Decode a composition rule from the byte C1 (and maybe one more byte
2715 from SRC) and store one encoded composition rule in
2716 coding->cmp_data. */
2717
2718#define DECODE_COMPOSITION_RULE(c1) \
2719 do { \
ec6d2bb8
KH
2720 (c1) -= 32; \
2721 if (c1 < 81) /* old format (before ver.21) */ \
2722 { \
2723 int gref = (c1) / 9; \
2724 int nref = (c1) % 9; \
2725 if (gref == 4) gref = 10; \
2726 if (nref == 4) nref = 10; \
df7492f9 2727 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2728 } \
b73bfc1c 2729 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2730 { \
2731 ONE_MORE_BYTE (c2); \
df7492f9 2732 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2733 } \
df7492f9
KH
2734 else \
2735 c1 = 0; \
ec6d2bb8 2736 } while (0)
88993dfd 2737
d46c5b12 2738
4ed46869
KH
2739/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2740
b73bfc1c 2741static void
df7492f9 2742decode_coding_iso_2022 (coding)
4ed46869 2743 struct coding_system *coding;
4ed46869 2744{
df7492f9
KH
2745 unsigned char *src = coding->source + coding->consumed;
2746 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2747 unsigned char *src_base;
df7492f9
KH
2748 int *charbuf = coding->charbuf;
2749 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2750 int consumed_chars = 0, consumed_chars_base;
2751 int char_offset = 0;
2752 int multibytep = coding->src_multibyte;
2753 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2754 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2755 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2756 struct charset *charset;
2757 int c;
2758 /* For handling composition sequence. */
2759#define COMPOSING_NO 0
2760#define COMPOSING_CHAR 1
2761#define COMPOSING_RULE 2
2762#define COMPOSING_COMPONENT_CHAR 3
2763#define COMPOSING_COMPONENT_RULE 4
2764
2765 int composition_state = COMPOSING_NO;
2766 enum composition_method method;
2767 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2768 int component_idx;
2769 int component_len;
2770 Lisp_Object attrs, eol_type, charset_list;
2771
2772 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2773 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2774
2775 while (1)
4ed46869 2776 {
b73bfc1c
KH
2777 int c1, c2;
2778
2779 src_base = src;
df7492f9
KH
2780 consumed_chars_base = consumed_chars;
2781
2782 if (charbuf >= charbuf_end)
2783 break;
2784
b73bfc1c 2785 ONE_MORE_BYTE (c1);
4ed46869 2786
ec6d2bb8 2787 /* We produce no character or one character. */
4ed46869
KH
2788 switch (iso_code_class [c1])
2789 {
2790 case ISO_0x20_or_0x7F:
df7492f9 2791 if (composition_state != COMPOSING_NO)
ec6d2bb8 2792 {
df7492f9
KH
2793 if (composition_state == COMPOSING_RULE
2794 || composition_state == COMPOSING_COMPONENT_RULE)
2795 {
2796 DECODE_COMPOSITION_RULE (c1);
2797 components[component_idx++] = c1;
2798 composition_state--;
2799 continue;
2800 }
ec6d2bb8 2801 }
df7492f9
KH
2802 if (charset_id_0 < 0
2803 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2804 /* This is SPACE or DEL. */
2805 charset = CHARSET_FROM_ID (charset_ascii);
2806 else
2807 charset = CHARSET_FROM_ID (charset_id_0);
2808 break;
4ed46869
KH
2809
2810 case ISO_graphic_plane_0:
781d7a48 2811 if (composition_state != COMPOSING_NO)
b73bfc1c 2812 {
781d7a48
KH
2813 if (composition_state == COMPOSING_RULE
2814 || composition_state == COMPOSING_COMPONENT_RULE)
2815 {
2816 DECODE_COMPOSITION_RULE (c1);
2817 components[component_idx++] = c1;
2818 composition_state--;
2819 continue;
2820 }
b73bfc1c 2821 }
df7492f9 2822 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2823 break;
2824
2825 case ISO_0xA0_or_0xFF:
df7492f9
KH
2826 if (charset_id_1 < 0
2827 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2828 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2829 goto invalid_code;
4ed46869
KH
2830 /* This is a graphic character, we fall down ... */
2831
2832 case ISO_graphic_plane_1:
df7492f9
KH
2833 if (charset_id_1 < 0)
2834 goto invalid_code;
2835 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2836 break;
2837
2838 case ISO_carriage_return:
df7492f9 2839 if (c1 == '\r')
4ed46869 2840 {
df7492f9 2841 if (EQ (eol_type, Qdos))
4ed46869 2842 {
df7492f9
KH
2843 if (src == src_end)
2844 goto no_more_source;
2845 if (*src == '\n')
2846 ONE_MORE_BYTE (c1);
4ed46869 2847 }
df7492f9
KH
2848 else if (EQ (eol_type, Qmac))
2849 c1 = '\n';
4ed46869 2850 }
df7492f9
KH
2851 /* fall through */
2852
2853 case ISO_control_0:
2854 MAYBE_FINISH_COMPOSITION ();
2855 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2856 break;
2857
df7492f9
KH
2858 case ISO_control_1:
2859 MAYBE_FINISH_COMPOSITION ();
2860 goto invalid_code;
2861
4ed46869 2862 case ISO_shift_out:
df7492f9
KH
2863 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2864 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2865 goto invalid_code;
2866 CODING_ISO_INVOCATION (coding, 0) = 1;
2867 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2868 continue;
4ed46869
KH
2869
2870 case ISO_shift_in:
df7492f9
KH
2871 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2872 goto invalid_code;
2873 CODING_ISO_INVOCATION (coding, 0) = 0;
2874 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2875 continue;
4ed46869
KH
2876
2877 case ISO_single_shift_2_7:
2878 case ISO_single_shift_2:
df7492f9
KH
2879 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2880 goto invalid_code;
4ed46869
KH
2881 /* SS2 is handled as an escape sequence of ESC 'N' */
2882 c1 = 'N';
2883 goto label_escape_sequence;
2884
2885 case ISO_single_shift_3:
df7492f9
KH
2886 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2887 goto invalid_code;
4ed46869
KH
2888 /* SS2 is handled as an escape sequence of ESC 'O' */
2889 c1 = 'O';
2890 goto label_escape_sequence;
2891
2892 case ISO_control_sequence_introducer:
2893 /* CSI is handled as an escape sequence of ESC '[' ... */
2894 c1 = '[';
2895 goto label_escape_sequence;
2896
2897 case ISO_escape:
2898 ONE_MORE_BYTE (c1);
2899 label_escape_sequence:
df7492f9 2900 /* Escape sequences handled here are invocation,
4ed46869
KH
2901 designation, direction specification, and character
2902 composition specification. */
2903 switch (c1)
2904 {
2905 case '&': /* revision of following character set */
2906 ONE_MORE_BYTE (c1);
2907 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2908 goto invalid_code;
4ed46869
KH
2909 ONE_MORE_BYTE (c1);
2910 if (c1 != ISO_CODE_ESC)
df7492f9 2911 goto invalid_code;
4ed46869
KH
2912 ONE_MORE_BYTE (c1);
2913 goto label_escape_sequence;
2914
2915 case '$': /* designation of 2-byte character set */
df7492f9
KH
2916 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2917 goto invalid_code;
4ed46869
KH
2918 ONE_MORE_BYTE (c1);
2919 if (c1 >= '@' && c1 <= 'B')
2920 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2921 or JISX0208.1980 */
df7492f9 2922 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2923 }
2924 else if (c1 >= 0x28 && c1 <= 0x2B)
2925 { /* designation of DIMENSION2_CHARS94 character set */
2926 ONE_MORE_BYTE (c2);
df7492f9 2927 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2928 }
2929 else if (c1 >= 0x2C && c1 <= 0x2F)
2930 { /* designation of DIMENSION2_CHARS96 character set */
2931 ONE_MORE_BYTE (c2);
df7492f9 2932 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2933 }
2934 else
df7492f9 2935 goto invalid_code;
b73bfc1c 2936 /* We must update these variables now. */
df7492f9
KH
2937 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2938 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2939 continue;
4ed46869
KH
2940
2941 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
2942 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2943 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2944 goto invalid_code;
2945 CODING_ISO_INVOCATION (coding, 0) = 2;
2946 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2947 continue;
4ed46869
KH
2948
2949 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
2950 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2951 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2952 goto invalid_code;
2953 CODING_ISO_INVOCATION (coding, 0) = 3;
2954 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2955 continue;
4ed46869
KH
2956
2957 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
2958 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2959 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2960 goto invalid_code;
2961 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 2962 ONE_MORE_BYTE (c1);
e7046a18 2963 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2964 goto invalid_code;
4ed46869
KH
2965 break;
2966
2967 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
2968 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2969 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2970 goto invalid_code;
2971 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 2972 ONE_MORE_BYTE (c1);
e7046a18 2973 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2974 goto invalid_code;
4ed46869
KH
2975 break;
2976
ec6d2bb8 2977 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
2978 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2979 goto invalid_code;
ec6d2bb8 2980 DECODE_COMPOSITION_START (c1);
b73bfc1c 2981 continue;
4ed46869 2982
ec6d2bb8 2983 case '1': /* end composition */
df7492f9
KH
2984 if (composition_state == COMPOSING_NO)
2985 goto invalid_code;
2986 DECODE_COMPOSITION_END ();
b73bfc1c 2987 continue;
4ed46869
KH
2988
2989 case '[': /* specification of direction */
df7492f9
KH
2990 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2991 goto invalid_code;
4ed46869 2992 /* For the moment, nested direction is not supported.
d46c5b12 2993 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 2994 left-to-right, and nozero means right-to-left. */
4ed46869
KH
2995 ONE_MORE_BYTE (c1);
2996 switch (c1)
2997 {
2998 case ']': /* end of the current direction */
d46c5b12 2999 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3000
3001 case '0': /* end of the current direction */
3002 case '1': /* start of left-to-right direction */
3003 ONE_MORE_BYTE (c1);
3004 if (c1 == ']')
d46c5b12 3005 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3006 else
df7492f9 3007 goto invalid_code;
4ed46869
KH
3008 break;
3009
3010 case '2': /* start of right-to-left direction */
3011 ONE_MORE_BYTE (c1);
3012 if (c1 == ']')
d46c5b12 3013 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3014 else
df7492f9 3015 goto invalid_code;
4ed46869
KH
3016 break;
3017
3018 default:
df7492f9 3019 goto invalid_code;
4ed46869 3020 }
b73bfc1c 3021 continue;
4ed46869
KH
3022
3023 default:
df7492f9
KH
3024 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3025 goto invalid_code;
4ed46869
KH
3026 if (c1 >= 0x28 && c1 <= 0x2B)
3027 { /* designation of DIMENSION1_CHARS94 character set */
3028 ONE_MORE_BYTE (c2);
df7492f9 3029 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
3030 }
3031 else if (c1 >= 0x2C && c1 <= 0x2F)
3032 { /* designation of DIMENSION1_CHARS96 character set */
3033 ONE_MORE_BYTE (c2);
df7492f9 3034 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
3035 }
3036 else
df7492f9 3037 goto invalid_code;
b73bfc1c 3038 /* We must update these variables now. */
df7492f9
KH
3039 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3040 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3041 continue;
4ed46869 3042 }
b73bfc1c 3043 }
4ed46869 3044
b73bfc1c 3045 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3046 Produce a decoded character while getting 2nd position code
3047 C2 if necessary. */
3048 c1 &= 0x7F;
3049 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3050 {
3051 ONE_MORE_BYTE (c2);
df7492f9 3052 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3053 /* C2 is not in a valid range. */
df7492f9
KH
3054 goto invalid_code;
3055 c1 = (c1 << 8) | (c2 & 0x7F);
3056 if (CHARSET_DIMENSION (charset) > 2)
3057 {
3058 ONE_MORE_BYTE (c2);
3059 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3060 /* C2 is not in a valid range. */
3061 goto invalid_code;
3062 c1 = (c1 << 8) | (c2 & 0x7F);
3063 }
3064 }
3065
3066 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3067 if (c < 0)
3068 {
3069 MAYBE_FINISH_COMPOSITION ();
3070 for (; src_base < src; src_base++, char_offset++)
3071 {
3072 if (ASCII_BYTE_P (*src_base))
3073 *charbuf++ = *src_base;
3074 else
3075 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3076 }
3077 }
3078 else if (composition_state == COMPOSING_NO)
3079 {
3080 *charbuf++ = c;
3081 char_offset++;
4ed46869 3082 }
df7492f9 3083 else
781d7a48
KH
3084 {
3085 components[component_idx++] = c;
3086 if (method == COMPOSITION_WITH_RULE
3087 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3088 && composition_state == COMPOSING_COMPONENT_CHAR))
3089 composition_state++;
3090 }
4ed46869
KH
3091 continue;
3092
df7492f9
KH
3093 invalid_code:
3094 MAYBE_FINISH_COMPOSITION ();
4ed46869 3095 src = src_base;
df7492f9
KH
3096 consumed_chars = consumed_chars_base;
3097 ONE_MORE_BYTE (c);
3098 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3099 coding->errors++;
4ed46869 3100 }
fb88bf2d 3101
df7492f9
KH
3102 no_more_source:
3103 coding->consumed_char += consumed_chars_base;
3104 coding->consumed = src_base - coding->source;
3105 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3106}
3107
b73bfc1c 3108
f4dee582 3109/* ISO2022 encoding stuff. */
4ed46869
KH
3110
3111/*
f4dee582 3112 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3113 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3114 variant has the following specifications:
df7492f9 3115 1. Initial designation to G0 thru G3.
4ed46869
KH
3116 2. Allows short-form designation?
3117 3. ASCII should be designated to G0 before control characters?
3118 4. ASCII should be designated to G0 at end of line?
3119 5. 7-bit environment or 8-bit environment?
3120 6. Use locking-shift?
3121 7. Use Single-shift?
3122 And the following two are only for Japanese:
3123 8. Use ASCII in place of JIS0201-1976-Roman?
3124 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3125 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3126 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3127 details.
4ed46869
KH
3128*/
3129
3130/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3131 register REG at DST, and increment DST. If <final-char> of CHARSET is
3132 '@', 'A', or 'B' and the coding system CODING allows, produce
3133 designation sequence of short-form. */
4ed46869
KH
3134
3135#define ENCODE_DESIGNATION(charset, reg, coding) \
3136 do { \
df7492f9 3137 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3138 char *intermediate_char_94 = "()*+"; \
3139 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3140 int revision = -1; \
3141 int c; \
3142 \
3143 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3144 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3145 \
3146 if (revision >= 0) \
70c22245 3147 { \
df7492f9
KH
3148 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3149 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3150 } \
df7492f9 3151 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3152 if (CHARSET_DIMENSION (charset) == 1) \
3153 { \
df7492f9
KH
3154 if (! CHARSET_ISO_CHARS_96 (charset)) \
3155 c = intermediate_char_94[reg]; \
4ed46869 3156 else \
df7492f9
KH
3157 c = intermediate_char_96[reg]; \
3158 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3159 } \
3160 else \
3161 { \
df7492f9
KH
3162 EMIT_ONE_ASCII_BYTE ('$'); \
3163 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3164 { \
df7492f9 3165 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3166 || reg != 0 \
3167 || final_char < '@' || final_char > 'B') \
df7492f9 3168 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3169 } \
3170 else \
df7492f9 3171 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3172 } \
df7492f9
KH
3173 EMIT_ONE_ASCII_BYTE (final_char); \
3174 \
3175 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3176 } while (0)
3177
df7492f9 3178
4ed46869
KH
3179/* The following two macros produce codes (control character or escape
3180 sequence) for ISO2022 single-shift functions (single-shift-2 and
3181 single-shift-3). */
3182
df7492f9
KH
3183#define ENCODE_SINGLE_SHIFT_2 \
3184 do { \
3185 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3186 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3187 else \
3188 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3189 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3190 } while (0)
3191
df7492f9
KH
3192
3193#define ENCODE_SINGLE_SHIFT_3 \
3194 do { \
3195 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3196 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3197 else \
3198 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3199 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3200 } while (0)
3201
df7492f9 3202
4ed46869
KH
3203/* The following four macros produce codes (control character or
3204 escape sequence) for ISO2022 locking-shift functions (shift-in,
3205 shift-out, locking-shift-2, and locking-shift-3). */
3206
df7492f9
KH
3207#define ENCODE_SHIFT_IN \
3208 do { \
3209 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3210 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3211 } while (0)
3212
df7492f9
KH
3213
3214#define ENCODE_SHIFT_OUT \
3215 do { \
3216 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3217 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3218 } while (0)
3219
df7492f9
KH
3220
3221#define ENCODE_LOCKING_SHIFT_2 \
3222 do { \
3223 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3224 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3225 } while (0)
3226
df7492f9
KH
3227
3228#define ENCODE_LOCKING_SHIFT_3 \
3229 do { \
3230 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3231 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3232 } while (0)
3233
df7492f9 3234
f4dee582
RS
3235/* Produce codes for a DIMENSION1 character whose character set is
3236 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3237 sequences are also produced in advance if necessary. */
3238
6e85d753
KH
3239#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3240 do { \
df7492f9 3241 int id = CHARSET_ID (charset); \
bf16eb23
KH
3242 \
3243 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3244 && id == charset_ascii) \
3245 { \
3246 id = charset_jisx0201_roman; \
3247 charset = CHARSET_FROM_ID (id); \
3248 } \
3249 \
df7492f9 3250 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3251 { \
df7492f9
KH
3252 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3253 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3254 else \
df7492f9
KH
3255 EMIT_ONE_BYTE (c1 | 0x80); \
3256 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3257 break; \
3258 } \
df7492f9 3259 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3260 { \
df7492f9 3261 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3262 break; \
3263 } \
df7492f9 3264 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3265 { \
df7492f9 3266 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3267 break; \
3268 } \
6e85d753
KH
3269 else \
3270 /* Since CHARSET is not yet invoked to any graphic planes, we \
3271 must invoke it, or, at first, designate it to some graphic \
3272 register. Then repeat the loop to actually produce the \
3273 character. */ \
df7492f9
KH
3274 dst = encode_invocation_designation (charset, coding, dst, \
3275 &produced_chars); \
4ed46869
KH
3276 } while (1)
3277
df7492f9 3278
f4dee582
RS
3279/* Produce codes for a DIMENSION2 character whose character set is
3280 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3281 invocation codes are also produced in advance if necessary. */
3282
6e85d753
KH
3283#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3284 do { \
df7492f9 3285 int id = CHARSET_ID (charset); \
bf16eb23
KH
3286 \
3287 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3288 && id == charset_jisx0208) \
3289 { \
3290 id = charset_jisx0208_1978; \
3291 charset = CHARSET_FROM_ID (id); \
3292 } \
3293 \
df7492f9 3294 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3295 { \
df7492f9
KH
3296 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3297 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3298 else \
df7492f9
KH
3299 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3300 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3301 break; \
3302 } \
df7492f9 3303 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3304 { \
df7492f9 3305 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3306 break; \
3307 } \
df7492f9 3308 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3309 { \
df7492f9 3310 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3311 break; \
3312 } \
6e85d753
KH
3313 else \
3314 /* Since CHARSET is not yet invoked to any graphic planes, we \
3315 must invoke it, or, at first, designate it to some graphic \
3316 register. Then repeat the loop to actually produce the \
3317 character. */ \
df7492f9
KH
3318 dst = encode_invocation_designation (charset, coding, dst, \
3319 &produced_chars); \
4ed46869
KH
3320 } while (1)
3321
05e6f5dc 3322
df7492f9
KH
3323#define ENCODE_ISO_CHARACTER(charset, c) \
3324 do { \
3325 int code = ENCODE_CHAR ((charset),(c)); \
3326 \
3327 if (CHARSET_DIMENSION (charset) == 1) \
3328 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3329 else \
3330 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3331 } while (0)
bdd9fb48 3332
05e6f5dc 3333
4ed46869 3334/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3335 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3336 Return new DST. */
3337
3338unsigned char *
df7492f9
KH
3339encode_invocation_designation (charset, coding, dst, p_nchars)
3340 struct charset *charset;
4ed46869
KH
3341 struct coding_system *coding;
3342 unsigned char *dst;
df7492f9 3343 int *p_nchars;
4ed46869 3344{
df7492f9
KH
3345 int multibytep = coding->dst_multibyte;
3346 int produced_chars = *p_nchars;
4ed46869 3347 int reg; /* graphic register number */
df7492f9 3348 int id = CHARSET_ID (charset);
4ed46869
KH
3349
3350 /* At first, check designations. */
3351 for (reg = 0; reg < 4; reg++)
df7492f9 3352 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3353 break;
3354
3355 if (reg >= 4)
3356 {
3357 /* CHARSET is not yet designated to any graphic registers. */
3358 /* At first check the requested designation. */
df7492f9
KH
3359 reg = CODING_ISO_REQUEST (coding, id);
3360 if (reg < 0)
1ba9e4ab
KH
3361 /* Since CHARSET requests no special designation, designate it
3362 to graphic register 0. */
4ed46869
KH
3363 reg = 0;
3364
3365 ENCODE_DESIGNATION (charset, reg, coding);
3366 }
3367
df7492f9
KH
3368 if (CODING_ISO_INVOCATION (coding, 0) != reg
3369 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3370 {
3371 /* Since the graphic register REG is not invoked to any graphic
3372 planes, invoke it to graphic plane 0. */
3373 switch (reg)
3374 {
3375 case 0: /* graphic register 0 */
3376 ENCODE_SHIFT_IN;
3377 break;
3378
3379 case 1: /* graphic register 1 */
3380 ENCODE_SHIFT_OUT;
3381 break;
3382
3383 case 2: /* graphic register 2 */
df7492f9 3384 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3385 ENCODE_SINGLE_SHIFT_2;
3386 else
3387 ENCODE_LOCKING_SHIFT_2;
3388 break;
3389
3390 case 3: /* graphic register 3 */
df7492f9 3391 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3392 ENCODE_SINGLE_SHIFT_3;
3393 else
3394 ENCODE_LOCKING_SHIFT_3;
3395 break;
3396 }
3397 }
b73bfc1c 3398
df7492f9 3399 *p_nchars = produced_chars;
4ed46869
KH
3400 return dst;
3401}
3402
df7492f9
KH
3403/* The following three macros produce codes for indicating direction
3404 of text. */
3405#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3406 do { \
df7492f9
KH
3407 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3408 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3409 else \
df7492f9 3410 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3411 } while (0)
3412
ec6d2bb8 3413
df7492f9
KH
3414#define ENCODE_DIRECTION_R2L() \
3415 do { \
3416 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3417 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3418 } while (0)
3419
ec6d2bb8 3420
df7492f9 3421#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3422 do { \
df7492f9
KH
3423 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3424 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3425 } while (0)
3426
4ed46869
KH
3427
3428/* Produce codes for designation and invocation to reset the graphic
3429 planes and registers to initial state. */
df7492f9
KH
3430#define ENCODE_RESET_PLANE_AND_REGISTER() \
3431 do { \
3432 int reg; \
3433 struct charset *charset; \
3434 \
3435 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3436 ENCODE_SHIFT_IN; \
3437 for (reg = 0; reg < 4; reg++) \
3438 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3439 && (CODING_ISO_DESIGNATION (coding, reg) \
3440 != CODING_ISO_INITIAL (coding, reg))) \
3441 { \
3442 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3443 ENCODE_DESIGNATION (charset, reg, coding); \
3444 } \
4ed46869
KH
3445 } while (0)
3446
df7492f9 3447
bdd9fb48 3448/* Produce designation sequences of charsets in the line started from
b73bfc1c 3449 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3450
3451 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3452 find all the necessary designations. */
3453
b73bfc1c 3454static unsigned char *
df7492f9 3455encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3456 struct coding_system *coding;
df7492f9
KH
3457 int *charbuf, *charbuf_end;
3458 unsigned char *dst;
e0e989f6 3459{
df7492f9 3460 struct charset *charset;
bdd9fb48
KH
3461 /* Table of charsets to be designated to each graphic register. */
3462 int r[4];
df7492f9
KH
3463 int c, found = 0, reg;
3464 int produced_chars = 0;
3465 int multibytep = coding->dst_multibyte;
3466 Lisp_Object attrs;
3467 Lisp_Object charset_list;
3468
3469 attrs = CODING_ID_ATTRS (coding->id);
3470 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3471 if (EQ (charset_list, Qiso_2022))
3472 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3473
3474 for (reg = 0; reg < 4; reg++)
3475 r[reg] = -1;
3476
b73bfc1c 3477 while (found < 4)
e0e989f6 3478 {
df7492f9
KH
3479 int id;
3480
3481 c = *charbuf++;
b73bfc1c
KH
3482 if (c == '\n')
3483 break;
df7492f9
KH
3484 charset = char_charset (c, charset_list, NULL);
3485 id = CHARSET_ID (charset);
3486 reg = CODING_ISO_REQUEST (coding, id);
3487 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3488 {
3489 found++;
df7492f9 3490 r[reg] = id;
bdd9fb48 3491 }
bdd9fb48
KH
3492 }
3493
3494 if (found)
3495 {
3496 for (reg = 0; reg < 4; reg++)
3497 if (r[reg] >= 0
df7492f9
KH
3498 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3499 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3500 }
b73bfc1c
KH
3501
3502 return dst;
e0e989f6
KH
3503}
3504
4ed46869
KH
3505/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3506
df7492f9
KH
3507static int
3508encode_coding_iso_2022 (coding)
4ed46869 3509 struct coding_system *coding;
4ed46869 3510{
df7492f9
KH
3511 int multibytep = coding->dst_multibyte;
3512 int *charbuf = coding->charbuf;
3513 int *charbuf_end = charbuf + coding->charbuf_used;
3514 unsigned char *dst = coding->destination + coding->produced;
3515 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3516 int safe_room = 16;
3517 int bol_designation
3518 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3519 && CODING_ISO_BOL (coding));
3520 int produced_chars = 0;
3521 Lisp_Object attrs, eol_type, charset_list;
3522 int ascii_compatible;
b73bfc1c 3523 int c;
05e6f5dc 3524
df7492f9 3525 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4
KH
3526 setup_iso_safe_charsets (attrs);
3527 coding->safe_charsets
3528 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
bdd9fb48 3529
df7492f9 3530 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3531
df7492f9 3532 while (charbuf < charbuf_end)
4ed46869 3533 {
df7492f9 3534 ASSURE_DESTINATION (safe_room);
b73bfc1c 3535
df7492f9 3536 if (bol_designation)
b73bfc1c 3537 {
df7492f9 3538 unsigned char *dst_prev = dst;
4ed46869 3539
bdd9fb48 3540 /* We have to produce designation sequences if any now. */
df7492f9
KH
3541 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3542 bol_designation = 0;
3543 /* We are sure that designation sequences are all ASCII bytes. */
3544 produced_chars += dst - dst_prev;
4ed46869 3545 }
ec6d2bb8 3546
df7492f9 3547 c = *charbuf++;
4ed46869 3548
b73bfc1c
KH
3549 /* Now encode the character C. */
3550 if (c < 0x20 || c == 0x7F)
3551 {
df7492f9
KH
3552 if (c == '\n'
3553 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3554 {
df7492f9
KH
3555 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3556 ENCODE_RESET_PLANE_AND_REGISTER ();
3557 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3558 {
df7492f9
KH
3559 int i;
3560
3561 for (i = 0; i < 4; i++)
3562 CODING_ISO_DESIGNATION (coding, i)
3563 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3564 }
df7492f9
KH
3565 bol_designation
3566 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3567 }
df7492f9
KH
3568 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3569 ENCODE_RESET_PLANE_AND_REGISTER ();
3570 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3571 }
df7492f9 3572 else if (ASCII_CHAR_P (c))
88993dfd 3573 {
df7492f9
KH
3574 if (ascii_compatible)
3575 EMIT_ONE_ASCII_BYTE (c);
3576 else
bf16eb23
KH
3577 {
3578 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3579 ENCODE_ISO_CHARACTER (charset, c);
3580 }
88993dfd 3581 }
16eafb5d
KH
3582 else if (CHAR_BYTE8_P (c))
3583 {
3584 c = CHAR_TO_BYTE8 (c);
3585 EMIT_ONE_BYTE (c);
3586 }
b73bfc1c 3587 else
df7492f9
KH
3588 {
3589 struct charset *charset = char_charset (c, charset_list, NULL);
b73bfc1c 3590
df7492f9
KH
3591 if (!charset)
3592 {
41cbe562
KH
3593 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3594 {
3595 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3596 charset = CHARSET_FROM_ID (charset_ascii);
3597 }
3598 else
3599 {
3600 c = coding->default_char;
3601 charset = char_charset (c, charset_list, NULL);
3602 }
df7492f9
KH
3603 }
3604 ENCODE_ISO_CHARACTER (charset, c);
3605 }
84fbb8a0 3606 }
b73bfc1c 3607
df7492f9
KH
3608 if (coding->mode & CODING_MODE_LAST_BLOCK
3609 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3610 {
3611 ASSURE_DESTINATION (safe_room);
3612 ENCODE_RESET_PLANE_AND_REGISTER ();
3613 }
3614 coding->result = CODING_RESULT_SUCCESS;
3615 CODING_ISO_BOL (coding) = bol_designation;
3616 coding->produced_char += produced_chars;
3617 coding->produced = dst - coding->destination;
3618 return 0;
4ed46869
KH
3619}
3620
3621\f
df7492f9 3622/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3623
df7492f9 3624/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3625 quite widely. So, for the moment, Emacs supports them in the bare
3626 C code. But, in the future, they may be supported only by CCL. */
3627
3628/* SJIS is a coding system encoding three character sets: ASCII, right
3629 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3630 as is. A character of charset katakana-jisx0201 is encoded by
3631 "position-code + 0x80". A character of charset japanese-jisx0208
3632 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3633 so that it fit in the range below.
4ed46869
KH
3634
3635 --- CODE RANGE of SJIS ---
3636 (character set) (range)
3637 ASCII 0x00 .. 0x7F
df7492f9 3638 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3639 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3640 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3641 -------------------------------
3642
3643*/
3644
3645/* BIG5 is a coding system encoding two character sets: ASCII and
3646 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3647 character set and is encoded in two-byte.
4ed46869
KH
3648
3649 --- CODE RANGE of BIG5 ---
3650 (character set) (range)
3651 ASCII 0x00 .. 0x7F
3652 Big5 (1st byte) 0xA1 .. 0xFE
3653 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3654 --------------------------
3655
df7492f9 3656 */
4ed46869
KH
3657
3658/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3659 Check if a text is encoded in SJIS. If it is, return
df7492f9 3660 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3661
0a28aafb 3662static int
df7492f9
KH
3663detect_coding_sjis (coding, mask)
3664 struct coding_system *coding;
3665 int *mask;
4ed46869 3666{
df7492f9
KH
3667 unsigned char *src = coding->source, *src_base = src;
3668 unsigned char *src_end = coding->source + coding->src_bytes;
3669 int multibytep = coding->src_multibyte;
3670 int consumed_chars = 0;
3671 int found = 0;
b73bfc1c 3672 int c;
89528eb3 3673 int incomplete;
df7492f9
KH
3674
3675 /* A coding system of this category is always ASCII compatible. */
3676 src += coding->head_ascii;
4ed46869 3677
b73bfc1c 3678 while (1)
4ed46869 3679 {
89528eb3 3680 incomplete = 0;
df7492f9 3681 ONE_MORE_BYTE (c);
89528eb3 3682 incomplete = 1;
682169fe
KH
3683 if (c < 0x80)
3684 continue;
df7492f9 3685 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3686 {
df7492f9 3687 ONE_MORE_BYTE (c);
682169fe 3688 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9
KH
3689 break;
3690 found = 1;
4ed46869 3691 }
df7492f9
KH
3692 else if (c >= 0xA0 && c < 0xE0)
3693 found = 1;
3694 else
3695 break;
4ed46869 3696 }
df7492f9
KH
3697 *mask &= ~CATEGORY_MASK_SJIS;
3698 return 0;
3699
3700 no_more_source:
89528eb3
KH
3701 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3702 {
3703 *mask &= ~CATEGORY_MASK_SJIS;
3704 return 0;
3705 }
3706 return found;
4ed46869
KH
3707}
3708
3709/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3710 Check if a text is encoded in BIG5. If it is, return
df7492f9 3711 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3712
0a28aafb 3713static int
df7492f9
KH
3714detect_coding_big5 (coding, mask)
3715 struct coding_system *coding;
3716 int *mask;
4ed46869 3717{
df7492f9
KH
3718 unsigned char *src = coding->source, *src_base = src;
3719 unsigned char *src_end = coding->source + coding->src_bytes;
3720 int multibytep = coding->src_multibyte;
3721 int consumed_chars = 0;
3722 int found = 0;
b73bfc1c 3723 int c;
89528eb3 3724 int incomplete;
fa42c37f 3725
df7492f9
KH
3726 /* A coding system of this category is always ASCII compatible. */
3727 src += coding->head_ascii;
fa42c37f 3728
b73bfc1c 3729 while (1)
fa42c37f 3730 {
89528eb3 3731 incomplete = 0;
df7492f9 3732 ONE_MORE_BYTE (c);
89528eb3 3733 incomplete = 1;
df7492f9 3734 if (c < 0x80)
fa42c37f 3735 continue;
df7492f9 3736 if (c >= 0xA1)
fa42c37f 3737 {
df7492f9
KH
3738 ONE_MORE_BYTE (c);
3739 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3740 return 0;
df7492f9 3741 found = 1;
fa42c37f 3742 }
df7492f9
KH
3743 else
3744 break;
fa42c37f 3745 }
df7492f9 3746 *mask &= ~CATEGORY_MASK_BIG5;
fa42c37f 3747 return 0;
df7492f9
KH
3748
3749 no_more_source:
89528eb3
KH
3750 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3751 {
3752 *mask &= ~CATEGORY_MASK_BIG5;
3753 return 0;
3754 }
3755 return found;
fa42c37f
KH
3756}
3757
4ed46869
KH
3758/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3759 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3760
b73bfc1c 3761static void
df7492f9 3762decode_coding_sjis (coding)
4ed46869 3763 struct coding_system *coding;
4ed46869 3764{
df7492f9
KH
3765 unsigned char *src = coding->source + coding->consumed;
3766 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3767 unsigned char *src_base;
df7492f9
KH
3768 int *charbuf = coding->charbuf;
3769 int *charbuf_end = charbuf + coding->charbuf_size;
3770 int consumed_chars = 0, consumed_chars_base;
3771 int multibytep = coding->src_multibyte;
3772 struct charset *charset_roman, *charset_kanji, *charset_kana;
3773 Lisp_Object attrs, eol_type, charset_list, val;
a5d301df 3774
df7492f9
KH
3775 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3776
3777 val = charset_list;
3778 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3
KH
3779 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3780 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3781
b73bfc1c 3782 while (1)
4ed46869 3783 {
df7492f9 3784 int c, c1;
b73bfc1c
KH
3785
3786 src_base = src;
df7492f9
KH
3787 consumed_chars_base = consumed_chars;
3788
3789 if (charbuf >= charbuf_end)
3790 break;
3791
3792 ONE_MORE_BYTE (c);
b73bfc1c 3793
df7492f9 3794 if (c == '\r')
4ed46869 3795 {
df7492f9 3796 if (EQ (eol_type, Qdos))
4ed46869 3797 {
df7492f9
KH
3798 if (src == src_end)
3799 goto no_more_source;
3800 if (*src == '\n')
3801 ONE_MORE_BYTE (c);
4ed46869 3802 }
df7492f9
KH
3803 else if (EQ (eol_type, Qmac))
3804 c = '\n';
4ed46869 3805 }
54f78171 3806 else
df7492f9
KH
3807 {
3808 struct charset *charset;
3809
3810 if (c < 0x80)
3811 charset = charset_roman;
3812 else
4ed46869 3813 {
df7492f9
KH
3814 if (c >= 0xF0)
3815 goto invalid_code;
3816 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3817 {
54f78171 3818 /* SJIS -> JISX0208 */
df7492f9
KH
3819 ONE_MORE_BYTE (c1);
3820 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3821 goto invalid_code;
3822 c = (c << 8) | c1;
3823 SJIS_TO_JIS (c);
3824 charset = charset_kanji;
5e34de15 3825 }
fb88bf2d 3826 else
89528eb3
KH
3827 {
3828 /* SJIS -> JISX0201-Kana */
3829 c &= 0x7F;
3830 charset = charset_kana;
3831 }
df7492f9
KH
3832 }
3833 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3834 }
3835 *charbuf++ = c;
3836 continue;
3837
3838 invalid_code:
3839 src = src_base;
3840 consumed_chars = consumed_chars_base;
3841 ONE_MORE_BYTE (c);
3842 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3843 coding->errors++;
3844 }
3845
3846 no_more_source:
3847 coding->consumed_char += consumed_chars_base;
3848 coding->consumed = src_base - coding->source;
3849 coding->charbuf_used = charbuf - coding->charbuf;
3850}
3851
3852static void
3853decode_coding_big5 (coding)
3854 struct coding_system *coding;
3855{
3856 unsigned char *src = coding->source + coding->consumed;
3857 unsigned char *src_end = coding->source + coding->src_bytes;
3858 unsigned char *src_base;
3859 int *charbuf = coding->charbuf;
3860 int *charbuf_end = charbuf + coding->charbuf_size;
3861 int consumed_chars = 0, consumed_chars_base;
3862 int multibytep = coding->src_multibyte;
3863 struct charset *charset_roman, *charset_big5;
3864 Lisp_Object attrs, eol_type, charset_list, val;
3865
3866 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3867 val = charset_list;
3868 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3869 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3870
3871 while (1)
3872 {
3873 int c, c1;
3874
3875 src_base = src;
3876 consumed_chars_base = consumed_chars;
3877
3878 if (charbuf >= charbuf_end)
3879 break;
3880
3881 ONE_MORE_BYTE (c);
3882
3883 if (c == '\r')
3884 {
3885 if (EQ (eol_type, Qdos))
3886 {
3887 if (src == src_end)
3888 goto no_more_source;
3889 if (*src == '\n')
3890 ONE_MORE_BYTE (c);
4ed46869 3891 }
df7492f9
KH
3892 else if (EQ (eol_type, Qmac))
3893 c = '\n';
3894 }
3895 else
3896 {
3897 struct charset *charset;
3898 if (c < 0x80)
3899 charset = charset_roman;
fb88bf2d 3900 else
fb88bf2d 3901 {
54f78171 3902 /* BIG5 -> Big5 */
df7492f9
KH
3903 if (c < 0xA1 || c > 0xFE)
3904 goto invalid_code;
3905 ONE_MORE_BYTE (c1);
3906 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3907 goto invalid_code;
3908 c = c << 8 | c1;
3909 charset = charset_big5;
4ed46869 3910 }
df7492f9 3911 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 3912 }
4ed46869 3913
df7492f9 3914 *charbuf++ = c;
fb88bf2d
KH
3915 continue;
3916
df7492f9 3917 invalid_code:
4ed46869 3918 src = src_base;
df7492f9
KH
3919 consumed_chars = consumed_chars_base;
3920 ONE_MORE_BYTE (c);
3921 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3922 coding->errors++;
fb88bf2d 3923 }
d46c5b12 3924
df7492f9
KH
3925 no_more_source:
3926 coding->consumed_char += consumed_chars_base;
3927 coding->consumed = src_base - coding->source;
3928 coding->charbuf_used = charbuf - coding->charbuf;
3929}
3930
3931/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3932 This function can encode charsets `ascii', `katakana-jisx0201',
3933 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3934 are sure that all these charsets are registered as official charset
3935 (i.e. do not have extended leading-codes). Characters of other
3936 charsets are produced without any encoding. If SJIS_P is 1, encode
3937 SJIS text, else encode BIG5 text. */
3938
3939static int
3940encode_coding_sjis (coding)
3941 struct coding_system *coding;
3942{
3943 int multibytep = coding->dst_multibyte;
3944 int *charbuf = coding->charbuf;
3945 int *charbuf_end = charbuf + coding->charbuf_used;
3946 unsigned char *dst = coding->destination + coding->produced;
3947 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3948 int safe_room = 4;
3949 int produced_chars = 0;
3950 Lisp_Object attrs, eol_type, charset_list, val;
3951 int ascii_compatible;
3952 struct charset *charset_roman, *charset_kanji, *charset_kana;
3953 int c;
3954
3955 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3956 val = charset_list;
3957 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3958 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3959 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3960
3961 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3962
3963 while (charbuf < charbuf_end)
3964 {
3965 ASSURE_DESTINATION (safe_room);
3966 c = *charbuf++;
3967 /* Now encode the character C. */
3968 if (ASCII_CHAR_P (c) && ascii_compatible)
3969 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
3970 else if (CHAR_BYTE8_P (c))
3971 {
3972 c = CHAR_TO_BYTE8 (c);
3973 EMIT_ONE_BYTE (c);
3974 }
df7492f9
KH
3975 else
3976 {
3977 unsigned code;
3978 struct charset *charset = char_charset (c, charset_list, &code);
3979
3980 if (!charset)
3981 {
41cbe562
KH
3982 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3983 {
3984 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3985 charset = CHARSET_FROM_ID (charset_ascii);
3986 }
3987 else
3988 {
3989 c = coding->default_char;
3990 charset = char_charset (c, charset_list, &code);
3991 }
df7492f9
KH
3992 }
3993 if (code == CHARSET_INVALID_CODE (charset))
3994 abort ();
3995 if (charset == charset_kanji)
3996 {
3997 int c1, c2;
3998 JIS_TO_SJIS (code);
3999 c1 = code >> 8, c2 = code & 0xFF;
4000 EMIT_TWO_BYTES (c1, c2);
4001 }
4002 else if (charset == charset_kana)
4003 EMIT_ONE_BYTE (code | 0x80);
4004 else
4005 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4006 }
4007 }
4008 coding->result = CODING_RESULT_SUCCESS;
4009 coding->produced_char += produced_chars;
4010 coding->produced = dst - coding->destination;
4011 return 0;
4012}
4013
4014static int
4015encode_coding_big5 (coding)
4016 struct coding_system *coding;
4017{
4018 int multibytep = coding->dst_multibyte;
4019 int *charbuf = coding->charbuf;
4020 int *charbuf_end = charbuf + coding->charbuf_used;
4021 unsigned char *dst = coding->destination + coding->produced;
4022 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4023 int safe_room = 4;
4024 int produced_chars = 0;
4025 Lisp_Object attrs, eol_type, charset_list, val;
4026 int ascii_compatible;
4027 struct charset *charset_roman, *charset_big5;
4028 int c;
4029
4030 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4031 val = charset_list;
4032 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4033 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4034 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4035
4036 while (charbuf < charbuf_end)
4037 {
4038 ASSURE_DESTINATION (safe_room);
4039 c = *charbuf++;
4040 /* Now encode the character C. */
4041 if (ASCII_CHAR_P (c) && ascii_compatible)
4042 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4043 else if (CHAR_BYTE8_P (c))
4044 {
4045 c = CHAR_TO_BYTE8 (c);
4046 EMIT_ONE_BYTE (c);
4047 }
df7492f9
KH
4048 else
4049 {
4050 unsigned code;
4051 struct charset *charset = char_charset (c, charset_list, &code);
4052
4053 if (! charset)
4054 {
41cbe562
KH
4055 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4056 {
4057 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4058 charset = CHARSET_FROM_ID (charset_ascii);
4059 }
4060 else
4061 {
4062 c = coding->default_char;
4063 charset = char_charset (c, charset_list, &code);
4064 }
df7492f9
KH
4065 }
4066 if (code == CHARSET_INVALID_CODE (charset))
4067 abort ();
4068 if (charset == charset_big5)
4069 {
4070 int c1, c2;
4071
4072 c1 = code >> 8, c2 = code & 0xFF;
4073 EMIT_TWO_BYTES (c1, c2);
4074 }
4075 else
4076 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4077 }
4078 }
4079 coding->result = CODING_RESULT_SUCCESS;
4080 coding->produced_char += produced_chars;
4081 coding->produced = dst - coding->destination;
4082 return 0;
4083}
4084
4085\f
4086/*** 10. CCL handlers ***/
4087
4088/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4089 Check if a text is encoded in a coding system of which
4090 encoder/decoder are written in CCL program. If it is, return
4091 CATEGORY_MASK_CCL, else return 0. */
4092
4093static int
4094detect_coding_ccl (coding, mask)
4095 struct coding_system *coding;
4096 int *mask;
4097{
4098 unsigned char *src = coding->source, *src_base = src;
4099 unsigned char *src_end = coding->source + coding->src_bytes;
4100 int multibytep = coding->src_multibyte;
4101 int consumed_chars = 0;
4102 int found = 0;
4103 unsigned char *valids = CODING_CCL_VALIDS (coding);
4104 int head_ascii = coding->head_ascii;
4105 Lisp_Object attrs;
4106
4107 coding = &coding_categories[coding_category_ccl];
4108 attrs = CODING_ID_ATTRS (coding->id);
4109 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4110 src += head_ascii;
4111
4112 while (1)
4113 {
4114 int c;
4115 ONE_MORE_BYTE (c);
4116 if (! valids[c])
4117 break;
4118 if (!found && valids[c] > 1)
4119 found = 1;
4120 }
4121 *mask &= ~CATEGORY_MASK_CCL;
4122 return 0;
4123
4124 no_more_source:
89528eb3 4125 return found;
df7492f9
KH
4126}
4127
4128static void
4129decode_coding_ccl (coding)
4130 struct coding_system *coding;
4131{
7c78e542 4132 const unsigned char *src = coding->source + coding->consumed;
df7492f9
KH
4133 unsigned char *src_end = coding->source + coding->src_bytes;
4134 int *charbuf = coding->charbuf;
4135 int *charbuf_end = charbuf + coding->charbuf_size;
4136 int consumed_chars = 0;
4137 int multibytep = coding->src_multibyte;
4138 struct ccl_program ccl;
4139 int source_charbuf[1024];
4140 int source_byteidx[1024];
4141
4142 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4143
4144 while (src < src_end)
4145 {
7c78e542 4146 const unsigned char *p = src;
df7492f9
KH
4147 int *source, *source_end;
4148 int i = 0;
4149
4150 if (multibytep)
4151 while (i < 1024 && p < src_end)
4152 {
4153 source_byteidx[i] = p - src;
4154 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4155 }
4156 else
4157 while (i < 1024 && p < src_end)
4158 source_charbuf[i++] = *p++;
4159
4160 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4161 ccl.last_block = 1;
4162
4163 source = source_charbuf;
4164 source_end = source + i;
4165 while (source < source_end)
4166 {
4167 ccl_driver (&ccl, source, charbuf,
4168 source_end - source, charbuf_end - charbuf);
4169 source += ccl.consumed;
4170 charbuf += ccl.produced;
4171 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4172 break;
4173 }
4174 if (source < source_end)
4175 src += source_byteidx[source - source_charbuf];
4176 else
4177 src = p;
4178 consumed_chars += source - source_charbuf;
4179
4180 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4181 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4182 break;
4183 }
4184
4185 switch (ccl.status)
4186 {
4187 case CCL_STAT_SUSPEND_BY_SRC:
4188 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4189 break;
4190 case CCL_STAT_SUSPEND_BY_DST:
4191 break;
4192 case CCL_STAT_QUIT:
4193 case CCL_STAT_INVALID_CMD:
4194 coding->result = CODING_RESULT_INTERRUPT;
4195 break;
4196 default:
4197 coding->result = CODING_RESULT_SUCCESS;
4198 break;
4199 }
4200 coding->consumed_char += consumed_chars;
4201 coding->consumed = src - coding->source;
4202 coding->charbuf_used = charbuf - coding->charbuf;
4203}
4204
4205static int
4206encode_coding_ccl (coding)
4207 struct coding_system *coding;
4208{
4209 struct ccl_program ccl;
4210 int multibytep = coding->dst_multibyte;
4211 int *charbuf = coding->charbuf;
4212 int *charbuf_end = charbuf + coding->charbuf_used;
4213 unsigned char *dst = coding->destination + coding->produced;
4214 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4215 unsigned char *adjusted_dst_end = dst_end - 1;
4216 int destination_charbuf[1024];
4217 int i, produced_chars = 0;
4218
4219 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4220
4221 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4222 ccl.dst_multibyte = coding->dst_multibyte;
4223
4224 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4225 {
4226 int dst_bytes = dst_end - dst;
4227 if (dst_bytes > 1024)
4228 dst_bytes = 1024;
4229
4230 ccl_driver (&ccl, charbuf, destination_charbuf,
4231 charbuf_end - charbuf, dst_bytes);
4232 charbuf += ccl.consumed;
4233 if (multibytep)
4234 for (i = 0; i < ccl.produced; i++)
4235 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4236 else
4237 {
4238 for (i = 0; i < ccl.produced; i++)
4239 *dst++ = destination_charbuf[i] & 0xFF;
4240 produced_chars += ccl.produced;
4241 }
4242 }
4243
4244 switch (ccl.status)
4245 {
4246 case CCL_STAT_SUSPEND_BY_SRC:
4247 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4248 break;
4249 case CCL_STAT_SUSPEND_BY_DST:
4250 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4251 break;
4252 case CCL_STAT_QUIT:
4253 case CCL_STAT_INVALID_CMD:
4254 coding->result = CODING_RESULT_INTERRUPT;
4255 break;
4256 default:
4257 coding->result = CODING_RESULT_SUCCESS;
4258 break;
4259 }
4260
4261 coding->produced_char += produced_chars;
4262 coding->produced = dst - coding->destination;
4263 return 0;
4ed46869
KH
4264}
4265
df7492f9
KH
4266
4267\f
4268/*** 10, 11. no-conversion handlers ***/
4269
4270/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4271
b73bfc1c 4272static void
df7492f9 4273decode_coding_raw_text (coding)
4ed46869 4274 struct coding_system *coding;
4ed46869 4275{
df7492f9 4276 coding->chars_at_source = 1;
2c78b7e1
KH
4277 coding->consumed_char = 0;
4278 coding->consumed = 0;
df7492f9
KH
4279 coding->result = CODING_RESULT_SUCCESS;
4280}
4ed46869 4281
df7492f9
KH
4282static int
4283encode_coding_raw_text (coding)
4284 struct coding_system *coding;
4285{
4286 int multibytep = coding->dst_multibyte;
4287 int *charbuf = coding->charbuf;
4288 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4289 unsigned char *dst = coding->destination + coding->produced;
4290 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4291 int produced_chars = 0;
4292 int c;
a5d301df 4293
df7492f9 4294 if (multibytep)
b73bfc1c 4295 {
df7492f9 4296 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4297
df7492f9
KH
4298 if (coding->src_multibyte)
4299 while (charbuf < charbuf_end)
4300 {
4301 ASSURE_DESTINATION (safe_room);
4302 c = *charbuf++;
4303 if (ASCII_CHAR_P (c))
4304 EMIT_ONE_ASCII_BYTE (c);
4305 else if (CHAR_BYTE8_P (c))
4306 {
4307 c = CHAR_TO_BYTE8 (c);
4308 EMIT_ONE_BYTE (c);
4309 }
4310 else
4311 {
4312 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4313
df7492f9
KH
4314 CHAR_STRING_ADVANCE (c, p1);
4315 while (p0 < p1)
9d123124
KH
4316 {
4317 EMIT_ONE_BYTE (*p0);
4318 p0++;
4319 }
df7492f9
KH
4320 }
4321 }
b73bfc1c 4322 else
df7492f9
KH
4323 while (charbuf < charbuf_end)
4324 {
4325 ASSURE_DESTINATION (safe_room);
4326 c = *charbuf++;
4327 EMIT_ONE_BYTE (c);
4328 }
4329 }
4330 else
4331 {
4332 if (coding->src_multibyte)
b73bfc1c 4333 {
df7492f9
KH
4334 int safe_room = MAX_MULTIBYTE_LENGTH;
4335
4336 while (charbuf < charbuf_end)
b73bfc1c 4337 {
df7492f9
KH
4338 ASSURE_DESTINATION (safe_room);
4339 c = *charbuf++;
4340 if (ASCII_CHAR_P (c))
4341 *dst++ = c;
4342 else if (CHAR_BYTE8_P (c))
4343 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4344 else
df7492f9
KH
4345 CHAR_STRING_ADVANCE (c, dst);
4346 produced_chars++;
b73bfc1c 4347 }
4ed46869 4348 }
df7492f9
KH
4349 else
4350 {
4351 ASSURE_DESTINATION (charbuf_end - charbuf);
4352 while (charbuf < charbuf_end && dst < dst_end)
4353 *dst++ = *charbuf++;
4354 produced_chars = dst - (coding->destination + coding->dst_bytes);
4355 }
4ed46869 4356 }
df7492f9
KH
4357 coding->result = CODING_RESULT_SUCCESS;
4358 coding->produced_char += produced_chars;
4359 coding->produced = dst - coding->destination;
4360 return 0;
4ed46869
KH
4361}
4362
0a28aafb 4363static int
df7492f9
KH
4364detect_coding_charset (coding, mask)
4365 struct coding_system *coding;
4366 int *mask;
1397dc18 4367{
df7492f9
KH
4368 unsigned char *src = coding->source, *src_base = src;
4369 unsigned char *src_end = coding->source + coding->src_bytes;
4370 int multibytep = coding->src_multibyte;
4371 int consumed_chars = 0;
4372 Lisp_Object attrs, valids;
1397dc18 4373
df7492f9
KH
4374 coding = &coding_categories[coding_category_charset];
4375 attrs = CODING_ID_ATTRS (coding->id);
4376 valids = AREF (attrs, coding_attr_charset_valids);
4377
4378 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4379 src += coding->head_ascii;
1397dc18 4380
b73bfc1c 4381 while (1)
1397dc18 4382 {
df7492f9 4383 int c;
1397dc18 4384
df7492f9
KH
4385 ONE_MORE_BYTE (c);
4386 if (NILP (AREF (valids, c)))
4387 break;
4388 }
4389 *mask &= ~CATEGORY_MASK_CHARSET;
4390 return 0;
4ed46869 4391
df7492f9 4392 no_more_source:
df7492f9
KH
4393 return 1;
4394}
4ed46869 4395
b73bfc1c 4396static void
df7492f9 4397decode_coding_charset (coding)
4ed46869 4398 struct coding_system *coding;
4ed46869 4399{
df7492f9
KH
4400 unsigned char *src = coding->source + coding->consumed;
4401 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4402 unsigned char *src_base;
df7492f9
KH
4403 int *charbuf = coding->charbuf;
4404 int *charbuf_end = charbuf + coding->charbuf_size;
4405 int consumed_chars = 0, consumed_chars_base;
4406 int multibytep = coding->src_multibyte;
4eb6d3f1 4407 Lisp_Object attrs, eol_type, charset_list, valids;
df7492f9
KH
4408
4409 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4410 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4411
df7492f9 4412 while (1)
4ed46869 4413 {
4eb6d3f1 4414 int c;
df7492f9
KH
4415
4416 src_base = src;
4417 consumed_chars_base = consumed_chars;
b73bfc1c 4418
df7492f9
KH
4419 if (charbuf >= charbuf_end)
4420 break;
4421
4eb6d3f1 4422 ONE_MORE_BYTE (c);
df7492f9 4423 if (c == '\r')
d46c5b12 4424 {
c7c66a95
KH
4425 /* Here we assume that no charset maps '\r' to something
4426 else. */
df7492f9 4427 if (EQ (eol_type, Qdos))
b73bfc1c 4428 {
4eb6d3f1
KH
4429 if (src < src_end
4430 && *src == '\n')
df7492f9 4431 ONE_MORE_BYTE (c);
b73bfc1c 4432 }
df7492f9 4433 else if (EQ (eol_type, Qmac))
b73bfc1c 4434 c = '\n';
d46c5b12 4435 }
df7492f9 4436 else
d46c5b12 4437 {
4eb6d3f1
KH
4438 Lisp_Object val;
4439 struct charset *charset;
c7c66a95 4440 int dim;
acb2a965
KH
4441 int len = 1;
4442 unsigned code = c;
4eb6d3f1
KH
4443
4444 val = AREF (valids, c);
4445 if (NILP (val))
4446 goto invalid_code;
c7c66a95 4447 if (INTEGERP (val))
4eb6d3f1 4448 {
c7c66a95
KH
4449 charset = CHARSET_FROM_ID (XFASTINT (val));
4450 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4451 while (len < dim)
4eb6d3f1 4452 {
acb2a965
KH
4453 ONE_MORE_BYTE (c);
4454 code = (code << 8) | c;
f9d71dcd 4455 len++;
4eb6d3f1 4456 }
c7c66a95
KH
4457 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4458 charset, code, c);
4459 }
4460 else
4461 {
4462 /* VAL is a list of charset IDs. It is assured that the
4463 list is sorted by charset dimensions (smaller one
4464 comes first). */
c7c66a95
KH
4465 while (CONSP (val))
4466 {
4467 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4468 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4469 while (len < dim)
c7c66a95 4470 {
acb2a965
KH
4471 ONE_MORE_BYTE (c);
4472 code = (code << 8) | c;
f9d71dcd 4473 len++;
c7c66a95 4474 }
c7c66a95
KH
4475 CODING_DECODE_CHAR (coding, src, src_base,
4476 src_end, charset, code, c);
4477 if (c >= 0)
4478 break;
4479 val = XCDR (val);
4480 }
4eb6d3f1 4481 }
df7492f9
KH
4482 if (c < 0)
4483 goto invalid_code;
d46c5b12 4484 }
df7492f9
KH
4485 *charbuf++ = c;
4486 continue;
4487
4488 invalid_code:
4489 src = src_base;
4490 consumed_chars = consumed_chars_base;
4491 ONE_MORE_BYTE (c);
4492 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4493 coding->errors++;
4ed46869
KH
4494 }
4495
df7492f9
KH
4496 no_more_source:
4497 coding->consumed_char += consumed_chars_base;
4498 coding->consumed = src_base - coding->source;
4499 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4500}
4501
df7492f9
KH
4502static int
4503encode_coding_charset (coding)
4ed46869 4504 struct coding_system *coding;
4ed46869 4505{
df7492f9
KH
4506 int multibytep = coding->dst_multibyte;
4507 int *charbuf = coding->charbuf;
4508 int *charbuf_end = charbuf + coding->charbuf_used;
4509 unsigned char *dst = coding->destination + coding->produced;
4510 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4511 int safe_room = MAX_MULTIBYTE_LENGTH;
4512 int produced_chars = 0;
df7492f9
KH
4513 Lisp_Object attrs, eol_type, charset_list;
4514 int ascii_compatible;
b73bfc1c 4515 int c;
b73bfc1c 4516
df7492f9 4517 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4518 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4519
df7492f9 4520 while (charbuf < charbuf_end)
4ed46869 4521 {
4eb6d3f1 4522 struct charset *charset;
df7492f9
KH
4523 unsigned code;
4524
4525 ASSURE_DESTINATION (safe_room);
4526 c = *charbuf++;
4527 if (ascii_compatible && ASCII_CHAR_P (c))
4528 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4529 else if (CHAR_BYTE8_P (c))
4530 {
4531 c = CHAR_TO_BYTE8 (c);
4532 EMIT_ONE_BYTE (c);
4533 }
d46c5b12 4534 else
4eb6d3f1
KH
4535 {
4536 charset = char_charset (c, charset_list, &code);
4537 if (charset)
4538 {
4539 if (CHARSET_DIMENSION (charset) == 1)
4540 EMIT_ONE_BYTE (code);
4541 else if (CHARSET_DIMENSION (charset) == 2)
4542 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4543 else if (CHARSET_DIMENSION (charset) == 3)
4544 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4545 else
4546 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4547 (code >> 8) & 0xFF, code & 0xFF);
4548 }
4549 else
41cbe562
KH
4550 {
4551 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4552 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4553 else
4554 c = coding->default_char;
4555 EMIT_ONE_BYTE (c);
4556 }
4eb6d3f1 4557 }
4ed46869
KH
4558 }
4559
df7492f9
KH
4560 coding->result = CODING_RESULT_SUCCESS;
4561 coding->produced_char += produced_chars;
4562 coding->produced = dst - coding->destination;
4563 return 0;
4ed46869
KH
4564}
4565
4566\f
1397dc18 4567/*** 7. C library functions ***/
4ed46869 4568
df7492f9
KH
4569/* Setup coding context CODING from information about CODING_SYSTEM.
4570 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4571 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4572
4573void
df7492f9
KH
4574setup_coding_system (coding_system, coding)
4575 Lisp_Object coding_system;
ec6d2bb8
KH
4576 struct coding_system *coding;
4577{
df7492f9
KH
4578 Lisp_Object attrs;
4579 Lisp_Object eol_type;
4580 Lisp_Object coding_type;
4581 Lisp_Object val;
ec6d2bb8 4582
df7492f9
KH
4583 if (NILP (coding_system))
4584 coding_system = Qno_conversion;
4585
4586 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4587
4588 attrs = CODING_ID_ATTRS (coding->id);
4589 eol_type = CODING_ID_EOL_TYPE (coding->id);
4590
4591 coding->mode = 0;
4592 coding->head_ascii = -1;
4593 coding->common_flags
4594 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4595
4596 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4597 coding->max_charset_id = XSTRING (val)->size - 1;
4598 coding->safe_charsets = (char *) XSTRING (val)->data;
4599 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4600
4601 coding_type = CODING_ATTR_TYPE (attrs);
4602 if (EQ (coding_type, Qundecided))
4603 {
4604 coding->detector = NULL;
4605 coding->decoder = decode_coding_raw_text;
4606 coding->encoder = encode_coding_raw_text;
4607 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4608 }
4609 else if (EQ (coding_type, Qiso_2022))
4610 {
4611 int i;
4612 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4613
4614 /* Invoke graphic register 0 to plane 0. */
4615 CODING_ISO_INVOCATION (coding, 0) = 0;
4616 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4617 CODING_ISO_INVOCATION (coding, 1)
4618 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4619 /* Setup the initial status of designation. */
4620 for (i = 0; i < 4; i++)
4621 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4622 /* Not single shifting initially. */
4623 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4624 /* Beginning of buffer should also be regarded as bol. */
4625 CODING_ISO_BOL (coding) = 1;
4626 coding->detector = detect_coding_iso_2022;
4627 coding->decoder = decode_coding_iso_2022;
4628 coding->encoder = encode_coding_iso_2022;
4629 if (flags & CODING_ISO_FLAG_SAFE)
4630 coding->mode |= CODING_MODE_SAFE_ENCODING;
4631 coding->common_flags
4632 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4633 | CODING_REQUIRE_FLUSHING_MASK);
4634 if (flags & CODING_ISO_FLAG_COMPOSITION)
4635 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4636 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4637 {
4638 setup_iso_safe_charsets (attrs);
4639 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4640 coding->max_charset_id = XSTRING (val)->size - 1;
4641 coding->safe_charsets = (char *) XSTRING (val)->data;
4642 }
4643 CODING_ISO_FLAGS (coding) = flags;
4644 }
4645 else if (EQ (coding_type, Qcharset))
4646 {
4647 coding->detector = detect_coding_charset;
4648 coding->decoder = decode_coding_charset;
4649 coding->encoder = encode_coding_charset;
4650 coding->common_flags
4651 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4652 }
4653 else if (EQ (coding_type, Qutf_8))
4654 {
4655 coding->detector = detect_coding_utf_8;
4656 coding->decoder = decode_coding_utf_8;
4657 coding->encoder = encode_coding_utf_8;
4658 coding->common_flags
4659 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4660 }
4661 else if (EQ (coding_type, Qutf_16))
4662 {
4663 val = AREF (attrs, coding_attr_utf_16_bom);
4664 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4665 : EQ (val, Qt) ? utf_16_with_bom
4666 : utf_16_without_bom);
4667 val = AREF (attrs, coding_attr_utf_16_endian);
4668 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4669 : utf_16_little_endian);
e19c3639 4670 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4671 coding->detector = detect_coding_utf_16;
4672 coding->decoder = decode_coding_utf_16;
4673 coding->encoder = encode_coding_utf_16;
4674 coding->common_flags
4675 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4676 }
4677 else if (EQ (coding_type, Qccl))
4678 {
4679 coding->detector = detect_coding_ccl;
4680 coding->decoder = decode_coding_ccl;
4681 coding->encoder = encode_coding_ccl;
4682 coding->common_flags
4683 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4684 | CODING_REQUIRE_FLUSHING_MASK);
4685 }
4686 else if (EQ (coding_type, Qemacs_mule))
4687 {
4688 coding->detector = detect_coding_emacs_mule;
4689 coding->decoder = decode_coding_emacs_mule;
4690 coding->encoder = encode_coding_emacs_mule;
4691 coding->common_flags
4692 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4693 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4694 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4695 {
4696 Lisp_Object tail, safe_charsets;
4697 int max_charset_id = 0;
4698
4699 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4700 tail = XCDR (tail))
4701 if (max_charset_id < XFASTINT (XCAR (tail)))
4702 max_charset_id = XFASTINT (XCAR (tail));
4703 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4704 make_number (255));
4705 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4706 tail = XCDR (tail))
4707 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4708 coding->max_charset_id = max_charset_id;
4709 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4710 }
4711 }
4712 else if (EQ (coding_type, Qshift_jis))
4713 {
4714 coding->detector = detect_coding_sjis;
4715 coding->decoder = decode_coding_sjis;
4716 coding->encoder = encode_coding_sjis;
4717 coding->common_flags
4718 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4719 }
4720 else if (EQ (coding_type, Qbig5))
4721 {
4722 coding->detector = detect_coding_big5;
4723 coding->decoder = decode_coding_big5;
4724 coding->encoder = encode_coding_big5;
4725 coding->common_flags
4726 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4727 }
4728 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4729 {
df7492f9
KH
4730 coding->detector = NULL;
4731 coding->decoder = decode_coding_raw_text;
4732 coding->encoder = encode_coding_raw_text;
4733 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4734 }
df7492f9
KH
4735
4736 return;
ec6d2bb8
KH
4737}
4738
df7492f9
KH
4739/* Return raw-text or one of its subsidiaries that has the same
4740 eol_type as CODING-SYSTEM. */
ec6d2bb8 4741
df7492f9
KH
4742Lisp_Object
4743raw_text_coding_system (coding_system)
4744 Lisp_Object coding_system;
ec6d2bb8 4745{
0be8721c 4746 Lisp_Object spec, attrs;
df7492f9
KH
4747 Lisp_Object eol_type, raw_text_eol_type;
4748
4749 spec = CODING_SYSTEM_SPEC (coding_system);
4750 attrs = AREF (spec, 0);
4751
4752 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4753 return coding_system;
ec6d2bb8 4754
df7492f9
KH
4755 eol_type = AREF (spec, 2);
4756 if (VECTORP (eol_type))
4757 return Qraw_text;
4758 spec = CODING_SYSTEM_SPEC (Qraw_text);
4759 raw_text_eol_type = AREF (spec, 2);
4760 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4761 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4762 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4763}
4764
54f78171 4765
df7492f9
KH
4766/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4767 does, return one of the subsidiary that has the same eol-spec as
4768 PARENT. Otherwise, return CODING_SYSTEM. */
4769
4770Lisp_Object
4771coding_inherit_eol_type (coding_system, parent)
b74e4686 4772 Lisp_Object coding_system, parent;
54f78171 4773{
df7492f9 4774 Lisp_Object spec, attrs, eol_type;
54f78171 4775
df7492f9
KH
4776 spec = CODING_SYSTEM_SPEC (coding_system);
4777 attrs = AREF (spec, 0);
4778 eol_type = AREF (spec, 2);
4779 if (VECTORP (eol_type))
4780 {
4781 Lisp_Object parent_spec;
df7492f9
KH
4782 Lisp_Object parent_eol_type;
4783
4784 parent_spec
4785 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4786 parent_eol_type = AREF (parent_spec, 2);
4787 if (EQ (parent_eol_type, Qunix))
4788 coding_system = AREF (eol_type, 0);
4789 else if (EQ (parent_eol_type, Qdos))
4790 coding_system = AREF (eol_type, 1);
4791 else if (EQ (parent_eol_type, Qmac))
4792 coding_system = AREF (eol_type, 2);
54f78171 4793 }
df7492f9 4794 return coding_system;
54f78171
KH
4795}
4796
4ed46869
KH
4797/* Emacs has a mechanism to automatically detect a coding system if it
4798 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4799 it's impossible to distinguish some coding systems accurately
4800 because they use the same range of codes. So, at first, coding
4801 systems are categorized into 7, those are:
4802
0ef69138 4803 o coding-category-emacs-mule
4ed46869
KH
4804
4805 The category for a coding system which has the same code range
4806 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 4807 symbol) `emacs-mule' by default.
4ed46869
KH
4808
4809 o coding-category-sjis
4810
4811 The category for a coding system which has the same code range
4812 as SJIS. Assigned the coding-system (Lisp
7717c392 4813 symbol) `japanese-shift-jis' by default.
4ed46869
KH
4814
4815 o coding-category-iso-7
4816
4817 The category for a coding system which has the same code range
7717c392 4818 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
4819 shift and single shift functions. This can encode/decode all
4820 charsets. Assigned the coding-system (Lisp symbol)
4821 `iso-2022-7bit' by default.
4822
4823 o coding-category-iso-7-tight
4824
4825 Same as coding-category-iso-7 except that this can
4826 encode/decode only the specified charsets.
4ed46869
KH
4827
4828 o coding-category-iso-8-1
4829
4830 The category for a coding system which has the same code range
4831 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4832 for DIMENSION1 charset. This doesn't use any locking shift
4833 and single shift functions. Assigned the coding-system (Lisp
4834 symbol) `iso-latin-1' by default.
4ed46869
KH
4835
4836 o coding-category-iso-8-2
4837
4838 The category for a coding system which has the same code range
4839 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4840 for DIMENSION2 charset. This doesn't use any locking shift
4841 and single shift functions. Assigned the coding-system (Lisp
4842 symbol) `japanese-iso-8bit' by default.
4ed46869 4843
7717c392 4844 o coding-category-iso-7-else
4ed46869
KH
4845
4846 The category for a coding system which has the same code range
df7492f9 4847 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
4848 single shift functions. Assigned the coding-system (Lisp
4849 symbol) `iso-2022-7bit-lock' by default.
4850
4851 o coding-category-iso-8-else
4852
4853 The category for a coding system which has the same code range
df7492f9 4854 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
4855 single shift functions. Assigned the coding-system (Lisp
4856 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
4857
4858 o coding-category-big5
4859
4860 The category for a coding system which has the same code range
4861 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 4862 `cn-big5' by default.
4ed46869 4863
fa42c37f
KH
4864 o coding-category-utf-8
4865
4866 The category for a coding system which has the same code range
4867 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4868 symbol) `utf-8' by default.
4869
4870 o coding-category-utf-16-be
4871
4872 The category for a coding system in which a text has an
4873 Unicode signature (cf. Unicode Standard) in the order of BIG
4874 endian at the head. Assigned the coding-system (Lisp symbol)
4875 `utf-16-be' by default.
4876
4877 o coding-category-utf-16-le
4878
4879 The category for a coding system in which a text has an
4880 Unicode signature (cf. Unicode Standard) in the order of
4881 LITTLE endian at the head. Assigned the coding-system (Lisp
4882 symbol) `utf-16-le' by default.
4883
1397dc18
KH
4884 o coding-category-ccl
4885
4886 The category for a coding system of which encoder/decoder is
4887 written in CCL programs. The default value is nil, i.e., no
4888 coding system is assigned.
4889
4ed46869
KH
4890 o coding-category-binary
4891
4892 The category for a coding system not categorized in any of the
4893 above. Assigned the coding-system (Lisp symbol)
e0e989f6 4894 `no-conversion' by default.
4ed46869
KH
4895
4896 Each of them is a Lisp symbol and the value is an actual
df7492f9 4897 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
4898 What Emacs does actually is to detect a category of coding system.
4899 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 4900 decide only one possible category, it selects a category of the
4ed46869
KH
4901 highest priority. Priorities of categories are also specified by a
4902 user in a Lisp variable `coding-category-list'.
4903
4904*/
4905
df7492f9
KH
4906#define EOL_SEEN_NONE 0
4907#define EOL_SEEN_LF 1
4908#define EOL_SEEN_CR 2
4909#define EOL_SEEN_CRLF 4
4ed46869 4910
df7492f9
KH
4911/* Detect how end-of-line of a text of length CODING->src_bytes
4912 pointed by CODING->source is encoded. Return one of
4913 EOL_SEEN_XXX. */
4ed46869 4914
bc4bc72a
RS
4915#define MAX_EOL_CHECK_COUNT 3
4916
d46c5b12 4917static int
89528eb3 4918detect_eol (source, src_bytes, category)
d46c5b12 4919 unsigned char *source;
df7492f9 4920 EMACS_INT src_bytes;
89528eb3 4921 enum coding_category category;
4ed46869 4922{
d46c5b12 4923 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 4924 unsigned char c;
df7492f9
KH
4925 int total = 0;
4926 int eol_seen = EOL_SEEN_NONE;
4ed46869 4927
89528eb3 4928 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 4929 {
df7492f9 4930 int msb, lsb;
fa42c37f 4931
89528eb3
KH
4932 msb = category == (coding_category_utf_16_le
4933 | coding_category_utf_16_le_nosig);
df7492f9 4934 lsb = 1 - msb;
fa42c37f 4935
df7492f9 4936 while (src + 1 < src_end)
fa42c37f 4937 {
df7492f9
KH
4938 c = src[lsb];
4939 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 4940 {
df7492f9
KH
4941 int this_eol;
4942
4943 if (c == '\n')
4944 this_eol = EOL_SEEN_LF;
4945 else if (src + 3 >= src_end
4946 || src[msb + 2] != 0
4947 || src[lsb + 2] != '\n')
4948 this_eol = EOL_SEEN_CR;
fa42c37f 4949 else
df7492f9
KH
4950 this_eol = EOL_SEEN_CRLF;
4951
4952 if (eol_seen == EOL_SEEN_NONE)
4953 /* This is the first end-of-line. */
4954 eol_seen = this_eol;
4955 else if (eol_seen != this_eol)
fa42c37f 4956 {
df7492f9
KH
4957 /* The found type is different from what found before. */
4958 eol_seen = EOL_SEEN_LF;
4959 break;
fa42c37f 4960 }
df7492f9
KH
4961 if (++total == MAX_EOL_CHECK_COUNT)
4962 break;
fa42c37f 4963 }
df7492f9 4964 src += 2;
fa42c37f 4965 }
df7492f9 4966 }
d46c5b12 4967 else
27901516 4968 {
df7492f9 4969 while (src < src_end)
27901516 4970 {
df7492f9
KH
4971 c = *src++;
4972 if (c == '\n' || c == '\r')
4973 {
4974 int this_eol;
d46c5b12 4975
df7492f9
KH
4976 if (c == '\n')
4977 this_eol = EOL_SEEN_LF;
4978 else if (src >= src_end || *src != '\n')
4979 this_eol = EOL_SEEN_CR;
4980 else
4981 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 4982
df7492f9
KH
4983 if (eol_seen == EOL_SEEN_NONE)
4984 /* This is the first end-of-line. */
4985 eol_seen = this_eol;
4986 else if (eol_seen != this_eol)
4987 {
4988 /* The found type is different from what found before. */
4989 eol_seen = EOL_SEEN_LF;
4990 break;
4991 }
4992 if (++total == MAX_EOL_CHECK_COUNT)
4993 break;
4994 }
4995 }
73be902c 4996 }
df7492f9 4997 return eol_seen;
73be902c
KH
4998}
4999
df7492f9 5000
73be902c 5001static void
df7492f9
KH
5002adjust_coding_eol_type (coding, eol_seen)
5003 struct coding_system *coding;
5004 int eol_seen;
73be902c 5005{
0be8721c 5006 Lisp_Object eol_type;
df7492f9
KH
5007
5008 eol_type = CODING_ID_EOL_TYPE (coding->id);
5009 if (eol_seen & EOL_SEEN_LF)
5010 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6f197c07 5011 else if (eol_seen & EOL_SEEN_CRLF)
df7492f9 5012 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6f197c07 5013 else if (eol_seen & EOL_SEEN_CR)
df7492f9 5014 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
5015}
5016
df7492f9
KH
5017/* Detect how a text specified in CODING is encoded. If a coding
5018 system is detected, update fields of CODING by the detected coding
5019 system. */
5020
5021void
5022detect_coding (coding)
d46c5b12 5023 struct coding_system *coding;
d46c5b12 5024{
df7492f9
KH
5025 unsigned char *src, *src_end;
5026 Lisp_Object attrs, coding_type;
d46c5b12 5027
df7492f9
KH
5028 coding->consumed = coding->consumed_char = 0;
5029 coding->produced = coding->produced_char = 0;
5030 coding_set_source (coding);
1c3478b0 5031
df7492f9 5032 src_end = coding->source + coding->src_bytes;
1c3478b0 5033
df7492f9
KH
5034 /* If we have not yet decided the text encoding type, detect it
5035 now. */
5036 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5037 {
df7492f9
KH
5038 int mask = CATEGORY_MASK_ANY;
5039 int c, i;
5040
5041 for (src = coding->source; src < src_end; src++)
5042 {
5043 c = *src;
5044 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5045 || c == ISO_CODE_SI
5046 || c == ISO_CODE_SO)))
5047 break;
5048 }
5049 coding->head_ascii = src - (coding->source + coding->consumed);
5050
5051 if (coding->head_ascii < coding->src_bytes)
1c3478b0 5052 {
df7492f9
KH
5053 int detected = 0;
5054
5055 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 5056 {
df7492f9
KH
5057 enum coding_category category = coding_priorities[i];
5058 struct coding_system *this = coding_categories + category;
5059
df7492f9 5060 if (this->id < 0)
1c3478b0 5061 {
df7492f9
KH
5062 /* No coding system of this category is defined. */
5063 mask &= ~(1 << category);
5064 }
89528eb3
KH
5065 else if (category >= coding_category_raw_text
5066 || detected & (1 << category))
5067 continue;
df7492f9
KH
5068 else
5069 {
5070 detected |= detected_mask[category];
89528eb3
KH
5071 if ((*(this->detector)) (coding, &mask)
5072 && (mask & (1 << category)))
df7492f9 5073 break;
1c3478b0
KH
5074 }
5075 }
df7492f9
KH
5076 if (! mask)
5077 setup_coding_system (Qraw_text, coding);
5078 else if (mask != CATEGORY_MASK_ANY)
5079 for (i = 0; i < coding_category_raw_text; i++)
5080 {
5081 enum coding_category category = coding_priorities[i];
5082 struct coding_system *this = coding_categories + category;
5083
5084 if (mask & (1 << category))
5085 {
5086 setup_coding_system (CODING_ID_NAME (this->id), coding);
5087 break;
5088 }
5089 }
1c3478b0 5090 }
b73bfc1c 5091 }
69f76525 5092
df7492f9
KH
5093 attrs = CODING_ID_ATTRS (coding->id);
5094 coding_type = CODING_ATTR_TYPE (attrs);
5095
5096 /* If we have not yet decided the EOL type, detect it now. But, the
5097 detection is impossible for a CCL based coding system, in which
5098 case, we detct the EOL type after decoding. */
5099 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5100 && ! EQ (coding_type, Qccl))
d46c5b12 5101 {
89528eb3
KH
5102 int eol_seen = detect_eol (coding->source, coding->src_bytes,
5103 XINT (CODING_ATTR_CATEGORY (attrs)));
df7492f9
KH
5104
5105 if (eol_seen != EOL_SEEN_NONE)
5106 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5107 }
4ed46869
KH
5108}
5109
aaaf0b1e
KH
5110
5111static void
df7492f9 5112decode_eol (coding)
aaaf0b1e 5113 struct coding_system *coding;
aaaf0b1e 5114{
df7492f9 5115 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5116 {
df7492f9
KH
5117 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5118 unsigned char *pend = p + coding->produced;
5119 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5120
df7492f9 5121 for (; p < pend; p++)
aaaf0b1e 5122 {
df7492f9
KH
5123 if (*p == '\n')
5124 eol_seen |= EOL_SEEN_LF;
5125 else if (*p == '\r')
aaaf0b1e 5126 {
df7492f9 5127 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5128 {
df7492f9
KH
5129 eol_seen |= EOL_SEEN_CRLF;
5130 p++;
aaaf0b1e 5131 }
aaaf0b1e 5132 else
df7492f9 5133 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5134 }
aaaf0b1e 5135 }
df7492f9
KH
5136 if (eol_seen != EOL_SEEN_NONE)
5137 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5138 }
aaaf0b1e 5139
df7492f9
KH
5140 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5141 {
5142 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5143 unsigned char *pend = p + coding->produced;
5144
5145 for (; p < pend; p++)
5146 if (*p == '\r')
5147 *p = '\n';
5148 }
5149 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5150 {
5151 unsigned char *p, *pbeg, *pend;
5152 Lisp_Object undo_list;
5153
5154 move_gap_both (coding->dst_pos + coding->produced_char,
5155 coding->dst_pos_byte + coding->produced);
5156 undo_list = current_buffer->undo_list;
5157 current_buffer->undo_list = Qt;
c197f191 5158 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
df7492f9
KH
5159 current_buffer->undo_list = undo_list;
5160 pbeg = GPT_ADDR;
5161 pend = pbeg + coding->produced;
5162
5163 for (p = pend - 1; p >= pbeg; p--)
5164 if (*p == '\r')
5165 {
5166 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5167 pend--;
5168 }
5169 coding->produced_char -= coding->produced - (pend - pbeg);
5170 coding->produced = pend - pbeg;
5171 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5172 }
5173}
5174
df7492f9
KH
5175static void
5176translate_chars (coding, table)
4ed46869 5177 struct coding_system *coding;
df7492f9 5178 Lisp_Object table;
4ed46869 5179{
df7492f9
KH
5180 int *charbuf = coding->charbuf;
5181 int *charbuf_end = charbuf + coding->charbuf_used;
5182 int c;
5183
5184 if (coding->chars_at_source)
5185 return;
4ed46869 5186
df7492f9 5187 while (charbuf < charbuf_end)
8844fa83 5188 {
df7492f9
KH
5189 c = *charbuf;
5190 if (c < 0)
5191 charbuf += c;
5192 else
5193 *charbuf++ = translate_char (table, c);
8844fa83 5194 }
df7492f9 5195}
4ed46869 5196
df7492f9
KH
5197static int
5198produce_chars (coding)
5199 struct coding_system *coding;
5200{
5201 unsigned char *dst = coding->destination + coding->produced;
5202 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5203 int produced;
5204 int produced_chars = 0;
b73bfc1c 5205
df7492f9 5206 if (! coding->chars_at_source)
4ed46869 5207 {
df7492f9
KH
5208 /* Characters are in coding->charbuf. */
5209 int *buf = coding->charbuf;
5210 int *buf_end = buf + coding->charbuf_used;
5211 unsigned char *adjusted_dst_end;
4ed46869 5212
df7492f9
KH
5213 if (BUFFERP (coding->src_object)
5214 && EQ (coding->src_object, coding->dst_object))
5215 dst_end = coding->source + coding->consumed;
5216 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5217
df7492f9
KH
5218 while (buf < buf_end)
5219 {
5220 int c = *buf++;
5221
5222 if (dst >= adjusted_dst_end)
5223 {
5224 dst = alloc_destination (coding,
5225 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5226 dst);
5227 dst_end = coding->destination + coding->dst_bytes;
5228 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5229 }
5230 if (c >= 0)
5231 {
5232 if (coding->dst_multibyte
5233 || ! CHAR_BYTE8_P (c))
5234 CHAR_STRING_ADVANCE (c, dst);
5235 else
5236 *dst++ = CHAR_TO_BYTE8 (c);
5237 produced_chars++;
5238 }
5239 else
5240 /* This is an annotation data. */
5241 buf -= c + 1;
5242 }
5243 }
5244 else
5245 {
df7492f9
KH
5246 unsigned char *src = coding->source;
5247 unsigned char *src_end = src + coding->src_bytes;
5248 Lisp_Object eol_type;
b73bfc1c 5249
df7492f9 5250 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5251
df7492f9 5252 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5253 {
df7492f9
KH
5254 if (coding->src_multibyte)
5255 {
71c81426 5256 int multibytep = 1;
df7492f9 5257 int consumed_chars;
d46c5b12 5258
df7492f9
KH
5259 while (1)
5260 {
5261 unsigned char *src_base = src;
5262 int c;
b73bfc1c 5263
df7492f9
KH
5264 ONE_MORE_BYTE (c);
5265 if (c == '\r')
5266 {
5267 if (EQ (eol_type, Qdos))
5268 {
5269 if (src < src_end
5270 && *src == '\n')
5271 c = *src++;
5272 }
5273 else if (EQ (eol_type, Qmac))
5274 c = '\n';
5275 }
5276 if (dst == dst_end)
5277 {
2c78b7e1 5278 coding->consumed = src - coding->source;
b73bfc1c 5279
2c78b7e1
KH
5280 if (EQ (coding->src_object, coding->dst_object))
5281 dst_end = src;
5282 if (dst == dst_end)
5283 {
5284 dst = alloc_destination (coding, src_end - src + 1,
5285 dst);
5286 dst_end = coding->destination + coding->dst_bytes;
5287 coding_set_source (coding);
5288 src = coding->source + coding->consumed;
5289 src_end = coding->source + coding->src_bytes;
5290 }
df7492f9
KH
5291 }
5292 *dst++ = c;
5293 produced_chars++;
5294 }
5295 no_more_source:
5296 ;
5297 }
5298 else
5299 while (src < src_end)
5300 {
71c81426 5301 int multibytep = 1;
df7492f9 5302 int c = *src++;
b73bfc1c 5303
df7492f9
KH
5304 if (c == '\r')
5305 {
5306 if (EQ (eol_type, Qdos))
5307 {
5308 if (src < src_end
5309 && *src == '\n')
5310 c = *src++;
5311 }
5312 else if (EQ (eol_type, Qmac))
5313 c = '\n';
5314 }
5315 if (dst >= dst_end - 1)
5316 {
2c78b7e1 5317 coding->consumed = src - coding->source;
df7492f9 5318
2c78b7e1
KH
5319 if (EQ (coding->src_object, coding->dst_object))
5320 dst_end = src;
5321 if (dst >= dst_end - 1)
5322 {
5323 dst = alloc_destination (coding, src_end - src + 2,
5324 dst);
5325 dst_end = coding->destination + coding->dst_bytes;
5326 coding_set_source (coding);
5327 src = coding->source + coding->consumed;
5328 src_end = coding->source + coding->src_bytes;
5329 }
df7492f9
KH
5330 }
5331 EMIT_ONE_BYTE (c);
5332 }
d46c5b12 5333 }
df7492f9
KH
5334 else
5335 {
5336 if (!EQ (coding->src_object, coding->dst_object))
5337 {
5338 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5339
df7492f9
KH
5340 if (require > 0)
5341 {
5342 EMACS_INT offset = src - coding->source;
5343
5344 dst = alloc_destination (coding, require, dst);
5345 coding_set_source (coding);
5346 src = coding->source + offset;
5347 src_end = coding->source + coding->src_bytes;
5348 }
5349 }
5350 produced_chars = coding->src_chars;
5351 while (src < src_end)
5352 {
5353 int c = *src++;
5354
5355 if (c == '\r')
5356 {
5357 if (EQ (eol_type, Qdos))
5358 {
5359 if (src < src_end
5360 && *src == '\n')
5361 c = *src++;
5362 produced_chars--;
5363 }
5364 else if (EQ (eol_type, Qmac))
5365 c = '\n';
5366 }
5367 *dst++ = c;
5368 }
5369 }
2c78b7e1
KH
5370 coding->consumed = coding->src_bytes;
5371 coding->consumed_char = coding->src_chars;
b73bfc1c 5372 }
4ed46869 5373
df7492f9
KH
5374 produced = dst - (coding->destination + coding->produced);
5375 if (BUFFERP (coding->dst_object))
5376 insert_from_gap (produced_chars, produced);
5377 coding->produced += produced;
5378 coding->produced_char += produced_chars;
5379 return produced_chars;
b73bfc1c 5380}
52d41803 5381
df7492f9
KH
5382/* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5383 or
5384 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5385 */
4ed46869 5386
df7492f9
KH
5387static INLINE void
5388produce_composition (coding, charbuf)
4ed46869 5389 struct coding_system *coding;
df7492f9 5390 int *charbuf;
4ed46869 5391{
df7492f9
KH
5392 Lisp_Object buffer;
5393 int len;
5394 EMACS_INT pos;
5395 enum composition_method method;
5396 int cmp_len;
5397 Lisp_Object components;
5398
5399 buffer = coding->dst_object;
5400 len = -charbuf[0];
5401 pos = coding->dst_pos + charbuf[1];
5402 method = (enum composition_method) (charbuf[3]);
5403 cmp_len = charbuf[4];
5404
5405 if (method == COMPOSITION_RELATIVE)
5406 components = Qnil;
5407 else
d46c5b12 5408 {
df7492f9
KH
5409 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5410 int i;
4ed46869 5411
df7492f9
KH
5412 len -= 5;
5413 charbuf += 5;
5414 for (i = 0; i < len; i++)
5415 args[i] = make_number (charbuf[i]);
5416 components = (method == COMPOSITION_WITH_ALTCHARS
5417 ? Fstring (len, args) : Fvector (len, args));
5418 }
5419 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5420}
b73bfc1c 5421
df7492f9
KH
5422static int *
5423save_composition_data (buf, buf_end, prop)
5424 int *buf, *buf_end;
5425 Lisp_Object prop;
5426{
5427 enum composition_method method = COMPOSITION_METHOD (prop);
5428 int cmp_len = COMPOSITION_LENGTH (prop);
4ed46869 5429
df7492f9
KH
5430 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5431 return NULL;
d46c5b12 5432
df7492f9
KH
5433 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5434 buf[2] = method;
5435 buf[3] = cmp_len;
b73bfc1c 5436
df7492f9
KH
5437 if (method == COMPOSITION_RELATIVE)
5438 buf[0] = 4;
5439 else
b73bfc1c 5440 {
df7492f9
KH
5441 Lisp_Object components;
5442 int len, i;
b73bfc1c 5443
df7492f9
KH
5444 components = COMPOSITION_COMPONENTS (prop);
5445 if (VECTORP (components))
d46c5b12 5446 {
df7492f9
KH
5447 len = XVECTOR (components)->size;
5448 for (i = 0; i < len; i++)
5449 buf[4 + i] = XINT (AREF (components, i));
5450 }
5451 else if (STRINGP (components))
5452 {
5453 int i_byte;
b73bfc1c 5454
df7492f9
KH
5455 len = XSTRING (components)->size;
5456 i = i_byte = 0;
5457 while (i < len)
5458 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5459 }
5460 else if (INTEGERP (components))
5461 {
5462 len = 1;
5463 buf[4] = XINT (components);
5464 }
5465 else if (CONSP (components))
5466 {
5467 for (len = 0; CONSP (components);
5468 len++, components = XCDR (components))
5469 buf[4 + len] = XINT (XCAR (components));
d46c5b12 5470 }
df7492f9
KH
5471 else
5472 abort ();
5473 buf[0] = 4 + len;
4ed46869 5474 }
df7492f9 5475 return (buf + buf[0]);
4ed46869
KH
5476}
5477
df7492f9
KH
5478#define CHARBUF_SIZE 0x4000
5479
5480#define ALLOC_CONVERSION_WORK_AREA(coding) \
5481 do { \
5482 int size = CHARBUF_SIZE;; \
5483 \
5484 coding->charbuf = NULL; \
5485 while (size > 1024) \
5486 { \
5487 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5488 if (coding->charbuf) \
5489 break; \
5490 size >>= 1; \
5491 } \
5492 if (! coding->charbuf) \
5493 { \
5494 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5495 return coding->result; \
5496 } \
5497 coding->charbuf_size = size; \
5498 } while (0)
4ed46869 5499
d46c5b12
KH
5500
5501static void
df7492f9 5502produce_annotation (coding)
d46c5b12 5503 struct coding_system *coding;
d46c5b12 5504{
df7492f9
KH
5505 int *charbuf = coding->charbuf;
5506 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5507
df7492f9 5508 while (charbuf < charbuf_end)
d46c5b12 5509 {
df7492f9
KH
5510 if (*charbuf >= 0)
5511 charbuf++;
d46c5b12 5512 else
d46c5b12 5513 {
df7492f9
KH
5514 int len = -*charbuf;
5515 switch (charbuf[2])
5516 {
5517 case CODING_ANNOTATE_COMPOSITION_MASK:
5518 produce_composition (coding, charbuf);
5519 break;
5520 default:
5521 abort ();
5522 }
5523 charbuf += len;
d46c5b12 5524 }
df7492f9
KH
5525 }
5526}
d46c5b12 5527
df7492f9
KH
5528/* Decode the data at CODING->src_object into CODING->dst_object.
5529 CODING->src_object is a buffer, a string, or nil.
5530 CODING->dst_object is a buffer.
de79a6a5 5531
df7492f9
KH
5532 If CODING->src_object is a buffer, it must be the current buffer.
5533 In this case, if CODING->src_pos is positive, it is a position of
5534 the source text in the buffer, otherwise, the source text is in the
5535 gap area of the buffer, and CODING->src_pos specifies the offset of
5536 the text from GPT (which must be the same as PT). If this is the
5537 same buffer as CODING->dst_object, CODING->src_pos must be
5538 negative.
b73bfc1c 5539
df7492f9
KH
5540 If CODING->src_object is a string, CODING->src_pos in an index to
5541 that string.
d46c5b12 5542
df7492f9
KH
5543 If CODING->src_object is nil, CODING->source must already point to
5544 the non-relocatable memory area. In this case, CODING->src_pos is
5545 an offset from CODING->source.
d46c5b12 5546
df7492f9
KH
5547 The decoded data is inserted at the current point of the buffer
5548 CODING->dst_object.
5549*/
5550
5551static int
5552decode_coding (coding)
d46c5b12 5553 struct coding_system *coding;
d46c5b12 5554{
df7492f9 5555 Lisp_Object attrs;
d46c5b12 5556
df7492f9
KH
5557 if (BUFFERP (coding->src_object)
5558 && coding->src_pos > 0
5559 && coding->src_pos < GPT
5560 && coding->src_pos + coding->src_chars > GPT)
5561 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5562
df7492f9 5563 if (BUFFERP (coding->dst_object))
88993dfd 5564 {
df7492f9
KH
5565 if (current_buffer != XBUFFER (coding->dst_object))
5566 set_buffer_internal (XBUFFER (coding->dst_object));
5567 if (GPT != PT)
5568 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5569 }
5570
df7492f9
KH
5571 coding->consumed = coding->consumed_char = 0;
5572 coding->produced = coding->produced_char = 0;
5573 coding->chars_at_source = 0;
5574 coding->result = CODING_RESULT_SUCCESS;
5575 coding->errors = 0;
5576
5577 ALLOC_CONVERSION_WORK_AREA (coding);
5578
5579 attrs = CODING_ID_ATTRS (coding->id);
5580
5581 do
d46c5b12 5582 {
df7492f9
KH
5583 coding_set_source (coding);
5584 coding->annotated = 0;
5585 (*(coding->decoder)) (coding);
5586 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
da4109a9
KH
5587 translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5588 else if (!NILP (Vstandard_translation_table_for_decode))
5589 translate_chars (coding, Vstandard_translation_table_for_decode);
df7492f9
KH
5590 coding_set_destination (coding);
5591 produce_chars (coding);
5592 if (coding->annotated)
5593 produce_annotation (coding);
d46c5b12 5594 }
df7492f9
KH
5595 while (coding->consumed < coding->src_bytes
5596 && ! coding->result);
d46c5b12 5597
df7492f9
KH
5598 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5599 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5600 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5601 decode_eol (coding);
d46c5b12 5602
df7492f9
KH
5603 coding->carryover_bytes = 0;
5604 if (coding->consumed < coding->src_bytes)
d46c5b12 5605 {
df7492f9
KH
5606 int nbytes = coding->src_bytes - coding->consumed;
5607 unsigned char *src;
5608
5609 coding_set_source (coding);
5610 coding_set_destination (coding);
5611 src = coding->source + coding->consumed;
5612
5613 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5614 {
df7492f9
KH
5615 /* Flush out unprocessed data as binary chars. We are sure
5616 that the number of data is less than the size of
5617 coding->charbuf. */
5618 int *charbuf = coding->charbuf;
5619
5620 while (nbytes-- > 0)
d46c5b12 5621 {
df7492f9
KH
5622 int c = *src++;
5623 *charbuf++ = (c & 0x80 ? - c : c);
d46c5b12 5624 }
df7492f9 5625 produce_chars (coding);
d46c5b12 5626 }
d46c5b12 5627 else
df7492f9
KH
5628 {
5629 /* Record unprocessed bytes in coding->carryover. We are
5630 sure that the number of data is less than the size of
5631 coding->carryover. */
5632 unsigned char *p = coding->carryover;
5633
5634 coding->carryover_bytes = nbytes;
5635 while (nbytes-- > 0)
5636 *p++ = *src++;
5637 }
5638 coding->consumed = coding->src_bytes;
5639 }
b73bfc1c 5640
df7492f9 5641 return coding->result;
d46c5b12
KH
5642}
5643
df7492f9
KH
5644static void
5645consume_chars (coding)
5646 struct coding_system *coding;
5647{
5648 int *buf = coding->charbuf;
5649 /* -1 is to compensate for CRLF. */
5650 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
7c78e542 5651 const unsigned char *src = coding->source + coding->consumed;
df7492f9
KH
5652 int pos = coding->src_pos + coding->consumed_char;
5653 int end_pos = coding->src_pos + coding->src_chars;
5654 int multibytep = coding->src_multibyte;
5655 Lisp_Object eol_type;
5656 int c;
5657 int start, end, stop;
5658 Lisp_Object object, prop;
88993dfd 5659
df7492f9
KH
5660 eol_type = CODING_ID_EOL_TYPE (coding->id);
5661 if (VECTORP (eol_type))
5662 eol_type = Qunix;
88993dfd 5663
df7492f9 5664 object = coding->src_object;
b843d1ae 5665
df7492f9
KH
5666 /* Note: composition handling is not yet implemented. */
5667 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 5668
df7492f9
KH
5669 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5670 && find_composition (pos, end_pos, &start, &end, &prop, object)
5671 && end <= end_pos
5672 && (start >= pos
5673 || (find_composition (end, end_pos, &start, &end, &prop, object)
5674 && end <= end_pos)))
5675 stop = start;
5676 else
5677 stop = end_pos;
ec6d2bb8 5678
df7492f9 5679 while (buf < buf_end)
ec6d2bb8 5680 {
df7492f9 5681 if (pos == stop)
ec6d2bb8 5682 {
df7492f9 5683 int *p;
ec6d2bb8 5684
df7492f9
KH
5685 if (pos == end_pos)
5686 break;
5687 p = save_composition_data (buf, buf_end, prop);
5688 if (p == NULL)
5689 break;
5690 buf = p;
5691 if (find_composition (end, end_pos, &start, &end, &prop, object)
5692 && end <= end_pos)
5693 stop = start;
5694 else
5695 stop = end_pos;
5696 }
5697
5698 if (! multibytep)
5699 c = *src++;
5700 else
5701 c = STRING_CHAR_ADVANCE (src);
5702 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5703 c = '\n';
5704 if (! EQ (eol_type, Qunix))
5705 {
5706 if (c == '\n')
5707 {
5708 if (EQ (eol_type, Qdos))
5709 *buf++ = '\r';
5710 else
5711 c = '\r';
ec6d2bb8 5712 }
ec6d2bb8 5713 }
df7492f9
KH
5714 *buf++ = c;
5715 pos++;
ec6d2bb8 5716 }
ec6d2bb8 5717
df7492f9
KH
5718 coding->consumed = src - coding->source;
5719 coding->consumed_char = pos - coding->src_pos;
5720 coding->charbuf_used = buf - coding->charbuf;
5721 coding->chars_at_source = 0;
ec6d2bb8
KH
5722}
5723
ec6d2bb8 5724
df7492f9
KH
5725/* Encode the text at CODING->src_object into CODING->dst_object.
5726 CODING->src_object is a buffer or a string.
5727 CODING->dst_object is a buffer or nil.
5728
5729 If CODING->src_object is a buffer, it must be the current buffer.
5730 In this case, if CODING->src_pos is positive, it is a position of
5731 the source text in the buffer, otherwise. the source text is in the
5732 gap area of the buffer, and coding->src_pos specifies the offset of
5733 the text from GPT (which must be the same as PT). If this is the
5734 same buffer as CODING->dst_object, CODING->src_pos must be
5735 negative and CODING should not have `pre-write-conversion'.
5736
5737 If CODING->src_object is a string, CODING should not have
5738 `pre-write-conversion'.
5739
5740 If CODING->dst_object is a buffer, the encoded data is inserted at
5741 the current point of that buffer.
5742
5743 If CODING->dst_object is nil, the encoded data is placed at the
5744 memory area specified by CODING->destination. */
5745
5746static int
5747encode_coding (coding)
ec6d2bb8 5748 struct coding_system *coding;
ec6d2bb8 5749{
df7492f9 5750 Lisp_Object attrs;
ec6d2bb8 5751
df7492f9 5752 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 5753
df7492f9 5754 if (BUFFERP (coding->dst_object))
ec6d2bb8 5755 {
df7492f9
KH
5756 set_buffer_internal (XBUFFER (coding->dst_object));
5757 coding->dst_multibyte
5758 = ! NILP (current_buffer->enable_multibyte_characters);
5759 }
ec6d2bb8 5760
df7492f9
KH
5761 coding->consumed = coding->consumed_char = 0;
5762 coding->produced = coding->produced_char = 0;
5763 coding->result = CODING_RESULT_SUCCESS;
5764 coding->errors = 0;
ec6d2bb8 5765
df7492f9 5766 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 5767
df7492f9
KH
5768 do {
5769 coding_set_source (coding);
5770 consume_chars (coding);
5771
5772 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
da4109a9
KH
5773 translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
5774 else if (!NILP (Vstandard_translation_table_for_encode))
5775 translate_chars (coding, Vstandard_translation_table_for_encode);
df7492f9
KH
5776
5777 coding_set_destination (coding);
5778 (*(coding->encoder)) (coding);
5779 } while (coding->consumed_char < coding->src_chars);
5780
5781 if (BUFFERP (coding->dst_object))
5782 insert_from_gap (coding->produced_char, coding->produced);
5783
5784 return (coding->result);
ec6d2bb8
KH
5785}
5786
df7492f9 5787/* Work buffer */
fb88bf2d 5788
df7492f9
KH
5789/* List of currently used working buffer. */
5790Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 5791
df7492f9
KH
5792/* A working buffer used by the top level conversion. */
5793Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 5794
4ed46869 5795
df7492f9
KH
5796/* Return a working buffer that can be freely used by the following
5797 code conversion. MULTIBYTEP specifies the multibyteness of the
5798 buffer. */
b73bfc1c 5799
df7492f9
KH
5800Lisp_Object
5801make_conversion_work_buffer (multibytep)
5802 int multibytep;
5803{
5804 struct buffer *current = current_buffer;
5805 Lisp_Object buf;
d46c5b12 5806
df7492f9 5807 if (NILP (Vcode_conversion_work_buf_list))
e133c8fa 5808 {
df7492f9
KH
5809 if (NILP (Vcode_conversion_reused_work_buf))
5810 Vcode_conversion_reused_work_buf
5811 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5812 Vcode_conversion_work_buf_list
5813 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
e133c8fa 5814 }
df7492f9 5815 else
d46c5b12 5816 {
c197f191 5817 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 5818 char str[128];
e077cc80 5819
df7492f9
KH
5820 sprintf (str, " *code-conversion-work*<%d>", depth);
5821 Vcode_conversion_work_buf_list
5822 = Fcons (Fget_buffer_create (build_string (str)),
5823 Vcode_conversion_work_buf_list);
d46c5b12 5824 }
d46c5b12 5825
df7492f9
KH
5826 buf = XCAR (Vcode_conversion_work_buf_list);
5827 set_buffer_internal (XBUFFER (buf));
5828 current_buffer->undo_list = Qt;
5829 Ferase_buffer ();
9d123124 5830 Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
df7492f9
KH
5831 set_buffer_internal (current);
5832 return buf;
5833}
d46c5b12 5834
df7492f9 5835static struct coding_system *saved_coding;
d46c5b12 5836
df7492f9
KH
5837Lisp_Object
5838code_conversion_restore (info)
5839 Lisp_Object info;
5840{
c197f191 5841 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 5842 Lisp_Object buf;
d46c5b12 5843
df7492f9 5844 if (depth > 0)
d46c5b12 5845 {
df7492f9
KH
5846 buf = XCAR (Vcode_conversion_work_buf_list);
5847 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5848 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5849 Fkill_buffer (buf);
5850 }
d46c5b12 5851
c197f191 5852 if (EQ (saved_coding->dst_object, Qt)
df7492f9
KH
5853 && saved_coding->destination)
5854 xfree (saved_coding->destination);
b843d1ae 5855
df7492f9
KH
5856 return save_excursion_restore (info);
5857}
d46c5b12 5858
12410ef1 5859
df7492f9
KH
5860int
5861decode_coding_gap (coding, chars, bytes)
5862 struct coding_system *coding;
5863 EMACS_INT chars, bytes;
5864{
5865 int count = specpdl_ptr - specpdl;
fb88bf2d 5866
df7492f9
KH
5867 saved_coding = coding;
5868 record_unwind_protect (code_conversion_restore, save_excursion_save ());
ec6d2bb8 5869
df7492f9
KH
5870 coding->src_object = Fcurrent_buffer ();
5871 coding->src_chars = chars;
5872 coding->src_bytes = bytes;
5873 coding->src_pos = -chars;
5874 coding->src_pos_byte = -bytes;
5875 coding->src_multibyte = chars < bytes;
5876 coding->dst_object = coding->src_object;
5877 coding->dst_pos = PT;
5878 coding->dst_pos_byte = PT_BYTE;
71c81426 5879 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4956c225 5880
df7492f9
KH
5881 if (CODING_REQUIRE_DETECTION (coding))
5882 detect_coding (coding);
5883
5884 decode_coding (coding);
d46c5b12 5885
df7492f9
KH
5886 unbind_to (count, Qnil);
5887 return coding->result;
5888}
d46c5b12 5889
df7492f9
KH
5890int
5891encode_coding_gap (coding, chars, bytes)
5892 struct coding_system *coding;
5893 EMACS_INT chars, bytes;
5894{
5895 int count = specpdl_ptr - specpdl;
5896 Lisp_Object buffer;
d46c5b12 5897
df7492f9
KH
5898 saved_coding = coding;
5899 record_unwind_protect (code_conversion_restore, save_excursion_save ());
fb88bf2d 5900
df7492f9
KH
5901 buffer = Fcurrent_buffer ();
5902 coding->src_object = buffer;
5903 coding->src_chars = chars;
5904 coding->src_bytes = bytes;
5905 coding->src_pos = -chars;
5906 coding->src_pos_byte = -bytes;
5907 coding->src_multibyte = chars < bytes;
5908 coding->dst_object = coding->src_object;
5909 coding->dst_pos = PT;
5910 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 5911
df7492f9 5912 encode_coding (coding);
f2558efd 5913
df7492f9
KH
5914 unbind_to (count, Qnil);
5915 return coding->result;
5916}
b73bfc1c 5917
d46c5b12 5918
df7492f9
KH
5919/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5920 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 5921
df7492f9 5922 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 5923
df7492f9
KH
5924 If it is a buffer, the text is at point of the buffer. FROM and TO
5925 are positions in the buffer.
ec6d2bb8 5926
df7492f9
KH
5927 If it is a string, the text is at the beginning of the string.
5928 FROM and TO are indices to the string.
ec6d2bb8 5929
df7492f9
KH
5930 If it is nil, the text is at coding->source. FROM and TO are
5931 indices to coding->source.
ec6d2bb8 5932
df7492f9 5933 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 5934
df7492f9
KH
5935 If it is a buffer, the decoded text is inserted at point of the
5936 buffer. If the buffer is the same as SRC_OBJECT, the source text
5937 is deleted.
d46c5b12 5938
df7492f9
KH
5939 If it is Qt, a string is made from the decoded text, and
5940 set in CODING->dst_object.
d46c5b12 5941
df7492f9
KH
5942 If it is Qnil, the decoded text is stored at CODING->destination.
5943 The called must allocate CODING->dst_bytes bytes at
5944 CODING->destination by xmalloc. If the decoded text is longer than
5945 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5946 */
d46c5b12 5947
df7492f9
KH
5948void
5949decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5950 dst_object)
5951 struct coding_system *coding;
5952 Lisp_Object src_object;
5953 EMACS_INT from, from_byte, to, to_byte;
5954 Lisp_Object dst_object;
5955{
5956 int count = specpdl_ptr - specpdl;
5957 unsigned char *destination;
5958 EMACS_INT dst_bytes;
5959 EMACS_INT chars = to - from;
5960 EMACS_INT bytes = to_byte - from_byte;
5961 Lisp_Object attrs;
d46c5b12 5962
df7492f9
KH
5963 saved_coding = coding;
5964 record_unwind_protect (code_conversion_restore, save_excursion_save ());
93dec019 5965
df7492f9
KH
5966 if (NILP (dst_object))
5967 {
5968 destination = coding->destination;
5969 dst_bytes = coding->dst_bytes;
5970 }
93dec019 5971
df7492f9
KH
5972 coding->src_object = src_object;
5973 coding->src_chars = chars;
5974 coding->src_bytes = bytes;
5975 coding->src_multibyte = chars < bytes;
70ad9fc4 5976
df7492f9
KH
5977 if (STRINGP (src_object))
5978 {
5979 coding->src_pos = from;
5980 coding->src_pos_byte = from_byte;
5981 }
5982 else if (BUFFERP (src_object))
5983 {
5984 set_buffer_internal (XBUFFER (src_object));
5985 if (from != GPT)
5986 move_gap_both (from, from_byte);
5987 if (EQ (src_object, dst_object))
fb88bf2d 5988 {
df7492f9
KH
5989 TEMP_SET_PT_BOTH (from, from_byte);
5990 del_range_both (from, from_byte, to, to_byte, 1);
5991 coding->src_pos = -chars;
5992 coding->src_pos_byte = -bytes;
fb88bf2d 5993 }
df7492f9 5994 else
fb88bf2d 5995 {
df7492f9
KH
5996 coding->src_pos = from;
5997 coding->src_pos_byte = from_byte;
fb88bf2d 5998 }
d46c5b12 5999 }
fb88bf2d 6000
df7492f9
KH
6001 if (CODING_REQUIRE_DETECTION (coding))
6002 detect_coding (coding);
6003 attrs = CODING_ID_ATTRS (coding->id);
6004
6005 if (! NILP (CODING_ATTR_POST_READ (attrs))
6006 || EQ (dst_object, Qt))
b73bfc1c 6007 {
df7492f9
KH
6008 coding->dst_object = make_conversion_work_buffer (1);
6009 coding->dst_pos = BEG;
6010 coding->dst_pos_byte = BEG_BYTE;
6011 coding->dst_multibyte = 1;
b73bfc1c 6012 }
df7492f9 6013 else if (BUFFERP (dst_object))
12410ef1 6014 {
df7492f9
KH
6015 coding->dst_object = dst_object;
6016 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6017 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6018 coding->dst_multibyte
6019 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 6020 }
72d1a715 6021 else
df7492f9
KH
6022 {
6023 coding->dst_object = Qnil;
6024 coding->dst_multibyte = 1;
6025 }
6026
6027 decode_coding (coding);
4ed46869 6028
df7492f9
KH
6029 if (BUFFERP (coding->dst_object))
6030 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 6031
df7492f9 6032 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6033 {
df7492f9
KH
6034 struct gcpro gcpro1, gcpro2;
6035 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6036 Lisp_Object val;
4ed46869 6037
c0cc7f7f 6038 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6039 GCPRO2 (coding->src_object, coding->dst_object);
6040 val = call1 (CODING_ATTR_POST_READ (attrs),
6041 make_number (coding->produced_char));
6042 UNGCPRO;
6043 CHECK_NATNUM (val);
6044 coding->produced_char += Z - prev_Z;
6045 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6046 }
4ed46869 6047
df7492f9 6048 if (EQ (dst_object, Qt))
ec6d2bb8 6049 {
df7492f9
KH
6050 coding->dst_object = Fbuffer_string ();
6051 }
6052 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6053 {
6054 set_buffer_internal (XBUFFER (coding->dst_object));
6055 if (dst_bytes < coding->produced)
6056 {
6057 destination
6058 = (unsigned char *) xrealloc (destination, coding->produced);
6059 if (! destination)
6060 {
6061 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6062 unbind_to (count, Qnil);
6063 return;
6064 }
6065 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6066 move_gap_both (BEGV, BEGV_BYTE);
6067 bcopy (BEGV_ADDR, destination, coding->produced);
6068 coding->destination = destination;
6069 }
ec6d2bb8 6070 }
2b4f9037 6071
df7492f9 6072 unbind_to (count, Qnil);
d46c5b12
KH
6073}
6074
df7492f9
KH
6075
6076void
6077encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6078 dst_object)
b73bfc1c 6079 struct coding_system *coding;
df7492f9
KH
6080 Lisp_Object src_object;
6081 EMACS_INT from, from_byte, to, to_byte;
6082 Lisp_Object dst_object;
b73bfc1c
KH
6083{
6084 int count = specpdl_ptr - specpdl;
df7492f9
KH
6085 EMACS_INT chars = to - from;
6086 EMACS_INT bytes = to_byte - from_byte;
6087 Lisp_Object attrs;
6088
6089 saved_coding = coding;
6090 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6091
6092 coding->src_object = src_object;
6093 coding->src_chars = chars;
6094 coding->src_bytes = bytes;
6095 coding->src_multibyte = chars < bytes;
6096
6097 attrs = CODING_ID_ATTRS (coding->id);
6098
6099 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6100 {
df7492f9
KH
6101 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6102 set_buffer_internal (XBUFFER (coding->src_object));
6103 if (STRINGP (src_object))
6104 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6105 else if (BUFFERP (src_object))
6106 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6107 else
6108 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6109
6110 if (EQ (src_object, dst_object))
6111 {
6112 set_buffer_internal (XBUFFER (src_object));
6113 del_range_both (from, from_byte, to, to_byte, 1);
6114 set_buffer_internal (XBUFFER (coding->src_object));
6115 }
6116
ac87bbef
KH
6117 call2 (CODING_ATTR_PRE_WRITE (attrs),
6118 make_number (BEG), make_number (Z));
6119 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6120 if (BEG != GPT)
6121 move_gap_both (BEG, BEG_BYTE);
6122 coding->src_chars = Z - BEG;
6123 coding->src_bytes = Z_BYTE - BEG_BYTE;
6124 coding->src_pos = BEG;
6125 coding->src_pos_byte = BEG_BYTE;
6126 coding->src_multibyte = Z < Z_BYTE;
6127 }
6128 else if (STRINGP (src_object))
6129 {
6130 coding->src_pos = from;
6131 coding->src_pos_byte = from_byte;
6132 }
6133 else if (BUFFERP (src_object))
d46c5b12 6134 {
df7492f9
KH
6135 set_buffer_internal (XBUFFER (src_object));
6136 if (from != GPT)
6137 move_gap_both (from, from_byte);
6138 if (EQ (src_object, dst_object))
d46c5b12 6139 {
df7492f9
KH
6140 del_range_both (from, from_byte, to, to_byte, 1);
6141 coding->src_pos = -chars;
6142 coding->src_pos_byte = -bytes;
d46c5b12 6143 }
df7492f9 6144 else
d46c5b12 6145 {
df7492f9
KH
6146 coding->src_pos = from;
6147 coding->src_pos_byte = from_byte;
d46c5b12
KH
6148 }
6149 }
4ed46869 6150
df7492f9 6151 if (BUFFERP (dst_object))
d46c5b12 6152 {
df7492f9 6153 coding->dst_object = dst_object;
28f67a95
KH
6154 if (EQ (src_object, dst_object))
6155 {
6156 coding->dst_pos = from;
6157 coding->dst_pos_byte = from_byte;
6158 }
6159 else
6160 {
6161 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6162 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6163 }
df7492f9
KH
6164 coding->dst_multibyte
6165 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6166 }
df7492f9 6167 else if (EQ (dst_object, Qt))
4956c225 6168 {
df7492f9 6169 coding->dst_object = Qnil;
df7492f9 6170 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6171 if (coding->dst_bytes == 0)
6172 coding->dst_bytes = 1;
6173 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6174 coding->dst_multibyte = 0;
4956c225 6175 }
df7492f9 6176 else
78108bcd 6177 {
df7492f9
KH
6178 coding->dst_object = Qnil;
6179 coding->dst_multibyte = 0;
78108bcd
KH
6180 }
6181
df7492f9 6182 encode_coding (coding);
4ed46869 6183
df7492f9 6184 if (EQ (dst_object, Qt))
4ed46869 6185 {
df7492f9
KH
6186 if (BUFFERP (coding->dst_object))
6187 coding->dst_object = Fbuffer_string ();
6188 else
73be902c 6189 {
df7492f9
KH
6190 coding->dst_object
6191 = make_unibyte_string ((char *) coding->destination,
6192 coding->produced);
6193 xfree (coding->destination);
73be902c 6194 }
4ed46869 6195 }
d46c5b12 6196
df7492f9 6197 unbind_to (count, Qnil);
b73bfc1c
KH
6198}
6199
df7492f9 6200
b73bfc1c 6201Lisp_Object
df7492f9 6202preferred_coding_system ()
b73bfc1c 6203{
df7492f9 6204 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6205
df7492f9 6206 return CODING_ID_NAME (id);
4ed46869
KH
6207}
6208
6209\f
6210#ifdef emacs
1397dc18 6211/*** 8. Emacs Lisp library functions ***/
4ed46869 6212
4ed46869 6213DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6214 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6215See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6216about coding-system objects. */)
6217 (obj)
4ed46869
KH
6218 Lisp_Object obj;
6219{
df7492f9 6220 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6221}
6222
9d991de8
RS
6223DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6224 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6225 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6226 (prompt)
4ed46869
KH
6227 Lisp_Object prompt;
6228{
e0e989f6 6229 Lisp_Object val;
9d991de8
RS
6230 do
6231 {
4608c386
KH
6232 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6233 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6234 }
6235 while (XSTRING (val)->size == 0);
e0e989f6 6236 return (Fintern (val, Qnil));
4ed46869
KH
6237}
6238
9b787f3e 6239DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6240 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6241If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6242 (prompt, default_coding_system)
9b787f3e 6243 Lisp_Object prompt, default_coding_system;
4ed46869 6244{
f44d27ce 6245 Lisp_Object val;
9b787f3e
RS
6246 if (SYMBOLP (default_coding_system))
6247 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6248 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6249 Qt, Qnil, Qcoding_system_history,
6250 default_coding_system, Qnil);
e0e989f6 6251 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6252}
6253
6254DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6255 1, 1, 0,
48b0f3ae
PJ
6256 doc: /* Check validity of CODING-SYSTEM.
6257If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6258It is valid if it is a symbol with a non-nil `coding-system' property.
6259The value of property should be a vector of length 5. */)
df7492f9 6260 (coding_system)
4ed46869
KH
6261 Lisp_Object coding_system;
6262{
b7826503 6263 CHECK_SYMBOL (coding_system);
4ed46869
KH
6264 if (!NILP (Fcoding_system_p (coding_system)))
6265 return coding_system;
6266 while (1)
02ba4723 6267 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6268}
df7492f9 6269
3a73fa5d 6270\f
89528eb3
KH
6271/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6272 HIGHEST is nonzero, return the coding system of the highest
6273 priority among the detected coding systems. Otherwize return a
6274 list of detected coding systems sorted by their priorities. If
6275 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6276 multibyte form but contains only ASCII and eight-bit chars.
6277 Otherwise, the bytes are raw bytes.
6278
6279 CODING-SYSTEM controls the detection as below:
6280
6281 If it is nil, detect both text-format and eol-format. If the
6282 text-format part of CODING-SYSTEM is already specified
6283 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6284 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6285 detect only text-format. */
6286
d46c5b12 6287Lisp_Object
df7492f9 6288detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6289 unsigned char *src;
6290 int src_bytes, highest;
0a28aafb 6291 int multibytep;
df7492f9 6292 Lisp_Object coding_system;
4ed46869 6293{
df7492f9
KH
6294 unsigned char *src_end = src + src_bytes;
6295 int mask = CATEGORY_MASK_ANY;
6296 int detected = 0;
6297 int c, i;
6298 Lisp_Object attrs, eol_type;
6299 Lisp_Object val;
6300 struct coding_system coding;
89528eb3 6301 int id;
df7492f9
KH
6302
6303 if (NILP (coding_system))
6304 coding_system = Qundecided;
6305 setup_coding_system (coding_system, &coding);
6306 attrs = CODING_ID_ATTRS (coding.id);
6307 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 6308 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 6309
df7492f9
KH
6310 coding.source = src;
6311 coding.src_bytes = src_bytes;
6312 coding.src_multibyte = multibytep;
6313 coding.consumed = 0;
89528eb3 6314 coding.mode |= CODING_MODE_LAST_BLOCK;
4ed46869 6315
89528eb3
KH
6316 /* At first, detect text-format if necessary. */
6317 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
4ed46869 6318 {
df7492f9 6319 for (; src < src_end; src++)
4ed46869 6320 {
df7492f9 6321 c = *src;
89528eb3
KH
6322 if (c & 0x80
6323 || (c < 0x20 && (c == ISO_CODE_ESC
6324 || c == ISO_CODE_SI
6325 || c == ISO_CODE_SO
6326 /* Most UTF-16 text contains '\0'. */
6327 || !c)))
d46c5b12 6328 break;
4ed46869 6329 }
df7492f9
KH
6330 coding.head_ascii = src - coding.source;
6331
6332 if (src < src_end)
6333 for (i = 0; i < coding_category_raw_text; i++)
6334 {
6335 enum coding_category category = coding_priorities[i];
6336 struct coding_system *this = coding_categories + category;
6337
df7492f9
KH
6338 if (this->id < 0)
6339 {
6340 /* No coding system of this category is defined. */
6341 mask &= ~(1 << category);
6342 }
89528eb3
KH
6343 else if (category >= coding_category_raw_text
6344 || detected & (1 << category))
6345 continue;
df7492f9
KH
6346 else
6347 {
6348 detected |= detected_mask[category];
6349 if ((*(coding_categories[category].detector)) (&coding, &mask)
89528eb3
KH
6350 && highest
6351 && (mask & (1 << category)))
df7492f9 6352 {
89528eb3 6353 mask = 1 << category;
df7492f9
KH
6354 break;
6355 }
6356 }
6357 }
4ed46869 6358
89528eb3
KH
6359 if (!mask)
6360 {
6361 id = coding_categories[coding_category_raw_text].id;
6362 val = Fcons (make_number (id), Qnil);
6363 }
6364 else if (mask == CATEGORY_MASK_ANY)
6365 {
6366 id = coding_categories[coding_category_undecided].id;
6367 val = Fcons (make_number (id), Qnil);
6368 }
6369 else if (highest)
6370 {
6371 for (i = 0; i < coding_category_raw_text; i++)
6372 if (mask & (1 << coding_priorities[i]))
6373 {
6374 id = coding_categories[coding_priorities[i]].id;
6375 val = Fcons (make_number (id), Qnil);
6376 break;
6377 }
6378 }
6379 else
6380 {
6381 val = Qnil;
6382 for (i = coding_category_raw_text - 1; i >= 0; i--)
6383 if (mask & (1 << coding_priorities[i]))
6384 {
6385 id = coding_categories[coding_priorities[i]].id;
6386 val = Fcons (make_number (id), val);
6387 }
6388 }
6389 }
df7492f9
KH
6390 else
6391 {
89528eb3
KH
6392 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6393 val = Fcons (make_number (coding.id), Qnil);
4ed46869 6394 }
df7492f9 6395
89528eb3 6396 /* Then, detect eol-format if necessary. */
df7492f9 6397 {
89528eb3 6398 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
6399 Lisp_Object tail;
6400
89528eb3
KH
6401 if (VECTORP (eol_type))
6402 {
6403 if (mask & ~CATEGORY_MASK_UTF_16)
6404 normal_eol = detect_eol (coding.source, src_bytes,
6405 coding_category_raw_text);
6406 if (mask & (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_BE_NOSIG))
6407 utf_16_be_eol = detect_eol (coding.source, src_bytes,
6408 coding_category_utf_16_be);
6409 if (mask & (CATEGORY_MASK_UTF_16_LE | CATEGORY_MASK_UTF_16_LE_NOSIG))
6410 utf_16_le_eol = detect_eol (coding.source, src_bytes,
6411 coding_category_utf_16_le);
6412 }
6413 else
6414 {
6415 if (EQ (eol_type, Qunix))
6416 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6417 else if (EQ (eol_type, Qdos))
6418 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6419 else
6420 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6421 }
6422
df7492f9
KH
6423 for (tail = val; CONSP (tail); tail = XCDR (tail))
6424 {
89528eb3 6425 enum coding_category category;
df7492f9 6426 int this_eol;
89528eb3
KH
6427
6428 id = XINT (XCAR (tail));
6429 attrs = CODING_ID_ATTRS (id);
6430 category = XINT (CODING_ATTR_CATEGORY (attrs));
6431 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
6432 if (VECTORP (eol_type))
6433 {
89528eb3
KH
6434 if (category == coding_category_utf_16_be
6435 || category == coding_category_utf_16_be_nosig)
6436 this_eol = utf_16_be_eol;
6437 else if (category == coding_category_utf_16_le
6438 || category == coding_category_utf_16_le_nosig)
6439 this_eol = utf_16_le_eol;
df7492f9 6440 else
89528eb3
KH
6441 this_eol = normal_eol;
6442
df7492f9
KH
6443 if (this_eol == EOL_SEEN_LF)
6444 XSETCAR (tail, AREF (eol_type, 0));
6445 else if (this_eol == EOL_SEEN_CRLF)
6446 XSETCAR (tail, AREF (eol_type, 1));
6447 else if (this_eol == EOL_SEEN_CR)
6448 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
6449 else
6450 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 6451 }
89528eb3
KH
6452 else
6453 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
6454 }
6455 }
6456
03699b14 6457 return (highest ? XCAR (val) : val);
93dec019 6458}
4ed46869 6459
df7492f9 6460
d46c5b12
KH
6461DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6462 2, 3, 0,
48b0f3ae
PJ
6463 doc: /* Detect coding system of the text in the region between START and END.
6464Return a list of possible coding systems ordered by priority.
6465
6466If only ASCII characters are found, it returns a list of single element
6467`undecided' or its subsidiary coding system according to a detected
6468end-of-line format.
6469
6470If optional argument HIGHEST is non-nil, return the coding system of
6471highest priority. */)
6472 (start, end, highest)
d46c5b12
KH
6473 Lisp_Object start, end, highest;
6474{
6475 int from, to;
6476 int from_byte, to_byte;
6289dd10 6477
b7826503
PJ
6478 CHECK_NUMBER_COERCE_MARKER (start);
6479 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6480
d46c5b12
KH
6481 validate_region (&start, &end);
6482 from = XINT (start), to = XINT (end);
6483 from_byte = CHAR_TO_BYTE (from);
6484 to_byte = CHAR_TO_BYTE (to);
6289dd10 6485
d46c5b12
KH
6486 if (from < GPT && to >= GPT)
6487 move_gap_both (to, to_byte);
c210f766 6488
d46c5b12 6489 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6490 to_byte - from_byte,
0a28aafb
KH
6491 !NILP (highest),
6492 !NILP (current_buffer
df7492f9
KH
6493 ->enable_multibyte_characters),
6494 Qnil);
d46c5b12 6495}
6289dd10 6496
d46c5b12
KH
6497DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6498 1, 2, 0,
48b0f3ae
PJ
6499 doc: /* Detect coding system of the text in STRING.
6500Return a list of possible coding systems ordered by priority.
6501
6502If only ASCII characters are found, it returns a list of single element
6503`undecided' or its subsidiary coding system according to a detected
6504end-of-line format.
6505
6506If optional argument HIGHEST is non-nil, return the coding system of
6507highest priority. */)
6508 (string, highest)
d46c5b12
KH
6509 Lisp_Object string, highest;
6510{
b7826503 6511 CHECK_STRING (string);
4ed46869 6512
d46c5b12 6513 return detect_coding_system (XSTRING (string)->data,
df7492f9 6514 STRING_BYTES (XSTRING (string)),
0a28aafb 6515 !NILP (highest),
df7492f9
KH
6516 STRING_MULTIBYTE (string),
6517 Qnil);
4ed46869
KH
6518}
6519
05e6f5dc 6520
df7492f9
KH
6521static INLINE int
6522char_encodable_p (c, attrs)
6523 int c;
6524 Lisp_Object attrs;
05e6f5dc 6525{
df7492f9 6526 Lisp_Object tail;
df7492f9 6527 struct charset *charset;
05e6f5dc 6528
df7492f9
KH
6529 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6530 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6531 {
df7492f9
KH
6532 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6533 if (CHAR_CHARSET_P (c, charset))
6534 break;
05e6f5dc 6535 }
df7492f9 6536 return (! NILP (tail));
05e6f5dc
KH
6537}
6538
6539
df7492f9
KH
6540/* Return a list of coding systems that safely encode the text between
6541 START and END. If EXCLUDE is non-nil, it is a list of coding
6542 systems not to check. The returned list doesn't contain any such
48468dac 6543 coding systems. In any case, if the text contains only ASCII or is
df7492f9
KH
6544 unibyte, return t. */
6545
6546DEFUN ("find-coding-systems-region-internal",
6547 Ffind_coding_systems_region_internal,
6548 Sfind_coding_systems_region_internal, 2, 3, 0,
6549 doc: /* Internal use only. */)
6550 (start, end, exclude)
6551 Lisp_Object start, end, exclude;
6552{
6553 Lisp_Object coding_attrs_list, safe_codings;
6554 EMACS_INT start_byte, end_byte;
7c78e542 6555 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
6556 int c;
6557 Lisp_Object tail, elt;
05e6f5dc 6558
df7492f9
KH
6559 if (STRINGP (start))
6560 {
6561 if (!STRING_MULTIBYTE (start)
48468dac 6562 || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
df7492f9
KH
6563 return Qt;
6564 start_byte = 0;
6565 end_byte = STRING_BYTES (XSTRING (start));
6566 }
6567 else
6568 {
6569 CHECK_NUMBER_COERCE_MARKER (start);
6570 CHECK_NUMBER_COERCE_MARKER (end);
6571 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6572 args_out_of_range (start, end);
6573 if (NILP (current_buffer->enable_multibyte_characters))
6574 return Qt;
6575 start_byte = CHAR_TO_BYTE (XINT (start));
6576 end_byte = CHAR_TO_BYTE (XINT (end));
6577 if (XINT (end) - XINT (start) == end_byte - start_byte)
6578 return Qt;
05e6f5dc 6579
df7492f9
KH
6580 if (start < GPT && end > GPT)
6581 {
6582 if ((GPT - start) < (end - GPT))
6583 move_gap_both (start, start_byte);
6584 else
6585 move_gap_both (end, end_byte);
6586 }
6587 }
05e6f5dc 6588
df7492f9
KH
6589 coding_attrs_list = Qnil;
6590 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6591 if (NILP (exclude)
6592 || NILP (Fmemq (XCAR (tail), exclude)))
6593 {
6594 Lisp_Object attrs;
05e6f5dc 6595
df7492f9
KH
6596 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6597 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6598 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6599 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6600 }
6601
6602 if (STRINGP (start))
6603 p = pbeg = XSTRING (start)->data;
6604 else
6605 p = pbeg = BYTE_POS_ADDR (start_byte);
6606 pend = p + (end_byte - start_byte);
6607
6608 while (p < pend && ASCII_BYTE_P (*p)) p++;
6609 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
6610
6611 while (p < pend)
6612 {
df7492f9
KH
6613 if (ASCII_BYTE_P (*p))
6614 p++;
6615 else
6616 {
6617 c = STRING_CHAR_ADVANCE (p);
6618
6619 charset_map_loaded = 0;
6620 for (tail = coding_attrs_list; CONSP (tail);)
6621 {
6622 elt = XCAR (tail);
6623 if (NILP (elt))
6624 tail = XCDR (tail);
6625 else if (char_encodable_p (c, elt))
6626 tail = XCDR (tail);
6627 else if (CONSP (XCDR (tail)))
6628 {
6629 XSETCAR (tail, XCAR (XCDR (tail)));
6630 XSETCDR (tail, XCDR (XCDR (tail)));
6631 }
6632 else
6633 {
6634 XSETCAR (tail, Qnil);
6635 tail = XCDR (tail);
6636 }
6637 }
6638 if (charset_map_loaded)
6639 {
6640 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 6641
df7492f9
KH
6642 if (STRINGP (start))
6643 pbeg = XSTRING (start)->data;
6644 else
6645 pbeg = BYTE_POS_ADDR (start_byte);
6646 p = pbeg + p_offset;
6647 pend = pbeg + pend_offset;
6648 }
6649 }
05e6f5dc 6650 }
df7492f9
KH
6651
6652 safe_codings = Qnil;
6653 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6654 if (! NILP (XCAR (tail)))
6655 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6656
05e6f5dc
KH
6657 return safe_codings;
6658}
6659
6660
df7492f9
KH
6661DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6662 Scheck_coding_systems_region, 3, 3, 0,
6663 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 6664
df7492f9
KH
6665START and END are buffer positions specifying the region.
6666CODING-SYSTEM-LIST is a list of coding systems to check.
6667
6668The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6669CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6670whole region, POS0, POS1, ... are buffer positions where non-encodable
6671characters are found.
6672
6673If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6674value is nil.
6675
6676START may be a string. In that case, check if the string is
6677encodable, and the value contains indices to the string instead of
6678buffer positions. END is ignored. */)
6679 (start, end, coding_system_list)
6680 Lisp_Object start, end, coding_system_list;
05e6f5dc 6681{
df7492f9
KH
6682 Lisp_Object list;
6683 EMACS_INT start_byte, end_byte;
6684 int pos;
7c78e542 6685 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
6686 int c;
6687 Lisp_Object tail, elt;
05e6f5dc
KH
6688
6689 if (STRINGP (start))
6690 {
df7492f9
KH
6691 if (!STRING_MULTIBYTE (start)
6692 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6693 return Qnil;
6694 start_byte = 0;
6695 end_byte = STRING_BYTES (XSTRING (start));
6696 pos = 0;
05e6f5dc
KH
6697 }
6698 else
6699 {
b7826503
PJ
6700 CHECK_NUMBER_COERCE_MARKER (start);
6701 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
6702 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6703 args_out_of_range (start, end);
6704 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
6705 return Qnil;
6706 start_byte = CHAR_TO_BYTE (XINT (start));
6707 end_byte = CHAR_TO_BYTE (XINT (end));
6708 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 6709 return Qt;
df7492f9
KH
6710
6711 if (start < GPT && end > GPT)
6712 {
6713 if ((GPT - start) < (end - GPT))
6714 move_gap_both (start, start_byte);
6715 else
6716 move_gap_both (end, end_byte);
6717 }
6718 pos = start;
6719 }
6720
6721 list = Qnil;
6722 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6723 {
6724 elt = XCAR (tail);
6725 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6726 Qnil)),
6727 list);
05e6f5dc
KH
6728 }
6729
df7492f9
KH
6730 if (STRINGP (start))
6731 p = pbeg = XSTRING (start)->data;
6732 else
6733 p = pbeg = BYTE_POS_ADDR (start_byte);
6734 pend = p + (end_byte - start_byte);
6735
6736 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6737 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6738
6739 while (p < pend)
05e6f5dc 6740 {
df7492f9
KH
6741 if (ASCII_BYTE_P (*p))
6742 p++;
6743 else
05e6f5dc 6744 {
df7492f9
KH
6745 c = STRING_CHAR_ADVANCE (p);
6746
6747 charset_map_loaded = 0;
6748 for (tail = list; CONSP (tail); tail = XCDR (tail))
6749 {
6750 elt = XCDR (XCAR (tail));
6751 if (! char_encodable_p (c, XCAR (elt)))
6752 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6753 }
6754 if (charset_map_loaded)
6755 {
6756 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6757
6758 if (STRINGP (start))
6759 pbeg = XSTRING (start)->data;
6760 else
6761 pbeg = BYTE_POS_ADDR (start_byte);
6762 p = pbeg + p_offset;
6763 pend = pbeg + pend_offset;
6764 }
05e6f5dc 6765 }
df7492f9 6766 pos++;
05e6f5dc
KH
6767 }
6768
df7492f9
KH
6769 tail = list;
6770 list = Qnil;
6771 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 6772 {
df7492f9
KH
6773 elt = XCAR (tail);
6774 if (CONSP (XCDR (XCDR (elt))))
6775 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6776 list);
05e6f5dc 6777 }
df7492f9
KH
6778
6779 return list;
05e6f5dc
KH
6780}
6781
6782
df7492f9 6783
4031e2bf 6784Lisp_Object
df7492f9
KH
6785code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6786 Lisp_Object start, end, coding_system, dst_object;
6787 int encodep, norecord;
3a73fa5d
RS
6788{
6789 struct coding_system coding;
df7492f9
KH
6790 EMACS_INT from, from_byte, to, to_byte;
6791 Lisp_Object src_object;
3a73fa5d 6792
b7826503
PJ
6793 CHECK_NUMBER_COERCE_MARKER (start);
6794 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
6795 if (NILP (coding_system))
6796 coding_system = Qno_conversion;
6797 else
6798 CHECK_CODING_SYSTEM (coding_system);
6799 src_object = Fcurrent_buffer ();
6800 if (NILP (dst_object))
6801 dst_object = src_object;
6802 else if (! EQ (dst_object, Qt))
6803 CHECK_BUFFER (dst_object);
3a73fa5d 6804
d46c5b12
KH
6805 validate_region (&start, &end);
6806 from = XFASTINT (start);
df7492f9 6807 from_byte = CHAR_TO_BYTE (from);
d46c5b12 6808 to = XFASTINT (end);
df7492f9 6809 to_byte = CHAR_TO_BYTE (to);
d46c5b12 6810
df7492f9
KH
6811 setup_coding_system (coding_system, &coding);
6812 coding.mode |= CODING_MODE_LAST_BLOCK;
6813
6814 if (encodep)
6815 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6816 dst_object);
6817 else
6818 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6819 dst_object);
6820 if (! norecord)
6821 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 6822
df7492f9
KH
6823 if (coding.result != CODING_RESULT_SUCCESS)
6824 error ("Code conversion error: %d", coding.result);
3a73fa5d 6825
df7492f9
KH
6826 return (BUFFERP (dst_object)
6827 ? make_number (coding.produced_char)
6828 : coding.dst_object);
4031e2bf
KH
6829}
6830
df7492f9 6831
4031e2bf 6832DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 6833 3, 4, "r\nzCoding system: ",
48b0f3ae 6834 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
6835When called from a program, takes four arguments:
6836 START, END, CODING-SYSTEM, and DESTINATION.
6837START and END are buffer positions.
6838
6839Optional 4th arguments DESTINATION specifies where the decoded text goes.
6840If nil, the region between START and END is replace by the decoded text.
6841If buffer, the decoded text is inserted in the buffer.
6842If t, the decoded text is returned.
6843
48b0f3ae
PJ
6844This function sets `last-coding-system-used' to the precise coding system
6845used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6846not fully specified.)
6847It returns the length of the decoded text. */)
df7492f9
KH
6848 (start, end, coding_system, destination)
6849 Lisp_Object start, end, coding_system, destination;
4031e2bf 6850{
df7492f9 6851 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
6852}
6853
6854DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
6855 3, 4, "r\nzCoding system: ",
6856 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
6857When called from a program, takes three arguments:
6858START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
6859
6860Optional 4th arguments DESTINATION specifies where the encoded text goes.
6861If nil, the region between START and END is replace by the encoded text.
6862If buffer, the encoded text is inserted in the buffer.
6863If t, the encoded text is returned.
6864
48b0f3ae
PJ
6865This function sets `last-coding-system-used' to the precise coding system
6866used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6867not fully specified.)
6868It returns the length of the encoded text. */)
df7492f9
KH
6869 (start, end, coding_system, destination)
6870 Lisp_Object start, end, coding_system, destination;
3a73fa5d 6871{
df7492f9 6872 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 6873}
3a73fa5d 6874
4031e2bf 6875Lisp_Object
df7492f9
KH
6876code_convert_string (string, coding_system, dst_object,
6877 encodep, nocopy, norecord)
6878 Lisp_Object string, coding_system, dst_object;
6879 int encodep, nocopy, norecord;
4031e2bf
KH
6880{
6881 struct coding_system coding;
df7492f9 6882 EMACS_INT chars, bytes;
3a73fa5d 6883
b7826503 6884 CHECK_STRING (string);
d46c5b12 6885 if (NILP (coding_system))
df7492f9
KH
6886 {
6887 if (! norecord)
6888 Vlast_coding_system_used = Qno_conversion;
6889 if (NILP (dst_object))
6890 return (nocopy ? Fcopy_sequence (string) : string);
6891 }
4ed46869 6892
df7492f9
KH
6893 if (NILP (coding_system))
6894 coding_system = Qno_conversion;
6895 else
6896 CHECK_CODING_SYSTEM (coding_system);
6897 if (NILP (dst_object))
6898 dst_object = Qt;
6899 else if (! EQ (dst_object, Qt))
6900 CHECK_BUFFER (dst_object);
5f1cd180 6901
df7492f9 6902 setup_coding_system (coding_system, &coding);
d46c5b12 6903 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
6904 chars = XSTRING (string)->size;
6905 bytes = STRING_BYTES (XSTRING (string));
6906 if (encodep)
6907 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6908 else
6909 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6910 if (! norecord)
6911 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 6912
df7492f9
KH
6913 if (coding.result != CODING_RESULT_SUCCESS)
6914 error ("Code conversion error: %d", coding.result);
4ed46869 6915
df7492f9
KH
6916 return (BUFFERP (dst_object)
6917 ? make_number (coding.produced_char)
6918 : coding.dst_object);
4ed46869
KH
6919}
6920
4031e2bf 6921
ecec61c1 6922/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
6923 Do not set Vlast_coding_system_used.
6924
6925 This function is called only from macros DECODE_FILE and
6926 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
6927
6928Lisp_Object
6929code_convert_string_norecord (string, coding_system, encodep)
6930 Lisp_Object string, coding_system;
6931 int encodep;
6932{
0be8721c 6933 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 6934}
ecec61c1 6935
ecec61c1 6936
df7492f9
KH
6937DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6938 2, 4, 0,
6939 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6940
6941Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6942if the decoding operation is trivial.
ecec61c1 6943
df7492f9 6944Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 6945inserted in BUFFER instead of returned as a string. In this case,
df7492f9 6946the return value is BUFFER.
ecec61c1 6947
df7492f9
KH
6948This function sets `last-coding-system-used' to the precise coding system
6949used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6950not fully specified. */)
6951 (string, coding_system, nocopy, buffer)
6952 Lisp_Object string, coding_system, nocopy, buffer;
6953{
6954 return code_convert_string (string, coding_system, buffer,
6955 0, ! NILP (nocopy), 0);
6956}
6957
6958DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6959 2, 4, 0,
6960 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6961
6962Optional third arg NOCOPY non-nil means it is OK to return STRING
6963itself if the encoding operation is trivial.
6964
6965Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 6966inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
6967the return value is BUFFER.
6968
6969This function sets `last-coding-system-used' to the precise coding system
6970used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6971not fully specified.) */)
6972 (string, coding_system, nocopy, buffer)
6973 Lisp_Object string, coding_system, nocopy, buffer;
6974{
6975 return code_convert_string (string, coding_system, buffer,
c197f191 6976 1, ! NILP (nocopy), 1);
ecec61c1 6977}
df7492f9 6978
3a73fa5d 6979\f
4ed46869 6980DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6981 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6982Return the corresponding character. */)
6983 (code)
4ed46869
KH
6984 Lisp_Object code;
6985{
df7492f9
KH
6986 Lisp_Object spec, attrs, val;
6987 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6988 int c;
6989
6990 CHECK_NATNUM (code);
6991 c = XFASTINT (code);
6992 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6993 attrs = AREF (spec, 0);
4ed46869 6994
df7492f9
KH
6995 if (ASCII_BYTE_P (c)
6996 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6997 return code;
6998
6999 val = CODING_ATTR_CHARSET_LIST (attrs);
7000 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
7001 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7002 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
df7492f9
KH
7003
7004 if (c <= 0x7F)
7005 charset = charset_roman;
7006 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 7007 {
df7492f9
KH
7008 charset = charset_kana;
7009 c -= 0x80;
55ab7be3
KH
7010 }
7011 else
7012 {
004068e4 7013 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
7014
7015 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7016 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7017 error ("Invalid code: %d", code);
7018 SJIS_TO_JIS (c);
7019 charset = charset_kanji;
55ab7be3 7020 }
df7492f9
KH
7021 c = DECODE_CHAR (charset, c);
7022 if (c < 0)
7023 error ("Invalid code: %d", code);
7024 return make_number (c);
4ed46869
KH
7025}
7026
df7492f9 7027
4ed46869 7028DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7029 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7030Return the corresponding code in SJIS. */)
7031 (ch)
df7492f9 7032 Lisp_Object ch;
4ed46869 7033{
df7492f9
KH
7034 Lisp_Object spec, attrs, charset_list;
7035 int c;
7036 struct charset *charset;
7037 unsigned code;
4ed46869 7038
df7492f9
KH
7039 CHECK_CHARACTER (ch);
7040 c = XFASTINT (ch);
7041 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7042 attrs = AREF (spec, 0);
7043
7044 if (ASCII_CHAR_P (c)
7045 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7046 return ch;
7047
7048 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7049 charset = char_charset (c, charset_list, &code);
7050 if (code == CHARSET_INVALID_CODE (charset))
7051 error ("Can't encode by shift_jis encoding: %d", c);
7052 JIS_TO_SJIS (code);
7053
7054 return make_number (code);
4ed46869
KH
7055}
7056
7057DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7058 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7059Return the corresponding character. */)
7060 (code)
4ed46869
KH
7061 Lisp_Object code;
7062{
df7492f9
KH
7063 Lisp_Object spec, attrs, val;
7064 struct charset *charset_roman, *charset_big5, *charset;
7065 int c;
4ed46869 7066
df7492f9
KH
7067 CHECK_NATNUM (code);
7068 c = XFASTINT (code);
7069 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7070 attrs = AREF (spec, 0);
7071
7072 if (ASCII_BYTE_P (c)
7073 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7074 return code;
7075
7076 val = CODING_ATTR_CHARSET_LIST (attrs);
7077 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7078 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7079
7080 if (c <= 0x7F)
7081 charset = charset_roman;
c28a9453
KH
7082 else
7083 {
df7492f9
KH
7084 int b1 = c >> 8, b2 = c & 0x7F;
7085 if (b1 < 0xA1 || b1 > 0xFE
7086 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7087 error ("Invalid code: %d", code);
7088 charset = charset_big5;
c28a9453 7089 }
df7492f9
KH
7090 c = DECODE_CHAR (charset, (unsigned )c);
7091 if (c < 0)
7092 error ("Invalid code: %d", code);
7093 return make_number (c);
4ed46869
KH
7094}
7095
7096DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7097 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7098Return the corresponding character code in Big5. */)
7099 (ch)
4ed46869
KH
7100 Lisp_Object ch;
7101{
df7492f9
KH
7102 Lisp_Object spec, attrs, charset_list;
7103 struct charset *charset;
7104 int c;
7105 unsigned code;
7106
7107 CHECK_CHARACTER (ch);
7108 c = XFASTINT (ch);
7109 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7110 attrs = AREF (spec, 0);
7111 if (ASCII_CHAR_P (c)
7112 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7113 return ch;
7114
7115 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7116 charset = char_charset (c, charset_list, &code);
7117 if (code == CHARSET_INVALID_CODE (charset))
7118 error ("Can't encode by Big5 encoding: %d", c);
7119
7120 return make_number (code);
4ed46869 7121}
df7492f9 7122
3a73fa5d 7123\f
1ba9e4ab
KH
7124DEFUN ("set-terminal-coding-system-internal",
7125 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7126 Sset_terminal_coding_system_internal, 1, 1, 0,
7127 doc: /* Internal use only. */)
7128 (coding_system)
b74e4686 7129 Lisp_Object coding_system;
4ed46869 7130{
b7826503 7131 CHECK_SYMBOL (coding_system);
df7492f9
KH
7132 setup_coding_system (Fcheck_coding_system (coding_system),
7133 &terminal_coding);
7134
70c22245 7135 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7136 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7137 /* Characer composition should be disabled. */
7138 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7139 terminal_coding.src_multibyte = 1;
7140 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7141 return Qnil;
7142}
7143
c4825358
KH
7144DEFUN ("set-safe-terminal-coding-system-internal",
7145 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7146 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7147 doc: /* Internal use only. */)
48b0f3ae 7148 (coding_system)
b74e4686 7149 Lisp_Object coding_system;
c4825358 7150{
b7826503 7151 CHECK_SYMBOL (coding_system);
c4825358
KH
7152 setup_coding_system (Fcheck_coding_system (coding_system),
7153 &safe_terminal_coding);
df7492f9
KH
7154 /* Characer composition should be disabled. */
7155 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7156 safe_terminal_coding.src_multibyte = 1;
7157 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7158 return Qnil;
7159}
7160
4ed46869
KH
7161DEFUN ("terminal-coding-system",
7162 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7163 doc: /* Return coding system specified for terminal output. */)
7164 ()
4ed46869 7165{
df7492f9 7166 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7167}
7168
1ba9e4ab
KH
7169DEFUN ("set-keyboard-coding-system-internal",
7170 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7171 Sset_keyboard_coding_system_internal, 1, 1, 0,
7172 doc: /* Internal use only. */)
7173 (coding_system)
4ed46869
KH
7174 Lisp_Object coding_system;
7175{
b7826503 7176 CHECK_SYMBOL (coding_system);
df7492f9
KH
7177 setup_coding_system (Fcheck_coding_system (coding_system),
7178 &keyboard_coding);
7179 /* Characer composition should be disabled. */
7180 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7181 return Qnil;
7182}
7183
7184DEFUN ("keyboard-coding-system",
7185 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7186 doc: /* Return coding system specified for decoding keyboard input. */)
7187 ()
4ed46869 7188{
df7492f9 7189 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7190}
7191
7192\f
a5d301df
KH
7193DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7194 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7195 doc: /* Choose a coding system for an operation based on the target name.
7196The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7197DECODING-SYSTEM is the coding system to use for decoding
7198\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7199for encoding (in case OPERATION does encoding).
7200
7201The first argument OPERATION specifies an I/O primitive:
7202 For file I/O, `insert-file-contents' or `write-region'.
7203 For process I/O, `call-process', `call-process-region', or `start-process'.
7204 For network I/O, `open-network-stream'.
7205
7206The remaining arguments should be the same arguments that were passed
7207to the primitive. Depending on which primitive, one of those arguments
7208is selected as the TARGET. For example, if OPERATION does file I/O,
7209whichever argument specifies the file name is TARGET.
7210
7211TARGET has a meaning which depends on OPERATION:
7212 For file I/O, TARGET is a file name.
7213 For process I/O, TARGET is a process name.
7214 For network I/O, TARGET is a service name or a port number
7215
7216This function looks up what specified for TARGET in,
7217`file-coding-system-alist', `process-coding-system-alist',
7218or `network-coding-system-alist' depending on OPERATION.
7219They may specify a coding system, a cons of coding systems,
7220or a function symbol to call.
7221In the last case, we call the function with one argument,
7222which is a list of all the arguments given to this function.
7223
7224usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7225 (nargs, args)
4ed46869
KH
7226 int nargs;
7227 Lisp_Object *args;
7228{
7229 Lisp_Object operation, target_idx, target, val;
7230 register Lisp_Object chain;
7231
7232 if (nargs < 2)
7233 error ("Too few arguments");
7234 operation = args[0];
7235 if (!SYMBOLP (operation)
7236 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7237 error ("Invalid first arguement");
4ed46869
KH
7238 if (nargs < 1 + XINT (target_idx))
7239 error ("Too few arguments for operation: %s",
7240 XSYMBOL (operation)->name->data);
7241 target = args[XINT (target_idx) + 1];
7242 if (!(STRINGP (target)
7243 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7244 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7245
2e34157c
RS
7246 chain = ((EQ (operation, Qinsert_file_contents)
7247 || EQ (operation, Qwrite_region))
02ba4723 7248 ? Vfile_coding_system_alist
2e34157c 7249 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7250 ? Vnetwork_coding_system_alist
7251 : Vprocess_coding_system_alist));
4ed46869
KH
7252 if (NILP (chain))
7253 return Qnil;
7254
03699b14 7255 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7256 {
f44d27ce 7257 Lisp_Object elt;
4ed46869 7258
df7492f9 7259 elt = XCAR (chain);
4ed46869
KH
7260 if (CONSP (elt)
7261 && ((STRINGP (target)
03699b14
KR
7262 && STRINGP (XCAR (elt))
7263 && fast_string_match (XCAR (elt), target) >= 0)
7264 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7265 {
03699b14 7266 val = XCDR (elt);
b19fd4c5
KH
7267 /* Here, if VAL is both a valid coding system and a valid
7268 function symbol, we return VAL as a coding system. */
02ba4723
KH
7269 if (CONSP (val))
7270 return val;
7271 if (! SYMBOLP (val))
7272 return Qnil;
7273 if (! NILP (Fcoding_system_p (val)))
7274 return Fcons (val, val);
b19fd4c5
KH
7275 if (! NILP (Ffboundp (val)))
7276 {
7277 val = call1 (val, Flist (nargs, args));
7278 if (CONSP (val))
7279 return val;
7280 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7281 return Fcons (val, val);
7282 }
02ba4723
KH
7283 return Qnil;
7284 }
4ed46869
KH
7285 }
7286 return Qnil;
7287}
7288
df7492f9 7289DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 7290 Sset_coding_system_priority, 0, MANY, 0,
da7db224 7291 doc: /* Assign higher priority to the coding systems given as arguments.
1fcd6c8b 7292usage: (set-coding-system-priority CODING-SYSTEM ...) */)
df7492f9
KH
7293 (nargs, args)
7294 int nargs;
7295 Lisp_Object *args;
7296{
7297 int i, j;
7298 int changed[coding_category_max];
7299 enum coding_category priorities[coding_category_max];
7300
7301 bzero (changed, sizeof changed);
7302
7303 for (i = j = 0; i < nargs; i++)
7304 {
7305 enum coding_category category;
7306 Lisp_Object spec, attrs;
7307
7308 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7309 attrs = AREF (spec, 0);
7310 category = XINT (CODING_ATTR_CATEGORY (attrs));
7311 if (changed[category])
7312 /* Ignore this coding system because a coding system of the
7313 same category already had a higher priority. */
7314 continue;
7315 changed[category] = 1;
7316 priorities[j++] = category;
7317 if (coding_categories[category].id >= 0
7318 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7319 setup_coding_system (args[i], &coding_categories[category]);
7320 }
7321
7322 /* Now we have decided top J priorities. Reflect the order of the
7323 original priorities to the remaining priorities. */
7324
7325 for (i = j, j = 0; i < coding_category_max; i++, j++)
7326 {
7327 while (j < coding_category_max
7328 && changed[coding_priorities[j]])
7329 j++;
7330 if (j == coding_category_max)
7331 abort ();
7332 priorities[i] = coding_priorities[j];
7333 }
7334
7335 bcopy (priorities, coding_priorities, sizeof priorities);
7336 return Qnil;
7337}
7338
7339DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7340 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7341 doc: /* Return a list of coding systems ordered by their priorities.
7342HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7343 (highestp)
7344 Lisp_Object highestp;
d46c5b12
KH
7345{
7346 int i;
df7492f9 7347 Lisp_Object val;
d46c5b12 7348
df7492f9 7349 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7350 {
df7492f9
KH
7351 enum coding_category category = coding_priorities[i];
7352 int id = coding_categories[category].id;
7353 Lisp_Object attrs;
7354
7355 if (id < 0)
7356 continue;
7357 attrs = CODING_ID_ATTRS (id);
7358 if (! NILP (highestp))
7359 return CODING_ATTR_BASE_NAME (attrs);
7360 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7361 }
7362 return Fnreverse (val);
7363}
7364
f0064e1f
DL
7365static char *suffixes[] = { "-unix", "-dos", "-mac" };
7366
df7492f9
KH
7367static Lisp_Object
7368make_subsidiaries (base)
7369 Lisp_Object base;
7370{
7371 Lisp_Object subsidiaries;
df7492f9
KH
7372 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7373 char *buf = (char *) alloca (base_name_len + 6);
7374 int i;
7375
7376 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7377 subsidiaries = Fmake_vector (make_number (3), Qnil);
7378 for (i = 0; i < 3; i++)
7379 {
7380 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7381 ASET (subsidiaries, i, intern (buf));
7382 }
7383 return subsidiaries;
7384}
7385
7386
7387DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7388 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7389 doc: /* For internal use only.
7390usage: (define-coding-system-internal ...) */)
df7492f9
KH
7391 (nargs, args)
7392 int nargs;
7393 Lisp_Object *args;
7394{
7395 Lisp_Object name;
7396 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7397 Lisp_Object attrs; /* Vector of attributes. */
7398 Lisp_Object eol_type;
7399 Lisp_Object aliases;
7400 Lisp_Object coding_type, charset_list, safe_charsets;
7401 enum coding_category category;
7402 Lisp_Object tail, val;
7403 int max_charset_id = 0;
7404 int i;
7405
7406 if (nargs < coding_arg_max)
7407 goto short_args;
7408
7409 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7410
7411 name = args[coding_arg_name];
7412 CHECK_SYMBOL (name);
7413 CODING_ATTR_BASE_NAME (attrs) = name;
7414
7415 val = args[coding_arg_mnemonic];
7416 if (! STRINGP (val))
7417 CHECK_CHARACTER (val);
7418 CODING_ATTR_MNEMONIC (attrs) = val;
7419
7420 coding_type = args[coding_arg_coding_type];
7421 CHECK_SYMBOL (coding_type);
7422 CODING_ATTR_TYPE (attrs) = coding_type;
7423
7424 charset_list = args[coding_arg_charset_list];
7425 if (SYMBOLP (charset_list))
7426 {
7427 if (EQ (charset_list, Qiso_2022))
7428 {
7429 if (! EQ (coding_type, Qiso_2022))
7430 error ("Invalid charset-list");
7431 charset_list = Viso_2022_charset_list;
7432 }
7433 else if (EQ (charset_list, Qemacs_mule))
7434 {
7435 if (! EQ (coding_type, Qemacs_mule))
7436 error ("Invalid charset-list");
7437 charset_list = Vemacs_mule_charset_list;
7438 }
7439 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7440 if (max_charset_id < XFASTINT (XCAR (tail)))
7441 max_charset_id = XFASTINT (XCAR (tail));
7442 }
7443 else
7444 {
7445 charset_list = Fcopy_sequence (charset_list);
7446 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7447 {
7448 struct charset *charset;
7449
7450 val = Fcar (tail);
7451 CHECK_CHARSET_GET_CHARSET (val, charset);
7452 if (EQ (coding_type, Qiso_2022)
7453 ? CHARSET_ISO_FINAL (charset) < 0
7454 : EQ (coding_type, Qemacs_mule)
7455 ? CHARSET_EMACS_MULE_ID (charset) < 0
7456 : 0)
7457 error ("Can't handle charset `%s'",
7458 XSYMBOL (CHARSET_NAME (charset))->name->data);
7459
7460 XCAR (tail) = make_number (charset->id);
7461 if (max_charset_id < charset->id)
7462 max_charset_id = charset->id;
7463 }
7464 }
7465 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7466
7467 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7468 make_number (255));
7469 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7470 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7471 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7472
7473 val = args[coding_arg_decode_translation_table];
7474 if (! NILP (val))
7475 CHECK_CHAR_TABLE (val);
7476 CODING_ATTR_DECODE_TBL (attrs) = val;
7477
7478 val = args[coding_arg_encode_translation_table];
7479 if (! NILP (val))
7480 CHECK_CHAR_TABLE (val);
7481 CODING_ATTR_ENCODE_TBL (attrs) = val;
7482
7483 val = args[coding_arg_post_read_conversion];
7484 CHECK_SYMBOL (val);
7485 CODING_ATTR_POST_READ (attrs) = val;
7486
7487 val = args[coding_arg_pre_write_conversion];
7488 CHECK_SYMBOL (val);
7489 CODING_ATTR_PRE_WRITE (attrs) = val;
7490
7491 val = args[coding_arg_default_char];
7492 if (NILP (val))
7493 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7494 else
7495 {
7496 CHECK_CHARACTER (val);
7497 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7498 }
7499
7500 val = args[coding_arg_plist];
7501 CHECK_LIST (val);
7502 CODING_ATTR_PLIST (attrs) = val;
7503
7504 if (EQ (coding_type, Qcharset))
7505 {
c7c66a95
KH
7506 /* Generate a lisp vector of 256 elements. Each element is nil,
7507 integer, or a list of charset IDs.
7508
7509 If Nth element is nil, the byte code N is invalid in this
7510 coding system.
7511
7512 If Nth element is a number NUM, N is the first byte of a
7513 charset whose ID is NUM.
7514
7515 If Nth element is a list of charset IDs, N is the first byte
7516 of one of them. The list is sorted by dimensions of the
7517 charsets. A charset of smaller dimension comes firtst.
7518 */
df7492f9
KH
7519 val = Fmake_vector (make_number (256), Qnil);
7520
7521 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7522 {
c7c66a95
KH
7523 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7524 int dim = CHARSET_DIMENSION (charset);
7525 int idx = (dim - 1) * 4;
7526
15d143f7
KH
7527 for (i = charset->code_space[idx];
7528 i <= charset->code_space[idx + 1]; i++)
7529 {
c7c66a95
KH
7530 Lisp_Object tmp, tmp2;
7531 int dim2;
7532
7533 tmp = AREF (val, i);
7534 if (NILP (tmp))
7535 tmp = XCAR (tail);
7536 else if (NUMBERP (tmp))
7537 {
7538 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7539 if (dim < dim2)
c7c66a95 7540 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
7541 else
7542 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 7543 }
15d143f7 7544 else
c7c66a95
KH
7545 {
7546 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7547 {
7548 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7549 if (dim < dim2)
7550 break;
7551 }
7552 if (NILP (tmp2))
7553 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7554 else
7555 {
7556 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7557 XSETCAR (tmp2, XCAR (tail));
7558 }
7559 }
7560 ASET (val, i, tmp);
15d143f7 7561 }
df7492f9
KH
7562 }
7563 ASET (attrs, coding_attr_charset_valids, val);
7564 category = coding_category_charset;
7565 }
7566 else if (EQ (coding_type, Qccl))
7567 {
7568 Lisp_Object valids;
7569
7570 if (nargs < coding_arg_ccl_max)
7571 goto short_args;
7572
7573 val = args[coding_arg_ccl_decoder];
7574 CHECK_CCL_PROGRAM (val);
7575 if (VECTORP (val))
7576 val = Fcopy_sequence (val);
7577 ASET (attrs, coding_attr_ccl_decoder, val);
7578
7579 val = args[coding_arg_ccl_encoder];
7580 CHECK_CCL_PROGRAM (val);
7581 if (VECTORP (val))
7582 val = Fcopy_sequence (val);
7583 ASET (attrs, coding_attr_ccl_encoder, val);
7584
7585 val = args[coding_arg_ccl_valids];
7586 valids = Fmake_string (make_number (256), make_number (0));
7587 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7588 {
7589 val = Fcar (tail);
7590 if (INTEGERP (val))
c197f191 7591 ASET (valids, XINT (val), make_number (1));
df7492f9
KH
7592 else
7593 {
7594 int from, to;
7595
7596 CHECK_CONS (val);
7597 CHECK_NUMBER (XCAR (val));
7598 CHECK_NUMBER (XCDR (val));
7599 from = XINT (XCAR (val));
7600 to = XINT (XCDR (val));
7601 for (i = from; i <= to; i++)
c197f191 7602 ASET (valids, i, make_number (1));
df7492f9
KH
7603 }
7604 }
7605 ASET (attrs, coding_attr_ccl_valids, valids);
7606
7607 category = coding_category_ccl;
7608 }
7609 else if (EQ (coding_type, Qutf_16))
7610 {
7611 Lisp_Object bom, endian;
7612
7613 if (nargs < coding_arg_utf16_max)
7614 goto short_args;
7615
7616 bom = args[coding_arg_utf16_bom];
7617 if (! NILP (bom) && ! EQ (bom, Qt))
7618 {
7619 CHECK_CONS (bom);
7620 CHECK_CODING_SYSTEM (XCAR (bom));
7621 CHECK_CODING_SYSTEM (XCDR (bom));
7622 }
7623 ASET (attrs, coding_attr_utf_16_bom, bom);
7624
7625 endian = args[coding_arg_utf16_endian];
7626 ASET (attrs, coding_attr_utf_16_endian, endian);
7627
7628 category = (CONSP (bom)
7629 ? coding_category_utf_16_auto
7630 : NILP (bom)
7631 ? (NILP (endian)
7632 ? coding_category_utf_16_be_nosig
7633 : coding_category_utf_16_le_nosig)
7634 : (NILP (endian)
7635 ? coding_category_utf_16_be
7636 : coding_category_utf_16_le));
7637 }
7638 else if (EQ (coding_type, Qiso_2022))
7639 {
7640 Lisp_Object initial, reg_usage, request, flags;
0be8721c 7641 int i, id;
1397dc18 7642
df7492f9
KH
7643 if (nargs < coding_arg_iso2022_max)
7644 goto short_args;
7645
7646 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7647 CHECK_VECTOR (initial);
7648 for (i = 0; i < 4; i++)
7649 {
7650 val = Faref (initial, make_number (i));
7651 if (! NILP (val))
7652 {
7653 CHECK_CHARSET_GET_ID (val, id);
7654 ASET (initial, i, make_number (id));
7655 }
7656 else
7657 ASET (initial, i, make_number (-1));
7658 }
7659
7660 reg_usage = args[coding_arg_iso2022_reg_usage];
7661 CHECK_CONS (reg_usage);
7662 CHECK_NATNUM (XCAR (reg_usage));
7663 CHECK_NATNUM (XCDR (reg_usage));
7664
7665 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7666 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 7667 {
df7492f9
KH
7668 int id;
7669
7670 val = Fcar (tail);
7671 CHECK_CONS (val);
7672 CHECK_CHARSET_GET_ID (XCAR (val), id);
7673 CHECK_NATNUM (XCDR (val));
7674 if (XINT (XCDR (val)) >= 4)
7675 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7676 XCAR (val) = make_number (id);
1397dc18 7677 }
df7492f9
KH
7678
7679 flags = args[coding_arg_iso2022_flags];
7680 CHECK_NATNUM (flags);
7681 i = XINT (flags);
7682 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7683 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7684
7685 ASET (attrs, coding_attr_iso_initial, initial);
7686 ASET (attrs, coding_attr_iso_usage, reg_usage);
7687 ASET (attrs, coding_attr_iso_request, request);
7688 ASET (attrs, coding_attr_iso_flags, flags);
7689 setup_iso_safe_charsets (attrs);
7690
7691 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7692 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7693 | CODING_ISO_FLAG_SINGLE_SHIFT))
7694 ? coding_category_iso_7_else
7695 : EQ (args[coding_arg_charset_list], Qiso_2022)
7696 ? coding_category_iso_7
7697 : coding_category_iso_7_tight);
7698 else
7699 {
7700 int id = XINT (AREF (initial, 1));
7701
c6fb6e98 7702 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
7703 || EQ (args[coding_arg_charset_list], Qiso_2022)
7704 || id < 0)
7705 ? coding_category_iso_8_else
7706 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7707 ? coding_category_iso_8_1
7708 : coding_category_iso_8_2);
7709 }
7710 }
7711 else if (EQ (coding_type, Qemacs_mule))
7712 {
7713 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7714 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7715
7716 category = coding_category_emacs_mule;
7717 }
7718 else if (EQ (coding_type, Qshift_jis))
7719 {
7720
7721 struct charset *charset;
7722
7723 if (XINT (Flength (charset_list)) != 3)
7724 error ("There should be just three charsets");
7725
7726 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7727 if (CHARSET_DIMENSION (charset) != 1)
7728 error ("Dimension of charset %s is not one",
7729 XSYMBOL (CHARSET_NAME (charset))->name->data);
7730
7731 charset_list = XCDR (charset_list);
7732 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7733 if (CHARSET_DIMENSION (charset) != 1)
7734 error ("Dimension of charset %s is not one",
7735 XSYMBOL (CHARSET_NAME (charset))->name->data);
7736
7737 charset_list = XCDR (charset_list);
7738 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7739 if (CHARSET_DIMENSION (charset) != 2)
7740 error ("Dimension of charset %s is not two",
7741 XSYMBOL (CHARSET_NAME (charset))->name->data);
7742
7743 category = coding_category_sjis;
7744 Vsjis_coding_system = name;
7745 }
7746 else if (EQ (coding_type, Qbig5))
7747 {
7748 struct charset *charset;
7749
7750 if (XINT (Flength (charset_list)) != 2)
7751 error ("There should be just two charsets");
7752
7753 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7754 if (CHARSET_DIMENSION (charset) != 1)
7755 error ("Dimension of charset %s is not one",
7756 XSYMBOL (CHARSET_NAME (charset))->name->data);
7757
7758 charset_list = XCDR (charset_list);
7759 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7760 if (CHARSET_DIMENSION (charset) != 2)
7761 error ("Dimension of charset %s is not two",
7762 XSYMBOL (CHARSET_NAME (charset))->name->data);
7763
7764 category = coding_category_big5;
7765 Vbig5_coding_system = name;
7766 }
7767 else if (EQ (coding_type, Qraw_text))
7768 category = coding_category_raw_text;
7769 else if (EQ (coding_type, Qutf_8))
7770 category = coding_category_utf_8;
7771 else if (EQ (coding_type, Qundecided))
7772 category = coding_category_undecided;
7773 else
7774 error ("Invalid coding system type: %s",
7775 XSYMBOL (coding_type)->name->data);
7776
7777 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7778
7779 eol_type = args[coding_arg_eol_type];
7780 if (! NILP (eol_type)
7781 && ! EQ (eol_type, Qunix)
7782 && ! EQ (eol_type, Qdos)
7783 && ! EQ (eol_type, Qmac))
7784 error ("Invalid eol-type");
7785
7786 aliases = Fcons (name, Qnil);
7787
7788 if (NILP (eol_type))
7789 {
7790 eol_type = make_subsidiaries (name);
7791 for (i = 0; i < 3; i++)
1397dc18 7792 {
df7492f9
KH
7793 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7794
7795 this_name = AREF (eol_type, i);
7796 this_aliases = Fcons (this_name, Qnil);
7797 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7798 this_spec = Fmake_vector (make_number (3), attrs);
7799 ASET (this_spec, 1, this_aliases);
7800 ASET (this_spec, 2, this_eol_type);
7801 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7802 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7803 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7804 Vcoding_system_alist);
1397dc18 7805 }
d46c5b12 7806 }
1397dc18 7807
df7492f9
KH
7808 spec_vec = Fmake_vector (make_number (3), attrs);
7809 ASET (spec_vec, 1, aliases);
7810 ASET (spec_vec, 2, eol_type);
7811
7812 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7813 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7814 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7815 Vcoding_system_alist);
7816
7817 {
7818 int id = coding_categories[category].id;
7819
7820 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7821 setup_coding_system (name, &coding_categories[category]);
7822 }
7823
d46c5b12 7824 return Qnil;
df7492f9
KH
7825
7826 short_args:
7827 return Fsignal (Qwrong_number_of_arguments,
7828 Fcons (intern ("define-coding-system-internal"),
7829 make_number (nargs)));
d46c5b12
KH
7830}
7831
da7db224
DL
7832/* Fixme: should this record the alias relationships for
7833 diagnostics? */
df7492f9
KH
7834DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7835 Sdefine_coding_system_alias, 2, 2, 0,
7836 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7837 (alias, coding_system)
7838 Lisp_Object alias, coding_system;
66cfb530 7839{
df7492f9 7840 Lisp_Object spec, aliases, eol_type;
84d60297 7841
df7492f9
KH
7842 CHECK_SYMBOL (alias);
7843 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7844 aliases = AREF (spec, 1);
7845 while (!NILP (XCDR (aliases)))
7846 aliases = XCDR (aliases);
7847 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 7848
df7492f9
KH
7849 eol_type = AREF (spec, 2);
7850 if (VECTORP (eol_type))
66cfb530 7851 {
df7492f9
KH
7852 Lisp_Object subsidiaries;
7853 int i;
7854
7855 subsidiaries = make_subsidiaries (alias);
7856 for (i = 0; i < 3; i++)
7857 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7858 AREF (eol_type, i));
7859
7860 ASET (spec, 2, subsidiaries);
66cfb530 7861 }
df7492f9
KH
7862
7863 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
7864 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
7865 Vcoding_system_alist);
66cfb530
KH
7866
7867 return Qnil;
7868}
7869
df7492f9
KH
7870DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7871 1, 1, 0,
7872 doc: /* Return the base of CODING-SYSTEM.
da7db224 7873Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
7874 (coding_system)
7875 Lisp_Object coding_system;
7876{
7877 Lisp_Object spec, attrs;
7878
7879 if (NILP (coding_system))
7880 return (Qno_conversion);
7881 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7882 attrs = AREF (spec, 0);
7883 return CODING_ATTR_BASE_NAME (attrs);
7884}
7885
7886DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7887 1, 1, 0,
7888 doc: "Return the property list of CODING-SYSTEM.")
7889 (coding_system)
7890 Lisp_Object coding_system;
7891{
7892 Lisp_Object spec, attrs;
7893
7894 if (NILP (coding_system))
7895 coding_system = Qno_conversion;
7896 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7897 attrs = AREF (spec, 0);
7898 return CODING_ATTR_PLIST (attrs);
7899}
7900
7901
7902DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7903 1, 1, 0,
da7db224 7904 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
7905 (coding_system)
7906 Lisp_Object coding_system;
7907{
7908 Lisp_Object spec;
7909
7910 if (NILP (coding_system))
7911 coding_system = Qno_conversion;
7912 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 7913 return AREF (spec, 1);
df7492f9
KH
7914}
7915
7916DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7917 Scoding_system_eol_type, 1, 1, 0,
7918 doc: /* Return eol-type of CODING-SYSTEM.
7919An eol-type is integer 0, 1, 2, or a vector of coding systems.
7920
7921Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7922and CR respectively.
7923
7924A vector value indicates that a format of end-of-line should be
7925detected automatically. Nth element of the vector is the subsidiary
7926coding system whose eol-type is N. */)
7927 (coding_system)
7928 Lisp_Object coding_system;
7929{
7930 Lisp_Object spec, eol_type;
7931 int n;
7932
7933 if (NILP (coding_system))
7934 coding_system = Qno_conversion;
7935 if (! CODING_SYSTEM_P (coding_system))
7936 return Qnil;
7937 spec = CODING_SYSTEM_SPEC (coding_system);
7938 eol_type = AREF (spec, 2);
7939 if (VECTORP (eol_type))
7940 return Fcopy_sequence (eol_type);
7941 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7942 return make_number (n);
7943}
7944
4ed46869
KH
7945#endif /* emacs */
7946
7947\f
1397dc18 7948/*** 9. Post-amble ***/
4ed46869 7949
dfcf069d 7950void
4ed46869
KH
7951init_coding_once ()
7952{
7953 int i;
7954
df7492f9
KH
7955 for (i = 0; i < coding_category_max; i++)
7956 {
7957 coding_categories[i].id = -1;
7958 coding_priorities[i] = i;
7959 }
4ed46869
KH
7960
7961 /* ISO2022 specific initialize routine. */
7962 for (i = 0; i < 0x20; i++)
b73bfc1c 7963 iso_code_class[i] = ISO_control_0;
4ed46869
KH
7964 for (i = 0x21; i < 0x7F; i++)
7965 iso_code_class[i] = ISO_graphic_plane_0;
7966 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 7967 iso_code_class[i] = ISO_control_1;
4ed46869
KH
7968 for (i = 0xA1; i < 0xFF; i++)
7969 iso_code_class[i] = ISO_graphic_plane_1;
7970 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7971 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7972 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7973 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7974 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7975 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7976 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7977 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7978 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7979 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7980
b843d1ae 7981 inhibit_pre_post_conversion = 0;
df7492f9
KH
7982
7983 for (i = 0; i < 256; i++)
7984 {
7985 emacs_mule_bytes[i] = 1;
7986 }
7c78e542
KH
7987 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
7988 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
7989 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
7990 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
7991}
7992
7993#ifdef emacs
7994
dfcf069d 7995void
e0e989f6
KH
7996syms_of_coding ()
7997{
df7492f9
KH
7998 staticpro (&Vcoding_system_hash_table);
7999 Vcoding_system_hash_table = Fmakehash (Qeq);
8000
8001 staticpro (&Vsjis_coding_system);
8002 Vsjis_coding_system = Qnil;
8003
8004 staticpro (&Vbig5_coding_system);
8005 Vbig5_coding_system = Qnil;
8006
8007 staticpro (&Vcode_conversion_work_buf_list);
8008 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 8009
df7492f9
KH
8010 staticpro (&Vcode_conversion_reused_work_buf);
8011 Vcode_conversion_reused_work_buf = Qnil;
8012
8013 DEFSYM (Qcharset, "charset");
8014 DEFSYM (Qtarget_idx, "target-idx");
8015 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
8016 Fset (Qcoding_system_history, Qnil);
8017
9ce27fde 8018 /* Target FILENAME is the first argument. */
e0e989f6 8019 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 8020 /* Target FILENAME is the third argument. */
e0e989f6
KH
8021 Fput (Qwrite_region, Qtarget_idx, make_number (2));
8022
df7492f9 8023 DEFSYM (Qcall_process, "call-process");
9ce27fde 8024 /* Target PROGRAM is the first argument. */
e0e989f6
KH
8025 Fput (Qcall_process, Qtarget_idx, make_number (0));
8026
df7492f9 8027 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 8028 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8029 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8030
df7492f9 8031 DEFSYM (Qstart_process, "start-process");
9ce27fde 8032 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8033 Fput (Qstart_process, Qtarget_idx, make_number (2));
8034
df7492f9 8035 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 8036 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
8037 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8038
df7492f9
KH
8039 DEFSYM (Qcoding_system, "coding-system");
8040 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 8041
df7492f9
KH
8042 DEFSYM (Qeol_type, "eol-type");
8043 DEFSYM (Qunix, "unix");
8044 DEFSYM (Qdos, "dos");
4ed46869 8045
df7492f9
KH
8046 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8047 DEFSYM (Qpost_read_conversion, "post-read-conversion");
8048 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8049 DEFSYM (Qdefault_char, "default-char");
8050 DEFSYM (Qundecided, "undecided");
8051 DEFSYM (Qno_conversion, "no-conversion");
8052 DEFSYM (Qraw_text, "raw-text");
4ed46869 8053
df7492f9 8054 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 8055
df7492f9 8056 DEFSYM (Qutf_8, "utf-8");
27901516 8057
df7492f9
KH
8058 DEFSYM (Qutf_16, "utf-16");
8059 DEFSYM (Qutf_16_be, "utf-16-be");
8060 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
8061 DEFSYM (Qutf_16_le, "utf-16-l3");
8062 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
8063 DEFSYM (Qsignature, "signature");
8064 DEFSYM (Qendian, "endian");
8065 DEFSYM (Qbig, "big");
8066 DEFSYM (Qlittle, "little");
27901516 8067
df7492f9
KH
8068 DEFSYM (Qshift_jis, "shift-jis");
8069 DEFSYM (Qbig5, "big5");
4ed46869 8070
df7492f9 8071 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 8072
df7492f9 8073 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
8074 Fput (Qcoding_system_error, Qerror_conditions,
8075 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8076 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 8077 build_string ("Invalid coding system"));
4ed46869 8078
df7492f9
KH
8079 /* Intern this now in case it isn't already done.
8080 Setting this variable twice is harmless.
8081 But don't staticpro it here--that is done in alloc.c. */
8082 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 8083
df7492f9 8084 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 8085 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
8086 DEFSYM (Qtranslation_table_id, "translation-table-id");
8087 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8088 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 8089
df7492f9 8090 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 8091
df7492f9 8092 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 8093
df7492f9
KH
8094 Vcoding_category_table
8095 = Fmake_vector (make_number (coding_category_max), Qnil);
8096 staticpro (&Vcoding_category_table);
8097 /* Followings are target of code detection. */
8098 ASET (Vcoding_category_table, coding_category_iso_7,
8099 intern ("coding-category-iso-7"));
8100 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8101 intern ("coding-category-iso-7-tight"));
8102 ASET (Vcoding_category_table, coding_category_iso_8_1,
8103 intern ("coding-category-iso-8-1"));
8104 ASET (Vcoding_category_table, coding_category_iso_8_2,
8105 intern ("coding-category-iso-8-2"));
8106 ASET (Vcoding_category_table, coding_category_iso_7_else,
8107 intern ("coding-category-iso-7-else"));
8108 ASET (Vcoding_category_table, coding_category_iso_8_else,
8109 intern ("coding-category-iso-8-else"));
8110 ASET (Vcoding_category_table, coding_category_utf_8,
8111 intern ("coding-category-utf-8"));
8112 ASET (Vcoding_category_table, coding_category_utf_16_be,
8113 intern ("coding-category-utf-16-be"));
8114 ASET (Vcoding_category_table, coding_category_utf_16_le,
8115 intern ("coding-category-utf-16-le"));
8116 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8117 intern ("coding-category-utf-16-be-nosig"));
8118 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8119 intern ("coding-category-utf-16-le-nosig"));
8120 ASET (Vcoding_category_table, coding_category_charset,
8121 intern ("coding-category-charset"));
8122 ASET (Vcoding_category_table, coding_category_sjis,
8123 intern ("coding-category-sjis"));
8124 ASET (Vcoding_category_table, coding_category_big5,
8125 intern ("coding-category-big5"));
8126 ASET (Vcoding_category_table, coding_category_ccl,
8127 intern ("coding-category-ccl"));
8128 ASET (Vcoding_category_table, coding_category_emacs_mule,
8129 intern ("coding-category-emacs-mule"));
8130 /* Followings are NOT target of code detection. */
8131 ASET (Vcoding_category_table, coding_category_raw_text,
8132 intern ("coding-category-raw-text"));
8133 ASET (Vcoding_category_table, coding_category_undecided,
8134 intern ("coding-category-undecided"));
70c22245 8135
4ed46869
KH
8136 defsubr (&Scoding_system_p);
8137 defsubr (&Sread_coding_system);
8138 defsubr (&Sread_non_nil_coding_system);
8139 defsubr (&Scheck_coding_system);
8140 defsubr (&Sdetect_coding_region);
d46c5b12 8141 defsubr (&Sdetect_coding_string);
05e6f5dc 8142 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 8143 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8144 defsubr (&Sdecode_coding_region);
8145 defsubr (&Sencode_coding_region);
8146 defsubr (&Sdecode_coding_string);
8147 defsubr (&Sencode_coding_string);
8148 defsubr (&Sdecode_sjis_char);
8149 defsubr (&Sencode_sjis_char);
8150 defsubr (&Sdecode_big5_char);
8151 defsubr (&Sencode_big5_char);
1ba9e4ab 8152 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8153 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8154 defsubr (&Sterminal_coding_system);
1ba9e4ab 8155 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8156 defsubr (&Skeyboard_coding_system);
a5d301df 8157 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
8158 defsubr (&Sset_coding_system_priority);
8159 defsubr (&Sdefine_coding_system_internal);
8160 defsubr (&Sdefine_coding_system_alias);
8161 defsubr (&Scoding_system_base);
8162 defsubr (&Scoding_system_plist);
8163 defsubr (&Scoding_system_aliases);
8164 defsubr (&Scoding_system_eol_type);
8165 defsubr (&Scoding_system_priority_list);
4ed46869 8166
4608c386 8167 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8168 doc: /* List of coding systems.
8169
8170Do not alter the value of this variable manually. This variable should be
df7492f9 8171updated by the functions `define-coding-system' and
48b0f3ae 8172`define-coding-system-alias'. */);
4608c386
KH
8173 Vcoding_system_list = Qnil;
8174
8175 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8176 doc: /* Alist of coding system names.
8177Each element is one element list of coding system name.
8178This variable is given to `completing-read' as TABLE argument.
8179
8180Do not alter the value of this variable manually. This variable should be
8181updated by the functions `make-coding-system' and
8182`define-coding-system-alias'. */);
4608c386
KH
8183 Vcoding_system_alist = Qnil;
8184
4ed46869 8185 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8186 doc: /* List of coding-categories (symbols) ordered by priority.
8187
8188On detecting a coding system, Emacs tries code detection algorithms
8189associated with each coding-category one by one in this order. When
8190one algorithm agrees with a byte sequence of source text, the coding
8191system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8192 {
8193 int i;
8194
8195 Vcoding_category_list = Qnil;
df7492f9 8196 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8197 Vcoding_category_list
d46c5b12
KH
8198 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8199 Vcoding_category_list);
4ed46869
KH
8200 }
8201
8202 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8203 doc: /* Specify the coding system for read operations.
8204It is useful to bind this variable with `let', but do not set it globally.
8205If the value is a coding system, it is used for decoding on read operation.
8206If not, an appropriate element is used from one of the coding system alists:
8207There are three such tables, `file-coding-system-alist',
8208`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8209 Vcoding_system_for_read = Qnil;
8210
8211 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8212 doc: /* Specify the coding system for write operations.
8213Programs bind this variable with `let', but you should not set it globally.
8214If the value is a coding system, it is used for encoding of output,
8215when writing it to a file and when sending it to a file or subprocess.
8216
8217If this does not specify a coding system, an appropriate element
8218is used from one of the coding system alists:
8219There are three such tables, `file-coding-system-alist',
8220`process-coding-system-alist', and `network-coding-system-alist'.
8221For output to files, if the above procedure does not specify a coding system,
8222the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8223 Vcoding_system_for_write = Qnil;
8224
8225 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8226 doc: /*
8227Coding system used in the latest file or process I/O. */);
4ed46869
KH
8228 Vlast_coding_system_used = Qnil;
8229
9ce27fde 8230 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8231 doc: /*
8232*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8233See info node `Coding Systems' and info node `Text and Binary' concerning
8234such conversion. */);
9ce27fde
KH
8235 inhibit_eol_conversion = 0;
8236
ed29121d 8237 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8238 doc: /*
8239Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8240Bind it to t if the process output is to be treated as if it were a file
8241read from some filesystem. */);
ed29121d
EZ
8242 inherit_process_coding_system = 0;
8243
02ba4723 8244 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8245 doc: /*
8246Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8247The format is ((PATTERN . VAL) ...),
8248where PATTERN is a regular expression matching a file name,
8249VAL is a coding system, a cons of coding systems, or a function symbol.
8250If VAL is a coding system, it is used for both decoding and encoding
8251the file contents.
8252If VAL is a cons of coding systems, the car part is used for decoding,
8253and the cdr part is used for encoding.
8254If VAL is a function symbol, the function must return a coding system
0192762c
DL
8255or a cons of coding systems which are used as above. The function gets
8256the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8257
8258See also the function `find-operation-coding-system'
8259and the variable `auto-coding-alist'. */);
02ba4723
KH
8260 Vfile_coding_system_alist = Qnil;
8261
8262 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8263 doc: /*
8264Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8265The format is ((PATTERN . VAL) ...),
8266where PATTERN is a regular expression matching a program name,
8267VAL is a coding system, a cons of coding systems, or a function symbol.
8268If VAL is a coding system, it is used for both decoding what received
8269from the program and encoding what sent to the program.
8270If VAL is a cons of coding systems, the car part is used for decoding,
8271and the cdr part is used for encoding.
8272If VAL is a function symbol, the function must return a coding system
8273or a cons of coding systems which are used as above.
8274
8275See also the function `find-operation-coding-system'. */);
02ba4723
KH
8276 Vprocess_coding_system_alist = Qnil;
8277
8278 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8279 doc: /*
8280Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8281The format is ((PATTERN . VAL) ...),
8282where PATTERN is a regular expression matching a network service name
8283or is a port number to connect to,
8284VAL is a coding system, a cons of coding systems, or a function symbol.
8285If VAL is a coding system, it is used for both decoding what received
8286from the network stream and encoding what sent to the network stream.
8287If VAL is a cons of coding systems, the car part is used for decoding,
8288and the cdr part is used for encoding.
8289If VAL is a function symbol, the function must return a coding system
8290or a cons of coding systems which are used as above.
8291
8292See also the function `find-operation-coding-system'. */);
02ba4723 8293 Vnetwork_coding_system_alist = Qnil;
4ed46869 8294
68c45bf0 8295 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8296 doc: /* Coding system to use with system messages.
8297Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8298 Vlocale_coding_system = Qnil;
8299
005f0d35 8300 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8301 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8302 doc: /*
8303*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8304 eol_mnemonic_unix = build_string (":");
4ed46869 8305
7722baf9 8306 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8307 doc: /*
8308*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8309 eol_mnemonic_dos = build_string ("\\");
4ed46869 8310
7722baf9 8311 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8312 doc: /*
8313*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8314 eol_mnemonic_mac = build_string ("/");
4ed46869 8315
7722baf9 8316 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8317 doc: /*
8318*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8319 eol_mnemonic_undecided = build_string (":");
4ed46869 8320
84fbb8a0 8321 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8322 doc: /*
8323*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8324 Venable_character_translation = Qt;
bdd9fb48 8325
f967223b 8326 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8327 &Vstandard_translation_table_for_decode,
8328 doc: /* Table for translating characters while decoding. */);
f967223b 8329 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8330
f967223b 8331 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8332 &Vstandard_translation_table_for_encode,
8333 doc: /* Table for translating characters while encoding. */);
f967223b 8334 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8335
df7492f9 8336 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8337 doc: /* Alist of charsets vs revision numbers.
8338While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8339designate it with the escape sequence identifying revision (cdr part
8340of the element). */);
8341 Vcharset_revision_table = Qnil;
02ba4723
KH
8342
8343 DEFVAR_LISP ("default-process-coding-system",
8344 &Vdefault_process_coding_system,
48b0f3ae
PJ
8345 doc: /* Cons of coding systems used for process I/O by default.
8346The car part is used for decoding a process output,
8347the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8348 Vdefault_process_coding_system = Qnil;
c4825358 8349
3f003981 8350 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8351 doc: /*
8352Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8353This is a vector of length 256.
8354If Nth element is non-nil, the existence of code N in a file
8355\(or output of subprocess) doesn't prevent it to be detected as
8356a coding system of ISO 2022 variant which has a flag
8357`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8358or reading output of a subprocess.
8359Only 128th through 159th elements has a meaning. */);
3f003981 8360 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8361
8362 DEFVAR_LISP ("select-safe-coding-system-function",
8363 &Vselect_safe_coding_system_function,
df7492f9
KH
8364 doc: /*
8365Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8366
8367If set, this function is called to force a user to select a proper
8368coding system which can encode the text in the case that a default
8369coding system used in each operation can't encode the text.
8370
8371The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8372 Vselect_safe_coding_system_function = Qnil;
8373
22ab2303 8374 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8375 &inhibit_iso_escape_detection,
df7492f9
KH
8376 doc: /*
8377If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8378
8379By default, on reading a file, Emacs tries to detect how the text is
8380encoded. This code detection is sensitive to escape sequences. If
8381the sequence is valid as ISO2022, the code is determined as one of
8382the ISO2022 encodings, and the file is decoded by the corresponding
8383coding system (e.g. `iso-2022-7bit').
8384
8385However, there may be a case that you want to read escape sequences in
8386a file as is. In such a case, you can set this variable to non-nil.
8387Then, as the code detection ignores any escape sequences, no file is
8388detected as encoded in some ISO2022 encoding. The result is that all
8389escape sequences become visible in a buffer.
8390
8391The default value is nil, and it is strongly recommended not to change
8392it. That is because many Emacs Lisp source files that contain
8393non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8394in Emacs's distribution, and they won't be decoded correctly on
8395reading if you suppress escape sequence detection.
8396
8397The other way to read escape sequences in a file without decoding is
8398to explicitly specify some coding system that doesn't use ISO2022's
8399escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8400 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8401
8402 {
8403 Lisp_Object args[coding_arg_max];
8404 Lisp_Object plist[14];
8405 int i;
8406
8407 for (i = 0; i < coding_arg_max; i++)
8408 args[i] = Qnil;
8409
8410 plist[0] = intern (":name");
8411 plist[1] = args[coding_arg_name] = Qno_conversion;
8412 plist[2] = intern (":mnemonic");
8413 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8414 plist[4] = intern (":coding-type");
8415 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8416 plist[6] = intern (":ascii-compatible-p");
8417 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8418 plist[8] = intern (":default-char");
8419 plist[9] = args[coding_arg_default_char] = make_number (0);
8420 plist[10] = intern (":docstring");
8421 plist[11] = build_string ("Do no conversion.\n\
8422\n\
8423When you visit a file with this coding, the file is read into a\n\
8424unibyte buffer as is, thus each byte of a file is treated as a\n\
8425character.");
8426 plist[12] = intern (":eol-type");
8427 plist[13] = args[coding_arg_eol_type] = Qunix;
8428 args[coding_arg_plist] = Flist (14, plist);
8429 Fdefine_coding_system_internal (coding_arg_max, args);
8430 }
8431
8432 setup_coding_system (Qno_conversion, &keyboard_coding);
8433 setup_coding_system (Qno_conversion, &terminal_coding);
8434 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8435}
8436
68c45bf0
PE
8437char *
8438emacs_strerror (error_number)
8439 int error_number;
8440{
8441 char *str;
8442
ca9c0567 8443 synchronize_system_messages_locale ();
68c45bf0
PE
8444 str = strerror (error_number);
8445
8446 if (! NILP (Vlocale_coding_system))
8447 {
8448 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8449 Vlocale_coding_system,
8450 0);
8451 str = (char *) XSTRING (dec)->data;
8452 }
8453
8454 return str;
8455}
8456
4ed46869 8457#endif /* emacs */