(DECODE_DESIGNATION): Set chars_96 to -1 instead of
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
8f924df7 3 Licensed to the Free Software Foundation.
6f197c07 4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
8f924df7 5 Copyright (C) 2003
df7492f9
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 56 coding system.
4ed46869 57
e19c3639
KH
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
4ed46869 63
e19c3639 64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
4ed46869 66
df7492f9
KH
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
4ed46869
KH
82
83 The most famous coding system for multiple character sets. X's
df7492f9
KH
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
4ed46869 87
df7492f9 88 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
cfb43547 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
4ed46869
KH
138
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9 146 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
df7492f9 149
ff0dacd7 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
151
152 Below is the template of these functions. */
153
4ed46869 154#if 0
df7492f9 155static int
ff0dacd7 156detect_coding_XXX (coding, detect_info)
df7492f9 157 struct coding_system *coding;
ff0dacd7 158 struct coding_detection_info *detect_info;
4ed46869 159{
df7492f9
KH
160 unsigned char *src = coding->source;
161 unsigned char *src_end = coding->source + coding->src_bytes;
162 int multibytep = coding->src_multibyte;
ff0dacd7 163 int consumed_chars = 0;
df7492f9
KH
164 int found = 0;
165 ...;
166
167 while (1)
168 {
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
171 ONE_MORE_BYTE (c);
ff0dacd7
KH
172
173 if (! __C_conforms_to_XXX___ (c))
174 break;
175 if (! __C_strongly_suggests_XXX__ (c))
176 found = CATEGORY_MASK_XXX;
df7492f9 177 }
ff0dacd7
KH
178 /* The byte sequence is invalid for XXX. */
179 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 180 return 0;
ff0dacd7 181
df7492f9 182 no_more_source:
ff0dacd7
KH
183 /* The source exausted successfully. */
184 detect_info->found |= found;
df7492f9 185 return 1;
4ed46869
KH
186}
187#endif
188
189/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
190
df7492f9
KH
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
d46c5b12 195
df7492f9
KH
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
d46c5b12 200
df7492f9 201 Below is the template of these functions. */
d46c5b12 202
4ed46869 203#if 0
b73bfc1c 204static void
df7492f9 205decode_coding_XXXX (coding)
4ed46869 206 struct coding_system *coding;
4ed46869 207{
df7492f9
KH
208 unsigned char *src = coding->source + coding->consumed;
209 unsigned char *src_end = coding->source + coding->src_bytes;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base;
214 /* A buffer to produce decoded characters. */
69a80ea3
KH
215 int *charbuf = coding->charbuf + coding->charbuf_used;
216 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
217 int multibytep = coding->src_multibyte;
218
219 while (1)
220 {
221 src_base = src;
222 if (charbuf < charbuf_end)
223 /* No more room to produce a decoded character. */
224 break;
225 ONE_MORE_BYTE (c);
226 /* Decode it. */
227 }
228
229 no_more_source:
230 if (src_base < src_end
231 && coding->mode & CODING_MODE_LAST_BLOCK)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base < src_end && charbuf < charbuf_end)
235 *charbuf++ = *src_base++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding->consumed = coding->consumed_char = src_base - coding->source;
239 /* Remember how many characters we produced. */
240 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
241}
242#endif
243
244/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
245
df7492f9
KH
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
d46c5b12 250
df7492f9
KH
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 255
df7492f9
KH
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
d46c5b12 259
df7492f9 260 Below is a template of these functions. */
4ed46869 261#if 0
b73bfc1c 262static void
df7492f9 263encode_coding_XXX (coding)
4ed46869 264 struct coding_system *coding;
4ed46869 265{
df7492f9
KH
266 int multibytep = coding->dst_multibyte;
267 int *charbuf = coding->charbuf;
268 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
269 unsigned char *dst = coding->destination + coding->produced;
270 unsigned char *dst_end = coding->destination + coding->dst_bytes;
271 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
272 int produced_chars = 0;
273
274 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
275 {
276 int c = *charbuf;
277 /* Encode C into DST, and increment DST. */
278 }
279 label_no_more_destination:
280 /* How many chars and bytes we produced. */
281 coding->produced_char += produced_chars;
282 coding->produced = dst - coding->destination;
4ed46869
KH
283}
284#endif
285
4ed46869
KH
286\f
287/*** 1. Preamble ***/
288
68c45bf0 289#include <config.h>
4ed46869
KH
290#include <stdio.h>
291
4ed46869
KH
292#include "lisp.h"
293#include "buffer.h"
df7492f9 294#include "character.h"
4ed46869
KH
295#include "charset.h"
296#include "ccl.h"
df7492f9 297#include "composite.h"
4ed46869
KH
298#include "coding.h"
299#include "window.h"
4ed46869 300
df7492f9 301Lisp_Object Vcoding_system_hash_table;
4ed46869 302
df7492f9 303Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
304Lisp_Object Qunix, Qdos;
305extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
306Lisp_Object Qbuffer_file_coding_system;
307Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 308Lisp_Object Qdefault_char;
27901516 309Lisp_Object Qno_conversion, Qundecided;
df7492f9 310Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 311Lisp_Object Qbig, Qlittle;
bb0115a2 312Lisp_Object Qcoding_system_history;
1397dc18 313Lisp_Object Qvalid_codes;
a6f87d34
KH
314Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
315Lisp_Object QCdecode_translation_table, QCencode_translation_table;
316Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
4ed46869
KH
317
318extern Lisp_Object Qinsert_file_contents, Qwrite_region;
319Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
320Lisp_Object Qstart_process, Qopen_network_stream;
321Lisp_Object Qtarget_idx;
322
065e3595
KH
323Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
324Lisp_Object Qinterrupted, Qinsufficient_memory;
325
5d5bf4d8
KH
326int coding_system_require_warning;
327
d46c5b12
KH
328Lisp_Object Vselect_safe_coding_system_function;
329
7722baf9
EZ
330/* Mnemonic string for each format of end-of-line. */
331Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
332/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 333 decided. */
7722baf9 334Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
335
336#ifdef emacs
337
4608c386
KH
338Lisp_Object Vcoding_system_list, Vcoding_system_alist;
339
340Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 341
d46c5b12
KH
342/* Coding system emacs-mule and raw-text are for converting only
343 end-of-line format. */
344Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 345Lisp_Object Qutf_8_emacs;
ecf488bc 346
4ed46869
KH
347/* Coding-systems are handed between Emacs Lisp programs and C internal
348 routines by the following three variables. */
349/* Coding-system for reading files and receiving data from process. */
350Lisp_Object Vcoding_system_for_read;
351/* Coding-system for writing files and sending data to process. */
352Lisp_Object Vcoding_system_for_write;
353/* Coding-system actually used in the latest I/O. */
354Lisp_Object Vlast_coding_system_used;
065e3595
KH
355/* Set to non-nil when an error is detected while code conversion. */
356Lisp_Object Vlast_code_conversion_error;
c4825358 357/* A vector of length 256 which contains information about special
94487c4e 358 Latin codes (especially for dealing with Microsoft codes). */
3f003981 359Lisp_Object Vlatin_extra_code_table;
c4825358 360
9ce27fde
KH
361/* Flag to inhibit code conversion of end-of-line format. */
362int inhibit_eol_conversion;
363
74383408
KH
364/* Flag to inhibit ISO2022 escape sequence detection. */
365int inhibit_iso_escape_detection;
366
ed29121d
EZ
367/* Flag to make buffer-file-coding-system inherit from process-coding. */
368int inherit_process_coding_system;
369
c4825358 370/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
371struct coding_system terminal_coding;
372
c4825358
KH
373/* Coding system to be used to encode text for terminal display when
374 terminal coding system is nil. */
375struct coding_system safe_terminal_coding;
376
377/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
378struct coding_system keyboard_coding;
379
02ba4723
KH
380Lisp_Object Vfile_coding_system_alist;
381Lisp_Object Vprocess_coding_system_alist;
382Lisp_Object Vnetwork_coding_system_alist;
4ed46869 383
68c45bf0
PE
384Lisp_Object Vlocale_coding_system;
385
4ed46869
KH
386#endif /* emacs */
387
f967223b
KH
388/* Flag to tell if we look up translation table on character code
389 conversion. */
84fbb8a0 390Lisp_Object Venable_character_translation;
f967223b
KH
391/* Standard translation table to look up on decoding (reading). */
392Lisp_Object Vstandard_translation_table_for_decode;
393/* Standard translation table to look up on encoding (writing). */
394Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 395
f967223b
KH
396Lisp_Object Qtranslation_table;
397Lisp_Object Qtranslation_table_id;
398Lisp_Object Qtranslation_table_for_decode;
399Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
400
401/* Alist of charsets vs revision number. */
df7492f9 402static Lisp_Object Vcharset_revision_table;
4ed46869 403
02ba4723
KH
404/* Default coding systems used for process I/O. */
405Lisp_Object Vdefault_process_coding_system;
406
002fdb44
DL
407/* Char table for translating Quail and self-inserting input. */
408Lisp_Object Vtranslation_table_for_input;
409
df7492f9
KH
410/* Two special coding systems. */
411Lisp_Object Vsjis_coding_system;
412Lisp_Object Vbig5_coding_system;
413
df7492f9
KH
414/* ISO2022 section */
415
416#define CODING_ISO_INITIAL(coding, reg) \
417 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
418 coding_attr_iso_initial), \
419 reg)))
420
421
422#define CODING_ISO_REQUEST(coding, charset_id) \
423 ((charset_id <= (coding)->max_charset_id \
424 ? (coding)->safe_charsets[charset_id] \
425 : -1))
426
427
428#define CODING_ISO_FLAGS(coding) \
429 ((coding)->spec.iso_2022.flags)
430#define CODING_ISO_DESIGNATION(coding, reg) \
431 ((coding)->spec.iso_2022.current_designation[reg])
432#define CODING_ISO_INVOCATION(coding, plane) \
433 ((coding)->spec.iso_2022.current_invocation[plane])
434#define CODING_ISO_SINGLE_SHIFTING(coding) \
435 ((coding)->spec.iso_2022.single_shifting)
436#define CODING_ISO_BOL(coding) \
437 ((coding)->spec.iso_2022.bol)
438#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
439 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
440
441/* Control characters of ISO2022. */
442 /* code */ /* function */
443#define ISO_CODE_LF 0x0A /* line-feed */
444#define ISO_CODE_CR 0x0D /* carriage-return */
445#define ISO_CODE_SO 0x0E /* shift-out */
446#define ISO_CODE_SI 0x0F /* shift-in */
447#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
448#define ISO_CODE_ESC 0x1B /* escape */
449#define ISO_CODE_SS2 0x8E /* single-shift-2 */
450#define ISO_CODE_SS3 0x8F /* single-shift-3 */
451#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
452
453/* All code (1-byte) of ISO2022 is classified into one of the
454 followings. */
455enum iso_code_class_type
456 {
457 ISO_control_0, /* Control codes in the range
458 0x00..0x1F and 0x7F, except for the
459 following 5 codes. */
df7492f9
KH
460 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
461 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
462 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
463 ISO_escape, /* ISO_CODE_SO (0x1B) */
464 ISO_control_1, /* Control codes in the range
465 0x80..0x9F, except for the
466 following 3 codes. */
467 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
468 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
469 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
470 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
471 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
472 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
473 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
474 };
05e6f5dc 475
df7492f9
KH
476/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
477 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 478
df7492f9
KH
479/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
480 instead of the correct short-form sequence (e.g. ESC $ A). */
481#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 482
df7492f9
KH
483/* If set, reset graphic planes and registers at end-of-line to the
484 initial state. */
485#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 486
df7492f9
KH
487/* If set, reset graphic planes and registers before any control
488 characters to the initial state. */
489#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 490
df7492f9
KH
491/* If set, encode by 7-bit environment. */
492#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 493
df7492f9
KH
494/* If set, use locking-shift function. */
495#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 496
df7492f9
KH
497/* If set, use single-shift function. Overwrite
498 CODING_ISO_FLAG_LOCKING_SHIFT. */
499#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 500
df7492f9
KH
501/* If set, use designation escape sequence. */
502#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 503
df7492f9
KH
504/* If set, produce revision number sequence. */
505#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 506
df7492f9
KH
507/* If set, produce ISO6429's direction specifying sequence. */
508#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 509
df7492f9
KH
510/* If set, assume designation states are reset at beginning of line on
511 output. */
512#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 513
df7492f9
KH
514/* If set, designation sequence should be placed at beginning of line
515 on output. */
516#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 517
df7492f9
KH
518/* If set, do not encode unsafe charactes on output. */
519#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 520
df7492f9
KH
521/* If set, extra latin codes (128..159) are accepted as a valid code
522 on input. */
523#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 524
df7492f9 525#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 526
df7492f9 527#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 528
bf16eb23 529#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 530
bf16eb23 531#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 532
bf16eb23 533#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 534
df7492f9
KH
535/* A character to be produced on output if encoding of the original
536 character is prohibited by CODING_ISO_FLAG_SAFE. */
537#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 538
4ed46869 539
df7492f9
KH
540/* UTF-16 section */
541#define CODING_UTF_16_BOM(coding) \
542 ((coding)->spec.utf_16.bom)
4ed46869 543
df7492f9
KH
544#define CODING_UTF_16_ENDIAN(coding) \
545 ((coding)->spec.utf_16.endian)
4ed46869 546
df7492f9
KH
547#define CODING_UTF_16_SURROGATE(coding) \
548 ((coding)->spec.utf_16.surrogate)
4ed46869 549
4ed46869 550
df7492f9
KH
551/* CCL section */
552#define CODING_CCL_DECODER(coding) \
553 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
554#define CODING_CCL_ENCODER(coding) \
555 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
556#define CODING_CCL_VALIDS(coding) \
8f924df7 557 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 558
5a936b46 559/* Index for each coding category in `coding_categories' */
4ed46869 560
df7492f9
KH
561enum coding_category
562 {
563 coding_category_iso_7,
564 coding_category_iso_7_tight,
565 coding_category_iso_8_1,
566 coding_category_iso_8_2,
567 coding_category_iso_7_else,
568 coding_category_iso_8_else,
569 coding_category_utf_8,
570 coding_category_utf_16_auto,
571 coding_category_utf_16_be,
572 coding_category_utf_16_le,
573 coding_category_utf_16_be_nosig,
574 coding_category_utf_16_le_nosig,
575 coding_category_charset,
576 coding_category_sjis,
577 coding_category_big5,
578 coding_category_ccl,
579 coding_category_emacs_mule,
580 /* All above are targets of code detection. */
581 coding_category_raw_text,
582 coding_category_undecided,
583 coding_category_max
584 };
585
586/* Definitions of flag bits used in detect_coding_XXXX. */
587#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
588#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
589#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
590#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
591#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
592#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
593#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 594#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
595#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
596#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
597#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
598#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
599#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
600#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
601#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
602#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
603#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 604#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
605
606/* This value is returned if detect_coding_mask () find nothing other
607 than ASCII characters. */
608#define CATEGORY_MASK_ANY \
609 (CATEGORY_MASK_ISO_7 \
610 | CATEGORY_MASK_ISO_7_TIGHT \
611 | CATEGORY_MASK_ISO_8_1 \
612 | CATEGORY_MASK_ISO_8_2 \
613 | CATEGORY_MASK_ISO_7_ELSE \
614 | CATEGORY_MASK_ISO_8_ELSE \
615 | CATEGORY_MASK_UTF_8 \
616 | CATEGORY_MASK_UTF_16_BE \
617 | CATEGORY_MASK_UTF_16_LE \
618 | CATEGORY_MASK_UTF_16_BE_NOSIG \
619 | CATEGORY_MASK_UTF_16_LE_NOSIG \
620 | CATEGORY_MASK_CHARSET \
621 | CATEGORY_MASK_SJIS \
622 | CATEGORY_MASK_BIG5 \
623 | CATEGORY_MASK_CCL \
624 | CATEGORY_MASK_EMACS_MULE)
625
626
627#define CATEGORY_MASK_ISO_7BIT \
628 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
629
630#define CATEGORY_MASK_ISO_8BIT \
631 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
632
633#define CATEGORY_MASK_ISO_ELSE \
634 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
635
636#define CATEGORY_MASK_ISO_ESCAPE \
637 (CATEGORY_MASK_ISO_7 \
638 | CATEGORY_MASK_ISO_7_TIGHT \
639 | CATEGORY_MASK_ISO_7_ELSE \
640 | CATEGORY_MASK_ISO_8_ELSE)
641
642#define CATEGORY_MASK_ISO \
643 ( CATEGORY_MASK_ISO_7BIT \
644 | CATEGORY_MASK_ISO_8BIT \
645 | CATEGORY_MASK_ISO_ELSE)
646
647#define CATEGORY_MASK_UTF_16 \
648 (CATEGORY_MASK_UTF_16_BE \
649 | CATEGORY_MASK_UTF_16_LE \
650 | CATEGORY_MASK_UTF_16_BE_NOSIG \
651 | CATEGORY_MASK_UTF_16_LE_NOSIG)
652
653
654/* List of symbols `coding-category-xxx' ordered by priority. This
655 variable is exposed to Emacs Lisp. */
656static Lisp_Object Vcoding_category_list;
657
658/* Table of coding categories (Lisp symbols). This variable is for
659 internal use oly. */
660static Lisp_Object Vcoding_category_table;
661
662/* Table of coding-categories ordered by priority. */
663static enum coding_category coding_priorities[coding_category_max];
664
665/* Nth element is a coding context for the coding system bound to the
666 Nth coding category. */
667static struct coding_system coding_categories[coding_category_max];
668
df7492f9
KH
669/*** Commonly used macros and functions ***/
670
671#ifndef min
672#define min(a, b) ((a) < (b) ? (a) : (b))
673#endif
674#ifndef max
675#define max(a, b) ((a) > (b) ? (a) : (b))
676#endif
4ed46869 677
24a73b0a
KH
678#define CODING_GET_INFO(coding, attrs, charset_list) \
679 do { \
680 (attrs) = CODING_ID_ATTRS ((coding)->id); \
681 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 682 } while (0)
4ed46869 683
4ed46869 684
df7492f9
KH
685/* Safely get one byte from the source text pointed by SRC which ends
686 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
687 in the source, it jumps to `no_more_source'. If multibytep is
688 nonzero, and a multibyte character is found at SRC, set C to the
689 negative value of the character code. The caller should declare
690 and set these variables appropriately in advance:
691 src, src_end, multibytep */
aa72b389 692
065e3595
KH
693#define ONE_MORE_BYTE(c) \
694 do { \
695 if (src == src_end) \
696 { \
697 if (src_base < src) \
698 record_conversion_result \
699 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
700 goto no_more_source; \
701 } \
702 c = *src++; \
703 if (multibytep && (c & 0x80)) \
704 { \
705 if ((c & 0xFE) == 0xC0) \
706 c = ((c & 1) << 6) | *src++; \
707 else \
708 { \
709 c = - string_char (--src, &src, NULL); \
710 record_conversion_result \
711 (coding, CODING_RESULT_INVALID_SRC); \
712 } \
713 } \
714 consumed_chars++; \
aa72b389
KH
715 } while (0)
716
aa72b389 717
065e3595
KH
718#define ONE_MORE_BYTE_NO_CHECK(c) \
719 do { \
720 c = *src++; \
721 if (multibytep && (c & 0x80)) \
722 { \
723 if ((c & 0xFE) == 0xC0) \
724 c = ((c & 1) << 6) | *src++; \
725 else \
726 { \
727 c = - string_char (--src, &src, NULL); \
728 record_conversion_result \
729 (coding, CODING_RESULT_INVALID_SRC); \
730 } \
731 } \
732 consumed_chars++; \
aa72b389
KH
733 } while (0)
734
aa72b389 735
df7492f9
KH
736/* Store a byte C in the place pointed by DST and increment DST to the
737 next free point, and increment PRODUCED_CHARS. The caller should
738 assure that C is 0..127, and declare and set the variable `dst'
739 appropriately in advance.
740*/
aa72b389
KH
741
742
df7492f9
KH
743#define EMIT_ONE_ASCII_BYTE(c) \
744 do { \
745 produced_chars++; \
746 *dst++ = (c); \
b6871cc7 747 } while (0)
aa72b389
KH
748
749
df7492f9 750/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 751
df7492f9
KH
752#define EMIT_TWO_ASCII_BYTES(c1, c2) \
753 do { \
754 produced_chars += 2; \
755 *dst++ = (c1), *dst++ = (c2); \
756 } while (0)
aa72b389
KH
757
758
df7492f9
KH
759/* Store a byte C in the place pointed by DST and increment DST to the
760 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
761 nonzero, store in an appropriate multibyte from. The caller should
762 declare and set the variables `dst' and `multibytep' appropriately
763 in advance. */
764
765#define EMIT_ONE_BYTE(c) \
766 do { \
767 produced_chars++; \
768 if (multibytep) \
769 { \
770 int ch = (c); \
771 if (ch >= 0x80) \
772 ch = BYTE8_TO_CHAR (ch); \
773 CHAR_STRING_ADVANCE (ch, dst); \
774 } \
775 else \
776 *dst++ = (c); \
aa72b389 777 } while (0)
aa72b389 778
aa72b389 779
df7492f9 780/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 781
e19c3639
KH
782#define EMIT_TWO_BYTES(c1, c2) \
783 do { \
784 produced_chars += 2; \
785 if (multibytep) \
786 { \
787 int ch; \
788 \
789 ch = (c1); \
790 if (ch >= 0x80) \
791 ch = BYTE8_TO_CHAR (ch); \
792 CHAR_STRING_ADVANCE (ch, dst); \
793 ch = (c2); \
794 if (ch >= 0x80) \
795 ch = BYTE8_TO_CHAR (ch); \
796 CHAR_STRING_ADVANCE (ch, dst); \
797 } \
798 else \
799 { \
800 *dst++ = (c1); \
801 *dst++ = (c2); \
802 } \
aa72b389
KH
803 } while (0)
804
805
df7492f9
KH
806#define EMIT_THREE_BYTES(c1, c2, c3) \
807 do { \
808 EMIT_ONE_BYTE (c1); \
809 EMIT_TWO_BYTES (c2, c3); \
810 } while (0)
aa72b389 811
aa72b389 812
df7492f9
KH
813#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
814 do { \
815 EMIT_TWO_BYTES (c1, c2); \
816 EMIT_TWO_BYTES (c3, c4); \
817 } while (0)
aa72b389 818
aa72b389 819
f6cbaf43
KH
820/* Prototypes for static functions. */
821static void record_conversion_result P_ ((struct coding_system *coding,
822 enum coding_result_code result));
823static int detect_coding_utf_8 P_ ((struct coding_system *,
824 struct coding_detection_info *info));
825static void decode_coding_utf_8 P_ ((struct coding_system *));
826static int encode_coding_utf_8 P_ ((struct coding_system *));
827
828static int detect_coding_utf_16 P_ ((struct coding_system *,
829 struct coding_detection_info *info));
830static void decode_coding_utf_16 P_ ((struct coding_system *));
831static int encode_coding_utf_16 P_ ((struct coding_system *));
832
833static int detect_coding_iso_2022 P_ ((struct coding_system *,
834 struct coding_detection_info *info));
835static void decode_coding_iso_2022 P_ ((struct coding_system *));
836static int encode_coding_iso_2022 P_ ((struct coding_system *));
837
838static int detect_coding_emacs_mule P_ ((struct coding_system *,
839 struct coding_detection_info *info));
840static void decode_coding_emacs_mule P_ ((struct coding_system *));
841static int encode_coding_emacs_mule P_ ((struct coding_system *));
842
843static int detect_coding_sjis P_ ((struct coding_system *,
844 struct coding_detection_info *info));
845static void decode_coding_sjis P_ ((struct coding_system *));
846static int encode_coding_sjis P_ ((struct coding_system *));
847
848static int detect_coding_big5 P_ ((struct coding_system *,
849 struct coding_detection_info *info));
850static void decode_coding_big5 P_ ((struct coding_system *));
851static int encode_coding_big5 P_ ((struct coding_system *));
852
853static int detect_coding_ccl P_ ((struct coding_system *,
854 struct coding_detection_info *info));
855static void decode_coding_ccl P_ ((struct coding_system *));
856static int encode_coding_ccl P_ ((struct coding_system *));
857
858static void decode_coding_raw_text P_ ((struct coding_system *));
859static int encode_coding_raw_text P_ ((struct coding_system *));
860
861static void coding_set_source P_ ((struct coding_system *));
862static void coding_set_destination P_ ((struct coding_system *));
863static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
864static void coding_alloc_by_making_gap P_ ((struct coding_system *,
865 EMACS_INT));
866static unsigned char *alloc_destination P_ ((struct coding_system *,
867 EMACS_INT, unsigned char *));
868static void setup_iso_safe_charsets P_ ((Lisp_Object));
869static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
870 int *, int *,
871 unsigned char *));
872static int detect_eol P_ ((const unsigned char *,
873 EMACS_INT, enum coding_category));
874static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
875static void decode_eol P_ ((struct coding_system *));
876static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
877static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
878 int, int *, int *));
879static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
880static INLINE void produce_composition P_ ((struct coding_system *, int *,
881 EMACS_INT));
882static INLINE void produce_charset P_ ((struct coding_system *, int *,
883 EMACS_INT));
884static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
885static int decode_coding P_ ((struct coding_system *));
886static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
887 struct coding_system *,
888 int *, EMACS_INT *));
889static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
890 struct coding_system *,
891 int *, EMACS_INT *));
892static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
893static int encode_coding P_ ((struct coding_system *));
894static Lisp_Object make_conversion_work_buffer P_ ((int));
895static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
896static INLINE int char_encodable_p P_ ((int, Lisp_Object));
897static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
898
065e3595
KH
899static void
900record_conversion_result (struct coding_system *coding,
901 enum coding_result_code result)
902{
903 coding->result = result;
904 switch (result)
905 {
906 case CODING_RESULT_INSUFFICIENT_SRC:
907 Vlast_code_conversion_error = Qinsufficient_source;
908 break;
909 case CODING_RESULT_INCONSISTENT_EOL:
910 Vlast_code_conversion_error = Qinconsistent_eol;
911 break;
912 case CODING_RESULT_INVALID_SRC:
913 Vlast_code_conversion_error = Qinvalid_source;
914 break;
915 case CODING_RESULT_INTERRUPT:
916 Vlast_code_conversion_error = Qinterrupted;
917 break;
918 case CODING_RESULT_INSUFFICIENT_MEM:
919 Vlast_code_conversion_error = Qinsufficient_memory;
920 break;
921 }
922}
923
df7492f9
KH
924#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
925 do { \
926 charset_map_loaded = 0; \
927 c = DECODE_CHAR (charset, code); \
928 if (charset_map_loaded) \
929 { \
8f924df7 930 const unsigned char *orig = coding->source; \
df7492f9
KH
931 EMACS_INT offset; \
932 \
933 coding_set_source (coding); \
934 offset = coding->source - orig; \
935 src += offset; \
936 src_base += offset; \
937 src_end += offset; \
938 } \
aa72b389
KH
939 } while (0)
940
941
df7492f9
KH
942#define ASSURE_DESTINATION(bytes) \
943 do { \
944 if (dst + (bytes) >= dst_end) \
945 { \
946 int more_bytes = charbuf_end - charbuf + (bytes); \
947 \
948 dst = alloc_destination (coding, more_bytes, dst); \
949 dst_end = coding->destination + coding->dst_bytes; \
950 } \
951 } while (0)
aa72b389 952
aa72b389 953
aa72b389 954
df7492f9
KH
955static void
956coding_set_source (coding)
aa72b389 957 struct coding_system *coding;
aa72b389 958{
df7492f9
KH
959 if (BUFFERP (coding->src_object))
960 {
2cb26057 961 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 962
df7492f9 963 if (coding->src_pos < 0)
2cb26057 964 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 965 else
2cb26057 966 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 967 }
df7492f9 968 else if (STRINGP (coding->src_object))
aa72b389 969 {
8f924df7 970 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 971 }
df7492f9
KH
972 else
973 /* Otherwise, the source is C string and is never relocated
974 automatically. Thus we don't have to update anything. */
975 ;
976}
aa72b389 977
df7492f9
KH
978static void
979coding_set_destination (coding)
980 struct coding_system *coding;
981{
982 if (BUFFERP (coding->dst_object))
aa72b389 983 {
df7492f9 984 if (coding->src_pos < 0)
aa72b389 985 {
28f67a95
KH
986 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
987 coding->dst_bytes = (GAP_END_ADDR
988 - (coding->src_bytes - coding->consumed)
989 - coding->destination);
aa72b389 990 }
df7492f9 991 else
28f67a95
KH
992 {
993 /* We are sure that coding->dst_pos_byte is before the gap
994 of the buffer. */
995 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
996 + coding->dst_pos_byte - 1);
997 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
998 - coding->destination);
999 }
df7492f9
KH
1000 }
1001 else
1002 /* Otherwise, the destination is C string and is never relocated
1003 automatically. Thus we don't have to update anything. */
1004 ;
1005}
1006
1007
1008static void
1009coding_alloc_by_realloc (coding, bytes)
1010 struct coding_system *coding;
1011 EMACS_INT bytes;
1012{
1013 coding->destination = (unsigned char *) xrealloc (coding->destination,
1014 coding->dst_bytes + bytes);
1015 coding->dst_bytes += bytes;
1016}
1017
1018static void
1019coding_alloc_by_making_gap (coding, bytes)
1020 struct coding_system *coding;
1021 EMACS_INT bytes;
1022{
2c78b7e1
KH
1023 if (BUFFERP (coding->dst_object)
1024 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
1025 {
1026 EMACS_INT add = coding->src_bytes - coding->consumed;
1027
1028 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1029 make_gap (bytes);
1030 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1031 }
1032 else
1033 {
2c78b7e1
KH
1034 Lisp_Object this_buffer;
1035
1036 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1037 set_buffer_internal (XBUFFER (coding->dst_object));
1038 make_gap (bytes);
1039 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1040 }
df7492f9 1041}
8f924df7 1042
df7492f9
KH
1043
1044static unsigned char *
1045alloc_destination (coding, nbytes, dst)
1046 struct coding_system *coding;
3e139625 1047 EMACS_INT nbytes;
df7492f9
KH
1048 unsigned char *dst;
1049{
1050 EMACS_INT offset = dst - coding->destination;
1051
1052 if (BUFFERP (coding->dst_object))
1053 coding_alloc_by_making_gap (coding, nbytes);
aa72b389 1054 else
df7492f9 1055 coding_alloc_by_realloc (coding, nbytes);
065e3595 1056 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1057 coding_set_destination (coding);
1058 dst = coding->destination + offset;
1059 return dst;
1060}
aa72b389 1061
ff0dacd7
KH
1062/** Macros for annotations. */
1063
1064/* Maximum length of annotation data (sum of annotations for
1065 composition and charset). */
69a80ea3 1066#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
ff0dacd7
KH
1067
1068/* An annotation data is stored in the array coding->charbuf in this
1069 format:
69a80ea3 1070 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1071 LENGTH is the number of elements in the annotation.
1072 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1073 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1074
1075 The format of the following elements depend on ANNOTATION_MASK.
1076
1077 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1078 follows:
1079 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1080 METHOD is one of enum composition_method.
1081 Optionnal COMPOSITION-COMPONENTS are characters and composition
1082 rules.
1083
1084 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1085 follows. */
1086
69a80ea3 1087#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1088 do { \
1089 *(buf)++ = -(len); \
1090 *(buf)++ = (mask); \
69a80ea3 1091 *(buf)++ = (nchars); \
ff0dacd7
KH
1092 coding->annotated = 1; \
1093 } while (0);
1094
69a80ea3
KH
1095#define ADD_COMPOSITION_DATA(buf, nchars, method) \
1096 do { \
1097 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1098 *buf++ = method; \
ff0dacd7
KH
1099 } while (0)
1100
1101
69a80ea3
KH
1102#define ADD_CHARSET_DATA(buf, nchars, id) \
1103 do { \
1104 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1105 *buf++ = id; \
ff0dacd7
KH
1106 } while (0)
1107
df7492f9
KH
1108\f
1109/*** 2. Emacs' internal format (emacs-utf-8) ***/
1110
1111
1112
1113\f
1114/*** 3. UTF-8 ***/
1115
1116/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1117 Check if a text is encoded in UTF-8. If it is, return 1, else
1118 return 0. */
df7492f9
KH
1119
1120#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1121#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1122#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1123#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1124#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1125#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1126
1127static int
ff0dacd7 1128detect_coding_utf_8 (coding, detect_info)
df7492f9 1129 struct coding_system *coding;
ff0dacd7 1130 struct coding_detection_info *detect_info;
df7492f9 1131{
065e3595 1132 const unsigned char *src = coding->source, *src_base;
8f924df7 1133 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1134 int multibytep = coding->src_multibyte;
1135 int consumed_chars = 0;
1136 int found = 0;
1137
ff0dacd7 1138 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1139 /* A coding system of this category is always ASCII compatible. */
1140 src += coding->head_ascii;
1141
1142 while (1)
aa72b389 1143 {
df7492f9 1144 int c, c1, c2, c3, c4;
aa72b389 1145
065e3595 1146 src_base = src;
df7492f9 1147 ONE_MORE_BYTE (c);
065e3595 1148 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1149 continue;
1150 ONE_MORE_BYTE (c1);
065e3595 1151 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1152 break;
1153 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1154 {
ff0dacd7 1155 found = CATEGORY_MASK_UTF_8;
df7492f9 1156 continue;
aa72b389 1157 }
df7492f9 1158 ONE_MORE_BYTE (c2);
065e3595 1159 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1160 break;
1161 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1162 {
ff0dacd7 1163 found = CATEGORY_MASK_UTF_8;
df7492f9 1164 continue;
aa72b389 1165 }
df7492f9 1166 ONE_MORE_BYTE (c3);
065e3595 1167 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1168 break;
1169 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1170 {
ff0dacd7 1171 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1172 continue;
1173 }
1174 ONE_MORE_BYTE (c4);
065e3595 1175 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1176 break;
1177 if (UTF_8_5_OCTET_LEADING_P (c))
1178 {
ff0dacd7 1179 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1180 continue;
1181 }
1182 break;
aa72b389 1183 }
ff0dacd7 1184 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1185 return 0;
aa72b389 1186
df7492f9 1187 no_more_source:
065e3595 1188 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1189 {
ff0dacd7 1190 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1191 return 0;
aa72b389 1192 }
ff0dacd7
KH
1193 detect_info->found |= found;
1194 return 1;
aa72b389
KH
1195}
1196
4ed46869 1197
b73bfc1c 1198static void
df7492f9 1199decode_coding_utf_8 (coding)
b73bfc1c 1200 struct coding_system *coding;
b73bfc1c 1201{
8f924df7
KH
1202 const unsigned char *src = coding->source + coding->consumed;
1203 const unsigned char *src_end = coding->source + coding->src_bytes;
1204 const unsigned char *src_base;
69a80ea3
KH
1205 int *charbuf = coding->charbuf + coding->charbuf_used;
1206 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1207 int consumed_chars = 0, consumed_chars_base;
1208 int multibytep = coding->src_multibyte;
24a73b0a 1209 Lisp_Object attr, charset_list;
4ed46869 1210
24a73b0a 1211 CODING_GET_INFO (coding, attr, charset_list);
df7492f9
KH
1212
1213 while (1)
b73bfc1c 1214 {
df7492f9 1215 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1216
df7492f9
KH
1217 src_base = src;
1218 consumed_chars_base = consumed_chars;
4af310db 1219
df7492f9
KH
1220 if (charbuf >= charbuf_end)
1221 break;
1222
1223 ONE_MORE_BYTE (c1);
065e3595
KH
1224 if (c1 < 0)
1225 {
1226 c = - c1;
1227 }
1228 else if (UTF_8_1_OCTET_P(c1))
df7492f9
KH
1229 {
1230 c = c1;
4af310db 1231 }
df7492f9 1232 else
4af310db 1233 {
df7492f9 1234 ONE_MORE_BYTE (c2);
065e3595 1235 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1236 goto invalid_code;
1237 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1238 {
b0edb2c5
DL
1239 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1240 /* Reject overlong sequences here and below. Encoders
1241 producing them are incorrect, they can be misleading,
1242 and they mess up read/write invariance. */
1243 if (c < 128)
1244 goto invalid_code;
4af310db 1245 }
df7492f9 1246 else
aa72b389 1247 {
df7492f9 1248 ONE_MORE_BYTE (c3);
065e3595 1249 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1250 goto invalid_code;
1251 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1252 {
1253 c = (((c1 & 0xF) << 12)
1254 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1255 if (c < 0x800
1256 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1257 goto invalid_code;
1258 }
df7492f9
KH
1259 else
1260 {
1261 ONE_MORE_BYTE (c4);
065e3595 1262 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1263 goto invalid_code;
1264 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1265 {
df7492f9
KH
1266 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1267 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1268 if (c < 0x10000)
1269 goto invalid_code;
1270 }
df7492f9
KH
1271 else
1272 {
1273 ONE_MORE_BYTE (c5);
065e3595 1274 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1275 goto invalid_code;
1276 if (UTF_8_5_OCTET_LEADING_P (c1))
1277 {
1278 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1279 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1280 | (c5 & 0x3F));
b0edb2c5 1281 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1282 goto invalid_code;
1283 }
1284 else
1285 goto invalid_code;
1286 }
1287 }
aa72b389 1288 }
b73bfc1c 1289 }
df7492f9
KH
1290
1291 *charbuf++ = c;
1292 continue;
1293
1294 invalid_code:
1295 src = src_base;
1296 consumed_chars = consumed_chars_base;
1297 ONE_MORE_BYTE (c);
1298 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1299 coding->errors++;
aa72b389
KH
1300 }
1301
df7492f9
KH
1302 no_more_source:
1303 coding->consumed_char += consumed_chars_base;
1304 coding->consumed = src_base - coding->source;
1305 coding->charbuf_used = charbuf - coding->charbuf;
1306}
1307
1308
1309static int
1310encode_coding_utf_8 (coding)
1311 struct coding_system *coding;
1312{
1313 int multibytep = coding->dst_multibyte;
1314 int *charbuf = coding->charbuf;
1315 int *charbuf_end = charbuf + coding->charbuf_used;
1316 unsigned char *dst = coding->destination + coding->produced;
1317 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1318 int produced_chars = 0;
df7492f9
KH
1319 int c;
1320
1321 if (multibytep)
aa72b389 1322 {
df7492f9
KH
1323 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1324
1325 while (charbuf < charbuf_end)
b73bfc1c 1326 {
df7492f9 1327 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1328
df7492f9
KH
1329 ASSURE_DESTINATION (safe_room);
1330 c = *charbuf++;
28f67a95
KH
1331 if (CHAR_BYTE8_P (c))
1332 {
1333 c = CHAR_TO_BYTE8 (c);
1334 EMIT_ONE_BYTE (c);
1335 }
1336 else
1337 {
1338 CHAR_STRING_ADVANCE (c, pend);
1339 for (p = str; p < pend; p++)
1340 EMIT_ONE_BYTE (*p);
1341 }
b73bfc1c 1342 }
aa72b389 1343 }
df7492f9
KH
1344 else
1345 {
1346 int safe_room = MAX_MULTIBYTE_LENGTH;
1347
1348 while (charbuf < charbuf_end)
b73bfc1c 1349 {
df7492f9
KH
1350 ASSURE_DESTINATION (safe_room);
1351 c = *charbuf++;
1352 dst += CHAR_STRING (c, dst);
1353 produced_chars++;
4ed46869
KH
1354 }
1355 }
065e3595 1356 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1357 coding->produced_char += produced_chars;
1358 coding->produced = dst - coding->destination;
1359 return 0;
4ed46869
KH
1360}
1361
b73bfc1c 1362
df7492f9 1363/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1364 Check if a text is encoded in one of UTF-16 based coding systems.
1365 If it is, return 1, else return 0. */
aa72b389 1366
df7492f9
KH
1367#define UTF_16_HIGH_SURROGATE_P(val) \
1368 (((val) & 0xFC00) == 0xD800)
1369
1370#define UTF_16_LOW_SURROGATE_P(val) \
1371 (((val) & 0xFC00) == 0xDC00)
93dec019 1372
df7492f9
KH
1373#define UTF_16_INVALID_P(val) \
1374 (((val) == 0xFFFE) \
1375 || ((val) == 0xFFFF) \
1376 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1377
aa72b389 1378
df7492f9 1379static int
ff0dacd7 1380detect_coding_utf_16 (coding, detect_info)
aa72b389 1381 struct coding_system *coding;
ff0dacd7 1382 struct coding_detection_info *detect_info;
aa72b389 1383{
8f924df7
KH
1384 const unsigned char *src = coding->source, *src_base = src;
1385 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1386 int multibytep = coding->src_multibyte;
1387 int consumed_chars = 0;
1388 int c1, c2;
aa72b389 1389
ff0dacd7 1390 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1391 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1392 && (coding->src_chars & 1))
ff0dacd7
KH
1393 {
1394 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1395 return 0;
1396 }
24a73b0a 1397
df7492f9
KH
1398 ONE_MORE_BYTE (c1);
1399 ONE_MORE_BYTE (c2);
df7492f9 1400 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1401 {
b49a1807
KH
1402 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1403 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1404 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1405 | CATEGORY_MASK_UTF_16_BE_NOSIG
1406 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1407 }
df7492f9 1408 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1409 {
b49a1807
KH
1410 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1411 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1412 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1413 | CATEGORY_MASK_UTF_16_BE_NOSIG
1414 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1415 }
065e3595 1416 else if (c1 >= 0 && c2 >= 0)
24a73b0a
KH
1417 {
1418 unsigned char b1[256], b2[256];
1419 int b1_variants = 1, b2_variants = 1;
1420 int n;
1421
1422 bzero (b1, 256), bzero (b2, 256);
1423 b1[c1]++, b2[c2]++;
1424 for (n = 0; n < 256 && src < src_end; n++)
1425 {
065e3595 1426 src_base = src;
24a73b0a
KH
1427 ONE_MORE_BYTE (c1);
1428 ONE_MORE_BYTE (c2);
065e3595
KH
1429 if (c1 < 0 || c2 < 0)
1430 break;
24a73b0a
KH
1431 if (! b1[c1++]) b1_variants++;
1432 if (! b2[c2++]) b2_variants++;
1433 }
1434 if (b1_variants < b2_variants)
1435 detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1436 else
1437 detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1438 detect_info->rejected
1439 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
ff0dacd7 1440 }
df7492f9 1441 no_more_source:
ff0dacd7 1442 return 1;
df7492f9 1443}
aa72b389 1444
df7492f9
KH
1445static void
1446decode_coding_utf_16 (coding)
1447 struct coding_system *coding;
1448{
8f924df7
KH
1449 const unsigned char *src = coding->source + coding->consumed;
1450 const unsigned char *src_end = coding->source + coding->src_bytes;
1451 const unsigned char *src_base;
69a80ea3
KH
1452 int *charbuf = coding->charbuf + coding->charbuf_used;
1453 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1454 int consumed_chars = 0, consumed_chars_base;
1455 int multibytep = coding->src_multibyte;
1456 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1457 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1458 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1459 Lisp_Object attr, charset_list;
df7492f9 1460
24a73b0a 1461 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1462
b49a1807 1463 if (bom == utf_16_with_bom)
aa72b389 1464 {
df7492f9 1465 int c, c1, c2;
4af310db 1466
aa72b389 1467 src_base = src;
df7492f9
KH
1468 ONE_MORE_BYTE (c1);
1469 ONE_MORE_BYTE (c2);
e19c3639 1470 c = (c1 << 8) | c2;
aa72b389 1471
b49a1807
KH
1472 if (endian == utf_16_big_endian
1473 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1474 {
b49a1807
KH
1475 /* The first two bytes are not BOM. Treat them as bytes
1476 for a normal character. */
1477 src = src_base;
1478 coding->errors++;
aa72b389 1479 }
b49a1807
KH
1480 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1481 }
1482 else if (bom == utf_16_detect_bom)
1483 {
1484 /* We have already tried to detect BOM and failed in
1485 detect_coding. */
1486 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9 1487 }
aa72b389 1488
df7492f9
KH
1489 while (1)
1490 {
1491 int c, c1, c2;
1492
1493 src_base = src;
1494 consumed_chars_base = consumed_chars;
1495
1496 if (charbuf + 2 >= charbuf_end)
1497 break;
1498
1499 ONE_MORE_BYTE (c1);
065e3595
KH
1500 if (c1 < 0)
1501 {
1502 *charbuf++ = -c1;
1503 continue;
1504 }
df7492f9 1505 ONE_MORE_BYTE (c2);
065e3595
KH
1506 if (c2 < 0)
1507 {
1508 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1509 *charbuf++ = -c2;
1510 continue;
1511 }
df7492f9 1512 c = (endian == utf_16_big_endian
e19c3639 1513 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1514 if (surrogate)
fd3ae0b9 1515 {
df7492f9 1516 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1517 {
df7492f9
KH
1518 if (endian == utf_16_big_endian)
1519 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1520 else
1521 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1522 *charbuf++ = c1;
1523 *charbuf++ = c2;
1524 coding->errors++;
1525 if (UTF_16_HIGH_SURROGATE_P (c))
1526 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1527 else
df7492f9 1528 *charbuf++ = c;
fd3ae0b9
KH
1529 }
1530 else
df7492f9
KH
1531 {
1532 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1533 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1534 *charbuf++ = 0x10000 + c;
df7492f9 1535 }
fd3ae0b9 1536 }
aa72b389 1537 else
df7492f9
KH
1538 {
1539 if (UTF_16_HIGH_SURROGATE_P (c))
1540 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1541 else
1542 *charbuf++ = c;
8f924df7 1543 }
aa72b389 1544 }
df7492f9
KH
1545
1546 no_more_source:
1547 coding->consumed_char += consumed_chars_base;
1548 coding->consumed = src_base - coding->source;
1549 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1550}
b73bfc1c 1551
df7492f9
KH
1552static int
1553encode_coding_utf_16 (coding)
1554 struct coding_system *coding;
1555{
1556 int multibytep = coding->dst_multibyte;
1557 int *charbuf = coding->charbuf;
1558 int *charbuf_end = charbuf + coding->charbuf_used;
1559 unsigned char *dst = coding->destination + coding->produced;
1560 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1561 int safe_room = 8;
1562 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1563 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1564 int produced_chars = 0;
24a73b0a 1565 Lisp_Object attrs, charset_list;
df7492f9 1566 int c;
4ed46869 1567
24a73b0a 1568 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1569
b49a1807 1570 if (bom != utf_16_without_bom)
df7492f9
KH
1571 {
1572 ASSURE_DESTINATION (safe_room);
1573 if (big_endian)
df7492f9 1574 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1575 else
1576 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1577 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1578 }
1579
1580 while (charbuf < charbuf_end)
1581 {
1582 ASSURE_DESTINATION (safe_room);
1583 c = *charbuf++;
e19c3639
KH
1584 if (c >= MAX_UNICODE_CHAR)
1585 c = coding->default_char;
df7492f9
KH
1586
1587 if (c < 0x10000)
1588 {
1589 if (big_endian)
1590 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1591 else
1592 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1593 }
1594 else
1595 {
1596 int c1, c2;
1597
1598 c -= 0x10000;
1599 c1 = (c >> 10) + 0xD800;
1600 c2 = (c & 0x3FF) + 0xDC00;
1601 if (big_endian)
1602 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1603 else
1604 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1605 }
1606 }
065e3595 1607 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1608 coding->produced = dst - coding->destination;
1609 coding->produced_char += produced_chars;
1610 return 0;
1611}
1612
1613\f
1614/*** 6. Old Emacs' internal format (emacs-mule) ***/
1615
1616/* Emacs' internal format for representation of multiple character
1617 sets is a kind of multi-byte encoding, i.e. characters are
1618 represented by variable-length sequences of one-byte codes.
1619
1620 ASCII characters and control characters (e.g. `tab', `newline') are
1621 represented by one-byte sequences which are their ASCII codes, in
1622 the range 0x00 through 0x7F.
1623
1624 8-bit characters of the range 0x80..0x9F are represented by
1625 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1626 code + 0x20).
1627
1628 8-bit characters of the range 0xA0..0xFF are represented by
1629 one-byte sequences which are their 8-bit code.
1630
1631 The other characters are represented by a sequence of `base
1632 leading-code', optional `extended leading-code', and one or two
1633 `position-code's. The length of the sequence is determined by the
1634 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1635 whereas extended leading-code and position-code take the range 0xA0
1636 through 0xFF. See `charset.h' for more details about leading-code
1637 and position-code.
1638
1639 --- CODE RANGE of Emacs' internal format ---
1640 character set range
1641 ------------- -----
1642 ascii 0x00..0x7F
1643 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1644 eight-bit-graphic 0xA0..0xBF
1645 ELSE 0x81..0x9D + [0xA0..0xFF]+
1646 ---------------------------------------------
1647
1648 As this is the internal character representation, the format is
1649 usually not used externally (i.e. in a file or in a data sent to a
1650 process). But, it is possible to have a text externally in this
1651 format (i.e. by encoding by the coding system `emacs-mule').
1652
1653 In that case, a sequence of one-byte codes has a slightly different
1654 form.
1655
1656 At first, all characters in eight-bit-control are represented by
1657 one-byte sequences which are their 8-bit code.
1658
1659 Next, character composition data are represented by the byte
1660 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1661 where,
1662 METHOD is 0xF0 plus one of composition method (enum
1663 composition_method),
1664
1665 BYTES is 0xA0 plus a byte length of this composition data,
1666
1667 CHARS is 0x20 plus a number of characters composed by this
1668 data,
1669
1670 COMPONENTs are characters of multibye form or composition
1671 rules encoded by two-byte of ASCII codes.
1672
1673 In addition, for backward compatibility, the following formats are
1674 also recognized as composition data on decoding.
1675
1676 0x80 MSEQ ...
1677 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1678
1679 Here,
1680 MSEQ is a multibyte form but in these special format:
1681 ASCII: 0xA0 ASCII_CODE+0x80,
1682 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1683 RULE is a one byte code of the range 0xA0..0xF0 that
1684 represents a composition rule.
1685 */
1686
1687char emacs_mule_bytes[256];
1688
df7492f9 1689int
ff0dacd7 1690emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1691 struct coding_system *coding;
065e3595 1692 const unsigned char *src;
ff0dacd7 1693 int *nbytes, *nchars, *id;
df7492f9 1694{
8f924df7
KH
1695 const unsigned char *src_end = coding->source + coding->src_bytes;
1696 const unsigned char *src_base = src;
df7492f9 1697 int multibytep = coding->src_multibyte;
df7492f9
KH
1698 struct charset *charset;
1699 unsigned code;
1700 int c;
1701 int consumed_chars = 0;
1702
1703 ONE_MORE_BYTE (c);
065e3595 1704 if (c < 0)
df7492f9 1705 {
065e3595
KH
1706 c = -c;
1707 charset = emacs_mule_charset[0];
1708 }
1709 else
1710 {
1711 switch (emacs_mule_bytes[c])
b73bfc1c 1712 {
065e3595 1713 case 2:
df7492f9
KH
1714 if (! (charset = emacs_mule_charset[c]))
1715 goto invalid_code;
1716 ONE_MORE_BYTE (c);
065e3595
KH
1717 if (c < 0)
1718 goto invalid_code;
df7492f9 1719 code = c & 0x7F;
065e3595
KH
1720 break;
1721
1722 case 3:
1723 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1724 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1725 {
1726 ONE_MORE_BYTE (c);
1727 if (c < 0 || ! (charset = emacs_mule_charset[c]))
1728 goto invalid_code;
1729 ONE_MORE_BYTE (c);
1730 if (c < 0)
1731 goto invalid_code;
1732 code = c & 0x7F;
1733 }
1734 else
1735 {
1736 if (! (charset = emacs_mule_charset[c]))
1737 goto invalid_code;
1738 ONE_MORE_BYTE (c);
1739 if (c < 0)
1740 goto invalid_code;
1741 code = (c & 0x7F) << 8;
1742 ONE_MORE_BYTE (c);
1743 if (c < 0)
1744 goto invalid_code;
1745 code |= c & 0x7F;
1746 }
1747 break;
1748
1749 case 4:
1750 ONE_MORE_BYTE (c);
1751 if (c < 0 || ! (charset = emacs_mule_charset[c]))
df7492f9
KH
1752 goto invalid_code;
1753 ONE_MORE_BYTE (c);
065e3595
KH
1754 if (c < 0)
1755 goto invalid_code;
781d7a48 1756 code = (c & 0x7F) << 8;
df7492f9 1757 ONE_MORE_BYTE (c);
065e3595
KH
1758 if (c < 0)
1759 goto invalid_code;
df7492f9 1760 code |= c & 0x7F;
065e3595 1761 break;
df7492f9 1762
065e3595
KH
1763 case 1:
1764 code = c;
1765 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1766 ? charset_ascii : charset_eight_bit);
1767 break;
df7492f9 1768
065e3595
KH
1769 default:
1770 abort ();
1771 }
1772 c = DECODE_CHAR (charset, code);
1773 if (c < 0)
1774 goto invalid_code;
df7492f9 1775 }
df7492f9
KH
1776 *nbytes = src - src_base;
1777 *nchars = consumed_chars;
ff0dacd7
KH
1778 if (id)
1779 *id = charset->id;
df7492f9
KH
1780 return c;
1781
1782 no_more_source:
1783 return -2;
1784
1785 invalid_code:
1786 return -1;
1787}
1788
1789
1790/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1791 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1792 else return 0. */
df7492f9
KH
1793
1794static int
ff0dacd7 1795detect_coding_emacs_mule (coding, detect_info)
df7492f9 1796 struct coding_system *coding;
ff0dacd7 1797 struct coding_detection_info *detect_info;
df7492f9 1798{
065e3595 1799 const unsigned char *src = coding->source, *src_base;
8f924df7 1800 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1801 int multibytep = coding->src_multibyte;
1802 int consumed_chars = 0;
1803 int c;
1804 int found = 0;
1805
ff0dacd7 1806 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1807 /* A coding system of this category is always ASCII compatible. */
1808 src += coding->head_ascii;
1809
1810 while (1)
1811 {
065e3595 1812 src_base = src;
df7492f9 1813 ONE_MORE_BYTE (c);
065e3595
KH
1814 if (c < 0)
1815 continue;
df7492f9
KH
1816 if (c == 0x80)
1817 {
1818 /* Perhaps the start of composite character. We simple skip
1819 it because analyzing it is too heavy for detecting. But,
1820 at least, we check that the composite character
1821 constitues of more than 4 bytes. */
8f924df7 1822 const unsigned char *src_base;
df7492f9
KH
1823
1824 repeat:
1825 src_base = src;
1826 do
1827 {
1828 ONE_MORE_BYTE (c);
1829 }
1830 while (c >= 0xA0);
1831
1832 if (src - src_base <= 4)
1833 break;
ff0dacd7 1834 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1835 if (c == 0x80)
1836 goto repeat;
b73bfc1c 1837 }
df7492f9
KH
1838
1839 if (c < 0x80)
b73bfc1c 1840 {
df7492f9
KH
1841 if (c < 0x20
1842 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1843 break;
1844 }
1845 else
1846 {
0e219d54 1847 int more_bytes = emacs_mule_bytes[*src_base] - 1;
df7492f9 1848
0e219d54 1849 while (more_bytes > 0)
df7492f9
KH
1850 {
1851 ONE_MORE_BYTE (c);
0e219d54
KH
1852 if (c < 0xA0)
1853 {
1854 src--; /* Unread the last byte. */
1855 break;
1856 }
1857 more_bytes--;
df7492f9 1858 }
0e219d54 1859 if (more_bytes != 0)
df7492f9 1860 break;
ff0dacd7 1861 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1862 }
1863 }
ff0dacd7 1864 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1865 return 0;
1866
1867 no_more_source:
065e3595 1868 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 1869 {
ff0dacd7 1870 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1871 return 0;
1872 }
ff0dacd7
KH
1873 detect_info->found |= found;
1874 return 1;
4ed46869
KH
1875}
1876
b73bfc1c 1877
df7492f9
KH
1878/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1879
1880/* Decode a character represented as a component of composition
1881 sequence of Emacs 20/21 style at SRC. Set C to that character and
1882 update SRC to the head of next character (or an encoded composition
1883 rule). If SRC doesn't points a composition component, set C to -1.
1884 If SRC points an invalid byte sequence, global exit by a return
1885 value 0. */
1886
1887#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1888 if (1) \
1889 { \
1890 int c; \
1891 int nbytes, nchars; \
1892 \
1893 if (src == src_end) \
1894 break; \
ff0dacd7 1895 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1896 if (c < 0) \
1897 { \
1898 if (c == -2) \
1899 break; \
1900 goto invalid_code; \
1901 } \
1902 *buf++ = c; \
1903 src += nbytes; \
1904 consumed_chars += nchars; \
1905 } \
1906 else
1907
1908
1909/* Decode a composition rule represented as a component of composition
781d7a48
KH
1910 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1911 and increment BUF. If SRC points an invalid byte sequence, set C
1912 to -1. */
df7492f9 1913
781d7a48 1914#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1915 do { \
1916 int c, gref, nref; \
1917 \
781d7a48 1918 if (src >= src_end) \
df7492f9
KH
1919 goto invalid_code; \
1920 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1921 c -= 0x20; \
df7492f9
KH
1922 if (c < 0 || c >= 81) \
1923 goto invalid_code; \
1924 \
1925 gref = c / 9, nref = c % 9; \
1926 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1927 } while (0)
1928
1929
781d7a48
KH
1930/* Decode a composition rule represented as a component of composition
1931 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1932 and increment BUF. If SRC points an invalid byte sequence, set C
1933 to -1. */
1934
1935#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1936 do { \
1937 int gref, nref; \
1938 \
1939 if (src + 1>= src_end) \
1940 goto invalid_code; \
1941 ONE_MORE_BYTE_NO_CHECK (gref); \
1942 gref -= 0x20; \
1943 ONE_MORE_BYTE_NO_CHECK (nref); \
1944 nref -= 0x20; \
1945 if (gref < 0 || gref >= 81 \
1946 || nref < 0 || nref >= 81) \
1947 goto invalid_code; \
1948 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1949 } while (0)
1950
1951
df7492f9 1952#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1953 do { \
df7492f9 1954 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1955 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1956 the byte length of this composition information, CHARS is the \
1957 number of characters composed by this composition. */ \
781d7a48
KH
1958 enum composition_method method = c - 0xF2; \
1959 int *charbuf_base = charbuf; \
df7492f9
KH
1960 int consumed_chars_limit; \
1961 int nbytes, nchars; \
1962 \
1963 ONE_MORE_BYTE (c); \
065e3595
KH
1964 if (c < 0) \
1965 goto invalid_code; \
df7492f9
KH
1966 nbytes = c - 0xA0; \
1967 if (nbytes < 3) \
1968 goto invalid_code; \
1969 ONE_MORE_BYTE (c); \
065e3595
KH
1970 if (c < 0) \
1971 goto invalid_code; \
df7492f9 1972 nchars = c - 0xA0; \
69a80ea3 1973 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9
KH
1974 consumed_chars_limit = consumed_chars_base + nbytes; \
1975 if (method != COMPOSITION_RELATIVE) \
aa72b389 1976 { \
df7492f9
KH
1977 int i = 0; \
1978 while (consumed_chars < consumed_chars_limit) \
aa72b389 1979 { \
df7492f9 1980 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1981 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1982 else \
1983 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1984 i++; \
aa72b389 1985 } \
df7492f9
KH
1986 if (consumed_chars < consumed_chars_limit) \
1987 goto invalid_code; \
781d7a48 1988 charbuf_base[0] -= i; \
aa72b389
KH
1989 } \
1990 } while (0)
93dec019 1991
aa72b389 1992
df7492f9
KH
1993#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1994 do { \
1995 /* Emacs 20 style format for relative composition. */ \
1996 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1997 enum composition_method method = COMPOSITION_RELATIVE; \
df7492f9
KH
1998 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1999 int *buf = components; \
2000 int i, j; \
2001 \
2002 src = src_base; \
2003 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2004 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
2005 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2006 if (i < 2) \
2007 goto invalid_code; \
69a80ea3 2008 ADD_COMPOSITION_DATA (charbuf, i, method); \
df7492f9
KH
2009 for (j = 0; j < i; j++) \
2010 *charbuf++ = components[j]; \
2011 } while (0)
2012
2013
2014#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
2015 do { \
2016 /* Emacs 20 style format for rule-base composition. */ \
2017 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 2018 enum composition_method method = COMPOSITION_WITH_RULE; \
df7492f9
KH
2019 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2020 int *buf = components; \
2021 int i, j; \
2022 \
2023 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2024 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
2025 { \
781d7a48 2026 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
2027 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2028 } \
2029 if (i < 1 || (buf - components) % 2 == 0) \
2030 goto invalid_code; \
2031 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
2032 goto no_more_source; \
69a80ea3 2033 ADD_COMPOSITION_DATA (buf, i, method); \
df7492f9
KH
2034 for (j = 0; j < i; j++) \
2035 *charbuf++ = components[j]; \
2036 for (j = 0; j < i; j += 2) \
2037 *charbuf++ = components[j]; \
2038 } while (0)
2039
aa72b389
KH
2040
2041static void
df7492f9 2042decode_coding_emacs_mule (coding)
aa72b389 2043 struct coding_system *coding;
aa72b389 2044{
8f924df7
KH
2045 const unsigned char *src = coding->source + coding->consumed;
2046 const unsigned char *src_end = coding->source + coding->src_bytes;
2047 const unsigned char *src_base;
69a80ea3
KH
2048 int *charbuf = coding->charbuf + coding->charbuf_used;
2049 int *charbuf_end
2050 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 2051 int consumed_chars = 0, consumed_chars_base;
df7492f9 2052 int multibytep = coding->src_multibyte;
24a73b0a 2053 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2054 int char_offset = coding->produced_char;
2055 int last_offset = char_offset;
2056 int last_id = charset_ascii;
aa72b389 2057
24a73b0a 2058 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2059
aa72b389
KH
2060 while (1)
2061 {
df7492f9
KH
2062 int c;
2063
aa72b389 2064 src_base = src;
df7492f9
KH
2065 consumed_chars_base = consumed_chars;
2066
2067 if (charbuf >= charbuf_end)
2068 break;
aa72b389 2069
df7492f9 2070 ONE_MORE_BYTE (c);
065e3595
KH
2071 if (c < 0)
2072 {
2073 *charbuf++ = -c;
2074 char_offset++;
2075 }
2076 else if (c < 0x80)
aa72b389 2077 {
df7492f9
KH
2078 *charbuf++ = c;
2079 char_offset++;
aa72b389 2080 }
df7492f9
KH
2081 else if (c == 0x80)
2082 {
df7492f9 2083 ONE_MORE_BYTE (c);
065e3595
KH
2084 if (c < 0)
2085 goto invalid_code;
781d7a48
KH
2086 if (c - 0xF2 >= COMPOSITION_RELATIVE
2087 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
2088 DECODE_EMACS_MULE_21_COMPOSITION (c);
2089 else if (c < 0xC0)
2090 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2091 else if (c == 0xFF)
2092 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2093 else
2094 goto invalid_code;
2095 }
2096 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2097 {
2098 int nbytes, nchars;
ff0dacd7
KH
2099 int id;
2100
781d7a48
KH
2101 src = src_base;
2102 consumed_chars = consumed_chars_base;
ff0dacd7 2103 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2104 if (c < 0)
2105 {
2106 if (c == -2)
2107 break;
2108 goto invalid_code;
2109 }
ff0dacd7
KH
2110 if (last_id != id)
2111 {
2112 if (last_id != charset_ascii)
69a80ea3 2113 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
2114 last_id = id;
2115 last_offset = char_offset;
2116 }
df7492f9 2117 *charbuf++ = c;
781d7a48
KH
2118 src += nbytes;
2119 consumed_chars += nchars;
df7492f9
KH
2120 char_offset++;
2121 }
2122 continue;
2123
2124 invalid_code:
2125 src = src_base;
2126 consumed_chars = consumed_chars_base;
2127 ONE_MORE_BYTE (c);
2128 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2129 char_offset++;
df7492f9
KH
2130 coding->errors++;
2131 }
2132
2133 no_more_source:
ff0dacd7 2134 if (last_id != charset_ascii)
69a80ea3 2135 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2136 coding->consumed_char += consumed_chars_base;
2137 coding->consumed = src_base - coding->source;
2138 coding->charbuf_used = charbuf - coding->charbuf;
2139}
2140
2141
2142#define EMACS_MULE_LEADING_CODES(id, codes) \
2143 do { \
2144 if (id < 0xA0) \
2145 codes[0] = id, codes[1] = 0; \
2146 else if (id < 0xE0) \
2147 codes[0] = 0x9A, codes[1] = id; \
2148 else if (id < 0xF0) \
2149 codes[0] = 0x9B, codes[1] = id; \
2150 else if (id < 0xF5) \
2151 codes[0] = 0x9C, codes[1] = id; \
2152 else \
2153 codes[0] = 0x9D, codes[1] = id; \
2154 } while (0);
2155
aa72b389 2156
df7492f9
KH
2157static int
2158encode_coding_emacs_mule (coding)
2159 struct coding_system *coding;
2160{
2161 int multibytep = coding->dst_multibyte;
2162 int *charbuf = coding->charbuf;
2163 int *charbuf_end = charbuf + coding->charbuf_used;
2164 unsigned char *dst = coding->destination + coding->produced;
2165 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2166 int safe_room = 8;
df7492f9 2167 int produced_chars = 0;
24a73b0a 2168 Lisp_Object attrs, charset_list;
df7492f9 2169 int c;
ff0dacd7 2170 int preferred_charset_id = -1;
df7492f9 2171
24a73b0a 2172 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2173 if (! EQ (charset_list, Vemacs_mule_charset_list))
2174 {
2175 CODING_ATTR_CHARSET_LIST (attrs)
2176 = charset_list = Vemacs_mule_charset_list;
2177 }
df7492f9
KH
2178
2179 while (charbuf < charbuf_end)
2180 {
2181 ASSURE_DESTINATION (safe_room);
2182 c = *charbuf++;
ff0dacd7
KH
2183
2184 if (c < 0)
2185 {
2186 /* Handle an annotation. */
2187 switch (*charbuf)
2188 {
2189 case CODING_ANNOTATE_COMPOSITION_MASK:
2190 /* Not yet implemented. */
2191 break;
2192 case CODING_ANNOTATE_CHARSET_MASK:
2193 preferred_charset_id = charbuf[3];
2194 if (preferred_charset_id >= 0
2195 && NILP (Fmemq (make_number (preferred_charset_id),
2196 charset_list)))
2197 preferred_charset_id = -1;
2198 break;
2199 default:
2200 abort ();
2201 }
2202 charbuf += -c - 1;
2203 continue;
2204 }
2205
df7492f9
KH
2206 if (ASCII_CHAR_P (c))
2207 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2208 else if (CHAR_BYTE8_P (c))
2209 {
2210 c = CHAR_TO_BYTE8 (c);
2211 EMIT_ONE_BYTE (c);
2212 }
df7492f9 2213 else
aa72b389 2214 {
df7492f9
KH
2215 struct charset *charset;
2216 unsigned code;
2217 int dimension;
2218 int emacs_mule_id;
2219 unsigned char leading_codes[2];
2220
ff0dacd7
KH
2221 if (preferred_charset_id >= 0)
2222 {
2223 charset = CHARSET_FROM_ID (preferred_charset_id);
2224 if (! CHAR_CHARSET_P (c, charset))
2225 charset = char_charset (c, charset_list, NULL);
2226 }
2227 else
2228 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2229 if (! charset)
2230 {
2231 c = coding->default_char;
2232 if (ASCII_CHAR_P (c))
2233 {
2234 EMIT_ONE_ASCII_BYTE (c);
2235 continue;
2236 }
2237 charset = char_charset (c, charset_list, &code);
2238 }
2239 dimension = CHARSET_DIMENSION (charset);
2240 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2241 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2242 EMIT_ONE_BYTE (leading_codes[0]);
2243 if (leading_codes[1])
2244 EMIT_ONE_BYTE (leading_codes[1]);
2245 if (dimension == 1)
1fa663f9 2246 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2247 else
df7492f9 2248 {
1fa663f9 2249 code |= 0x8080;
df7492f9
KH
2250 EMIT_ONE_BYTE (code >> 8);
2251 EMIT_ONE_BYTE (code & 0xFF);
2252 }
aa72b389 2253 }
aa72b389 2254 }
065e3595 2255 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2256 coding->produced_char += produced_chars;
2257 coding->produced = dst - coding->destination;
2258 return 0;
aa72b389 2259}
b73bfc1c 2260
4ed46869 2261\f
df7492f9 2262/*** 7. ISO2022 handlers ***/
4ed46869
KH
2263
2264/* The following note describes the coding system ISO2022 briefly.
39787efd 2265 Since the intention of this note is to help understand the
5a936b46 2266 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2267 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2268 original document of ISO2022. This is equivalent to the standard
cfb43547 2269 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2270
2271 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2272 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2273 is encoded using bytes less than 128. This may make the encoded
2274 text a little bit longer, but the text passes more easily through
cfb43547 2275 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2276 Significant Bit).
b73bfc1c 2277
cfb43547
DL
2278 There are two kinds of character sets: control character sets and
2279 graphic character sets. The former contain control characters such
4ed46869 2280 as `newline' and `escape' to provide control functions (control
39787efd 2281 functions are also provided by escape sequences). The latter
cfb43547 2282 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2283 two control character sets and many graphic character sets.
2284
2285 Graphic character sets are classified into one of the following
39787efd
KH
2286 four classes, according to the number of bytes (DIMENSION) and
2287 number of characters in one dimension (CHARS) of the set:
2288 - DIMENSION1_CHARS94
2289 - DIMENSION1_CHARS96
2290 - DIMENSION2_CHARS94
2291 - DIMENSION2_CHARS96
2292
2293 In addition, each character set is assigned an identification tag,
cfb43547 2294 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2295 hereafter). The <F> of each character set is decided by ECMA(*)
2296 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2297 (0x30..0x3F are for private use only).
4ed46869
KH
2298
2299 Note (*): ECMA = European Computer Manufacturers Association
2300
cfb43547 2301 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2302 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2303 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2304 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2305 o DIMENSION2_CHARS96 -- none for the moment
2306
39787efd 2307 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2308 C0 [0x00..0x1F] -- control character plane 0
2309 GL [0x20..0x7F] -- graphic character plane 0
2310 C1 [0x80..0x9F] -- control character plane 1
2311 GR [0xA0..0xFF] -- graphic character plane 1
2312
2313 A control character set is directly designated and invoked to C0 or
39787efd
KH
2314 C1 by an escape sequence. The most common case is that:
2315 - ISO646's control character set is designated/invoked to C0, and
2316 - ISO6429's control character set is designated/invoked to C1,
2317 and usually these designations/invocations are omitted in encoded
2318 text. In a 7-bit environment, only C0 can be used, and a control
2319 character for C1 is encoded by an appropriate escape sequence to
2320 fit into the environment. All control characters for C1 are
2321 defined to have corresponding escape sequences.
4ed46869
KH
2322
2323 A graphic character set is at first designated to one of four
2324 graphic registers (G0 through G3), then these graphic registers are
2325 invoked to GL or GR. These designations and invocations can be
2326 done independently. The most common case is that G0 is invoked to
39787efd
KH
2327 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2328 these invocations and designations are omitted in encoded text.
2329 In a 7-bit environment, only GL can be used.
4ed46869 2330
39787efd
KH
2331 When a graphic character set of CHARS94 is invoked to GL, codes
2332 0x20 and 0x7F of the GL area work as control characters SPACE and
2333 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2334 be used.
4ed46869
KH
2335
2336 There are two ways of invocation: locking-shift and single-shift.
2337 With locking-shift, the invocation lasts until the next different
39787efd
KH
2338 invocation, whereas with single-shift, the invocation affects the
2339 following character only and doesn't affect the locking-shift
2340 state. Invocations are done by the following control characters or
2341 escape sequences:
4ed46869
KH
2342
2343 ----------------------------------------------------------------------
39787efd 2344 abbrev function cntrl escape seq description
4ed46869 2345 ----------------------------------------------------------------------
39787efd
KH
2346 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2347 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2348 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2349 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2350 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2351 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2352 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2353 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2354 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2355 ----------------------------------------------------------------------
39787efd
KH
2356 (*) These are not used by any known coding system.
2357
2358 Control characters for these functions are defined by macros
2359 ISO_CODE_XXX in `coding.h'.
4ed46869 2360
39787efd 2361 Designations are done by the following escape sequences:
4ed46869
KH
2362 ----------------------------------------------------------------------
2363 escape sequence description
2364 ----------------------------------------------------------------------
2365 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2366 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2367 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2368 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2369 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2370 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2371 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2372 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2373 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2374 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2375 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2376 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2377 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2378 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2379 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2380 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2381 ----------------------------------------------------------------------
2382
2383 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2384 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2385
2386 Note (*): Although these designations are not allowed in ISO2022,
2387 Emacs accepts them on decoding, and produces them on encoding
39787efd 2388 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2389 7-bit environment, non-locking-shift, and non-single-shift.
2390
2391 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2392 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2393
cfb43547 2394 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2395 same multilingual text in ISO2022. Actually, there exist many
2396 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2397 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2398 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2399 localized platforms), and all of these are variants of ISO2022.
2400
2401 In addition to the above, Emacs handles two more kinds of escape
2402 sequences: ISO6429's direction specification and Emacs' private
2403 sequence for specifying character composition.
2404
39787efd 2405 ISO6429's direction specification takes the following form:
4ed46869
KH
2406 o CSI ']' -- end of the current direction
2407 o CSI '0' ']' -- end of the current direction
2408 o CSI '1' ']' -- start of left-to-right text
2409 o CSI '2' ']' -- start of right-to-left text
2410 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2411 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2412
2413 Character composition specification takes the following form:
ec6d2bb8
KH
2414 o ESC '0' -- start relative composition
2415 o ESC '1' -- end composition
2416 o ESC '2' -- start rule-base composition (*)
2417 o ESC '3' -- start relative composition with alternate chars (**)
2418 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2419 Since these are not standard escape sequences of any ISO standard,
cfb43547 2420 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2421
5a936b46
DL
2422 (*) This form is used only in Emacs 20.7 and older versions,
2423 but newer versions can safely decode it.
cfb43547 2424 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2425 and older versions can't decode it.
ec6d2bb8 2426
cfb43547 2427 Here's a list of example usages of these composition escape
b73bfc1c 2428 sequences (categorized by `enum composition_method').
ec6d2bb8 2429
b73bfc1c 2430 COMPOSITION_RELATIVE:
ec6d2bb8 2431 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2432 COMPOSITION_WITH_RULE:
ec6d2bb8 2433 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2434 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2435 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2436 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2437 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2438
2439enum iso_code_class_type iso_code_class[256];
2440
df7492f9
KH
2441#define SAFE_CHARSET_P(coding, id) \
2442 ((id) <= (coding)->max_charset_id \
2443 && (coding)->safe_charsets[id] >= 0)
2444
2445
2446#define SHIFT_OUT_OK(category) \
2447 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2448
2449static void
f0064e1f
DL
2450setup_iso_safe_charsets (attrs)
2451 Lisp_Object attrs;
df7492f9
KH
2452{
2453 Lisp_Object charset_list, safe_charsets;
2454 Lisp_Object request;
2455 Lisp_Object reg_usage;
2456 Lisp_Object tail;
2457 int reg94, reg96;
2458 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2459 int max_charset_id;
2460
2461 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2462 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2463 && ! EQ (charset_list, Viso_2022_charset_list))
2464 {
2465 CODING_ATTR_CHARSET_LIST (attrs)
2466 = charset_list = Viso_2022_charset_list;
2467 ASET (attrs, coding_attr_safe_charsets, Qnil);
2468 }
2469
2470 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2471 return;
2472
2473 max_charset_id = 0;
2474 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2475 {
2476 int id = XINT (XCAR (tail));
2477 if (max_charset_id < id)
2478 max_charset_id = id;
2479 }
d46c5b12 2480
df7492f9
KH
2481 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2482 make_number (255));
2483 request = AREF (attrs, coding_attr_iso_request);
2484 reg_usage = AREF (attrs, coding_attr_iso_usage);
2485 reg94 = XINT (XCAR (reg_usage));
2486 reg96 = XINT (XCDR (reg_usage));
2487
2488 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2489 {
2490 Lisp_Object id;
2491 Lisp_Object reg;
2492 struct charset *charset;
2493
2494 id = XCAR (tail);
2495 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2496 reg = Fcdr (Fassq (id, request));
df7492f9 2497 if (! NILP (reg))
8f924df7 2498 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2499 else if (charset->iso_chars_96)
2500 {
2501 if (reg96 < 4)
8f924df7 2502 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2503 }
2504 else
2505 {
2506 if (reg94 < 4)
8f924df7 2507 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2508 }
2509 }
2510 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2511}
d46c5b12 2512
b6871cc7 2513
4ed46869 2514/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2515 Check if a text is encoded in one of ISO-2022 based codig systems.
2516 If it is, return 1, else return 0. */
4ed46869 2517
0a28aafb 2518static int
ff0dacd7 2519detect_coding_iso_2022 (coding, detect_info)
df7492f9 2520 struct coding_system *coding;
ff0dacd7 2521 struct coding_detection_info *detect_info;
4ed46869 2522{
8f924df7
KH
2523 const unsigned char *src = coding->source, *src_base = src;
2524 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2525 int multibytep = coding->src_multibyte;
ff0dacd7 2526 int single_shifting = 0;
df7492f9
KH
2527 int id;
2528 int c, c1;
2529 int consumed_chars = 0;
2530 int i;
ff0dacd7
KH
2531 int rejected = 0;
2532 int found = 0;
2533
2534 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2535
2536 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2537 {
2538 struct coding_system *this = &(coding_categories[i]);
2539 Lisp_Object attrs, val;
2540
2541 attrs = CODING_ID_ATTRS (this->id);
2542 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2543 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2544 setup_iso_safe_charsets (attrs);
2545 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
2546 this->max_charset_id = SCHARS (val) - 1;
2547 this->safe_charsets = (char *) SDATA (val);
df7492f9
KH
2548 }
2549
2550 /* A coding system of this category is always ASCII compatible. */
2551 src += coding->head_ascii;
3f003981 2552
ff0dacd7 2553 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2554 {
065e3595 2555 src_base = src;
df7492f9 2556 ONE_MORE_BYTE (c);
4ed46869
KH
2557 switch (c)
2558 {
2559 case ISO_CODE_ESC:
74383408
KH
2560 if (inhibit_iso_escape_detection)
2561 break;
f46869e4 2562 single_shifting = 0;
df7492f9 2563 ONE_MORE_BYTE (c);
d46c5b12 2564 if (c >= '(' && c <= '/')
4ed46869 2565 {
bf9cdd4e 2566 /* Designation sequence for a charset of dimension 1. */
df7492f9 2567 ONE_MORE_BYTE (c1);
d46c5b12 2568 if (c1 < ' ' || c1 >= 0x80
df7492f9 2569 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2570 /* Invalid designation sequence. Just ignore. */
2571 break;
bf9cdd4e
KH
2572 }
2573 else if (c == '$')
2574 {
2575 /* Designation sequence for a charset of dimension 2. */
df7492f9 2576 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2577 if (c >= '@' && c <= 'B')
2578 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2579 id = iso_charset_table[1][0][c];
bf9cdd4e 2580 else if (c >= '(' && c <= '/')
bcf26d6a 2581 {
df7492f9 2582 ONE_MORE_BYTE (c1);
d46c5b12 2583 if (c1 < ' ' || c1 >= 0x80
df7492f9 2584 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2585 /* Invalid designation sequence. Just ignore. */
2586 break;
bcf26d6a 2587 }
bf9cdd4e 2588 else
ff0dacd7 2589 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2590 break;
2591 }
ae9ff118 2592 else if (c == 'N' || c == 'O')
d46c5b12 2593 {
ae9ff118 2594 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2595 single_shifting = 1;
2596 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2597 break;
4ed46869 2598 }
ec6d2bb8
KH
2599 else if (c >= '0' && c <= '4')
2600 {
2601 /* ESC <Fp> for start/end composition. */
ff0dacd7 2602 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2603 break;
2604 }
bf9cdd4e 2605 else
df7492f9 2606 {
ff0dacd7 2607 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2608 break;
2609 }
d46c5b12
KH
2610
2611 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2612 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2613 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2614 id))
ff0dacd7 2615 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2616 else
ff0dacd7 2617 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2618 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2619 id))
ff0dacd7 2620 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2621 else
ff0dacd7 2622 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2623 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2624 id))
ff0dacd7 2625 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2626 else
ff0dacd7 2627 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2628 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2629 id))
ff0dacd7 2630 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2631 else
ff0dacd7 2632 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2633 break;
2634
4ed46869 2635 case ISO_CODE_SO:
d46c5b12 2636 case ISO_CODE_SI:
ff0dacd7 2637 /* Locking shift out/in. */
74383408
KH
2638 if (inhibit_iso_escape_detection)
2639 break;
f46869e4 2640 single_shifting = 0;
ff0dacd7
KH
2641 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2642 found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2643 break;
2644
4ed46869 2645 case ISO_CODE_CSI:
ff0dacd7 2646 /* Control sequence introducer. */
f46869e4 2647 single_shifting = 0;
ff0dacd7
KH
2648 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2649 found |= CATEGORY_MASK_ISO_8_ELSE;
2650 goto check_extra_latin;
2651
4ed46869
KH
2652 case ISO_CODE_SS2:
2653 case ISO_CODE_SS3:
ff0dacd7
KH
2654 /* Single shift. */
2655 if (inhibit_iso_escape_detection)
2656 break;
75e2a253 2657 single_shifting = 0;
ff0dacd7
KH
2658 rejected |= CATEGORY_MASK_ISO_7BIT;
2659 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2660 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 2661 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
2662 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2663 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
2664 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2665 if (single_shifting)
2666 break;
ff0dacd7 2667 goto check_extra_latin;
4ed46869
KH
2668
2669 default:
065e3595
KH
2670 if (c < 0)
2671 continue;
4ed46869 2672 if (c < 0x80)
f46869e4
KH
2673 {
2674 single_shifting = 0;
2675 break;
2676 }
ff0dacd7 2677 if (c >= 0xA0)
c4825358 2678 {
ff0dacd7
KH
2679 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2680 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2681 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2682 0xA0..0FF. If the byte length is even, we include
2683 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2684 only when we are not single shifting. */
2685 if (! single_shifting
2686 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2687 {
e17de821 2688 int i = 1;
b73bfc1c
KH
2689 while (src < src_end)
2690 {
df7492f9 2691 ONE_MORE_BYTE (c);
b73bfc1c
KH
2692 if (c < 0xA0)
2693 break;
2694 i++;
2695 }
2696
2697 if (i & 1 && src < src_end)
ff0dacd7 2698 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2699 else
ff0dacd7 2700 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2701 }
ff0dacd7 2702 break;
4ed46869 2703 }
ff0dacd7
KH
2704 check_extra_latin:
2705 single_shifting = 0;
2706 if (! VECTORP (Vlatin_extra_code_table)
2707 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2708 {
2709 rejected = CATEGORY_MASK_ISO;
2710 break;
2711 }
2712 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2713 & CODING_ISO_FLAG_LATIN_EXTRA)
2714 found |= CATEGORY_MASK_ISO_8_1;
2715 else
2716 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 2717 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2718 }
2719 }
ff0dacd7
KH
2720 detect_info->rejected |= CATEGORY_MASK_ISO;
2721 return 0;
4ed46869 2722
df7492f9 2723 no_more_source:
ff0dacd7
KH
2724 detect_info->rejected |= rejected;
2725 detect_info->found |= (found & ~rejected);
df7492f9 2726 return 1;
4ed46869 2727}
ec6d2bb8 2728
4ed46869 2729
134b9549
KH
2730/* Set designation state into CODING. Set CHARS_96 to -1 if the
2731 escape sequence should be kept. */
df7492f9
KH
2732#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2733 do { \
2734 int id, prev; \
2735 \
2736 if (final < '0' || final >= 128 \
2737 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2738 || !SAFE_CHARSET_P (coding, id)) \
2739 { \
2740 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
2741 chars_96 = -1; \
2742 break; \
df7492f9
KH
2743 } \
2744 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2745 if (id == charset_jisx0201_roman) \
2746 { \
2747 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2748 id = charset_ascii; \
2749 } \
2750 else if (id == charset_jisx0208_1978) \
2751 { \
2752 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2753 id = charset_jisx0208; \
2754 } \
df7492f9
KH
2755 CODING_ISO_DESIGNATION (coding, reg) = id; \
2756 /* If there was an invalid designation to REG previously, and this \
2757 designation is ASCII to REG, we should keep this designation \
2758 sequence. */ \
2759 if (prev == -2 && id == charset_ascii) \
134b9549 2760 chars_96 = -1; \
4ed46869
KH
2761 } while (0)
2762
d46c5b12 2763
df7492f9
KH
2764#define MAYBE_FINISH_COMPOSITION() \
2765 do { \
2766 int i; \
2767 if (composition_state == COMPOSING_NO) \
2768 break; \
2769 /* It is assured that we have enough room for producing \
2770 characters stored in the table `components'. */ \
2771 if (charbuf + component_idx > charbuf_end) \
2772 goto no_more_source; \
2773 composition_state = COMPOSING_NO; \
2774 if (method == COMPOSITION_RELATIVE \
2775 || method == COMPOSITION_WITH_ALTCHARS) \
2776 { \
2777 for (i = 0; i < component_idx; i++) \
2778 *charbuf++ = components[i]; \
2779 char_offset += component_idx; \
2780 } \
2781 else \
2782 { \
2783 for (i = 0; i < component_idx; i += 2) \
2784 *charbuf++ = components[i]; \
2785 char_offset += (component_idx / 2) + 1; \
2786 } \
2787 } while (0)
2788
d46c5b12 2789
aa72b389
KH
2790/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2791 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2792 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2793 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2794 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2795 */
ec6d2bb8 2796
df7492f9
KH
2797#define DECODE_COMPOSITION_START(c1) \
2798 do { \
2799 if (c1 == '0' \
781d7a48 2800 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2801 { \
2802 component_len = component_idx; \
2803 composition_state = COMPOSING_CHAR; \
2804 } \
2805 else \
2806 { \
8f924df7 2807 const unsigned char *p; \
df7492f9
KH
2808 \
2809 MAYBE_FINISH_COMPOSITION (); \
2810 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2811 goto no_more_source; \
2812 for (p = src; p < src_end - 1; p++) \
2813 if (*p == ISO_CODE_ESC && p[1] == '1') \
2814 break; \
2815 if (p == src_end - 1) \
2816 { \
2817 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2818 goto invalid_code; \
2819 goto no_more_source; \
2820 } \
2821 \
2822 /* This is surely the start of a composition. */ \
2823 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2824 : c1 == '2' ? COMPOSITION_WITH_RULE \
2825 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2826 : COMPOSITION_WITH_RULE_ALTCHARS); \
2827 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2828 : COMPOSING_COMPONENT_CHAR); \
2829 component_idx = component_len = 0; \
2830 } \
ec6d2bb8
KH
2831 } while (0)
2832
ec6d2bb8 2833
df7492f9
KH
2834/* Handle compositoin end sequence ESC 1. */
2835
2836#define DECODE_COMPOSITION_END() \
ec6d2bb8 2837 do { \
df7492f9
KH
2838 int nchars = (component_len > 0 ? component_idx - component_len \
2839 : method == COMPOSITION_RELATIVE ? component_idx \
2840 : (component_idx + 1) / 2); \
2841 int i; \
2842 int *saved_charbuf = charbuf; \
2843 \
69a80ea3 2844 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9 2845 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2846 { \
df7492f9
KH
2847 if (component_len == 0) \
2848 for (i = 0; i < component_idx; i++) \
2849 *charbuf++ = components[i]; \
2850 else \
2851 for (i = 0; i < component_len; i++) \
2852 *charbuf++ = components[i]; \
2853 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2854 } \
df7492f9
KH
2855 if (method == COMPOSITION_WITH_RULE) \
2856 for (i = 0; i < component_idx; i += 2, char_offset++) \
2857 *charbuf++ = components[i]; \
ec6d2bb8 2858 else \
df7492f9
KH
2859 for (i = component_len; i < component_idx; i++, char_offset++) \
2860 *charbuf++ = components[i]; \
2861 coding->annotated = 1; \
2862 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2863 } while (0)
2864
df7492f9 2865
ec6d2bb8
KH
2866/* Decode a composition rule from the byte C1 (and maybe one more byte
2867 from SRC) and store one encoded composition rule in
2868 coding->cmp_data. */
2869
2870#define DECODE_COMPOSITION_RULE(c1) \
2871 do { \
ec6d2bb8
KH
2872 (c1) -= 32; \
2873 if (c1 < 81) /* old format (before ver.21) */ \
2874 { \
2875 int gref = (c1) / 9; \
2876 int nref = (c1) % 9; \
2877 if (gref == 4) gref = 10; \
2878 if (nref == 4) nref = 10; \
df7492f9 2879 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2880 } \
b73bfc1c 2881 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2882 { \
2883 ONE_MORE_BYTE (c2); \
df7492f9 2884 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2885 } \
df7492f9
KH
2886 else \
2887 c1 = 0; \
ec6d2bb8 2888 } while (0)
88993dfd 2889
d46c5b12 2890
4ed46869
KH
2891/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2892
b73bfc1c 2893static void
df7492f9 2894decode_coding_iso_2022 (coding)
4ed46869 2895 struct coding_system *coding;
4ed46869 2896{
8f924df7
KH
2897 const unsigned char *src = coding->source + coding->consumed;
2898 const unsigned char *src_end = coding->source + coding->src_bytes;
2899 const unsigned char *src_base;
69a80ea3 2900 int *charbuf = coding->charbuf + coding->charbuf_used;
ff0dacd7 2901 int *charbuf_end
69a80ea3 2902 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2903 int consumed_chars = 0, consumed_chars_base;
df7492f9 2904 int multibytep = coding->src_multibyte;
4ed46869 2905 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
2906 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2907 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 2908 int charset_id_2, charset_id_3;
df7492f9
KH
2909 struct charset *charset;
2910 int c;
2911 /* For handling composition sequence. */
2912#define COMPOSING_NO 0
2913#define COMPOSING_CHAR 1
2914#define COMPOSING_RULE 2
2915#define COMPOSING_COMPONENT_CHAR 3
2916#define COMPOSING_COMPONENT_RULE 4
2917
2918 int composition_state = COMPOSING_NO;
2919 enum composition_method method;
2920 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2921 int component_idx;
2922 int component_len;
24a73b0a 2923 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2924 int char_offset = coding->produced_char;
2925 int last_offset = char_offset;
2926 int last_id = charset_ascii;
df7492f9 2927
24a73b0a 2928 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 2929 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2930
2931 while (1)
4ed46869 2932 {
463f5630 2933 int c1, c2;
b73bfc1c
KH
2934
2935 src_base = src;
df7492f9
KH
2936 consumed_chars_base = consumed_chars;
2937
2938 if (charbuf >= charbuf_end)
2939 break;
2940
b73bfc1c 2941 ONE_MORE_BYTE (c1);
065e3595
KH
2942 if (c1 < 0)
2943 goto invalid_code;
4ed46869 2944
98725083 2945 /* We produce at most one character. */
4ed46869
KH
2946 switch (iso_code_class [c1])
2947 {
2948 case ISO_0x20_or_0x7F:
df7492f9 2949 if (composition_state != COMPOSING_NO)
ec6d2bb8 2950 {
df7492f9
KH
2951 if (composition_state == COMPOSING_RULE
2952 || composition_state == COMPOSING_COMPONENT_RULE)
2953 {
2954 DECODE_COMPOSITION_RULE (c1);
2955 components[component_idx++] = c1;
2956 composition_state--;
2957 continue;
2958 }
4ed46869 2959 }
df7492f9
KH
2960 if (charset_id_0 < 0
2961 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2962 /* This is SPACE or DEL. */
2963 charset = CHARSET_FROM_ID (charset_ascii);
2964 else
2965 charset = CHARSET_FROM_ID (charset_id_0);
2966 break;
4ed46869
KH
2967
2968 case ISO_graphic_plane_0:
781d7a48 2969 if (composition_state != COMPOSING_NO)
b73bfc1c 2970 {
781d7a48
KH
2971 if (composition_state == COMPOSING_RULE
2972 || composition_state == COMPOSING_COMPONENT_RULE)
2973 {
2974 DECODE_COMPOSITION_RULE (c1);
2975 components[component_idx++] = c1;
2976 composition_state--;
2977 continue;
2978 }
b73bfc1c 2979 }
134b9549
KH
2980 if (charset_id_0 < 0)
2981 charset = CHARSET_FROM_ID (charset_ascii);
2982 else
2983 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2984 break;
2985
2986 case ISO_0xA0_or_0xFF:
df7492f9
KH
2987 if (charset_id_1 < 0
2988 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2989 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2990 goto invalid_code;
4ed46869
KH
2991 /* This is a graphic character, we fall down ... */
2992
2993 case ISO_graphic_plane_1:
df7492f9
KH
2994 if (charset_id_1 < 0)
2995 goto invalid_code;
2996 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2997 break;
2998
df7492f9
KH
2999 case ISO_control_0:
3000 MAYBE_FINISH_COMPOSITION ();
3001 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3002 break;
3003
df7492f9
KH
3004 case ISO_control_1:
3005 MAYBE_FINISH_COMPOSITION ();
3006 goto invalid_code;
3007
4ed46869 3008 case ISO_shift_out:
df7492f9
KH
3009 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3010 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3011 goto invalid_code;
3012 CODING_ISO_INVOCATION (coding, 0) = 1;
3013 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3014 continue;
4ed46869
KH
3015
3016 case ISO_shift_in:
df7492f9
KH
3017 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3018 goto invalid_code;
3019 CODING_ISO_INVOCATION (coding, 0) = 0;
3020 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3021 continue;
4ed46869
KH
3022
3023 case ISO_single_shift_2_7:
3024 case ISO_single_shift_2:
df7492f9
KH
3025 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3026 goto invalid_code;
4ed46869
KH
3027 /* SS2 is handled as an escape sequence of ESC 'N' */
3028 c1 = 'N';
3029 goto label_escape_sequence;
3030
3031 case ISO_single_shift_3:
df7492f9
KH
3032 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3033 goto invalid_code;
4ed46869
KH
3034 /* SS2 is handled as an escape sequence of ESC 'O' */
3035 c1 = 'O';
3036 goto label_escape_sequence;
3037
3038 case ISO_control_sequence_introducer:
3039 /* CSI is handled as an escape sequence of ESC '[' ... */
3040 c1 = '[';
3041 goto label_escape_sequence;
3042
3043 case ISO_escape:
3044 ONE_MORE_BYTE (c1);
3045 label_escape_sequence:
df7492f9 3046 /* Escape sequences handled here are invocation,
4ed46869
KH
3047 designation, direction specification, and character
3048 composition specification. */
3049 switch (c1)
3050 {
3051 case '&': /* revision of following character set */
3052 ONE_MORE_BYTE (c1);
3053 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3054 goto invalid_code;
4ed46869
KH
3055 ONE_MORE_BYTE (c1);
3056 if (c1 != ISO_CODE_ESC)
df7492f9 3057 goto invalid_code;
4ed46869
KH
3058 ONE_MORE_BYTE (c1);
3059 goto label_escape_sequence;
3060
3061 case '$': /* designation of 2-byte character set */
df7492f9
KH
3062 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3063 goto invalid_code;
134b9549
KH
3064 {
3065 int reg, chars96;
3066
3067 ONE_MORE_BYTE (c1);
3068 if (c1 >= '@' && c1 <= 'B')
3069 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3070 or JISX0208.1980 */
134b9549
KH
3071 reg = 0, chars96 = 0;
3072 }
3073 else if (c1 >= 0x28 && c1 <= 0x2B)
3074 { /* designation of DIMENSION2_CHARS94 character set */
3075 reg = c1 - 0x28, chars96 = 0;
3076 ONE_MORE_BYTE (c1);
3077 }
3078 else if (c1 >= 0x2C && c1 <= 0x2F)
3079 { /* designation of DIMENSION2_CHARS96 character set */
3080 reg = c1 - 0x2C, chars96 = 1;
3081 ONE_MORE_BYTE (c1);
3082 }
3083 else
3084 goto invalid_code;
3085 DECODE_DESIGNATION (reg, 2, chars96, c1);
3086 /* We must update these variables now. */
3087 if (reg == 0)
3088 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3089 else if (reg == 1)
3090 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3091 if (chars96 < 0)
3092 goto invalid_code;
3093 }
b73bfc1c 3094 continue;
4ed46869
KH
3095
3096 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3097 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3098 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3099 goto invalid_code;
3100 CODING_ISO_INVOCATION (coding, 0) = 2;
3101 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3102 continue;
4ed46869
KH
3103
3104 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3105 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3106 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3107 goto invalid_code;
3108 CODING_ISO_INVOCATION (coding, 0) = 3;
3109 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3110 continue;
4ed46869
KH
3111
3112 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3113 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3114 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3115 goto invalid_code;
134b9549
KH
3116 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3117 if (charset_id_2 < 0)
3118 charset = CHARSET_FROM_ID (charset_ascii);
3119 else
3120 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3121 ONE_MORE_BYTE (c1);
e7046a18 3122 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3123 goto invalid_code;
4ed46869
KH
3124 break;
3125
3126 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3127 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3128 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3129 goto invalid_code;
134b9549
KH
3130 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3131 if (charset_id_3 < 0)
3132 charset = CHARSET_FROM_ID (charset_ascii);
3133 else
3134 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3135 ONE_MORE_BYTE (c1);
e7046a18 3136 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3137 goto invalid_code;
4ed46869
KH
3138 break;
3139
ec6d2bb8 3140 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3141 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3142 goto invalid_code;
ec6d2bb8 3143 DECODE_COMPOSITION_START (c1);
b73bfc1c 3144 continue;
4ed46869 3145
ec6d2bb8 3146 case '1': /* end composition */
df7492f9
KH
3147 if (composition_state == COMPOSING_NO)
3148 goto invalid_code;
3149 DECODE_COMPOSITION_END ();
b73bfc1c 3150 continue;
4ed46869
KH
3151
3152 case '[': /* specification of direction */
df7492f9
KH
3153 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3154 goto invalid_code;
4ed46869 3155 /* For the moment, nested direction is not supported.
d46c5b12 3156 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3157 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3158 ONE_MORE_BYTE (c1);
3159 switch (c1)
3160 {
3161 case ']': /* end of the current direction */
d46c5b12 3162 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3163
3164 case '0': /* end of the current direction */
3165 case '1': /* start of left-to-right direction */
3166 ONE_MORE_BYTE (c1);
3167 if (c1 == ']')
d46c5b12 3168 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3169 else
df7492f9 3170 goto invalid_code;
4ed46869
KH
3171 break;
3172
3173 case '2': /* start of right-to-left direction */
3174 ONE_MORE_BYTE (c1);
3175 if (c1 == ']')
d46c5b12 3176 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3177 else
df7492f9 3178 goto invalid_code;
4ed46869
KH
3179 break;
3180
3181 default:
df7492f9 3182 goto invalid_code;
4ed46869 3183 }
b73bfc1c 3184 continue;
4ed46869 3185
103e0180 3186 case '%':
103e0180
KH
3187 ONE_MORE_BYTE (c1);
3188 if (c1 == '/')
3189 {
3190 /* CTEXT extended segment:
3191 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3192 We keep these bytes as is for the moment.
3193 They may be decoded by post-read-conversion. */
3194 int dim, M, L;
4776e638 3195 int size;
8f924df7 3196
103e0180
KH
3197 ONE_MORE_BYTE (dim);
3198 ONE_MORE_BYTE (M);
3199 ONE_MORE_BYTE (L);
3200 size = ((M - 128) * 128) + (L - 128);
4776e638
KH
3201 if (charbuf + 8 + size > charbuf_end)
3202 goto break_loop;
3203 *charbuf++ = ISO_CODE_ESC;
3204 *charbuf++ = '%';
3205 *charbuf++ = '/';
3206 *charbuf++ = dim;
3207 *charbuf++ = BYTE8_TO_CHAR (M);
3208 *charbuf++ = BYTE8_TO_CHAR (L);
103e0180
KH
3209 while (size-- > 0)
3210 {
3211 ONE_MORE_BYTE (c1);
4776e638 3212 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3213 }
103e0180
KH
3214 }
3215 else if (c1 == 'G')
3216 {
103e0180
KH
3217 /* XFree86 extension for embedding UTF-8 in CTEXT:
3218 ESC % G --UTF-8-BYTES-- ESC % @
3219 We keep these bytes as is for the moment.
3220 They may be decoded by post-read-conversion. */
4776e638
KH
3221 int *p = charbuf;
3222
3223 if (p + 6 > charbuf_end)
3224 goto break_loop;
3225 *p++ = ISO_CODE_ESC;
3226 *p++ = '%';
3227 *p++ = 'G';
3228 while (p < charbuf_end)
103e0180
KH
3229 {
3230 ONE_MORE_BYTE (c1);
3231 if (c1 == ISO_CODE_ESC
3232 && src + 1 < src_end
3233 && src[0] == '%'
3234 && src[1] == '@')
3235 break;
4776e638 3236 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3237 }
4776e638
KH
3238 if (p + 3 > charbuf_end)
3239 goto break_loop;
3240 *p++ = ISO_CODE_ESC;
3241 *p++ = '%';
3242 *p++ = '@';
3243 charbuf = p;
103e0180
KH
3244 }
3245 else
4776e638 3246 goto invalid_code;
103e0180 3247 continue;
4776e638 3248 break;
103e0180 3249
4ed46869 3250 default:
df7492f9
KH
3251 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3252 goto invalid_code;
134b9549
KH
3253 {
3254 int reg, chars96;
3255
3256 if (c1 >= 0x28 && c1 <= 0x2B)
3257 { /* designation of DIMENSION1_CHARS94 character set */
3258 reg = c1 - 0x28, chars96 = 0;
3259 ONE_MORE_BYTE (c1);
3260 }
3261 else if (c1 >= 0x2C && c1 <= 0x2F)
3262 { /* designation of DIMENSION1_CHARS96 character set */
3263 reg = c1 - 0x2C, chars96 = 1;
3264 ONE_MORE_BYTE (c1);
3265 }
3266 else
3267 goto invalid_code;
3268 DECODE_DESIGNATION (reg, 1, chars96, c1);
3269 /* We must update these variables now. */
3270 if (reg == 0)
3271 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3272 else if (reg == 1)
3273 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3274 if (chars96 < 0)
3275 goto invalid_code;
3276 }
b73bfc1c 3277 continue;
4ed46869 3278 }
b73bfc1c 3279 }
4ed46869 3280
ff0dacd7
KH
3281 if (charset->id != charset_ascii
3282 && last_id != charset->id)
3283 {
3284 if (last_id != charset_ascii)
69a80ea3 3285 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3286 last_id = charset->id;
3287 last_offset = char_offset;
3288 }
3289
b73bfc1c 3290 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3291 Produce a decoded character while getting 2nd position code
3292 C2 if necessary. */
3293 c1 &= 0x7F;
3294 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3295 {
3296 ONE_MORE_BYTE (c2);
df7492f9 3297 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3298 /* C2 is not in a valid range. */
df7492f9
KH
3299 goto invalid_code;
3300 c1 = (c1 << 8) | (c2 & 0x7F);
3301 if (CHARSET_DIMENSION (charset) > 2)
3302 {
3303 ONE_MORE_BYTE (c2);
3304 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3305 /* C2 is not in a valid range. */
3306 goto invalid_code;
3307 c1 = (c1 << 8) | (c2 & 0x7F);
3308 }
3309 }
3310
3311 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3312 if (c < 0)
3313 {
3314 MAYBE_FINISH_COMPOSITION ();
3315 for (; src_base < src; src_base++, char_offset++)
3316 {
3317 if (ASCII_BYTE_P (*src_base))
3318 *charbuf++ = *src_base;
3319 else
3320 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3321 }
3322 }
3323 else if (composition_state == COMPOSING_NO)
3324 {
3325 *charbuf++ = c;
3326 char_offset++;
4ed46869 3327 }
df7492f9 3328 else
781d7a48
KH
3329 {
3330 components[component_idx++] = c;
3331 if (method == COMPOSITION_WITH_RULE
3332 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3333 && composition_state == COMPOSING_COMPONENT_CHAR))
3334 composition_state++;
4ed46869
KH
3335 }
3336 continue;
3337
df7492f9
KH
3338 invalid_code:
3339 MAYBE_FINISH_COMPOSITION ();
4ed46869 3340 src = src_base;
df7492f9
KH
3341 consumed_chars = consumed_chars_base;
3342 ONE_MORE_BYTE (c);
065e3595 3343 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3344 char_offset++;
df7492f9 3345 coding->errors++;
4776e638
KH
3346 continue;
3347
3348 break_loop:
3349 break;
4ed46869 3350 }
fb88bf2d 3351
df7492f9 3352 no_more_source:
ff0dacd7 3353 if (last_id != charset_ascii)
69a80ea3 3354 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
3355 coding->consumed_char += consumed_chars_base;
3356 coding->consumed = src_base - coding->source;
3357 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3358}
3359
b73bfc1c 3360
f4dee582 3361/* ISO2022 encoding stuff. */
4ed46869
KH
3362
3363/*
f4dee582 3364 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3365 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3366 variant has the following specifications:
df7492f9 3367 1. Initial designation to G0 thru G3.
4ed46869
KH
3368 2. Allows short-form designation?
3369 3. ASCII should be designated to G0 before control characters?
3370 4. ASCII should be designated to G0 at end of line?
3371 5. 7-bit environment or 8-bit environment?
3372 6. Use locking-shift?
3373 7. Use Single-shift?
3374 And the following two are only for Japanese:
3375 8. Use ASCII in place of JIS0201-1976-Roman?
3376 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3377 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3378 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3379 details.
4ed46869
KH
3380*/
3381
3382/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3383 register REG at DST, and increment DST. If <final-char> of CHARSET is
3384 '@', 'A', or 'B' and the coding system CODING allows, produce
3385 designation sequence of short-form. */
4ed46869
KH
3386
3387#define ENCODE_DESIGNATION(charset, reg, coding) \
3388 do { \
df7492f9 3389 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3390 char *intermediate_char_94 = "()*+"; \
3391 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3392 int revision = -1; \
3393 int c; \
3394 \
3395 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3396 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3397 \
3398 if (revision >= 0) \
70c22245 3399 { \
df7492f9
KH
3400 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3401 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3402 } \
df7492f9 3403 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3404 if (CHARSET_DIMENSION (charset) == 1) \
3405 { \
df7492f9
KH
3406 if (! CHARSET_ISO_CHARS_96 (charset)) \
3407 c = intermediate_char_94[reg]; \
4ed46869 3408 else \
df7492f9
KH
3409 c = intermediate_char_96[reg]; \
3410 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3411 } \
3412 else \
3413 { \
df7492f9
KH
3414 EMIT_ONE_ASCII_BYTE ('$'); \
3415 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3416 { \
df7492f9 3417 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3418 || reg != 0 \
3419 || final_char < '@' || final_char > 'B') \
df7492f9 3420 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3421 } \
3422 else \
df7492f9 3423 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3424 } \
df7492f9
KH
3425 EMIT_ONE_ASCII_BYTE (final_char); \
3426 \
3427 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3428 } while (0)
3429
df7492f9 3430
4ed46869
KH
3431/* The following two macros produce codes (control character or escape
3432 sequence) for ISO2022 single-shift functions (single-shift-2 and
3433 single-shift-3). */
3434
df7492f9
KH
3435#define ENCODE_SINGLE_SHIFT_2 \
3436 do { \
3437 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3438 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3439 else \
3440 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3441 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3442 } while (0)
3443
df7492f9
KH
3444
3445#define ENCODE_SINGLE_SHIFT_3 \
3446 do { \
3447 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3448 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3449 else \
3450 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3451 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3452 } while (0)
3453
df7492f9 3454
4ed46869
KH
3455/* The following four macros produce codes (control character or
3456 escape sequence) for ISO2022 locking-shift functions (shift-in,
3457 shift-out, locking-shift-2, and locking-shift-3). */
3458
df7492f9
KH
3459#define ENCODE_SHIFT_IN \
3460 do { \
3461 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3462 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3463 } while (0)
3464
df7492f9
KH
3465
3466#define ENCODE_SHIFT_OUT \
3467 do { \
3468 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3469 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3470 } while (0)
3471
df7492f9
KH
3472
3473#define ENCODE_LOCKING_SHIFT_2 \
3474 do { \
3475 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3476 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3477 } while (0)
3478
df7492f9
KH
3479
3480#define ENCODE_LOCKING_SHIFT_3 \
3481 do { \
3482 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3483 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3484 } while (0)
3485
df7492f9 3486
f4dee582
RS
3487/* Produce codes for a DIMENSION1 character whose character set is
3488 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3489 sequences are also produced in advance if necessary. */
3490
6e85d753
KH
3491#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3492 do { \
df7492f9 3493 int id = CHARSET_ID (charset); \
bf16eb23
KH
3494 \
3495 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3496 && id == charset_ascii) \
3497 { \
3498 id = charset_jisx0201_roman; \
3499 charset = CHARSET_FROM_ID (id); \
3500 } \
3501 \
df7492f9 3502 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3503 { \
df7492f9
KH
3504 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3505 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3506 else \
df7492f9
KH
3507 EMIT_ONE_BYTE (c1 | 0x80); \
3508 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3509 break; \
3510 } \
df7492f9 3511 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3512 { \
df7492f9 3513 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3514 break; \
3515 } \
df7492f9 3516 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3517 { \
df7492f9 3518 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3519 break; \
3520 } \
6e85d753
KH
3521 else \
3522 /* Since CHARSET is not yet invoked to any graphic planes, we \
3523 must invoke it, or, at first, designate it to some graphic \
3524 register. Then repeat the loop to actually produce the \
3525 character. */ \
df7492f9
KH
3526 dst = encode_invocation_designation (charset, coding, dst, \
3527 &produced_chars); \
4ed46869
KH
3528 } while (1)
3529
df7492f9 3530
f4dee582
RS
3531/* Produce codes for a DIMENSION2 character whose character set is
3532 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3533 invocation codes are also produced in advance if necessary. */
3534
6e85d753
KH
3535#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3536 do { \
df7492f9 3537 int id = CHARSET_ID (charset); \
bf16eb23
KH
3538 \
3539 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3540 && id == charset_jisx0208) \
3541 { \
3542 id = charset_jisx0208_1978; \
3543 charset = CHARSET_FROM_ID (id); \
3544 } \
3545 \
df7492f9 3546 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3547 { \
df7492f9
KH
3548 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3549 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3550 else \
df7492f9
KH
3551 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3552 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3553 break; \
3554 } \
df7492f9 3555 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3556 { \
df7492f9 3557 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3558 break; \
3559 } \
df7492f9 3560 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3561 { \
df7492f9 3562 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3563 break; \
3564 } \
6e85d753
KH
3565 else \
3566 /* Since CHARSET is not yet invoked to any graphic planes, we \
3567 must invoke it, or, at first, designate it to some graphic \
3568 register. Then repeat the loop to actually produce the \
3569 character. */ \
df7492f9
KH
3570 dst = encode_invocation_designation (charset, coding, dst, \
3571 &produced_chars); \
4ed46869
KH
3572 } while (1)
3573
05e6f5dc 3574
df7492f9
KH
3575#define ENCODE_ISO_CHARACTER(charset, c) \
3576 do { \
3577 int code = ENCODE_CHAR ((charset),(c)); \
3578 \
3579 if (CHARSET_DIMENSION (charset) == 1) \
3580 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3581 else \
3582 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3583 } while (0)
bdd9fb48 3584
05e6f5dc 3585
4ed46869 3586/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3587 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3588 Return new DST. */
3589
3590unsigned char *
df7492f9
KH
3591encode_invocation_designation (charset, coding, dst, p_nchars)
3592 struct charset *charset;
4ed46869
KH
3593 struct coding_system *coding;
3594 unsigned char *dst;
df7492f9 3595 int *p_nchars;
4ed46869 3596{
df7492f9
KH
3597 int multibytep = coding->dst_multibyte;
3598 int produced_chars = *p_nchars;
4ed46869 3599 int reg; /* graphic register number */
df7492f9 3600 int id = CHARSET_ID (charset);
4ed46869
KH
3601
3602 /* At first, check designations. */
3603 for (reg = 0; reg < 4; reg++)
df7492f9 3604 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3605 break;
3606
3607 if (reg >= 4)
3608 {
3609 /* CHARSET is not yet designated to any graphic registers. */
3610 /* At first check the requested designation. */
df7492f9
KH
3611 reg = CODING_ISO_REQUEST (coding, id);
3612 if (reg < 0)
1ba9e4ab
KH
3613 /* Since CHARSET requests no special designation, designate it
3614 to graphic register 0. */
4ed46869
KH
3615 reg = 0;
3616
3617 ENCODE_DESIGNATION (charset, reg, coding);
3618 }
3619
df7492f9
KH
3620 if (CODING_ISO_INVOCATION (coding, 0) != reg
3621 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3622 {
3623 /* Since the graphic register REG is not invoked to any graphic
3624 planes, invoke it to graphic plane 0. */
3625 switch (reg)
3626 {
3627 case 0: /* graphic register 0 */
3628 ENCODE_SHIFT_IN;
3629 break;
3630
3631 case 1: /* graphic register 1 */
3632 ENCODE_SHIFT_OUT;
3633 break;
3634
3635 case 2: /* graphic register 2 */
df7492f9 3636 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3637 ENCODE_SINGLE_SHIFT_2;
3638 else
3639 ENCODE_LOCKING_SHIFT_2;
3640 break;
3641
3642 case 3: /* graphic register 3 */
df7492f9 3643 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3644 ENCODE_SINGLE_SHIFT_3;
3645 else
3646 ENCODE_LOCKING_SHIFT_3;
3647 break;
3648 }
3649 }
b73bfc1c 3650
df7492f9 3651 *p_nchars = produced_chars;
4ed46869
KH
3652 return dst;
3653}
3654
df7492f9
KH
3655/* The following three macros produce codes for indicating direction
3656 of text. */
3657#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3658 do { \
df7492f9
KH
3659 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3660 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3661 else \
df7492f9 3662 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3663 } while (0)
3664
ec6d2bb8 3665
df7492f9
KH
3666#define ENCODE_DIRECTION_R2L() \
3667 do { \
3668 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3669 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3670 } while (0)
3671
ec6d2bb8 3672
df7492f9 3673#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3674 do { \
df7492f9
KH
3675 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3676 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 3677 } while (0)
4ed46869 3678
4ed46869
KH
3679
3680/* Produce codes for designation and invocation to reset the graphic
3681 planes and registers to initial state. */
df7492f9
KH
3682#define ENCODE_RESET_PLANE_AND_REGISTER() \
3683 do { \
3684 int reg; \
3685 struct charset *charset; \
3686 \
3687 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3688 ENCODE_SHIFT_IN; \
3689 for (reg = 0; reg < 4; reg++) \
3690 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3691 && (CODING_ISO_DESIGNATION (coding, reg) \
3692 != CODING_ISO_INITIAL (coding, reg))) \
3693 { \
3694 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3695 ENCODE_DESIGNATION (charset, reg, coding); \
3696 } \
4ed46869
KH
3697 } while (0)
3698
df7492f9 3699
bdd9fb48 3700/* Produce designation sequences of charsets in the line started from
b73bfc1c 3701 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3702
3703 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3704 find all the necessary designations. */
3705
b73bfc1c 3706static unsigned char *
df7492f9 3707encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3708 struct coding_system *coding;
df7492f9
KH
3709 int *charbuf, *charbuf_end;
3710 unsigned char *dst;
e0e989f6 3711{
df7492f9 3712 struct charset *charset;
bdd9fb48
KH
3713 /* Table of charsets to be designated to each graphic register. */
3714 int r[4];
df7492f9
KH
3715 int c, found = 0, reg;
3716 int produced_chars = 0;
3717 int multibytep = coding->dst_multibyte;
3718 Lisp_Object attrs;
3719 Lisp_Object charset_list;
3720
3721 attrs = CODING_ID_ATTRS (coding->id);
3722 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3723 if (EQ (charset_list, Qiso_2022))
3724 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3725
3726 for (reg = 0; reg < 4; reg++)
3727 r[reg] = -1;
3728
b73bfc1c 3729 while (found < 4)
e0e989f6 3730 {
df7492f9
KH
3731 int id;
3732
3733 c = *charbuf++;
b73bfc1c
KH
3734 if (c == '\n')
3735 break;
df7492f9
KH
3736 charset = char_charset (c, charset_list, NULL);
3737 id = CHARSET_ID (charset);
3738 reg = CODING_ISO_REQUEST (coding, id);
3739 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3740 {
3741 found++;
df7492f9 3742 r[reg] = id;
bdd9fb48 3743 }
bdd9fb48
KH
3744 }
3745
3746 if (found)
3747 {
3748 for (reg = 0; reg < 4; reg++)
3749 if (r[reg] >= 0
df7492f9
KH
3750 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3751 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3752 }
b73bfc1c
KH
3753
3754 return dst;
e0e989f6
KH
3755}
3756
4ed46869
KH
3757/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3758
df7492f9
KH
3759static int
3760encode_coding_iso_2022 (coding)
4ed46869 3761 struct coding_system *coding;
4ed46869 3762{
df7492f9
KH
3763 int multibytep = coding->dst_multibyte;
3764 int *charbuf = coding->charbuf;
3765 int *charbuf_end = charbuf + coding->charbuf_used;
3766 unsigned char *dst = coding->destination + coding->produced;
3767 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3768 int safe_room = 16;
3769 int bol_designation
3770 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3771 && CODING_ISO_BOL (coding));
3772 int produced_chars = 0;
3773 Lisp_Object attrs, eol_type, charset_list;
3774 int ascii_compatible;
b73bfc1c 3775 int c;
ff0dacd7 3776 int preferred_charset_id = -1;
05e6f5dc 3777
24a73b0a
KH
3778 CODING_GET_INFO (coding, attrs, charset_list);
3779 eol_type = CODING_ID_EOL_TYPE (coding->id);
3780 if (VECTORP (eol_type))
3781 eol_type = Qunix;
3782
004068e4 3783 setup_iso_safe_charsets (attrs);
ff0dacd7
KH
3784 /* Charset list may have been changed. */
3785 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
8f924df7 3786 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
0eecad43 3787
df7492f9 3788 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 3789
df7492f9 3790 while (charbuf < charbuf_end)
4ed46869 3791 {
df7492f9 3792 ASSURE_DESTINATION (safe_room);
b73bfc1c 3793
df7492f9 3794 if (bol_designation)
b73bfc1c 3795 {
df7492f9 3796 unsigned char *dst_prev = dst;
4ed46869 3797
bdd9fb48 3798 /* We have to produce designation sequences if any now. */
df7492f9
KH
3799 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3800 bol_designation = 0;
3801 /* We are sure that designation sequences are all ASCII bytes. */
3802 produced_chars += dst - dst_prev;
e0e989f6
KH
3803 }
3804
df7492f9 3805 c = *charbuf++;
ec6d2bb8 3806
ff0dacd7
KH
3807 if (c < 0)
3808 {
3809 /* Handle an annotation. */
3810 switch (*charbuf)
ec6d2bb8 3811 {
ff0dacd7
KH
3812 case CODING_ANNOTATE_COMPOSITION_MASK:
3813 /* Not yet implemented. */
3814 break;
3815 case CODING_ANNOTATE_CHARSET_MASK:
3816 preferred_charset_id = charbuf[3];
3817 if (preferred_charset_id >= 0
3818 && NILP (Fmemq (make_number (preferred_charset_id),
3819 charset_list)))
3820 preferred_charset_id = -1;
3821 break;
3822 default:
3823 abort ();
4ed46869 3824 }
ff0dacd7
KH
3825 charbuf += -c - 1;
3826 continue;
4ed46869 3827 }
ec6d2bb8 3828
b73bfc1c
KH
3829 /* Now encode the character C. */
3830 if (c < 0x20 || c == 0x7F)
3831 {
df7492f9
KH
3832 if (c == '\n'
3833 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3834 {
df7492f9
KH
3835 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3836 ENCODE_RESET_PLANE_AND_REGISTER ();
3837 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3838 {
df7492f9
KH
3839 int i;
3840
3841 for (i = 0; i < 4; i++)
3842 CODING_ISO_DESIGNATION (coding, i)
3843 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3844 }
df7492f9
KH
3845 bol_designation
3846 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3847 }
df7492f9
KH
3848 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3849 ENCODE_RESET_PLANE_AND_REGISTER ();
3850 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3851 }
df7492f9 3852 else if (ASCII_CHAR_P (c))
88993dfd 3853 {
df7492f9
KH
3854 if (ascii_compatible)
3855 EMIT_ONE_ASCII_BYTE (c);
93dec019 3856 else
19a8d9e0 3857 {
bf16eb23
KH
3858 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3859 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 3860 }
4ed46869 3861 }
16eafb5d 3862 else if (CHAR_BYTE8_P (c))
88993dfd 3863 {
16eafb5d
KH
3864 c = CHAR_TO_BYTE8 (c);
3865 EMIT_ONE_BYTE (c);
88993dfd 3866 }
b73bfc1c 3867 else
df7492f9 3868 {
ff0dacd7 3869 struct charset *charset;
b73bfc1c 3870
ff0dacd7
KH
3871 if (preferred_charset_id >= 0)
3872 {
3873 charset = CHARSET_FROM_ID (preferred_charset_id);
3874 if (! CHAR_CHARSET_P (c, charset))
3875 charset = char_charset (c, charset_list, NULL);
3876 }
3877 else
3878 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3879 if (!charset)
3880 {
41cbe562
KH
3881 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3882 {
3883 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3884 charset = CHARSET_FROM_ID (charset_ascii);
3885 }
3886 else
3887 {
3888 c = coding->default_char;
3889 charset = char_charset (c, charset_list, NULL);
3890 }
df7492f9
KH
3891 }
3892 ENCODE_ISO_CHARACTER (charset, c);
3893 }
84fbb8a0 3894 }
b73bfc1c 3895
df7492f9
KH
3896 if (coding->mode & CODING_MODE_LAST_BLOCK
3897 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3898 {
3899 ASSURE_DESTINATION (safe_room);
3900 ENCODE_RESET_PLANE_AND_REGISTER ();
3901 }
065e3595 3902 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
3903 CODING_ISO_BOL (coding) = bol_designation;
3904 coding->produced_char += produced_chars;
3905 coding->produced = dst - coding->destination;
3906 return 0;
4ed46869
KH
3907}
3908
3909\f
df7492f9 3910/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3911
df7492f9 3912/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3913 quite widely. So, for the moment, Emacs supports them in the bare
3914 C code. But, in the future, they may be supported only by CCL. */
3915
3916/* SJIS is a coding system encoding three character sets: ASCII, right
3917 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3918 as is. A character of charset katakana-jisx0201 is encoded by
3919 "position-code + 0x80". A character of charset japanese-jisx0208
3920 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3921 so that it fit in the range below.
4ed46869
KH
3922
3923 --- CODE RANGE of SJIS ---
3924 (character set) (range)
3925 ASCII 0x00 .. 0x7F
df7492f9 3926 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3927 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3928 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3929 -------------------------------
3930
3931*/
3932
3933/* BIG5 is a coding system encoding two character sets: ASCII and
3934 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3935 character set and is encoded in two-byte.
4ed46869
KH
3936
3937 --- CODE RANGE of BIG5 ---
3938 (character set) (range)
3939 ASCII 0x00 .. 0x7F
3940 Big5 (1st byte) 0xA1 .. 0xFE
3941 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3942 --------------------------
3943
df7492f9 3944 */
4ed46869
KH
3945
3946/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3947 Check if a text is encoded in SJIS. If it is, return
df7492f9 3948 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3949
0a28aafb 3950static int
ff0dacd7 3951detect_coding_sjis (coding, detect_info)
df7492f9 3952 struct coding_system *coding;
ff0dacd7 3953 struct coding_detection_info *detect_info;
4ed46869 3954{
065e3595 3955 const unsigned char *src = coding->source, *src_base;
8f924df7 3956 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
3957 int multibytep = coding->src_multibyte;
3958 int consumed_chars = 0;
3959 int found = 0;
b73bfc1c 3960 int c;
df7492f9 3961
ff0dacd7 3962 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
3963 /* A coding system of this category is always ASCII compatible. */
3964 src += coding->head_ascii;
4ed46869 3965
b73bfc1c 3966 while (1)
4ed46869 3967 {
065e3595 3968 src_base = src;
df7492f9 3969 ONE_MORE_BYTE (c);
682169fe
KH
3970 if (c < 0x80)
3971 continue;
df7492f9 3972 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3973 {
df7492f9 3974 ONE_MORE_BYTE (c);
682169fe 3975 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 3976 break;
ff0dacd7 3977 found = CATEGORY_MASK_SJIS;
4ed46869 3978 }
df7492f9 3979 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 3980 found = CATEGORY_MASK_SJIS;
df7492f9
KH
3981 else
3982 break;
4ed46869 3983 }
ff0dacd7 3984 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
3985 return 0;
3986
3987 no_more_source:
065e3595 3988 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 3989 {
ff0dacd7 3990 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 3991 return 0;
4ed46869 3992 }
ff0dacd7
KH
3993 detect_info->found |= found;
3994 return 1;
4ed46869
KH
3995}
3996
3997/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3998 Check if a text is encoded in BIG5. If it is, return
df7492f9 3999 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4000
0a28aafb 4001static int
ff0dacd7 4002detect_coding_big5 (coding, detect_info)
df7492f9 4003 struct coding_system *coding;
ff0dacd7 4004 struct coding_detection_info *detect_info;
4ed46869 4005{
065e3595 4006 const unsigned char *src = coding->source, *src_base;
8f924df7 4007 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4008 int multibytep = coding->src_multibyte;
4009 int consumed_chars = 0;
4010 int found = 0;
b73bfc1c 4011 int c;
fa42c37f 4012
ff0dacd7 4013 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4014 /* A coding system of this category is always ASCII compatible. */
4015 src += coding->head_ascii;
fa42c37f 4016
b73bfc1c 4017 while (1)
fa42c37f 4018 {
065e3595 4019 src_base = src;
df7492f9
KH
4020 ONE_MORE_BYTE (c);
4021 if (c < 0x80)
fa42c37f 4022 continue;
df7492f9 4023 if (c >= 0xA1)
fa42c37f 4024 {
df7492f9
KH
4025 ONE_MORE_BYTE (c);
4026 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4027 return 0;
ff0dacd7 4028 found = CATEGORY_MASK_BIG5;
fa42c37f 4029 }
df7492f9
KH
4030 else
4031 break;
fa42c37f 4032 }
ff0dacd7 4033 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4034 return 0;
fa42c37f 4035
df7492f9 4036 no_more_source:
065e3595 4037 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4038 {
ff0dacd7 4039 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4040 return 0;
4041 }
ff0dacd7
KH
4042 detect_info->found |= found;
4043 return 1;
fa42c37f
KH
4044}
4045
4ed46869
KH
4046/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4047 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4048
b73bfc1c 4049static void
df7492f9 4050decode_coding_sjis (coding)
4ed46869 4051 struct coding_system *coding;
4ed46869 4052{
8f924df7
KH
4053 const unsigned char *src = coding->source + coding->consumed;
4054 const unsigned char *src_end = coding->source + coding->src_bytes;
4055 const unsigned char *src_base;
69a80ea3
KH
4056 int *charbuf = coding->charbuf + coding->charbuf_used;
4057 int *charbuf_end
4058 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4059 int consumed_chars = 0, consumed_chars_base;
4060 int multibytep = coding->src_multibyte;
4061 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4062 struct charset *charset_kanji2;
24a73b0a 4063 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4064 int char_offset = coding->produced_char;
4065 int last_offset = char_offset;
4066 int last_id = charset_ascii;
a5d301df 4067
24a73b0a 4068 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4069
4070 val = charset_list;
4071 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4072 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4073 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4074 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4075
b73bfc1c 4076 while (1)
4ed46869 4077 {
df7492f9 4078 int c, c1;
24a73b0a 4079 struct charset *charset;
fa42c37f 4080
b73bfc1c 4081 src_base = src;
df7492f9 4082 consumed_chars_base = consumed_chars;
fa42c37f 4083
df7492f9
KH
4084 if (charbuf >= charbuf_end)
4085 break;
4086
4087 ONE_MORE_BYTE (c);
065e3595
KH
4088 if (c < 0)
4089 goto invalid_code;
24a73b0a
KH
4090 if (c < 0x80)
4091 charset = charset_roman;
57a47f8a 4092 else if (c == 0x80 || c == 0xA0)
8e921c4b 4093 goto invalid_code;
57a47f8a
KH
4094 else if (c >= 0xA1 && c <= 0xDF)
4095 {
4096 /* SJIS -> JISX0201-Kana */
4097 c &= 0x7F;
4098 charset = charset_kana;
4099 }
4100 else if (c <= 0xEF)
df7492f9 4101 {
57a47f8a
KH
4102 /* SJIS -> JISX0208 */
4103 ONE_MORE_BYTE (c1);
4104 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4105 goto invalid_code;
57a47f8a
KH
4106 c = (c << 8) | c1;
4107 SJIS_TO_JIS (c);
4108 charset = charset_kanji;
4109 }
4110 else if (c <= 0xFC && charset_kanji2)
4111 {
c6876370 4112 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4113 ONE_MORE_BYTE (c1);
4114 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4115 goto invalid_code;
57a47f8a
KH
4116 c = (c << 8) | c1;
4117 SJIS_TO_JIS2 (c);
4118 charset = charset_kanji2;
df7492f9 4119 }
57a47f8a
KH
4120 else
4121 goto invalid_code;
24a73b0a
KH
4122 if (charset->id != charset_ascii
4123 && last_id != charset->id)
4124 {
4125 if (last_id != charset_ascii)
69a80ea3 4126 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4127 last_id = charset->id;
4128 last_offset = char_offset;
4129 }
4130 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4131 *charbuf++ = c;
ff0dacd7 4132 char_offset++;
df7492f9 4133 continue;
b73bfc1c 4134
df7492f9
KH
4135 invalid_code:
4136 src = src_base;
4137 consumed_chars = consumed_chars_base;
4138 ONE_MORE_BYTE (c);
065e3595 4139 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4140 char_offset++;
df7492f9
KH
4141 coding->errors++;
4142 }
fa42c37f 4143
df7492f9 4144 no_more_source:
ff0dacd7 4145 if (last_id != charset_ascii)
69a80ea3 4146 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4147 coding->consumed_char += consumed_chars_base;
4148 coding->consumed = src_base - coding->source;
4149 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4150}
4151
b73bfc1c 4152static void
df7492f9 4153decode_coding_big5 (coding)
4ed46869 4154 struct coding_system *coding;
4ed46869 4155{
8f924df7
KH
4156 const unsigned char *src = coding->source + coding->consumed;
4157 const unsigned char *src_end = coding->source + coding->src_bytes;
4158 const unsigned char *src_base;
69a80ea3
KH
4159 int *charbuf = coding->charbuf + coding->charbuf_used;
4160 int *charbuf_end
4161 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4162 int consumed_chars = 0, consumed_chars_base;
4163 int multibytep = coding->src_multibyte;
4164 struct charset *charset_roman, *charset_big5;
24a73b0a 4165 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4166 int char_offset = coding->produced_char;
4167 int last_offset = char_offset;
4168 int last_id = charset_ascii;
df7492f9 4169
24a73b0a 4170 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4171 val = charset_list;
4172 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4173 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4174
b73bfc1c 4175 while (1)
4ed46869 4176 {
df7492f9 4177 int c, c1;
24a73b0a 4178 struct charset *charset;
b73bfc1c
KH
4179
4180 src_base = src;
df7492f9
KH
4181 consumed_chars_base = consumed_chars;
4182
4183 if (charbuf >= charbuf_end)
4184 break;
4185
4186 ONE_MORE_BYTE (c);
b73bfc1c 4187
065e3595
KH
4188 if (c < 0)
4189 goto invalid_code;
24a73b0a
KH
4190 if (c < 0x80)
4191 charset = charset_roman;
4192 else
4ed46869 4193 {
24a73b0a
KH
4194 /* BIG5 -> Big5 */
4195 if (c < 0xA1 || c > 0xFE)
4196 goto invalid_code;
4197 ONE_MORE_BYTE (c1);
4198 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4199 goto invalid_code;
4200 c = c << 8 | c1;
4201 charset = charset_big5;
4ed46869 4202 }
24a73b0a
KH
4203 if (charset->id != charset_ascii
4204 && last_id != charset->id)
df7492f9 4205 {
24a73b0a 4206 if (last_id != charset_ascii)
69a80ea3 4207 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4208 last_id = charset->id;
4209 last_offset = char_offset;
4ed46869 4210 }
24a73b0a 4211 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4212 *charbuf++ = c;
ff0dacd7 4213 char_offset++;
fb88bf2d
KH
4214 continue;
4215
df7492f9 4216 invalid_code:
4ed46869 4217 src = src_base;
df7492f9
KH
4218 consumed_chars = consumed_chars_base;
4219 ONE_MORE_BYTE (c);
065e3595 4220 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4221 char_offset++;
df7492f9 4222 coding->errors++;
fb88bf2d 4223 }
d46c5b12 4224
df7492f9 4225 no_more_source:
ff0dacd7 4226 if (last_id != charset_ascii)
69a80ea3 4227 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4228 coding->consumed_char += consumed_chars_base;
4229 coding->consumed = src_base - coding->source;
4230 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4231}
4232
4233/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4234 This function can encode charsets `ascii', `katakana-jisx0201',
4235 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4236 are sure that all these charsets are registered as official charset
4ed46869
KH
4237 (i.e. do not have extended leading-codes). Characters of other
4238 charsets are produced without any encoding. If SJIS_P is 1, encode
4239 SJIS text, else encode BIG5 text. */
4240
df7492f9
KH
4241static int
4242encode_coding_sjis (coding)
4ed46869 4243 struct coding_system *coding;
4ed46869 4244{
df7492f9
KH
4245 int multibytep = coding->dst_multibyte;
4246 int *charbuf = coding->charbuf;
4247 int *charbuf_end = charbuf + coding->charbuf_used;
4248 unsigned char *dst = coding->destination + coding->produced;
4249 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4250 int safe_room = 4;
4251 int produced_chars = 0;
24a73b0a 4252 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4253 int ascii_compatible;
4254 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4255 struct charset *charset_kanji2;
df7492f9 4256 int c;
a5d301df 4257
24a73b0a 4258 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4259 val = charset_list;
4260 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4261 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4262 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4263 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4264
df7492f9 4265 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4266
df7492f9
KH
4267 while (charbuf < charbuf_end)
4268 {
4269 ASSURE_DESTINATION (safe_room);
4270 c = *charbuf++;
b73bfc1c 4271 /* Now encode the character C. */
df7492f9
KH
4272 if (ASCII_CHAR_P (c) && ascii_compatible)
4273 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4274 else if (CHAR_BYTE8_P (c))
4275 {
4276 c = CHAR_TO_BYTE8 (c);
4277 EMIT_ONE_BYTE (c);
4278 }
df7492f9 4279 else
b73bfc1c 4280 {
df7492f9
KH
4281 unsigned code;
4282 struct charset *charset = char_charset (c, charset_list, &code);
4283
4284 if (!charset)
4ed46869 4285 {
41cbe562 4286 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4287 {
41cbe562
KH
4288 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4289 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4290 }
41cbe562 4291 else
b73bfc1c 4292 {
41cbe562
KH
4293 c = coding->default_char;
4294 charset = char_charset (c, charset_list, &code);
b73bfc1c 4295 }
b73bfc1c 4296 }
df7492f9
KH
4297 if (code == CHARSET_INVALID_CODE (charset))
4298 abort ();
4299 if (charset == charset_kanji)
4300 {
4301 int c1, c2;
4302 JIS_TO_SJIS (code);
4303 c1 = code >> 8, c2 = code & 0xFF;
4304 EMIT_TWO_BYTES (c1, c2);
4305 }
4306 else if (charset == charset_kana)
4307 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
4308 else if (charset_kanji2 && charset == charset_kanji2)
4309 {
4310 int c1, c2;
4311
4312 c1 = code >> 8;
4313 if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4314 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4315 {
4316 JIS_TO_SJIS2 (code);
4317 c1 = code >> 8, c2 = code & 0xFF;
4318 EMIT_TWO_BYTES (c1, c2);
4319 }
4320 else
4321 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4322 }
df7492f9
KH
4323 else
4324 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4325 }
4326 }
065e3595 4327 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4328 coding->produced_char += produced_chars;
4329 coding->produced = dst - coding->destination;
4330 return 0;
4331}
4332
4333static int
4334encode_coding_big5 (coding)
4335 struct coding_system *coding;
4336{
4337 int multibytep = coding->dst_multibyte;
4338 int *charbuf = coding->charbuf;
4339 int *charbuf_end = charbuf + coding->charbuf_used;
4340 unsigned char *dst = coding->destination + coding->produced;
4341 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4342 int safe_room = 4;
4343 int produced_chars = 0;
24a73b0a 4344 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4345 int ascii_compatible;
4346 struct charset *charset_roman, *charset_big5;
4347 int c;
4348
24a73b0a 4349 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4350 val = charset_list;
4351 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4352 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4353 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4354
4355 while (charbuf < charbuf_end)
4356 {
4357 ASSURE_DESTINATION (safe_room);
4358 c = *charbuf++;
4359 /* Now encode the character C. */
4360 if (ASCII_CHAR_P (c) && ascii_compatible)
4361 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4362 else if (CHAR_BYTE8_P (c))
4363 {
4364 c = CHAR_TO_BYTE8 (c);
4365 EMIT_ONE_BYTE (c);
b73bfc1c
KH
4366 }
4367 else
4368 {
df7492f9
KH
4369 unsigned code;
4370 struct charset *charset = char_charset (c, charset_list, &code);
4371
4372 if (! charset)
b73bfc1c 4373 {
41cbe562 4374 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4375 {
41cbe562
KH
4376 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4377 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4378 }
41cbe562 4379 else
0eecad43 4380 {
41cbe562
KH
4381 c = coding->default_char;
4382 charset = char_charset (c, charset_list, &code);
0eecad43 4383 }
4ed46869 4384 }
df7492f9
KH
4385 if (code == CHARSET_INVALID_CODE (charset))
4386 abort ();
4387 if (charset == charset_big5)
b73bfc1c 4388 {
df7492f9
KH
4389 int c1, c2;
4390
4391 c1 = code >> 8, c2 = code & 0xFF;
4392 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 4393 }
df7492f9
KH
4394 else
4395 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 4396 }
4ed46869 4397 }
065e3595 4398 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4399 coding->produced_char += produced_chars;
4400 coding->produced = dst - coding->destination;
4401 return 0;
4ed46869
KH
4402}
4403
4404\f
df7492f9 4405/*** 10. CCL handlers ***/
1397dc18
KH
4406
4407/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4408 Check if a text is encoded in a coding system of which
4409 encoder/decoder are written in CCL program. If it is, return
df7492f9 4410 CATEGORY_MASK_CCL, else return 0. */
1397dc18 4411
0a28aafb 4412static int
ff0dacd7 4413detect_coding_ccl (coding, detect_info)
df7492f9 4414 struct coding_system *coding;
ff0dacd7 4415 struct coding_detection_info *detect_info;
1397dc18 4416{
065e3595 4417 const unsigned char *src = coding->source, *src_base;
8f924df7 4418 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4419 int multibytep = coding->src_multibyte;
4420 int consumed_chars = 0;
4421 int found = 0;
0e219d54 4422 unsigned char *valids;
df7492f9
KH
4423 int head_ascii = coding->head_ascii;
4424 Lisp_Object attrs;
4425
ff0dacd7
KH
4426 detect_info->checked |= CATEGORY_MASK_CCL;
4427
df7492f9 4428 coding = &coding_categories[coding_category_ccl];
0e219d54 4429 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
4430 attrs = CODING_ID_ATTRS (coding->id);
4431 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4432 src += head_ascii;
1397dc18 4433
b73bfc1c 4434 while (1)
1397dc18 4435 {
df7492f9 4436 int c;
065e3595
KH
4437
4438 src_base = src;
df7492f9 4439 ONE_MORE_BYTE (c);
065e3595 4440 if (c < 0 || ! valids[c])
df7492f9 4441 break;
ff0dacd7
KH
4442 if ((valids[c] > 1))
4443 found = CATEGORY_MASK_CCL;
df7492f9 4444 }
ff0dacd7 4445 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4446 return 0;
4447
4448 no_more_source:
ff0dacd7
KH
4449 detect_info->found |= found;
4450 return 1;
df7492f9
KH
4451}
4452
4453static void
4454decode_coding_ccl (coding)
4455 struct coding_system *coding;
4456{
7c78e542 4457 const unsigned char *src = coding->source + coding->consumed;
8f924df7 4458 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
4459 int *charbuf = coding->charbuf + coding->charbuf_used;
4460 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
4461 int consumed_chars = 0;
4462 int multibytep = coding->src_multibyte;
4463 struct ccl_program ccl;
4464 int source_charbuf[1024];
4465 int source_byteidx[1024];
24a73b0a 4466 Lisp_Object attrs, charset_list;
df7492f9 4467
24a73b0a 4468 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4469 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4470
4471 while (src < src_end)
4472 {
7c78e542 4473 const unsigned char *p = src;
df7492f9
KH
4474 int *source, *source_end;
4475 int i = 0;
4476
4477 if (multibytep)
4478 while (i < 1024 && p < src_end)
4479 {
4480 source_byteidx[i] = p - src;
4481 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4482 }
4483 else
4484 while (i < 1024 && p < src_end)
4485 source_charbuf[i++] = *p++;
8f924df7 4486
df7492f9
KH
4487 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4488 ccl.last_block = 1;
4489
4490 source = source_charbuf;
4491 source_end = source + i;
4492 while (source < source_end)
4493 {
4494 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4495 source_end - source, charbuf_end - charbuf,
4496 charset_list);
df7492f9
KH
4497 source += ccl.consumed;
4498 charbuf += ccl.produced;
4499 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4500 break;
4501 }
4502 if (source < source_end)
4503 src += source_byteidx[source - source_charbuf];
4504 else
4505 src = p;
4506 consumed_chars += source - source_charbuf;
4507
4508 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4509 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4510 break;
4511 }
4512
4513 switch (ccl.status)
4514 {
4515 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4516 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4517 break;
4518 case CCL_STAT_SUSPEND_BY_DST:
4519 break;
4520 case CCL_STAT_QUIT:
4521 case CCL_STAT_INVALID_CMD:
065e3595 4522 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4523 break;
4524 default:
065e3595 4525 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4526 break;
4527 }
4528 coding->consumed_char += consumed_chars;
4529 coding->consumed = src - coding->source;
4530 coding->charbuf_used = charbuf - coding->charbuf;
4531}
4532
4533static int
4534encode_coding_ccl (coding)
4535 struct coding_system *coding;
4536{
4537 struct ccl_program ccl;
4538 int multibytep = coding->dst_multibyte;
4539 int *charbuf = coding->charbuf;
4540 int *charbuf_end = charbuf + coding->charbuf_used;
4541 unsigned char *dst = coding->destination + coding->produced;
4542 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4543 unsigned char *adjusted_dst_end = dst_end - 1;
4544 int destination_charbuf[1024];
4545 int i, produced_chars = 0;
24a73b0a 4546 Lisp_Object attrs, charset_list;
df7492f9 4547
24a73b0a 4548 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4549 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4550
4551 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4552 ccl.dst_multibyte = coding->dst_multibyte;
4553
4554 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4555 {
4556 int dst_bytes = dst_end - dst;
4557 if (dst_bytes > 1024)
4558 dst_bytes = 1024;
4559
4560 ccl_driver (&ccl, charbuf, destination_charbuf,
8dcbea82 4561 charbuf_end - charbuf, dst_bytes, charset_list);
df7492f9
KH
4562 charbuf += ccl.consumed;
4563 if (multibytep)
4564 for (i = 0; i < ccl.produced; i++)
4565 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4566 else
4567 {
4568 for (i = 0; i < ccl.produced; i++)
4569 *dst++ = destination_charbuf[i] & 0xFF;
4570 produced_chars += ccl.produced;
4571 }
4572 }
4573
4574 switch (ccl.status)
4575 {
4576 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4577 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4578 break;
4579 case CCL_STAT_SUSPEND_BY_DST:
065e3595 4580 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
4581 break;
4582 case CCL_STAT_QUIT:
4583 case CCL_STAT_INVALID_CMD:
065e3595 4584 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4585 break;
4586 default:
065e3595 4587 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4588 break;
1397dc18 4589 }
df7492f9
KH
4590
4591 coding->produced_char += produced_chars;
4592 coding->produced = dst - coding->destination;
4593 return 0;
1397dc18
KH
4594}
4595
df7492f9 4596
1397dc18 4597\f
df7492f9 4598/*** 10, 11. no-conversion handlers ***/
4ed46869 4599
b73bfc1c 4600/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4601
b73bfc1c 4602static void
df7492f9 4603decode_coding_raw_text (coding)
4ed46869 4604 struct coding_system *coding;
4ed46869 4605{
df7492f9 4606 coding->chars_at_source = 1;
2c78b7e1
KH
4607 coding->consumed_char = 0;
4608 coding->consumed = 0;
065e3595 4609 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4610}
4ed46869 4611
df7492f9
KH
4612static int
4613encode_coding_raw_text (coding)
4614 struct coding_system *coding;
4615{
4616 int multibytep = coding->dst_multibyte;
4617 int *charbuf = coding->charbuf;
4618 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4619 unsigned char *dst = coding->destination + coding->produced;
4620 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4621 int produced_chars = 0;
b73bfc1c
KH
4622 int c;
4623
df7492f9 4624 if (multibytep)
b73bfc1c 4625 {
df7492f9 4626 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4627
df7492f9
KH
4628 if (coding->src_multibyte)
4629 while (charbuf < charbuf_end)
4630 {
4631 ASSURE_DESTINATION (safe_room);
4632 c = *charbuf++;
4633 if (ASCII_CHAR_P (c))
4634 EMIT_ONE_ASCII_BYTE (c);
4635 else if (CHAR_BYTE8_P (c))
4636 {
4637 c = CHAR_TO_BYTE8 (c);
4638 EMIT_ONE_BYTE (c);
4639 }
4640 else
4641 {
4642 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4643
df7492f9
KH
4644 CHAR_STRING_ADVANCE (c, p1);
4645 while (p0 < p1)
9d123124
KH
4646 {
4647 EMIT_ONE_BYTE (*p0);
4648 p0++;
4649 }
df7492f9
KH
4650 }
4651 }
b73bfc1c 4652 else
df7492f9
KH
4653 while (charbuf < charbuf_end)
4654 {
4655 ASSURE_DESTINATION (safe_room);
4656 c = *charbuf++;
4657 EMIT_ONE_BYTE (c);
4658 }
4659 }
4660 else
4ed46869 4661 {
df7492f9 4662 if (coding->src_multibyte)
d46c5b12 4663 {
df7492f9
KH
4664 int safe_room = MAX_MULTIBYTE_LENGTH;
4665
4666 while (charbuf < charbuf_end)
d46c5b12 4667 {
df7492f9
KH
4668 ASSURE_DESTINATION (safe_room);
4669 c = *charbuf++;
4670 if (ASCII_CHAR_P (c))
4671 *dst++ = c;
4672 else if (CHAR_BYTE8_P (c))
4673 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4674 else
df7492f9
KH
4675 CHAR_STRING_ADVANCE (c, dst);
4676 produced_chars++;
d46c5b12
KH
4677 }
4678 }
df7492f9
KH
4679 else
4680 {
4681 ASSURE_DESTINATION (charbuf_end - charbuf);
4682 while (charbuf < charbuf_end && dst < dst_end)
4683 *dst++ = *charbuf++;
4684 produced_chars = dst - (coding->destination + coding->dst_bytes);
8f924df7 4685 }
4ed46869 4686 }
065e3595 4687 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4688 coding->produced_char += produced_chars;
4689 coding->produced = dst - coding->destination;
4690 return 0;
4ed46869
KH
4691}
4692
ff0dacd7
KH
4693/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4694 Check if a text is encoded in a charset-based coding system. If it
4695 is, return 1, else return 0. */
4696
0a28aafb 4697static int
ff0dacd7 4698detect_coding_charset (coding, detect_info)
df7492f9 4699 struct coding_system *coding;
ff0dacd7 4700 struct coding_detection_info *detect_info;
1397dc18 4701{
065e3595 4702 const unsigned char *src = coding->source, *src_base;
8f924df7 4703 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4704 int multibytep = coding->src_multibyte;
4705 int consumed_chars = 0;
4706 Lisp_Object attrs, valids;
584948ac 4707 int found = 0;
1397dc18 4708
ff0dacd7
KH
4709 detect_info->checked |= CATEGORY_MASK_CHARSET;
4710
df7492f9
KH
4711 coding = &coding_categories[coding_category_charset];
4712 attrs = CODING_ID_ATTRS (coding->id);
4713 valids = AREF (attrs, coding_attr_charset_valids);
4714
4715 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4716 src += coding->head_ascii;
1397dc18 4717
b73bfc1c 4718 while (1)
1397dc18 4719 {
df7492f9 4720 int c;
1397dc18 4721
065e3595 4722 src_base = src;
df7492f9 4723 ONE_MORE_BYTE (c);
065e3595
KH
4724 if (c < 0)
4725 continue;
df7492f9
KH
4726 if (NILP (AREF (valids, c)))
4727 break;
584948ac 4728 if (c >= 0x80)
ff0dacd7 4729 found = CATEGORY_MASK_CHARSET;
df7492f9 4730 }
ff0dacd7 4731 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4732 return 0;
4ed46869 4733
df7492f9 4734 no_more_source:
ff0dacd7
KH
4735 detect_info->found |= found;
4736 return 1;
df7492f9 4737}
b73bfc1c 4738
b73bfc1c 4739static void
df7492f9 4740decode_coding_charset (coding)
4ed46869 4741 struct coding_system *coding;
4ed46869 4742{
8f924df7
KH
4743 const unsigned char *src = coding->source + coding->consumed;
4744 const unsigned char *src_end = coding->source + coding->src_bytes;
4745 const unsigned char *src_base;
69a80ea3
KH
4746 int *charbuf = coding->charbuf + coding->charbuf_used;
4747 int *charbuf_end
4748 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4749 int consumed_chars = 0, consumed_chars_base;
4750 int multibytep = coding->src_multibyte;
24a73b0a 4751 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
4752 int char_offset = coding->produced_char;
4753 int last_offset = char_offset;
4754 int last_id = charset_ascii;
df7492f9 4755
24a73b0a 4756 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 4757 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4758
df7492f9 4759 while (1)
4ed46869 4760 {
4eb6d3f1 4761 int c;
24a73b0a
KH
4762 Lisp_Object val;
4763 struct charset *charset;
4764 int dim;
4765 int len = 1;
4766 unsigned code;
df7492f9
KH
4767
4768 src_base = src;
4769 consumed_chars_base = consumed_chars;
b73bfc1c 4770
df7492f9
KH
4771 if (charbuf >= charbuf_end)
4772 break;
4773
4eb6d3f1 4774 ONE_MORE_BYTE (c);
065e3595
KH
4775 if (c < 0)
4776 goto invalid_code;
24a73b0a
KH
4777 code = c;
4778
4779 val = AREF (valids, c);
4780 if (NILP (val))
4781 goto invalid_code;
4782 if (INTEGERP (val))
d46c5b12 4783 {
24a73b0a
KH
4784 charset = CHARSET_FROM_ID (XFASTINT (val));
4785 dim = CHARSET_DIMENSION (charset);
4786 while (len < dim)
b73bfc1c 4787 {
24a73b0a
KH
4788 ONE_MORE_BYTE (c);
4789 code = (code << 8) | c;
4790 len++;
b73bfc1c 4791 }
24a73b0a
KH
4792 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4793 charset, code, c);
d46c5b12 4794 }
df7492f9 4795 else
d46c5b12 4796 {
24a73b0a
KH
4797 /* VAL is a list of charset IDs. It is assured that the
4798 list is sorted by charset dimensions (smaller one
4799 comes first). */
4800 while (CONSP (val))
4eb6d3f1 4801 {
24a73b0a 4802 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 4803 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4804 while (len < dim)
4eb6d3f1 4805 {
acb2a965
KH
4806 ONE_MORE_BYTE (c);
4807 code = (code << 8) | c;
f9d71dcd 4808 len++;
4eb6d3f1 4809 }
24a73b0a
KH
4810 CODING_DECODE_CHAR (coding, src, src_base,
4811 src_end, charset, code, c);
4812 if (c >= 0)
4813 break;
4814 val = XCDR (val);
ff0dacd7 4815 }
d46c5b12 4816 }
24a73b0a
KH
4817 if (c < 0)
4818 goto invalid_code;
4819 if (charset->id != charset_ascii
4820 && last_id != charset->id)
4821 {
4822 if (last_id != charset_ascii)
69a80ea3 4823 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4824 last_id = charset->id;
4825 last_offset = char_offset;
4826 }
4827
df7492f9 4828 *charbuf++ = c;
ff0dacd7 4829 char_offset++;
df7492f9
KH
4830 continue;
4831
4832 invalid_code:
4833 src = src_base;
4834 consumed_chars = consumed_chars_base;
4835 ONE_MORE_BYTE (c);
065e3595 4836 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4837 char_offset++;
df7492f9 4838 coding->errors++;
4ed46869
KH
4839 }
4840
df7492f9 4841 no_more_source:
ff0dacd7 4842 if (last_id != charset_ascii)
69a80ea3 4843 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4844 coding->consumed_char += consumed_chars_base;
4845 coding->consumed = src_base - coding->source;
4846 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4847}
4848
df7492f9
KH
4849static int
4850encode_coding_charset (coding)
4ed46869 4851 struct coding_system *coding;
4ed46869 4852{
df7492f9
KH
4853 int multibytep = coding->dst_multibyte;
4854 int *charbuf = coding->charbuf;
4855 int *charbuf_end = charbuf + coding->charbuf_used;
4856 unsigned char *dst = coding->destination + coding->produced;
4857 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4858 int safe_room = MAX_MULTIBYTE_LENGTH;
4859 int produced_chars = 0;
24a73b0a 4860 Lisp_Object attrs, charset_list;
df7492f9 4861 int ascii_compatible;
b73bfc1c 4862 int c;
b73bfc1c 4863
24a73b0a 4864 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 4865 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4866
df7492f9 4867 while (charbuf < charbuf_end)
4ed46869 4868 {
4eb6d3f1 4869 struct charset *charset;
df7492f9 4870 unsigned code;
8f924df7 4871
df7492f9
KH
4872 ASSURE_DESTINATION (safe_room);
4873 c = *charbuf++;
4874 if (ascii_compatible && ASCII_CHAR_P (c))
4875 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 4876 else if (CHAR_BYTE8_P (c))
4ed46869 4877 {
16eafb5d
KH
4878 c = CHAR_TO_BYTE8 (c);
4879 EMIT_ONE_BYTE (c);
d46c5b12 4880 }
d46c5b12 4881 else
b73bfc1c 4882 {
4eb6d3f1
KH
4883 charset = char_charset (c, charset_list, &code);
4884 if (charset)
4885 {
4886 if (CHARSET_DIMENSION (charset) == 1)
4887 EMIT_ONE_BYTE (code);
4888 else if (CHARSET_DIMENSION (charset) == 2)
4889 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4890 else if (CHARSET_DIMENSION (charset) == 3)
4891 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4892 else
4893 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4894 (code >> 8) & 0xFF, code & 0xFF);
4895 }
4896 else
41cbe562
KH
4897 {
4898 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4899 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4900 else
4901 c = coding->default_char;
4902 EMIT_ONE_BYTE (c);
4903 }
4ed46869 4904 }
4ed46869
KH
4905 }
4906
065e3595 4907 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4908 coding->produced_char += produced_chars;
4909 coding->produced = dst - coding->destination;
4910 return 0;
4ed46869
KH
4911}
4912
4913\f
1397dc18 4914/*** 7. C library functions ***/
4ed46869 4915
df7492f9
KH
4916/* Setup coding context CODING from information about CODING_SYSTEM.
4917 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4918 CODING_SYSTEM is invalid, signal an error. */
4ed46869 4919
ec6d2bb8 4920void
e0e989f6
KH
4921setup_coding_system (coding_system, coding)
4922 Lisp_Object coding_system;
4ed46869
KH
4923 struct coding_system *coding;
4924{
df7492f9
KH
4925 Lisp_Object attrs;
4926 Lisp_Object eol_type;
4927 Lisp_Object coding_type;
4608c386 4928 Lisp_Object val;
4ed46869 4929
df7492f9
KH
4930 if (NILP (coding_system))
4931 coding_system = Qno_conversion;
c07c8e12 4932
df7492f9 4933 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 4934
df7492f9
KH
4935 attrs = CODING_ID_ATTRS (coding->id);
4936 eol_type = CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 4937
df7492f9
KH
4938 coding->mode = 0;
4939 coding->head_ascii = -1;
4940 coding->common_flags
4941 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
5e5c78be
KH
4942 if (! NILP (CODING_ATTR_POST_READ (attrs)))
4943 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4944 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4945 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
4946 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4947 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 4948
df7492f9 4949 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
4950 coding->max_charset_id = SCHARS (val) - 1;
4951 coding->safe_charsets = (char *) SDATA (val);
df7492f9 4952 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4608c386 4953
df7492f9
KH
4954 coding_type = CODING_ATTR_TYPE (attrs);
4955 if (EQ (coding_type, Qundecided))
d46c5b12 4956 {
df7492f9
KH
4957 coding->detector = NULL;
4958 coding->decoder = decode_coding_raw_text;
4959 coding->encoder = encode_coding_raw_text;
4960 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 4961 }
df7492f9 4962 else if (EQ (coding_type, Qiso_2022))
d46c5b12 4963 {
df7492f9
KH
4964 int i;
4965 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4966
4967 /* Invoke graphic register 0 to plane 0. */
4968 CODING_ISO_INVOCATION (coding, 0) = 0;
4969 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4970 CODING_ISO_INVOCATION (coding, 1)
4971 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4972 /* Setup the initial status of designation. */
4973 for (i = 0; i < 4; i++)
4974 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4975 /* Not single shifting initially. */
4976 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4977 /* Beginning of buffer should also be regarded as bol. */
4978 CODING_ISO_BOL (coding) = 1;
4979 coding->detector = detect_coding_iso_2022;
4980 coding->decoder = decode_coding_iso_2022;
4981 coding->encoder = encode_coding_iso_2022;
4982 if (flags & CODING_ISO_FLAG_SAFE)
4983 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 4984 coding->common_flags
df7492f9
KH
4985 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4986 | CODING_REQUIRE_FLUSHING_MASK);
4987 if (flags & CODING_ISO_FLAG_COMPOSITION)
4988 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
4989 if (flags & CODING_ISO_FLAG_DESIGNATION)
4990 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
4991 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4992 {
4993 setup_iso_safe_charsets (attrs);
4994 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
4995 coding->max_charset_id = SCHARS (val) - 1;
4996 coding->safe_charsets = (char *) SDATA (val);
df7492f9
KH
4997 }
4998 CODING_ISO_FLAGS (coding) = flags;
d46c5b12 4999 }
df7492f9 5000 else if (EQ (coding_type, Qcharset))
d46c5b12 5001 {
df7492f9
KH
5002 coding->detector = detect_coding_charset;
5003 coding->decoder = decode_coding_charset;
5004 coding->encoder = encode_coding_charset;
d46c5b12 5005 coding->common_flags
df7492f9 5006 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5007 }
df7492f9 5008 else if (EQ (coding_type, Qutf_8))
d46c5b12 5009 {
df7492f9
KH
5010 coding->detector = detect_coding_utf_8;
5011 coding->decoder = decode_coding_utf_8;
5012 coding->encoder = encode_coding_utf_8;
5013 coding->common_flags
5014 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5015 }
5016 else if (EQ (coding_type, Qutf_16))
5017 {
5018 val = AREF (attrs, coding_attr_utf_16_bom);
5019 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5020 : EQ (val, Qt) ? utf_16_with_bom
5021 : utf_16_without_bom);
5022 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5023 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5024 : utf_16_little_endian);
e19c3639 5025 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5026 coding->detector = detect_coding_utf_16;
5027 coding->decoder = decode_coding_utf_16;
5028 coding->encoder = encode_coding_utf_16;
5029 coding->common_flags
5030 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
5031 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5032 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5033 }
df7492f9 5034 else if (EQ (coding_type, Qccl))
4ed46869 5035 {
df7492f9
KH
5036 coding->detector = detect_coding_ccl;
5037 coding->decoder = decode_coding_ccl;
5038 coding->encoder = encode_coding_ccl;
c952af22 5039 coding->common_flags
df7492f9
KH
5040 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5041 | CODING_REQUIRE_FLUSHING_MASK);
5042 }
5043 else if (EQ (coding_type, Qemacs_mule))
5044 {
5045 coding->detector = detect_coding_emacs_mule;
5046 coding->decoder = decode_coding_emacs_mule;
5047 coding->encoder = encode_coding_emacs_mule;
c952af22 5048 coding->common_flags
df7492f9
KH
5049 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5050 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5051 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5052 {
5053 Lisp_Object tail, safe_charsets;
5054 int max_charset_id = 0;
5055
5056 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5057 tail = XCDR (tail))
5058 if (max_charset_id < XFASTINT (XCAR (tail)))
5059 max_charset_id = XFASTINT (XCAR (tail));
5060 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5061 make_number (255));
5062 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5063 tail = XCDR (tail))
8f924df7 5064 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5065 coding->max_charset_id = max_charset_id;
8f924df7 5066 coding->safe_charsets = (char *) SDATA (safe_charsets);
df7492f9
KH
5067 }
5068 }
5069 else if (EQ (coding_type, Qshift_jis))
5070 {
5071 coding->detector = detect_coding_sjis;
5072 coding->decoder = decode_coding_sjis;
5073 coding->encoder = encode_coding_sjis;
c952af22 5074 coding->common_flags
df7492f9
KH
5075 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5076 }
5077 else if (EQ (coding_type, Qbig5))
5078 {
5079 coding->detector = detect_coding_big5;
5080 coding->decoder = decode_coding_big5;
5081 coding->encoder = encode_coding_big5;
c952af22 5082 coding->common_flags
df7492f9
KH
5083 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5084 }
5085 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5086 {
df7492f9
KH
5087 coding->detector = NULL;
5088 coding->decoder = decode_coding_raw_text;
5089 coding->encoder = encode_coding_raw_text;
4ed46869 5090 }
4ed46869 5091
df7492f9 5092 return;
4ed46869
KH
5093}
5094
df7492f9
KH
5095/* Return raw-text or one of its subsidiaries that has the same
5096 eol_type as CODING-SYSTEM. */
ec6d2bb8 5097
df7492f9
KH
5098Lisp_Object
5099raw_text_coding_system (coding_system)
5100 Lisp_Object coding_system;
ec6d2bb8 5101{
0be8721c 5102 Lisp_Object spec, attrs;
df7492f9 5103 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5104
d3e4cb56
KH
5105 if (NILP (coding_system))
5106 return Qraw_text;
df7492f9
KH
5107 spec = CODING_SYSTEM_SPEC (coding_system);
5108 attrs = AREF (spec, 0);
ec6d2bb8 5109
df7492f9
KH
5110 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5111 return coding_system;
ec6d2bb8 5112
df7492f9
KH
5113 eol_type = AREF (spec, 2);
5114 if (VECTORP (eol_type))
5115 return Qraw_text;
5116 spec = CODING_SYSTEM_SPEC (Qraw_text);
5117 raw_text_eol_type = AREF (spec, 2);
5118 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5119 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5120 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5121}
5122
54f78171 5123
df7492f9
KH
5124/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5125 does, return one of the subsidiary that has the same eol-spec as
5126 PARENT. Otherwise, return CODING_SYSTEM. */
5127
5128Lisp_Object
5129coding_inherit_eol_type (coding_system, parent)
b74e4686 5130 Lisp_Object coding_system, parent;
54f78171 5131{
3e139625 5132 Lisp_Object spec, eol_type;
54f78171 5133
d3e4cb56
KH
5134 if (NILP (coding_system))
5135 coding_system = Qraw_text;
df7492f9 5136 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5137 eol_type = AREF (spec, 2);
d3e4cb56
KH
5138 if (VECTORP (eol_type)
5139 && ! NILP (parent))
df7492f9
KH
5140 {
5141 Lisp_Object parent_spec;
df7492f9
KH
5142 Lisp_Object parent_eol_type;
5143
5144 parent_spec
5145 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5146 parent_eol_type = AREF (parent_spec, 2);
5147 if (EQ (parent_eol_type, Qunix))
5148 coding_system = AREF (eol_type, 0);
5149 else if (EQ (parent_eol_type, Qdos))
5150 coding_system = AREF (eol_type, 1);
5151 else if (EQ (parent_eol_type, Qmac))
5152 coding_system = AREF (eol_type, 2);
54f78171 5153 }
df7492f9 5154 return coding_system;
54f78171
KH
5155}
5156
4ed46869
KH
5157/* Emacs has a mechanism to automatically detect a coding system if it
5158 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5159 it's impossible to distinguish some coding systems accurately
5160 because they use the same range of codes. So, at first, coding
5161 systems are categorized into 7, those are:
5162
0ef69138 5163 o coding-category-emacs-mule
4ed46869
KH
5164
5165 The category for a coding system which has the same code range
5166 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5167 symbol) `emacs-mule' by default.
4ed46869
KH
5168
5169 o coding-category-sjis
5170
5171 The category for a coding system which has the same code range
5172 as SJIS. Assigned the coding-system (Lisp
7717c392 5173 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5174
5175 o coding-category-iso-7
5176
5177 The category for a coding system which has the same code range
7717c392 5178 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5179 shift and single shift functions. This can encode/decode all
5180 charsets. Assigned the coding-system (Lisp symbol)
5181 `iso-2022-7bit' by default.
5182
5183 o coding-category-iso-7-tight
5184
5185 Same as coding-category-iso-7 except that this can
5186 encode/decode only the specified charsets.
4ed46869
KH
5187
5188 o coding-category-iso-8-1
5189
5190 The category for a coding system which has the same code range
5191 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5192 for DIMENSION1 charset. This doesn't use any locking shift
5193 and single shift functions. Assigned the coding-system (Lisp
5194 symbol) `iso-latin-1' by default.
4ed46869
KH
5195
5196 o coding-category-iso-8-2
5197
5198 The category for a coding system which has the same code range
5199 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5200 for DIMENSION2 charset. This doesn't use any locking shift
5201 and single shift functions. Assigned the coding-system (Lisp
5202 symbol) `japanese-iso-8bit' by default.
4ed46869 5203
7717c392 5204 o coding-category-iso-7-else
4ed46869
KH
5205
5206 The category for a coding system which has the same code range
df7492f9 5207 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5208 single shift functions. Assigned the coding-system (Lisp
5209 symbol) `iso-2022-7bit-lock' by default.
5210
5211 o coding-category-iso-8-else
5212
5213 The category for a coding system which has the same code range
df7492f9 5214 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5215 single shift functions. Assigned the coding-system (Lisp
5216 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5217
5218 o coding-category-big5
5219
5220 The category for a coding system which has the same code range
5221 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5222 `cn-big5' by default.
4ed46869 5223
fa42c37f
KH
5224 o coding-category-utf-8
5225
5226 The category for a coding system which has the same code range
6e76ae91 5227 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
5228 symbol) `utf-8' by default.
5229
5230 o coding-category-utf-16-be
5231
5232 The category for a coding system in which a text has an
5233 Unicode signature (cf. Unicode Standard) in the order of BIG
5234 endian at the head. Assigned the coding-system (Lisp symbol)
5235 `utf-16-be' by default.
5236
5237 o coding-category-utf-16-le
5238
5239 The category for a coding system in which a text has an
5240 Unicode signature (cf. Unicode Standard) in the order of
5241 LITTLE endian at the head. Assigned the coding-system (Lisp
5242 symbol) `utf-16-le' by default.
5243
1397dc18
KH
5244 o coding-category-ccl
5245
5246 The category for a coding system of which encoder/decoder is
5247 written in CCL programs. The default value is nil, i.e., no
5248 coding system is assigned.
5249
4ed46869
KH
5250 o coding-category-binary
5251
5252 The category for a coding system not categorized in any of the
5253 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5254 `no-conversion' by default.
4ed46869
KH
5255
5256 Each of them is a Lisp symbol and the value is an actual
df7492f9 5257 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5258 What Emacs does actually is to detect a category of coding system.
5259 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5260 decide only one possible category, it selects a category of the
4ed46869
KH
5261 highest priority. Priorities of categories are also specified by a
5262 user in a Lisp variable `coding-category-list'.
5263
5264*/
5265
df7492f9
KH
5266#define EOL_SEEN_NONE 0
5267#define EOL_SEEN_LF 1
5268#define EOL_SEEN_CR 2
5269#define EOL_SEEN_CRLF 4
66cfb530 5270
ff0dacd7
KH
5271/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5272 SOURCE is encoded. If CATEGORY is one of
5273 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5274 two-byte, else they are encoded by one-byte.
5275
5276 Return one of EOL_SEEN_XXX. */
4ed46869 5277
bc4bc72a 5278#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
5279
5280static int
89528eb3 5281detect_eol (source, src_bytes, category)
f6cbaf43 5282 const unsigned char *source;
df7492f9 5283 EMACS_INT src_bytes;
89528eb3 5284 enum coding_category category;
4ed46869 5285{
f6cbaf43 5286 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5287 unsigned char c;
df7492f9
KH
5288 int total = 0;
5289 int eol_seen = EOL_SEEN_NONE;
4ed46869 5290
89528eb3 5291 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5292 {
df7492f9 5293 int msb, lsb;
fa42c37f 5294
89528eb3
KH
5295 msb = category == (coding_category_utf_16_le
5296 | coding_category_utf_16_le_nosig);
df7492f9 5297 lsb = 1 - msb;
fa42c37f 5298
df7492f9 5299 while (src + 1 < src_end)
fa42c37f 5300 {
df7492f9
KH
5301 c = src[lsb];
5302 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5303 {
df7492f9
KH
5304 int this_eol;
5305
5306 if (c == '\n')
5307 this_eol = EOL_SEEN_LF;
5308 else if (src + 3 >= src_end
5309 || src[msb + 2] != 0
5310 || src[lsb + 2] != '\n')
5311 this_eol = EOL_SEEN_CR;
fa42c37f 5312 else
8f924df7 5313 this_eol = EOL_SEEN_CRLF;
df7492f9
KH
5314
5315 if (eol_seen == EOL_SEEN_NONE)
5316 /* This is the first end-of-line. */
5317 eol_seen = this_eol;
5318 else if (eol_seen != this_eol)
fa42c37f 5319 {
df7492f9
KH
5320 /* The found type is different from what found before. */
5321 eol_seen = EOL_SEEN_LF;
5322 break;
fa42c37f 5323 }
df7492f9
KH
5324 if (++total == MAX_EOL_CHECK_COUNT)
5325 break;
fa42c37f 5326 }
df7492f9 5327 src += 2;
fa42c37f 5328 }
bcf26d6a 5329 }
d46c5b12 5330 else
c4825358 5331 {
df7492f9 5332 while (src < src_end)
27901516 5333 {
df7492f9
KH
5334 c = *src++;
5335 if (c == '\n' || c == '\r')
5336 {
5337 int this_eol;
d46c5b12 5338
df7492f9
KH
5339 if (c == '\n')
5340 this_eol = EOL_SEEN_LF;
5341 else if (src >= src_end || *src != '\n')
5342 this_eol = EOL_SEEN_CR;
5343 else
5344 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5345
df7492f9
KH
5346 if (eol_seen == EOL_SEEN_NONE)
5347 /* This is the first end-of-line. */
5348 eol_seen = this_eol;
5349 else if (eol_seen != this_eol)
5350 {
5351 /* The found type is different from what found before. */
5352 eol_seen = EOL_SEEN_LF;
5353 break;
5354 }
5355 if (++total == MAX_EOL_CHECK_COUNT)
5356 break;
5357 }
5358 }
73be902c 5359 }
df7492f9 5360 return eol_seen;
73be902c
KH
5361}
5362
df7492f9 5363
24a73b0a 5364static Lisp_Object
df7492f9
KH
5365adjust_coding_eol_type (coding, eol_seen)
5366 struct coding_system *coding;
5367 int eol_seen;
73be902c 5368{
0be8721c 5369 Lisp_Object eol_type;
8f924df7 5370
df7492f9
KH
5371 eol_type = CODING_ID_EOL_TYPE (coding->id);
5372 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
5373 {
5374 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5375 eol_type = Qunix;
5376 }
6f197c07 5377 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
5378 {
5379 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5380 eol_type = Qdos;
5381 }
6f197c07 5382 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
5383 {
5384 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5385 eol_type = Qmac;
5386 }
5387 return eol_type;
d46c5b12 5388}
4ed46869 5389
df7492f9
KH
5390/* Detect how a text specified in CODING is encoded. If a coding
5391 system is detected, update fields of CODING by the detected coding
5392 system. */
0a28aafb 5393
df7492f9
KH
5394void
5395detect_coding (coding)
d46c5b12 5396 struct coding_system *coding;
d46c5b12 5397{
8f924df7 5398 const unsigned char *src, *src_end;
df7492f9 5399 Lisp_Object attrs, coding_type;
d46c5b12 5400
df7492f9
KH
5401 coding->consumed = coding->consumed_char = 0;
5402 coding->produced = coding->produced_char = 0;
5403 coding_set_source (coding);
1c3478b0 5404
df7492f9 5405 src_end = coding->source + coding->src_bytes;
1c3478b0 5406
df7492f9
KH
5407 /* If we have not yet decided the text encoding type, detect it
5408 now. */
5409 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5410 {
df7492f9
KH
5411 int c, i;
5412
24a73b0a 5413 for (i = 0, src = coding->source; src < src_end; i++, src++)
d46c5b12 5414 {
df7492f9 5415 c = *src;
75e2a253 5416 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
df7492f9
KH
5417 || c == ISO_CODE_SI
5418 || c == ISO_CODE_SO)))
5419 break;
d46c5b12 5420 }
df7492f9
KH
5421 coding->head_ascii = src - (coding->source + coding->consumed);
5422
5423 if (coding->head_ascii < coding->src_bytes)
d46c5b12 5424 {
ff0dacd7
KH
5425 struct coding_detection_info detect_info;
5426 enum coding_category category;
5427 struct coding_system *this;
df7492f9 5428
ff0dacd7 5429 detect_info.checked = detect_info.found = detect_info.rejected = 0;
df7492f9 5430 for (i = 0; i < coding_category_raw_text; i++)
d46c5b12 5431 {
ff0dacd7
KH
5432 category = coding_priorities[i];
5433 this = coding_categories + category;
df7492f9 5434 if (this->id < 0)
fa42c37f 5435 {
df7492f9 5436 /* No coding system of this category is defined. */
ff0dacd7 5437 detect_info.rejected |= (1 << category);
fa42c37f 5438 }
ff0dacd7 5439 else if (category >= coding_category_raw_text)
89528eb3 5440 continue;
ff0dacd7 5441 else if (detect_info.checked & (1 << category))
fa42c37f 5442 {
ff0dacd7
KH
5443 if (detect_info.found & (1 << category))
5444 break;
fa42c37f 5445 }
ff0dacd7
KH
5446 else if ((*(this->detector)) (coding, &detect_info)
5447 && detect_info.found & (1 << category))
24a73b0a
KH
5448 {
5449 if (category == coding_category_utf_16_auto)
5450 {
5451 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5452 category = coding_category_utf_16_le;
5453 else
5454 category = coding_category_utf_16_be;
5455 }
5456 break;
5457 }
d46c5b12 5458 }
ff0dacd7
KH
5459 if (i < coding_category_raw_text)
5460 setup_coding_system (CODING_ID_NAME (this->id), coding);
5461 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5462 setup_coding_system (Qraw_text, coding);
ff0dacd7 5463 else if (detect_info.rejected)
df7492f9 5464 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5465 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5466 {
5467 this = coding_categories + coding_priorities[i];
5468 setup_coding_system (CODING_ID_NAME (this->id), coding);
5469 break;
5470 }
d46c5b12 5471 }
b73bfc1c 5472 }
24a73b0a
KH
5473 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5474 == coding_category_utf_16_auto)
b49a1807
KH
5475 {
5476 Lisp_Object coding_systems;
5477 struct coding_detection_info detect_info;
5478
5479 coding_systems
5480 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5481 detect_info.found = detect_info.rejected = 0;
5482 if (CONSP (coding_systems)
24a73b0a 5483 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
5484 {
5485 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5486 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 5487 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
5488 setup_coding_system (XCDR (coding_systems), coding);
5489 }
5490 }
4ed46869 5491}
4ed46869 5492
d46c5b12 5493
aaaf0b1e 5494static void
df7492f9 5495decode_eol (coding)
aaaf0b1e 5496 struct coding_system *coding;
aaaf0b1e 5497{
24a73b0a
KH
5498 Lisp_Object eol_type;
5499 unsigned char *p, *pbeg, *pend;
5500
5501 eol_type = CODING_ID_EOL_TYPE (coding->id);
5502 if (EQ (eol_type, Qunix))
5503 return;
5504
5505 if (NILP (coding->dst_object))
5506 pbeg = coding->destination;
5507 else
5508 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5509 pend = pbeg + coding->produced;
5510
5511 if (VECTORP (eol_type))
aaaf0b1e 5512 {
df7492f9 5513 int eol_seen = EOL_SEEN_NONE;
4ed46869 5514
24a73b0a 5515 for (p = pbeg; p < pend; p++)
aaaf0b1e 5516 {
df7492f9
KH
5517 if (*p == '\n')
5518 eol_seen |= EOL_SEEN_LF;
5519 else if (*p == '\r')
aaaf0b1e 5520 {
df7492f9 5521 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5522 {
df7492f9
KH
5523 eol_seen |= EOL_SEEN_CRLF;
5524 p++;
aaaf0b1e 5525 }
aaaf0b1e 5526 else
df7492f9 5527 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5528 }
aaaf0b1e 5529 }
24a73b0a
KH
5530 if (eol_seen != EOL_SEEN_NONE
5531 && eol_seen != EOL_SEEN_LF
5532 && eol_seen != EOL_SEEN_CRLF
5533 && eol_seen != EOL_SEEN_CR)
5534 eol_seen = EOL_SEEN_LF;
df7492f9 5535 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 5536 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5537 }
d46c5b12 5538
24a73b0a 5539 if (EQ (eol_type, Qmac))
27901516 5540 {
24a73b0a 5541 for (p = pbeg; p < pend; p++)
df7492f9
KH
5542 if (*p == '\r')
5543 *p = '\n';
4ed46869 5544 }
24a73b0a 5545 else if (EQ (eol_type, Qdos))
df7492f9 5546 {
24a73b0a 5547 int n = 0;
b73bfc1c 5548
24a73b0a
KH
5549 if (NILP (coding->dst_object))
5550 {
5551 for (p = pend - 2; p >= pbeg; p--)
5552 if (*p == '\r')
5553 {
5554 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5555 n++;
5556 }
5557 }
5558 else
5559 {
5560 for (p = pend - 2; p >= pbeg; p--)
5561 if (*p == '\r')
5562 {
5563 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5564 int pos = BYTE_TO_CHAR (pos_byte);
5565
5566 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5567 n++;
5568 }
5569 }
5570 coding->produced -= n;
5571 coding->produced_char -= n;
aaaf0b1e 5572 }
4ed46869
KH
5573}
5574
7d64c6ad 5575
a6f87d34
KH
5576/* Return a translation table (or list of them) from coding system
5577 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5578 decoding (ENCODEP is zero). */
7d64c6ad 5579
e6a54062 5580static Lisp_Object
09ee6fdd
KH
5581get_translation_table (attrs, encodep, max_lookup)
5582 Lisp_Object attrs;
5583 int encodep, *max_lookup;
7d64c6ad
KH
5584{
5585 Lisp_Object standard, translation_table;
09ee6fdd 5586 Lisp_Object val;
7d64c6ad
KH
5587
5588 if (encodep)
5589 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5590 standard = Vstandard_translation_table_for_encode;
5591 else
5592 translation_table = CODING_ATTR_DECODE_TBL (attrs),
5593 standard = Vstandard_translation_table_for_decode;
7d64c6ad 5594 if (NILP (translation_table))
09ee6fdd
KH
5595 translation_table = standard;
5596 else
a6f87d34 5597 {
09ee6fdd
KH
5598 if (SYMBOLP (translation_table))
5599 translation_table = Fget (translation_table, Qtranslation_table);
5600 else if (CONSP (translation_table))
5601 {
5602 translation_table = Fcopy_sequence (translation_table);
5603 for (val = translation_table; CONSP (val); val = XCDR (val))
5604 if (SYMBOLP (XCAR (val)))
5605 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5606 }
5607 if (CHAR_TABLE_P (standard))
5608 {
5609 if (CONSP (translation_table))
5610 translation_table = nconc2 (translation_table,
5611 Fcons (standard, Qnil));
5612 else
5613 translation_table = Fcons (translation_table,
5614 Fcons (standard, Qnil));
5615 }
a6f87d34 5616 }
2170c8f0
KH
5617
5618 if (max_lookup)
09ee6fdd 5619 {
2170c8f0
KH
5620 *max_lookup = 1;
5621 if (CHAR_TABLE_P (translation_table)
5622 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5623 {
5624 val = XCHAR_TABLE (translation_table)->extras[1];
5625 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5626 *max_lookup = XFASTINT (val);
5627 }
5628 else if (CONSP (translation_table))
5629 {
5630 Lisp_Object tail, val;
09ee6fdd 5631
2170c8f0
KH
5632 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5633 if (CHAR_TABLE_P (XCAR (tail))
5634 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5635 {
5636 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5637 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5638 *max_lookup = XFASTINT (val);
5639 }
5640 }
a6f87d34 5641 }
7d64c6ad
KH
5642 return translation_table;
5643}
5644
09ee6fdd
KH
5645#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
5646 do { \
5647 trans = Qnil; \
5648 if (CHAR_TABLE_P (table)) \
5649 { \
5650 trans = CHAR_TABLE_REF (table, c); \
5651 if (CHARACTERP (trans)) \
5652 c = XFASTINT (trans), trans = Qnil; \
5653 } \
5654 else if (CONSP (table)) \
5655 { \
5656 Lisp_Object tail; \
5657 \
5658 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
5659 if (CHAR_TABLE_P (XCAR (tail))) \
5660 { \
5661 trans = CHAR_TABLE_REF (XCAR (tail), c); \
5662 if (CHARACTERP (trans)) \
5663 c = XFASTINT (trans), trans = Qnil; \
5664 else if (! NILP (trans)) \
5665 break; \
5666 } \
5667 } \
e6a54062
KH
5668 } while (0)
5669
7d64c6ad 5670
69a80ea3
KH
5671static Lisp_Object
5672get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5673 Lisp_Object val;
5674 int *buf, *buf_end;
5675 int last_block;
5676 int *from_nchars, *to_nchars;
5677{
433f7f87
KH
5678 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
5679 [TO-CHAR ...]. */
69a80ea3
KH
5680 if (CONSP (val))
5681 {
433f7f87 5682 Lisp_Object from, tail;
69a80ea3
KH
5683 int i, len;
5684
433f7f87 5685 for (tail = val; CONSP (tail); tail = XCDR (tail))
69a80ea3 5686 {
433f7f87
KH
5687 val = XCAR (tail);
5688 from = XCAR (val);
5689 len = ASIZE (from);
5690 for (i = 0; i < len; i++)
5691 {
5692 if (buf + i == buf_end)
5693 {
5694 if (! last_block)
5695 return Qt;
5696 break;
5697 }
5698 if (XINT (AREF (from, i)) != buf[i])
5699 break;
5700 }
5701 if (i == len)
5702 {
5703 val = XCDR (val);
5704 *from_nchars = len;
5705 break;
5706 }
69a80ea3 5707 }
433f7f87
KH
5708 if (! CONSP (tail))
5709 return Qnil;
69a80ea3
KH
5710 }
5711 if (VECTORP (val))
5712 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5713 else
5714 *buf = XINT (val);
5715 return val;
5716}
5717
5718
d46c5b12 5719static int
69a80ea3 5720produce_chars (coding, translation_table, last_block)
df7492f9 5721 struct coding_system *coding;
69a80ea3
KH
5722 Lisp_Object translation_table;
5723 int last_block;
4ed46869 5724{
df7492f9
KH
5725 unsigned char *dst = coding->destination + coding->produced;
5726 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5727 int produced;
5728 int produced_chars = 0;
69a80ea3 5729 int carryover = 0;
4ed46869 5730
df7492f9 5731 if (! coding->chars_at_source)
4ed46869 5732 {
df7492f9 5733 /* Characters are in coding->charbuf. */
fba4576f
AS
5734 int *buf = coding->charbuf;
5735 int *buf_end = buf + coding->charbuf_used;
4ed46869 5736
df7492f9
KH
5737 if (BUFFERP (coding->src_object)
5738 && EQ (coding->src_object, coding->dst_object))
8f924df7 5739 dst_end = ((unsigned char *) coding->source) + coding->consumed;
4ed46869 5740
df7492f9 5741 while (buf < buf_end)
4ed46869 5742 {
69a80ea3 5743 int c = *buf, i;
bc4bc72a 5744
df7492f9
KH
5745 if (c >= 0)
5746 {
69a80ea3
KH
5747 int from_nchars = 1, to_nchars = 1;
5748 Lisp_Object trans = Qnil;
5749
09ee6fdd 5750 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 5751 if (! NILP (trans))
69a80ea3
KH
5752 {
5753 trans = get_translation (trans, buf, buf_end, last_block,
5754 &from_nchars, &to_nchars);
5755 if (EQ (trans, Qt))
5756 break;
5757 c = *buf;
5758 }
5759
5760 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5761 {
5762 dst = alloc_destination (coding,
5763 buf_end - buf
5764 + MAX_MULTIBYTE_LENGTH * to_nchars,
5765 dst);
5766 dst_end = coding->destination + coding->dst_bytes;
5767 }
5768
433f7f87 5769 for (i = 0; i < to_nchars; i++)
69a80ea3 5770 {
433f7f87
KH
5771 if (i > 0)
5772 c = XINT (AREF (trans, i));
69a80ea3
KH
5773 if (coding->dst_multibyte
5774 || ! CHAR_BYTE8_P (c))
5775 CHAR_STRING_ADVANCE (c, dst);
5776 else
5777 *dst++ = CHAR_TO_BYTE8 (c);
5778 }
5779 produced_chars += to_nchars;
5780 *buf++ = to_nchars;
5781 while (--from_nchars > 0)
5782 *buf++ = 0;
d46c5b12 5783 }
df7492f9 5784 else
69a80ea3
KH
5785 /* This is an annotation datum. (-C) is the length. */
5786 buf += -c;
4ed46869 5787 }
69a80ea3 5788 carryover = buf_end - buf;
4ed46869 5789 }
fa42c37f 5790 else
fa42c37f 5791 {
8f924df7
KH
5792 const unsigned char *src = coding->source;
5793 const unsigned char *src_end = src + coding->src_bytes;
df7492f9 5794 Lisp_Object eol_type;
fa42c37f 5795
df7492f9 5796 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5797
df7492f9 5798 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 5799 {
df7492f9 5800 if (coding->src_multibyte)
fa42c37f 5801 {
71c81426 5802 int multibytep = 1;
df7492f9 5803 int consumed_chars;
d46c5b12 5804
df7492f9
KH
5805 while (1)
5806 {
8f924df7 5807 const unsigned char *src_base = src;
df7492f9 5808 int c;
b73bfc1c 5809
df7492f9
KH
5810 ONE_MORE_BYTE (c);
5811 if (c == '\r')
5812 {
5813 if (EQ (eol_type, Qdos))
5814 {
98725083
KH
5815 if (src == src_end)
5816 {
065e3595
KH
5817 record_conversion_result
5818 (coding, CODING_RESULT_INSUFFICIENT_SRC);
98725083
KH
5819 goto no_more_source;
5820 }
5821 if (*src == '\n')
df7492f9
KH
5822 c = *src++;
5823 }
5824 else if (EQ (eol_type, Qmac))
5825 c = '\n';
5826 }
5827 if (dst == dst_end)
5828 {
2c78b7e1 5829 coding->consumed = src - coding->source;
b73bfc1c 5830
2c78b7e1 5831 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5832 dst_end = (unsigned char *) src;
2c78b7e1
KH
5833 if (dst == dst_end)
5834 {
5835 dst = alloc_destination (coding, src_end - src + 1,
5836 dst);
5837 dst_end = coding->destination + coding->dst_bytes;
5838 coding_set_source (coding);
5839 src = coding->source + coding->consumed;
5840 src_end = coding->source + coding->src_bytes;
5841 }
df7492f9
KH
5842 }
5843 *dst++ = c;
5844 produced_chars++;
5845 }
5846 no_more_source:
5847 ;
fa42c37f
KH
5848 }
5849 else
df7492f9
KH
5850 while (src < src_end)
5851 {
71c81426 5852 int multibytep = 1;
df7492f9 5853 int c = *src++;
b73bfc1c 5854
df7492f9
KH
5855 if (c == '\r')
5856 {
5857 if (EQ (eol_type, Qdos))
5858 {
5859 if (src < src_end
5860 && *src == '\n')
5861 c = *src++;
5862 }
5863 else if (EQ (eol_type, Qmac))
5864 c = '\n';
5865 }
5866 if (dst >= dst_end - 1)
5867 {
2c78b7e1 5868 coding->consumed = src - coding->source;
df7492f9 5869
2c78b7e1 5870 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5871 dst_end = (unsigned char *) src;
2c78b7e1
KH
5872 if (dst >= dst_end - 1)
5873 {
5874 dst = alloc_destination (coding, src_end - src + 2,
5875 dst);
5876 dst_end = coding->destination + coding->dst_bytes;
5877 coding_set_source (coding);
5878 src = coding->source + coding->consumed;
5879 src_end = coding->source + coding->src_bytes;
5880 }
df7492f9
KH
5881 }
5882 EMIT_ONE_BYTE (c);
5883 }
d46c5b12 5884 }
df7492f9
KH
5885 else
5886 {
5887 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 5888 {
df7492f9 5889 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5890
df7492f9 5891 if (require > 0)
fa42c37f 5892 {
df7492f9
KH
5893 EMACS_INT offset = src - coding->source;
5894
5895 dst = alloc_destination (coding, require, dst);
5896 coding_set_source (coding);
5897 src = coding->source + offset;
5898 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
5899 }
5900 }
df7492f9
KH
5901 produced_chars = coding->src_chars;
5902 while (src < src_end)
fa42c37f 5903 {
df7492f9
KH
5904 int c = *src++;
5905
5906 if (c == '\r')
5907 {
5908 if (EQ (eol_type, Qdos))
5909 {
5910 if (src < src_end
5911 && *src == '\n')
5912 c = *src++;
5913 produced_chars--;
5914 }
5915 else if (EQ (eol_type, Qmac))
5916 c = '\n';
5917 }
5918 *dst++ = c;
fa42c37f
KH
5919 }
5920 }
2c78b7e1
KH
5921 coding->consumed = coding->src_bytes;
5922 coding->consumed_char = coding->src_chars;
fa42c37f
KH
5923 }
5924
df7492f9
KH
5925 produced = dst - (coding->destination + coding->produced);
5926 if (BUFFERP (coding->dst_object))
5927 insert_from_gap (produced_chars, produced);
5928 coding->produced += produced;
5929 coding->produced_char += produced_chars;
69a80ea3 5930 return carryover;
fa42c37f
KH
5931}
5932
ff0dacd7
KH
5933/* Compose text in CODING->object according to the annotation data at
5934 CHARBUF. CHARBUF is an array:
5935 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 5936 */
4ed46869 5937
df7492f9 5938static INLINE void
69a80ea3 5939produce_composition (coding, charbuf, pos)
4ed46869 5940 struct coding_system *coding;
df7492f9 5941 int *charbuf;
69a80ea3 5942 EMACS_INT pos;
4ed46869 5943{
df7492f9 5944 int len;
69a80ea3 5945 EMACS_INT to;
df7492f9 5946 enum composition_method method;
df7492f9 5947 Lisp_Object components;
fa42c37f 5948
df7492f9 5949 len = -charbuf[0];
69a80ea3
KH
5950 to = pos + charbuf[2];
5951 method = (enum composition_method) (charbuf[3]);
d46c5b12 5952
df7492f9
KH
5953 if (method == COMPOSITION_RELATIVE)
5954 components = Qnil;
d46c5b12 5955 else
d46c5b12 5956 {
df7492f9
KH
5957 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5958 int i;
b73bfc1c 5959
69a80ea3
KH
5960 len -= 4;
5961 charbuf += 4;
df7492f9
KH
5962 for (i = 0; i < len; i++)
5963 args[i] = make_number (charbuf[i]);
5964 components = (method == COMPOSITION_WITH_ALTCHARS
5965 ? Fstring (len, args) : Fvector (len, args));
d46c5b12 5966 }
69a80ea3 5967 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
5968}
5969
d46c5b12 5970
ff0dacd7
KH
5971/* Put `charset' property on text in CODING->object according to
5972 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 5973 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 5974 */
d46c5b12 5975
ff0dacd7 5976static INLINE void
69a80ea3 5977produce_charset (coding, charbuf, pos)
d46c5b12 5978 struct coding_system *coding;
ff0dacd7 5979 int *charbuf;
69a80ea3 5980 EMACS_INT pos;
d46c5b12 5981{
69a80ea3
KH
5982 EMACS_INT from = pos - charbuf[2];
5983 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 5984
69a80ea3 5985 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
5986 Qcharset, CHARSET_NAME (charset),
5987 coding->dst_object);
d46c5b12
KH
5988}
5989
d46c5b12 5990
df7492f9
KH
5991#define CHARBUF_SIZE 0x4000
5992
5993#define ALLOC_CONVERSION_WORK_AREA(coding) \
5994 do { \
5995 int size = CHARBUF_SIZE;; \
5996 \
5997 coding->charbuf = NULL; \
5998 while (size > 1024) \
5999 { \
6000 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6001 if (coding->charbuf) \
6002 break; \
6003 size >>= 1; \
6004 } \
6005 if (! coding->charbuf) \
6006 { \
065e3595 6007 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
6008 return coding->result; \
6009 } \
6010 coding->charbuf_size = size; \
6011 } while (0)
4ed46869 6012
d46c5b12
KH
6013
6014static void
69a80ea3 6015produce_annotation (coding, pos)
d46c5b12 6016 struct coding_system *coding;
69a80ea3 6017 EMACS_INT pos;
d46c5b12 6018{
df7492f9
KH
6019 int *charbuf = coding->charbuf;
6020 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 6021
ff0dacd7
KH
6022 if (NILP (coding->dst_object))
6023 return;
d46c5b12 6024
df7492f9 6025 while (charbuf < charbuf_end)
a84f1519 6026 {
df7492f9 6027 if (*charbuf >= 0)
69a80ea3 6028 pos += *charbuf++;
d46c5b12 6029 else
d46c5b12 6030 {
df7492f9 6031 int len = -*charbuf;
ff0dacd7 6032 switch (charbuf[1])
df7492f9
KH
6033 {
6034 case CODING_ANNOTATE_COMPOSITION_MASK:
69a80ea3 6035 produce_composition (coding, charbuf, pos);
df7492f9 6036 break;
ff0dacd7 6037 case CODING_ANNOTATE_CHARSET_MASK:
69a80ea3 6038 produce_charset (coding, charbuf, pos);
ff0dacd7 6039 break;
df7492f9
KH
6040 default:
6041 abort ();
6042 }
6043 charbuf += len;
d46c5b12 6044 }
a84f1519 6045 }
d46c5b12
KH
6046}
6047
df7492f9
KH
6048/* Decode the data at CODING->src_object into CODING->dst_object.
6049 CODING->src_object is a buffer, a string, or nil.
6050 CODING->dst_object is a buffer.
d46c5b12 6051
df7492f9
KH
6052 If CODING->src_object is a buffer, it must be the current buffer.
6053 In this case, if CODING->src_pos is positive, it is a position of
6054 the source text in the buffer, otherwise, the source text is in the
6055 gap area of the buffer, and CODING->src_pos specifies the offset of
6056 the text from GPT (which must be the same as PT). If this is the
6057 same buffer as CODING->dst_object, CODING->src_pos must be
6058 negative.
d46c5b12 6059
df7492f9
KH
6060 If CODING->src_object is a string, CODING->src_pos in an index to
6061 that string.
d46c5b12 6062
df7492f9
KH
6063 If CODING->src_object is nil, CODING->source must already point to
6064 the non-relocatable memory area. In this case, CODING->src_pos is
6065 an offset from CODING->source.
73be902c 6066
df7492f9
KH
6067 The decoded data is inserted at the current point of the buffer
6068 CODING->dst_object.
6069*/
d46c5b12 6070
df7492f9
KH
6071static int
6072decode_coding (coding)
d46c5b12 6073 struct coding_system *coding;
d46c5b12 6074{
df7492f9 6075 Lisp_Object attrs;
24a73b0a 6076 Lisp_Object undo_list;
7d64c6ad 6077 Lisp_Object translation_table;
69a80ea3
KH
6078 int carryover;
6079 int i;
d46c5b12 6080
df7492f9
KH
6081 if (BUFFERP (coding->src_object)
6082 && coding->src_pos > 0
6083 && coding->src_pos < GPT
6084 && coding->src_pos + coding->src_chars > GPT)
6085 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 6086
24a73b0a 6087 undo_list = Qt;
df7492f9 6088 if (BUFFERP (coding->dst_object))
1c3478b0 6089 {
df7492f9
KH
6090 if (current_buffer != XBUFFER (coding->dst_object))
6091 set_buffer_internal (XBUFFER (coding->dst_object));
6092 if (GPT != PT)
6093 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
6094 undo_list = current_buffer->undo_list;
6095 current_buffer->undo_list = Qt;
1c3478b0
KH
6096 }
6097
df7492f9
KH
6098 coding->consumed = coding->consumed_char = 0;
6099 coding->produced = coding->produced_char = 0;
6100 coding->chars_at_source = 0;
065e3595 6101 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 6102 coding->errors = 0;
1c3478b0 6103
df7492f9
KH
6104 ALLOC_CONVERSION_WORK_AREA (coding);
6105
6106 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 6107 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 6108
69a80ea3 6109 carryover = 0;
df7492f9 6110 do
b73bfc1c 6111 {
69a80ea3
KH
6112 EMACS_INT pos = coding->dst_pos + coding->produced_char;
6113
df7492f9
KH
6114 coding_set_source (coding);
6115 coding->annotated = 0;
69a80ea3 6116 coding->charbuf_used = carryover;
df7492f9 6117 (*(coding->decoder)) (coding);
df7492f9 6118 coding_set_destination (coding);
69a80ea3 6119 carryover = produce_chars (coding, translation_table, 0);
df7492f9 6120 if (coding->annotated)
69a80ea3
KH
6121 produce_annotation (coding, pos);
6122 for (i = 0; i < carryover; i++)
6123 coding->charbuf[i]
6124 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 6125 }
df7492f9
KH
6126 while (coding->consumed < coding->src_bytes
6127 && ! coding->result);
d46c5b12 6128
69a80ea3
KH
6129 if (carryover > 0)
6130 {
6131 coding_set_destination (coding);
6132 coding->charbuf_used = carryover;
6133 produce_chars (coding, translation_table, 1);
6134 }
6135
df7492f9
KH
6136 coding->carryover_bytes = 0;
6137 if (coding->consumed < coding->src_bytes)
d46c5b12 6138 {
df7492f9 6139 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 6140 const unsigned char *src;
df7492f9
KH
6141
6142 coding_set_source (coding);
6143 coding_set_destination (coding);
6144 src = coding->source + coding->consumed;
6145
6146 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 6147 {
df7492f9
KH
6148 /* Flush out unprocessed data as binary chars. We are sure
6149 that the number of data is less than the size of
6150 coding->charbuf. */
065e3595 6151 coding->charbuf_used = 0;
df7492f9 6152 while (nbytes-- > 0)
1c3478b0 6153 {
df7492f9 6154 int c = *src++;
98725083
KH
6155
6156 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
1c3478b0 6157 }
f6cbaf43 6158 produce_chars (coding, Qnil, 1);
d46c5b12 6159 }
d46c5b12 6160 else
df7492f9
KH
6161 {
6162 /* Record unprocessed bytes in coding->carryover. We are
6163 sure that the number of data is less than the size of
6164 coding->carryover. */
6165 unsigned char *p = coding->carryover;
6166
6167 coding->carryover_bytes = nbytes;
6168 while (nbytes-- > 0)
6169 *p++ = *src++;
1c3478b0 6170 }
df7492f9 6171 coding->consumed = coding->src_bytes;
b73bfc1c 6172 }
69f76525 6173
24a73b0a
KH
6174 if (BUFFERP (coding->dst_object))
6175 {
6176 current_buffer->undo_list = undo_list;
6177 record_insert (coding->dst_pos, coding->produced_char);
6178 }
6179 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6180 decode_eol (coding);
73be902c 6181 return coding->result;
4ed46869
KH
6182}
6183
aaaf0b1e 6184
e1c23804 6185/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
6186 ending before LIMIT of CODING->src_object (buffer or string), store
6187 the data in BUF, set *STOP to a starting position of the next
6188 composition (if any) or to LIMIT, and return the address of the
6189 next element of BUF.
6190
6191 If such an annotation is not found, set *STOP to a starting
6192 position of a composition after POS (if any) or to LIMIT, and
6193 return BUF. */
6194
6195static INLINE int *
6196handle_composition_annotation (pos, limit, coding, buf, stop)
6197 EMACS_INT pos, limit;
aaaf0b1e 6198 struct coding_system *coding;
ff0dacd7
KH
6199 int *buf;
6200 EMACS_INT *stop;
aaaf0b1e 6201{
ff0dacd7
KH
6202 EMACS_INT start, end;
6203 Lisp_Object prop;
aaaf0b1e 6204
ff0dacd7
KH
6205 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6206 || end > limit)
6207 *stop = limit;
6208 else if (start > pos)
6209 *stop = start;
6210 else
aaaf0b1e 6211 {
ff0dacd7 6212 if (start == pos)
aaaf0b1e 6213 {
ff0dacd7
KH
6214 /* We found a composition. Store the corresponding
6215 annotation data in BUF. */
6216 int *head = buf;
6217 enum composition_method method = COMPOSITION_METHOD (prop);
6218 int nchars = COMPOSITION_LENGTH (prop);
6219
69a80ea3 6220 ADD_COMPOSITION_DATA (buf, nchars, method);
ff0dacd7 6221 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 6222 {
ff0dacd7
KH
6223 Lisp_Object components;
6224 int len, i, i_byte;
6225
6226 components = COMPOSITION_COMPONENTS (prop);
6227 if (VECTORP (components))
aaaf0b1e 6228 {
ff0dacd7
KH
6229 len = XVECTOR (components)->size;
6230 for (i = 0; i < len; i++)
6231 *buf++ = XINT (AREF (components, i));
aaaf0b1e 6232 }
ff0dacd7 6233 else if (STRINGP (components))
aaaf0b1e 6234 {
8f924df7 6235 len = SCHARS (components);
ff0dacd7
KH
6236 i = i_byte = 0;
6237 while (i < len)
6238 {
6239 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6240 buf++;
6241 }
6242 }
6243 else if (INTEGERP (components))
6244 {
6245 len = 1;
6246 *buf++ = XINT (components);
6247 }
6248 else if (CONSP (components))
6249 {
6250 for (len = 0; CONSP (components);
6251 len++, components = XCDR (components))
6252 *buf++ = XINT (XCAR (components));
aaaf0b1e 6253 }
aaaf0b1e 6254 else
ff0dacd7
KH
6255 abort ();
6256 *head -= len;
aaaf0b1e 6257 }
aaaf0b1e 6258 }
ff0dacd7
KH
6259
6260 if (find_composition (end, limit, &start, &end, &prop,
6261 coding->src_object)
6262 && end <= limit)
6263 *stop = start;
6264 else
6265 *stop = limit;
aaaf0b1e 6266 }
ff0dacd7
KH
6267 return buf;
6268}
6269
6270
e1c23804 6271/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
6272 CODING->src_object (buffer of string), store the data in BUF, set
6273 *STOP to the position where the value of `charset' property changes
6274 (limiting by LIMIT), and return the address of the next element of
6275 BUF.
6276
6277 If the property value is nil, set *STOP to the position where the
6278 property value is non-nil (limiting by LIMIT), and return BUF. */
6279
6280static INLINE int *
6281handle_charset_annotation (pos, limit, coding, buf, stop)
6282 EMACS_INT pos, limit;
6283 struct coding_system *coding;
6284 int *buf;
6285 EMACS_INT *stop;
6286{
6287 Lisp_Object val, next;
6288 int id;
6289
6290 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6291 if (! NILP (val) && CHARSETP (val))
6292 id = XINT (CHARSET_SYMBOL_ID (val));
6293 else
6294 id = -1;
69a80ea3 6295 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
6296 next = Fnext_single_property_change (make_number (pos), Qcharset,
6297 coding->src_object,
6298 make_number (limit));
6299 *stop = XINT (next);
6300 return buf;
6301}
6302
6303
df7492f9 6304static void
09ee6fdd 6305consume_chars (coding, translation_table, max_lookup)
df7492f9 6306 struct coding_system *coding;
433f7f87 6307 Lisp_Object translation_table;
09ee6fdd 6308 int max_lookup;
df7492f9
KH
6309{
6310 int *buf = coding->charbuf;
ff0dacd7 6311 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 6312 const unsigned char *src = coding->source + coding->consumed;
4776e638 6313 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
6314 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6315 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6316 int multibytep = coding->src_multibyte;
6317 Lisp_Object eol_type;
6318 int c;
ff0dacd7 6319 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 6320 int *lookup_buf = NULL;
433f7f87
KH
6321
6322 if (! NILP (translation_table))
09ee6fdd 6323 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 6324
df7492f9
KH
6325 eol_type = CODING_ID_EOL_TYPE (coding->id);
6326 if (VECTORP (eol_type))
6327 eol_type = Qunix;
88993dfd 6328
df7492f9
KH
6329 /* Note: composition handling is not yet implemented. */
6330 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6331
0b5670c9
KH
6332 if (NILP (coding->src_object))
6333 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 6334 else
0b5670c9
KH
6335 {
6336 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6337 stop = stop_composition = pos;
6338 else
6339 stop = stop_composition = end_pos;
6340 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6341 stop = stop_charset = pos;
6342 else
6343 stop_charset = end_pos;
6344 }
ec6d2bb8 6345
24a73b0a 6346 /* Compensate for CRLF and conversion. */
ff0dacd7 6347 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6348 while (buf < buf_end)
aaaf0b1e 6349 {
433f7f87
KH
6350 Lisp_Object trans;
6351
df7492f9 6352 if (pos == stop)
ec6d2bb8 6353 {
df7492f9
KH
6354 if (pos == end_pos)
6355 break;
ff0dacd7
KH
6356 if (pos == stop_composition)
6357 buf = handle_composition_annotation (pos, end_pos, coding,
6358 buf, &stop_composition);
6359 if (pos == stop_charset)
6360 buf = handle_charset_annotation (pos, end_pos, coding,
6361 buf, &stop_charset);
6362 stop = (stop_composition < stop_charset
6363 ? stop_composition : stop_charset);
df7492f9
KH
6364 }
6365
6366 if (! multibytep)
4776e638 6367 {
d3e4cb56 6368 EMACS_INT bytes;
aaaf0b1e 6369
d3e4cb56
KH
6370 if (! CODING_FOR_UNIBYTE (coding)
6371 && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
4776e638
KH
6372 c = STRING_CHAR_ADVANCE (src), pos += bytes;
6373 else
6374 c = *src++, pos++;
6375 }
df7492f9 6376 else
4776e638 6377 c = STRING_CHAR_ADVANCE (src), pos++;
df7492f9
KH
6378 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6379 c = '\n';
6380 if (! EQ (eol_type, Qunix))
aaaf0b1e 6381 {
df7492f9 6382 if (c == '\n')
aaaf0b1e 6383 {
df7492f9
KH
6384 if (EQ (eol_type, Qdos))
6385 *buf++ = '\r';
6386 else
6387 c = '\r';
aaaf0b1e
KH
6388 }
6389 }
433f7f87 6390
e6a54062 6391 trans = Qnil;
09ee6fdd 6392 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6393 if (NILP (trans))
433f7f87
KH
6394 *buf++ = c;
6395 else
6396 {
6397 int from_nchars = 1, to_nchars = 1;
6398 int *lookup_buf_end;
6399 const unsigned char *p = src;
6400 int i;
6401
6402 lookup_buf[0] = c;
6403 for (i = 1; i < max_lookup && p < src_end; i++)
6404 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6405 lookup_buf_end = lookup_buf + i;
6406 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6407 &from_nchars, &to_nchars);
6408 if (EQ (trans, Qt)
6409 || buf + to_nchars > buf_end)
6410 break;
6411 *buf++ = *lookup_buf;
6412 for (i = 1; i < to_nchars; i++)
6413 *buf++ = XINT (AREF (trans, i));
6414 for (i = 1; i < from_nchars; i++, pos++)
6415 src += MULTIBYTE_LENGTH_NO_CHECK (src);
6416 }
aaaf0b1e 6417 }
ec6d2bb8 6418
df7492f9
KH
6419 coding->consumed = src - coding->source;
6420 coding->consumed_char = pos - coding->src_pos;
6421 coding->charbuf_used = buf - coding->charbuf;
6422 coding->chars_at_source = 0;
aaaf0b1e
KH
6423}
6424
4ed46869 6425
df7492f9
KH
6426/* Encode the text at CODING->src_object into CODING->dst_object.
6427 CODING->src_object is a buffer or a string.
6428 CODING->dst_object is a buffer or nil.
6429
6430 If CODING->src_object is a buffer, it must be the current buffer.
6431 In this case, if CODING->src_pos is positive, it is a position of
6432 the source text in the buffer, otherwise. the source text is in the
6433 gap area of the buffer, and coding->src_pos specifies the offset of
6434 the text from GPT (which must be the same as PT). If this is the
6435 same buffer as CODING->dst_object, CODING->src_pos must be
6436 negative and CODING should not have `pre-write-conversion'.
6437
6438 If CODING->src_object is a string, CODING should not have
6439 `pre-write-conversion'.
6440
6441 If CODING->dst_object is a buffer, the encoded data is inserted at
6442 the current point of that buffer.
6443
6444 If CODING->dst_object is nil, the encoded data is placed at the
6445 memory area specified by CODING->destination. */
6446
6447static int
6448encode_coding (coding)
4ed46869 6449 struct coding_system *coding;
4ed46869 6450{
df7492f9 6451 Lisp_Object attrs;
7d64c6ad 6452 Lisp_Object translation_table;
09ee6fdd 6453 int max_lookup;
9861e777 6454
df7492f9 6455 attrs = CODING_ID_ATTRS (coding->id);
09ee6fdd 6456 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 6457
df7492f9 6458 if (BUFFERP (coding->dst_object))
8844fa83 6459 {
df7492f9
KH
6460 set_buffer_internal (XBUFFER (coding->dst_object));
6461 coding->dst_multibyte
6462 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 6463 }
4ed46869 6464
b73bfc1c 6465 coding->consumed = coding->consumed_char = 0;
df7492f9 6466 coding->produced = coding->produced_char = 0;
065e3595 6467 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 6468 coding->errors = 0;
b73bfc1c 6469
df7492f9 6470 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 6471
df7492f9
KH
6472 do {
6473 coding_set_source (coding);
09ee6fdd 6474 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
6475 coding_set_destination (coding);
6476 (*(coding->encoder)) (coding);
6477 } while (coding->consumed_char < coding->src_chars);
6478
6479 if (BUFFERP (coding->dst_object))
6480 insert_from_gap (coding->produced_char, coding->produced);
6481
6482 return (coding->result);
ec6d2bb8
KH
6483}
6484
fb88bf2d 6485
24a73b0a
KH
6486/* Name (or base name) of work buffer for code conversion. */
6487static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 6488
24a73b0a
KH
6489/* A working buffer used by the top level conversion. Once it is
6490 created, it is never destroyed. It has the name
6491 Vcode_conversion_workbuf_name. The other working buffers are
6492 destroyed after the use is finished, and their names are modified
6493 versions of Vcode_conversion_workbuf_name. */
6494static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 6495
24a73b0a
KH
6496/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6497static int reused_workbuf_in_use;
4ed46869 6498
24a73b0a
KH
6499
6500/* Return a working buffer of code convesion. MULTIBYTE specifies the
6501 multibyteness of returning buffer. */
b73bfc1c 6502
f6cbaf43 6503static Lisp_Object
24a73b0a 6504make_conversion_work_buffer (multibyte)
f6cbaf43 6505 int multibyte;
df7492f9 6506{
24a73b0a
KH
6507 Lisp_Object name, workbuf;
6508 struct buffer *current;
4ed46869 6509
24a73b0a 6510 if (reused_workbuf_in_use++)
065e3595
KH
6511 {
6512 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6513 workbuf = Fget_buffer_create (name);
6514 }
df7492f9 6515 else
065e3595
KH
6516 {
6517 name = Vcode_conversion_workbuf_name;
6518 workbuf = Fget_buffer_create (name);
6519 if (NILP (Vcode_conversion_reused_workbuf))
6520 Vcode_conversion_reused_workbuf = workbuf;
6521 }
24a73b0a
KH
6522 current = current_buffer;
6523 set_buffer_internal (XBUFFER (workbuf));
6524 Ferase_buffer ();
df7492f9 6525 current_buffer->undo_list = Qt;
24a73b0a 6526 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 6527 set_buffer_internal (current);
24a73b0a 6528 return workbuf;
df7492f9 6529}
d46c5b12 6530
24a73b0a 6531
4776e638 6532static Lisp_Object
24a73b0a
KH
6533code_conversion_restore (arg)
6534 Lisp_Object arg;
4776e638 6535{
24a73b0a
KH
6536 Lisp_Object current, workbuf;
6537
6538 current = XCAR (arg);
6539 workbuf = XCDR (arg);
6540 if (! NILP (workbuf))
6541 {
6542 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6543 reused_workbuf_in_use = 0;
6544 else if (! NILP (Fbuffer_live_p (workbuf)))
6545 Fkill_buffer (workbuf);
6546 }
6547 set_buffer_internal (XBUFFER (current));
4776e638
KH
6548 return Qnil;
6549}
b73bfc1c 6550
24a73b0a
KH
6551Lisp_Object
6552code_conversion_save (with_work_buf, multibyte)
4776e638 6553 int with_work_buf, multibyte;
df7492f9 6554{
24a73b0a 6555 Lisp_Object workbuf = Qnil;
b73bfc1c 6556
4776e638 6557 if (with_work_buf)
24a73b0a
KH
6558 workbuf = make_conversion_work_buffer (multibyte);
6559 record_unwind_protect (code_conversion_restore,
6560 Fcons (Fcurrent_buffer (), workbuf));
4776e638 6561 return workbuf;
df7492f9 6562}
d46c5b12 6563
df7492f9
KH
6564int
6565decode_coding_gap (coding, chars, bytes)
6566 struct coding_system *coding;
6567 EMACS_INT chars, bytes;
6568{
6569 int count = specpdl_ptr - specpdl;
5e5c78be 6570 Lisp_Object attrs;
fb88bf2d 6571
24a73b0a 6572 code_conversion_save (0, 0);
ec6d2bb8 6573
24a73b0a 6574 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6575 coding->src_chars = chars;
6576 coding->src_bytes = bytes;
6577 coding->src_pos = -chars;
6578 coding->src_pos_byte = -bytes;
6579 coding->src_multibyte = chars < bytes;
24a73b0a 6580 coding->dst_object = coding->src_object;
df7492f9
KH
6581 coding->dst_pos = PT;
6582 coding->dst_pos_byte = PT_BYTE;
71c81426 6583 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
98725083 6584 coding->mode |= CODING_MODE_LAST_BLOCK;
4ed46869 6585
df7492f9
KH
6586 if (CODING_REQUIRE_DETECTION (coding))
6587 detect_coding (coding);
8f924df7 6588
df7492f9 6589 decode_coding (coding);
d46c5b12 6590
5e5c78be
KH
6591 attrs = CODING_ID_ATTRS (coding->id);
6592 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 6593 {
5e5c78be
KH
6594 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6595 Lisp_Object val;
6596
6597 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
6598 val = call1 (CODING_ATTR_POST_READ (attrs),
6599 make_number (coding->produced_char));
5e5c78be
KH
6600 CHECK_NATNUM (val);
6601 coding->produced_char += Z - prev_Z;
6602 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 6603 }
4ed46869 6604
df7492f9 6605 unbind_to (count, Qnil);
b73bfc1c
KH
6606 return coding->result;
6607}
52d41803 6608
4ed46869 6609int
df7492f9 6610encode_coding_gap (coding, chars, bytes)
4ed46869 6611 struct coding_system *coding;
df7492f9 6612 EMACS_INT chars, bytes;
4ed46869 6613{
df7492f9 6614 int count = specpdl_ptr - specpdl;
4ed46869 6615
24a73b0a 6616 code_conversion_save (0, 0);
4ed46869 6617
24a73b0a 6618 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6619 coding->src_chars = chars;
6620 coding->src_bytes = bytes;
6621 coding->src_pos = -chars;
6622 coding->src_pos_byte = -bytes;
6623 coding->src_multibyte = chars < bytes;
6624 coding->dst_object = coding->src_object;
6625 coding->dst_pos = PT;
6626 coding->dst_pos_byte = PT_BYTE;
4ed46869 6627
df7492f9 6628 encode_coding (coding);
b73bfc1c 6629
df7492f9
KH
6630 unbind_to (count, Qnil);
6631 return coding->result;
6632}
4ed46869 6633
d46c5b12 6634
df7492f9
KH
6635/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6636 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 6637
df7492f9 6638 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 6639
df7492f9
KH
6640 If it is a buffer, the text is at point of the buffer. FROM and TO
6641 are positions in the buffer.
b73bfc1c 6642
df7492f9
KH
6643 If it is a string, the text is at the beginning of the string.
6644 FROM and TO are indices to the string.
4ed46869 6645
df7492f9
KH
6646 If it is nil, the text is at coding->source. FROM and TO are
6647 indices to coding->source.
bb10be8b 6648
df7492f9 6649 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 6650
df7492f9
KH
6651 If it is a buffer, the decoded text is inserted at point of the
6652 buffer. If the buffer is the same as SRC_OBJECT, the source text
6653 is deleted.
4ed46869 6654
df7492f9
KH
6655 If it is Qt, a string is made from the decoded text, and
6656 set in CODING->dst_object.
d46c5b12 6657
df7492f9 6658 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6659 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6660 CODING->destination by xmalloc. If the decoded text is longer than
6661 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6662 */
d46c5b12 6663
df7492f9
KH
6664void
6665decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6666 dst_object)
d46c5b12 6667 struct coding_system *coding;
df7492f9
KH
6668 Lisp_Object src_object;
6669 EMACS_INT from, from_byte, to, to_byte;
6670 Lisp_Object dst_object;
d46c5b12 6671{
df7492f9
KH
6672 int count = specpdl_ptr - specpdl;
6673 unsigned char *destination;
6674 EMACS_INT dst_bytes;
6675 EMACS_INT chars = to - from;
6676 EMACS_INT bytes = to_byte - from_byte;
6677 Lisp_Object attrs;
4776e638
KH
6678 Lisp_Object buffer;
6679 int saved_pt = -1, saved_pt_byte;
d46c5b12 6680
4776e638 6681 buffer = Fcurrent_buffer ();
93dec019 6682
df7492f9 6683 if (NILP (dst_object))
d46c5b12 6684 {
df7492f9
KH
6685 destination = coding->destination;
6686 dst_bytes = coding->dst_bytes;
d46c5b12 6687 }
93dec019 6688
df7492f9
KH
6689 coding->src_object = src_object;
6690 coding->src_chars = chars;
6691 coding->src_bytes = bytes;
6692 coding->src_multibyte = chars < bytes;
70ad9fc4 6693
df7492f9 6694 if (STRINGP (src_object))
d46c5b12 6695 {
df7492f9
KH
6696 coding->src_pos = from;
6697 coding->src_pos_byte = from_byte;
d46c5b12 6698 }
df7492f9 6699 else if (BUFFERP (src_object))
88993dfd 6700 {
df7492f9
KH
6701 set_buffer_internal (XBUFFER (src_object));
6702 if (from != GPT)
6703 move_gap_both (from, from_byte);
6704 if (EQ (src_object, dst_object))
fb88bf2d 6705 {
4776e638 6706 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6707 TEMP_SET_PT_BOTH (from, from_byte);
6708 del_range_both (from, from_byte, to, to_byte, 1);
6709 coding->src_pos = -chars;
6710 coding->src_pos_byte = -bytes;
fb88bf2d 6711 }
df7492f9 6712 else
fb88bf2d 6713 {
df7492f9
KH
6714 coding->src_pos = from;
6715 coding->src_pos_byte = from_byte;
fb88bf2d 6716 }
88993dfd
KH
6717 }
6718
df7492f9
KH
6719 if (CODING_REQUIRE_DETECTION (coding))
6720 detect_coding (coding);
6721 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 6722
2cb26057
KH
6723 if (EQ (dst_object, Qt)
6724 || (! NILP (CODING_ATTR_POST_READ (attrs))
6725 && NILP (dst_object)))
b73bfc1c 6726 {
24a73b0a 6727 coding->dst_object = code_conversion_save (1, 1);
df7492f9
KH
6728 coding->dst_pos = BEG;
6729 coding->dst_pos_byte = BEG_BYTE;
6730 coding->dst_multibyte = 1;
b73bfc1c 6731 }
df7492f9 6732 else if (BUFFERP (dst_object))
d46c5b12 6733 {
24a73b0a 6734 code_conversion_save (0, 0);
df7492f9
KH
6735 coding->dst_object = dst_object;
6736 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6737 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6738 coding->dst_multibyte
6739 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
6740 }
6741 else
6742 {
24a73b0a 6743 code_conversion_save (0, 0);
df7492f9
KH
6744 coding->dst_object = Qnil;
6745 coding->dst_multibyte = 1;
d46c5b12
KH
6746 }
6747
df7492f9 6748 decode_coding (coding);
fa46990e 6749
df7492f9
KH
6750 if (BUFFERP (coding->dst_object))
6751 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 6752
df7492f9 6753 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6754 {
df7492f9
KH
6755 struct gcpro gcpro1, gcpro2;
6756 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6757 Lisp_Object val;
d46c5b12 6758
c0cc7f7f 6759 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6760 GCPRO2 (coding->src_object, coding->dst_object);
6761 val = call1 (CODING_ATTR_POST_READ (attrs),
6762 make_number (coding->produced_char));
6763 UNGCPRO;
6764 CHECK_NATNUM (val);
6765 coding->produced_char += Z - prev_Z;
6766 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6767 }
de79a6a5 6768
df7492f9 6769 if (EQ (dst_object, Qt))
ec6d2bb8 6770 {
df7492f9
KH
6771 coding->dst_object = Fbuffer_string ();
6772 }
6773 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6774 {
6775 set_buffer_internal (XBUFFER (coding->dst_object));
6776 if (dst_bytes < coding->produced)
6777 {
6778 destination
6779 = (unsigned char *) xrealloc (destination, coding->produced);
6780 if (! destination)
6781 {
065e3595
KH
6782 record_conversion_result (coding,
6783 CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
6784 unbind_to (count, Qnil);
6785 return;
6786 }
6787 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6788 move_gap_both (BEGV, BEGV_BYTE);
6789 bcopy (BEGV_ADDR, destination, coding->produced);
6790 coding->destination = destination;
d46c5b12 6791 }
ec6d2bb8 6792 }
b73bfc1c 6793
4776e638
KH
6794 if (saved_pt >= 0)
6795 {
6796 /* This is the case of:
6797 (BUFFERP (src_object) && EQ (src_object, dst_object))
6798 As we have moved PT while replacing the original buffer
6799 contents, we must recover it now. */
6800 set_buffer_internal (XBUFFER (src_object));
6801 if (saved_pt < from)
6802 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6803 else if (saved_pt < from + chars)
6804 TEMP_SET_PT_BOTH (from, from_byte);
6805 else if (! NILP (current_buffer->enable_multibyte_characters))
6806 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6807 saved_pt_byte + (coding->produced - bytes));
6808 else
6809 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6810 saved_pt_byte + (coding->produced - bytes));
d46c5b12 6811 }
4776e638 6812
065e3595 6813 unbind_to (count, coding->dst_object);
d46c5b12
KH
6814}
6815
d46c5b12 6816
df7492f9
KH
6817void
6818encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6819 dst_object)
d46c5b12 6820 struct coding_system *coding;
df7492f9
KH
6821 Lisp_Object src_object;
6822 EMACS_INT from, from_byte, to, to_byte;
6823 Lisp_Object dst_object;
d46c5b12 6824{
b73bfc1c 6825 int count = specpdl_ptr - specpdl;
df7492f9
KH
6826 EMACS_INT chars = to - from;
6827 EMACS_INT bytes = to_byte - from_byte;
6828 Lisp_Object attrs;
4776e638
KH
6829 Lisp_Object buffer;
6830 int saved_pt = -1, saved_pt_byte;
df7492f9 6831
4776e638 6832 buffer = Fcurrent_buffer ();
df7492f9
KH
6833
6834 coding->src_object = src_object;
6835 coding->src_chars = chars;
6836 coding->src_bytes = bytes;
6837 coding->src_multibyte = chars < bytes;
6838
6839 attrs = CODING_ID_ATTRS (coding->id);
6840
6841 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6842 {
24a73b0a 6843 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
6844 set_buffer_internal (XBUFFER (coding->src_object));
6845 if (STRINGP (src_object))
6846 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6847 else if (BUFFERP (src_object))
6848 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6849 else
6850 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 6851
df7492f9
KH
6852 if (EQ (src_object, dst_object))
6853 {
6854 set_buffer_internal (XBUFFER (src_object));
4776e638 6855 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6856 del_range_both (from, from_byte, to, to_byte, 1);
6857 set_buffer_internal (XBUFFER (coding->src_object));
6858 }
6859
ac87bbef
KH
6860 call2 (CODING_ATTR_PRE_WRITE (attrs),
6861 make_number (BEG), make_number (Z));
6862 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6863 if (BEG != GPT)
6864 move_gap_both (BEG, BEG_BYTE);
6865 coding->src_chars = Z - BEG;
6866 coding->src_bytes = Z_BYTE - BEG_BYTE;
6867 coding->src_pos = BEG;
6868 coding->src_pos_byte = BEG_BYTE;
6869 coding->src_multibyte = Z < Z_BYTE;
6870 }
6871 else if (STRINGP (src_object))
d46c5b12 6872 {
24a73b0a 6873 code_conversion_save (0, 0);
df7492f9
KH
6874 coding->src_pos = from;
6875 coding->src_pos_byte = from_byte;
b73bfc1c 6876 }
df7492f9 6877 else if (BUFFERP (src_object))
b73bfc1c 6878 {
24a73b0a 6879 code_conversion_save (0, 0);
df7492f9 6880 set_buffer_internal (XBUFFER (src_object));
df7492f9 6881 if (EQ (src_object, dst_object))
d46c5b12 6882 {
4776e638 6883 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
6884 coding->src_object = del_range_1 (from, to, 1, 1);
6885 coding->src_pos = 0;
6886 coding->src_pos_byte = 0;
d46c5b12 6887 }
df7492f9 6888 else
d46c5b12 6889 {
ff0dacd7
KH
6890 if (from < GPT && to >= GPT)
6891 move_gap_both (from, from_byte);
df7492f9
KH
6892 coding->src_pos = from;
6893 coding->src_pos_byte = from_byte;
d46c5b12 6894 }
d46c5b12 6895 }
4776e638 6896 else
24a73b0a 6897 code_conversion_save (0, 0);
d46c5b12 6898
df7492f9 6899 if (BUFFERP (dst_object))
88993dfd 6900 {
df7492f9 6901 coding->dst_object = dst_object;
28f67a95
KH
6902 if (EQ (src_object, dst_object))
6903 {
6904 coding->dst_pos = from;
6905 coding->dst_pos_byte = from_byte;
6906 }
6907 else
6908 {
6909 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6910 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6911 }
df7492f9
KH
6912 coding->dst_multibyte
6913 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 6914 }
df7492f9 6915 else if (EQ (dst_object, Qt))
d46c5b12 6916 {
df7492f9 6917 coding->dst_object = Qnil;
df7492f9 6918 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6919 if (coding->dst_bytes == 0)
6920 coding->dst_bytes = 1;
6921 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6922 coding->dst_multibyte = 0;
d46c5b12
KH
6923 }
6924 else
6925 {
df7492f9
KH
6926 coding->dst_object = Qnil;
6927 coding->dst_multibyte = 0;
d46c5b12
KH
6928 }
6929
df7492f9 6930 encode_coding (coding);
d46c5b12 6931
df7492f9 6932 if (EQ (dst_object, Qt))
d46c5b12 6933 {
df7492f9
KH
6934 if (BUFFERP (coding->dst_object))
6935 coding->dst_object = Fbuffer_string ();
6936 else
d46c5b12 6937 {
df7492f9
KH
6938 coding->dst_object
6939 = make_unibyte_string ((char *) coding->destination,
6940 coding->produced);
6941 xfree (coding->destination);
d46c5b12 6942 }
4ed46869 6943 }
d46c5b12 6944
4776e638
KH
6945 if (saved_pt >= 0)
6946 {
6947 /* This is the case of:
6948 (BUFFERP (src_object) && EQ (src_object, dst_object))
6949 As we have moved PT while replacing the original buffer
6950 contents, we must recover it now. */
6951 set_buffer_internal (XBUFFER (src_object));
6952 if (saved_pt < from)
6953 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6954 else if (saved_pt < from + chars)
6955 TEMP_SET_PT_BOTH (from, from_byte);
6956 else if (! NILP (current_buffer->enable_multibyte_characters))
6957 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6958 saved_pt_byte + (coding->produced - bytes));
d46c5b12 6959 else
4776e638
KH
6960 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6961 saved_pt_byte + (coding->produced - bytes));
6962 }
6963
df7492f9 6964 unbind_to (count, Qnil);
b73bfc1c
KH
6965}
6966
df7492f9 6967
b73bfc1c 6968Lisp_Object
df7492f9 6969preferred_coding_system ()
b73bfc1c 6970{
df7492f9 6971 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6972
df7492f9 6973 return CODING_ID_NAME (id);
4ed46869
KH
6974}
6975
6976\f
6977#ifdef emacs
1397dc18 6978/*** 8. Emacs Lisp library functions ***/
4ed46869 6979
4ed46869 6980DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6981 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6982See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6983about coding-system objects. */)
6984 (obj)
4ed46869
KH
6985 Lisp_Object obj;
6986{
df7492f9 6987 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6988}
6989
9d991de8
RS
6990DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6991 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6992 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6993 (prompt)
4ed46869
KH
6994 Lisp_Object prompt;
6995{
e0e989f6 6996 Lisp_Object val;
9d991de8
RS
6997 do
6998 {
4608c386
KH
6999 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7000 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 7001 }
8f924df7 7002 while (SCHARS (val) == 0);
e0e989f6 7003 return (Fintern (val, Qnil));
4ed46869
KH
7004}
7005
9b787f3e 7006DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
7007 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7008If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
7009 (prompt, default_coding_system)
9b787f3e 7010 Lisp_Object prompt, default_coding_system;
4ed46869 7011{
f44d27ce 7012 Lisp_Object val;
9b787f3e 7013 if (SYMBOLP (default_coding_system))
a3181084 7014 XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
4608c386 7015 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
7016 Qt, Qnil, Qcoding_system_history,
7017 default_coding_system, Qnil);
8f924df7 7018 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
7019}
7020
7021DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7022 1, 1, 0,
48b0f3ae 7023 doc: /* Check validity of CODING-SYSTEM.
b054002f 7024If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
df7492f9 7025 (coding_system)
4ed46869
KH
7026 Lisp_Object coding_system;
7027{
b7826503 7028 CHECK_SYMBOL (coding_system);
4ed46869
KH
7029 if (!NILP (Fcoding_system_p (coding_system)))
7030 return coding_system;
7031 while (1)
02ba4723 7032 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 7033}
df7492f9 7034
3a73fa5d 7035\f
89528eb3
KH
7036/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
7037 HIGHEST is nonzero, return the coding system of the highest
7038 priority among the detected coding systems. Otherwize return a
7039 list of detected coding systems sorted by their priorities. If
7040 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7041 multibyte form but contains only ASCII and eight-bit chars.
7042 Otherwise, the bytes are raw bytes.
7043
7044 CODING-SYSTEM controls the detection as below:
7045
7046 If it is nil, detect both text-format and eol-format. If the
7047 text-format part of CODING-SYSTEM is already specified
7048 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
7049 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7050 detect only text-format. */
7051
d46c5b12 7052Lisp_Object
24a73b0a
KH
7053detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7054 coding_system)
8f924df7 7055 const unsigned char *src;
24a73b0a 7056 int src_chars, src_bytes, highest;
0a28aafb 7057 int multibytep;
df7492f9 7058 Lisp_Object coding_system;
4ed46869 7059{
8f924df7 7060 const unsigned char *src_end = src + src_bytes;
df7492f9
KH
7061 Lisp_Object attrs, eol_type;
7062 Lisp_Object val;
7063 struct coding_system coding;
89528eb3 7064 int id;
ff0dacd7 7065 struct coding_detection_info detect_info;
24a73b0a 7066 enum coding_category base_category;
b73bfc1c 7067
df7492f9
KH
7068 if (NILP (coding_system))
7069 coding_system = Qundecided;
7070 setup_coding_system (coding_system, &coding);
7071 attrs = CODING_ID_ATTRS (coding.id);
7072 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 7073 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 7074
df7492f9 7075 coding.source = src;
24a73b0a 7076 coding.src_chars = src_chars;
df7492f9
KH
7077 coding.src_bytes = src_bytes;
7078 coding.src_multibyte = multibytep;
7079 coding.consumed = 0;
89528eb3 7080 coding.mode |= CODING_MODE_LAST_BLOCK;
d46c5b12 7081
ff0dacd7 7082 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 7083
89528eb3 7084 /* At first, detect text-format if necessary. */
24a73b0a
KH
7085 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7086 if (base_category == coding_category_undecided)
4ed46869 7087 {
ff0dacd7
KH
7088 enum coding_category category;
7089 struct coding_system *this;
7090 int c, i;
88993dfd 7091
24a73b0a
KH
7092 /* Skip all ASCII bytes except for a few ISO2022 controls. */
7093 for (i = 0; src < src_end; i++, src++)
4ed46869 7094 {
df7492f9 7095 c = *src;
75e2a253 7096 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
24a73b0a
KH
7097 || c == ISO_CODE_SI
7098 || c == ISO_CODE_SO)))
d46c5b12 7099 break;
4ed46869 7100 }
df7492f9 7101 coding.head_ascii = src - coding.source;
88993dfd 7102
df7492f9
KH
7103 if (src < src_end)
7104 for (i = 0; i < coding_category_raw_text; i++)
7105 {
ff0dacd7
KH
7106 category = coding_priorities[i];
7107 this = coding_categories + category;
b843d1ae 7108
df7492f9
KH
7109 if (this->id < 0)
7110 {
7111 /* No coding system of this category is defined. */
ff0dacd7 7112 detect_info.rejected |= (1 << category);
df7492f9 7113 }
ff0dacd7 7114 else if (category >= coding_category_raw_text)
89528eb3 7115 continue;
ff0dacd7
KH
7116 else if (detect_info.checked & (1 << category))
7117 {
7118 if (highest
7119 && (detect_info.found & (1 << category)))
7120 break;
7121 }
df7492f9
KH
7122 else
7123 {
ff0dacd7 7124 if ((*(this->detector)) (&coding, &detect_info)
89528eb3 7125 && highest
ff0dacd7 7126 && (detect_info.found & (1 << category)))
24a73b0a
KH
7127 {
7128 if (category == coding_category_utf_16_auto)
7129 {
7130 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7131 category = coding_category_utf_16_le;
7132 else
7133 category = coding_category_utf_16_be;
7134 }
7135 break;
7136 }
df7492f9
KH
7137 }
7138 }
ec6d2bb8 7139
ff0dacd7 7140 if (detect_info.rejected == CATEGORY_MASK_ANY)
ec6d2bb8 7141 {
ff0dacd7 7142 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
7143 id = coding_categories[coding_category_raw_text].id;
7144 val = Fcons (make_number (id), Qnil);
7145 }
ff0dacd7 7146 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 7147 {
ff0dacd7 7148 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
7149 id = coding_categories[coding_category_undecided].id;
7150 val = Fcons (make_number (id), Qnil);
7151 }
7152 else if (highest)
7153 {
ff0dacd7 7154 if (detect_info.found)
ec6d2bb8 7155 {
ff0dacd7
KH
7156 detect_info.found = 1 << category;
7157 val = Fcons (make_number (this->id), Qnil);
7158 }
7159 else
7160 for (i = 0; i < coding_category_raw_text; i++)
7161 if (! (detect_info.rejected & (1 << coding_priorities[i])))
7162 {
7163 detect_info.found = 1 << coding_priorities[i];
7164 id = coding_categories[coding_priorities[i]].id;
7165 val = Fcons (make_number (id), Qnil);
7166 break;
7167 }
7168 }
89528eb3
KH
7169 else
7170 {
ff0dacd7
KH
7171 int mask = detect_info.rejected | detect_info.found;
7172 int found = 0;
89528eb3 7173 val = Qnil;
ec6d2bb8 7174
89528eb3 7175 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
7176 {
7177 category = coding_priorities[i];
7178 if (! (mask & (1 << category)))
ec6d2bb8 7179 {
ff0dacd7
KH
7180 found |= 1 << category;
7181 id = coding_categories[category].id;
7182 val = Fcons (make_number (id), val);
7183 }
7184 }
7185 for (i = coding_category_raw_text - 1; i >= 0; i--)
7186 {
7187 category = coding_priorities[i];
7188 if (detect_info.found & (1 << category))
7189 {
7190 id = coding_categories[category].id;
7191 val = Fcons (make_number (id), val);
ec6d2bb8 7192 }
ec6d2bb8 7193 }
ff0dacd7 7194 detect_info.found |= found;
ec6d2bb8 7195 }
ec6d2bb8 7196 }
24a73b0a
KH
7197 else if (base_category == coding_category_utf_16_auto)
7198 {
7199 if (detect_coding_utf_16 (&coding, &detect_info))
7200 {
7201 enum coding_category category;
7202 struct coding_system *this;
7203
7204 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7205 this = coding_categories + coding_category_utf_16_le;
7206 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7207 this = coding_categories + coding_category_utf_16_be;
7208 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7209 this = coding_categories + coding_category_utf_16_be_nosig;
7210 else
7211 this = coding_categories + coding_category_utf_16_le_nosig;
7212 val = Fcons (make_number (this->id), Qnil);
7213 }
7214 }
df7492f9
KH
7215 else
7216 {
ff0dacd7 7217 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 7218 val = Fcons (make_number (coding.id), Qnil);
4ed46869 7219 }
df7492f9 7220
89528eb3 7221 /* Then, detect eol-format if necessary. */
df7492f9 7222 {
89528eb3 7223 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
7224 Lisp_Object tail;
7225
89528eb3
KH
7226 if (VECTORP (eol_type))
7227 {
ff0dacd7 7228 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
7229 normal_eol = detect_eol (coding.source, src_bytes,
7230 coding_category_raw_text);
ff0dacd7
KH
7231 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7232 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
7233 utf_16_be_eol = detect_eol (coding.source, src_bytes,
7234 coding_category_utf_16_be);
ff0dacd7
KH
7235 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7236 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
7237 utf_16_le_eol = detect_eol (coding.source, src_bytes,
7238 coding_category_utf_16_le);
7239 }
7240 else
7241 {
7242 if (EQ (eol_type, Qunix))
7243 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7244 else if (EQ (eol_type, Qdos))
7245 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7246 else
7247 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7248 }
7249
df7492f9
KH
7250 for (tail = val; CONSP (tail); tail = XCDR (tail))
7251 {
89528eb3 7252 enum coding_category category;
df7492f9 7253 int this_eol;
89528eb3
KH
7254
7255 id = XINT (XCAR (tail));
7256 attrs = CODING_ID_ATTRS (id);
7257 category = XINT (CODING_ATTR_CATEGORY (attrs));
7258 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
7259 if (VECTORP (eol_type))
7260 {
89528eb3
KH
7261 if (category == coding_category_utf_16_be
7262 || category == coding_category_utf_16_be_nosig)
7263 this_eol = utf_16_be_eol;
7264 else if (category == coding_category_utf_16_le
7265 || category == coding_category_utf_16_le_nosig)
7266 this_eol = utf_16_le_eol;
df7492f9 7267 else
89528eb3
KH
7268 this_eol = normal_eol;
7269
df7492f9
KH
7270 if (this_eol == EOL_SEEN_LF)
7271 XSETCAR (tail, AREF (eol_type, 0));
7272 else if (this_eol == EOL_SEEN_CRLF)
7273 XSETCAR (tail, AREF (eol_type, 1));
7274 else if (this_eol == EOL_SEEN_CR)
7275 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
7276 else
7277 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 7278 }
89528eb3
KH
7279 else
7280 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
7281 }
7282 }
ec6d2bb8 7283
03699b14 7284 return (highest ? XCAR (val) : val);
ec6d2bb8
KH
7285}
7286
ec6d2bb8 7287
d46c5b12
KH
7288DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7289 2, 3, 0,
48b0f3ae
PJ
7290 doc: /* Detect coding system of the text in the region between START and END.
7291Return a list of possible coding systems ordered by priority.
ec6d2bb8 7292
48b0f3ae
PJ
7293If only ASCII characters are found, it returns a list of single element
7294`undecided' or its subsidiary coding system according to a detected
7295end-of-line format.
ec6d2bb8 7296
48b0f3ae
PJ
7297If optional argument HIGHEST is non-nil, return the coding system of
7298highest priority. */)
7299 (start, end, highest)
d46c5b12
KH
7300 Lisp_Object start, end, highest;
7301{
7302 int from, to;
7303 int from_byte, to_byte;
ec6d2bb8 7304
b7826503
PJ
7305 CHECK_NUMBER_COERCE_MARKER (start);
7306 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 7307
d46c5b12
KH
7308 validate_region (&start, &end);
7309 from = XINT (start), to = XINT (end);
7310 from_byte = CHAR_TO_BYTE (from);
7311 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 7312
d46c5b12
KH
7313 if (from < GPT && to >= GPT)
7314 move_gap_both (to, to_byte);
c210f766 7315
d46c5b12 7316 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 7317 to - from, to_byte - from_byte,
0a28aafb
KH
7318 !NILP (highest),
7319 !NILP (current_buffer
df7492f9
KH
7320 ->enable_multibyte_characters),
7321 Qnil);
ec6d2bb8
KH
7322}
7323
d46c5b12
KH
7324DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7325 1, 2, 0,
48b0f3ae
PJ
7326 doc: /* Detect coding system of the text in STRING.
7327Return a list of possible coding systems ordered by priority.
fb88bf2d 7328
48b0f3ae
PJ
7329If only ASCII characters are found, it returns a list of single element
7330`undecided' or its subsidiary coding system according to a detected
7331end-of-line format.
d46c5b12 7332
48b0f3ae
PJ
7333If optional argument HIGHEST is non-nil, return the coding system of
7334highest priority. */)
7335 (string, highest)
d46c5b12
KH
7336 Lisp_Object string, highest;
7337{
b7826503 7338 CHECK_STRING (string);
b73bfc1c 7339
24a73b0a
KH
7340 return detect_coding_system (SDATA (string),
7341 SCHARS (string), SBYTES (string),
8f924df7 7342 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 7343 Qnil);
4ed46869 7344}
4ed46869 7345
b73bfc1c 7346
df7492f9
KH
7347static INLINE int
7348char_encodable_p (c, attrs)
7349 int c;
7350 Lisp_Object attrs;
05e6f5dc 7351{
df7492f9 7352 Lisp_Object tail;
df7492f9 7353 struct charset *charset;
7d64c6ad 7354 Lisp_Object translation_table;
d46c5b12 7355
7d64c6ad 7356 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 7357 if (! NILP (translation_table))
7d64c6ad 7358 c = translate_char (translation_table, c);
df7492f9
KH
7359 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7360 CONSP (tail); tail = XCDR (tail))
e133c8fa 7361 {
df7492f9
KH
7362 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7363 if (CHAR_CHARSET_P (c, charset))
7364 break;
e133c8fa 7365 }
df7492f9 7366 return (! NILP (tail));
05e6f5dc 7367}
83fa074f 7368
fb88bf2d 7369
df7492f9
KH
7370/* Return a list of coding systems that safely encode the text between
7371 START and END. If EXCLUDE is non-nil, it is a list of coding
7372 systems not to check. The returned list doesn't contain any such
48468dac 7373 coding systems. In any case, if the text contains only ASCII or is
df7492f9 7374 unibyte, return t. */
e077cc80 7375
df7492f9
KH
7376DEFUN ("find-coding-systems-region-internal",
7377 Ffind_coding_systems_region_internal,
7378 Sfind_coding_systems_region_internal, 2, 3, 0,
7379 doc: /* Internal use only. */)
7380 (start, end, exclude)
7381 Lisp_Object start, end, exclude;
7382{
7383 Lisp_Object coding_attrs_list, safe_codings;
7384 EMACS_INT start_byte, end_byte;
7c78e542 7385 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7386 int c;
7387 Lisp_Object tail, elt;
d46c5b12 7388
df7492f9
KH
7389 if (STRINGP (start))
7390 {
7391 if (!STRING_MULTIBYTE (start)
8f924df7 7392 || SCHARS (start) == SBYTES (start))
df7492f9
KH
7393 return Qt;
7394 start_byte = 0;
8f924df7 7395 end_byte = SBYTES (start);
df7492f9
KH
7396 }
7397 else
d46c5b12 7398 {
df7492f9
KH
7399 CHECK_NUMBER_COERCE_MARKER (start);
7400 CHECK_NUMBER_COERCE_MARKER (end);
7401 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7402 args_out_of_range (start, end);
7403 if (NILP (current_buffer->enable_multibyte_characters))
7404 return Qt;
7405 start_byte = CHAR_TO_BYTE (XINT (start));
7406 end_byte = CHAR_TO_BYTE (XINT (end));
7407 if (XINT (end) - XINT (start) == end_byte - start_byte)
7408 return Qt;
d46c5b12 7409
e1c23804 7410 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 7411 {
e1c23804
DL
7412 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7413 move_gap_both (XINT (start), start_byte);
df7492f9 7414 else
e1c23804 7415 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
7416 }
7417 }
7418
df7492f9
KH
7419 coding_attrs_list = Qnil;
7420 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7421 if (NILP (exclude)
7422 || NILP (Fmemq (XCAR (tail), exclude)))
7423 {
7424 Lisp_Object attrs;
d46c5b12 7425
df7492f9
KH
7426 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7427 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7428 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
7429 {
7430 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 7431 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
7432 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7433 }
df7492f9 7434 }
d46c5b12 7435
df7492f9 7436 if (STRINGP (start))
8f924df7 7437 p = pbeg = SDATA (start);
df7492f9
KH
7438 else
7439 p = pbeg = BYTE_POS_ADDR (start_byte);
7440 pend = p + (end_byte - start_byte);
b843d1ae 7441
df7492f9
KH
7442 while (p < pend && ASCII_BYTE_P (*p)) p++;
7443 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 7444
05e6f5dc 7445 while (p < pend)
72d1a715 7446 {
df7492f9
KH
7447 if (ASCII_BYTE_P (*p))
7448 p++;
72d1a715
RS
7449 else
7450 {
df7492f9 7451 c = STRING_CHAR_ADVANCE (p);
12410ef1 7452
df7492f9
KH
7453 charset_map_loaded = 0;
7454 for (tail = coding_attrs_list; CONSP (tail);)
7455 {
7456 elt = XCAR (tail);
7457 if (NILP (elt))
7458 tail = XCDR (tail);
7459 else if (char_encodable_p (c, elt))
7460 tail = XCDR (tail);
7461 else if (CONSP (XCDR (tail)))
7462 {
7463 XSETCAR (tail, XCAR (XCDR (tail)));
7464 XSETCDR (tail, XCDR (XCDR (tail)));
7465 }
7466 else
7467 {
7468 XSETCAR (tail, Qnil);
7469 tail = XCDR (tail);
7470 }
7471 }
7472 if (charset_map_loaded)
7473 {
7474 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 7475
df7492f9 7476 if (STRINGP (start))
8f924df7 7477 pbeg = SDATA (start);
df7492f9
KH
7478 else
7479 pbeg = BYTE_POS_ADDR (start_byte);
7480 p = pbeg + p_offset;
7481 pend = pbeg + pend_offset;
7482 }
7483 }
ec6d2bb8 7484 }
fb88bf2d 7485
df7492f9
KH
7486 safe_codings = Qnil;
7487 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7488 if (! NILP (XCAR (tail)))
7489 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 7490
05e6f5dc
KH
7491 return safe_codings;
7492}
4956c225 7493
d46c5b12 7494
8f924df7
KH
7495DEFUN ("unencodable-char-position", Funencodable_char_position,
7496 Sunencodable_char_position, 3, 5, 0,
7497 doc: /*
7498Return position of first un-encodable character in a region.
7499START and END specfiy the region and CODING-SYSTEM specifies the
7500encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 7501
8f924df7
KH
7502If optional 4th argument COUNT is non-nil, it specifies at most how
7503many un-encodable characters to search. In this case, the value is a
7504list of positions.
d46c5b12 7505
8f924df7
KH
7506If optional 5th argument STRING is non-nil, it is a string to search
7507for un-encodable characters. In that case, START and END are indexes
7508to the string. */)
7509 (start, end, coding_system, count, string)
7510 Lisp_Object start, end, coding_system, count, string;
7511{
7512 int n;
7513 struct coding_system coding;
7d64c6ad 7514 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
7515 Lisp_Object positions;
7516 int from, to;
7517 const unsigned char *p, *stop, *pend;
7518 int ascii_compatible;
fb88bf2d 7519
8f924df7
KH
7520 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7521 attrs = CODING_ID_ATTRS (coding.id);
7522 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7523 return Qnil;
7524 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7525 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 7526 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 7527
8f924df7
KH
7528 if (NILP (string))
7529 {
7530 validate_region (&start, &end);
7531 from = XINT (start);
7532 to = XINT (end);
7533 if (NILP (current_buffer->enable_multibyte_characters)
7534 || (ascii_compatible
7535 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7536 return Qnil;
7537 p = CHAR_POS_ADDR (from);
7538 pend = CHAR_POS_ADDR (to);
7539 if (from < GPT && to >= GPT)
7540 stop = GPT_ADDR;
7541 else
7542 stop = pend;
7543 }
7544 else
7545 {
7546 CHECK_STRING (string);
7547 CHECK_NATNUM (start);
7548 CHECK_NATNUM (end);
7549 from = XINT (start);
7550 to = XINT (end);
7551 if (from > to
7552 || to > SCHARS (string))
7553 args_out_of_range_3 (string, start, end);
7554 if (! STRING_MULTIBYTE (string))
7555 return Qnil;
7556 p = SDATA (string) + string_char_to_byte (string, from);
7557 stop = pend = SDATA (string) + string_char_to_byte (string, to);
7558 if (ascii_compatible && (to - from) == (pend - p))
7559 return Qnil;
7560 }
f2558efd 7561
8f924df7
KH
7562 if (NILP (count))
7563 n = 1;
7564 else
b73bfc1c 7565 {
8f924df7
KH
7566 CHECK_NATNUM (count);
7567 n = XINT (count);
b73bfc1c
KH
7568 }
7569
8f924df7
KH
7570 positions = Qnil;
7571 while (1)
d46c5b12 7572 {
8f924df7 7573 int c;
ec6d2bb8 7574
8f924df7
KH
7575 if (ascii_compatible)
7576 while (p < stop && ASCII_BYTE_P (*p))
7577 p++, from++;
7578 if (p >= stop)
0e79d667 7579 {
8f924df7
KH
7580 if (p >= pend)
7581 break;
7582 stop = pend;
7583 p = GAP_END_ADDR;
0e79d667 7584 }
ec6d2bb8 7585
8f924df7
KH
7586 c = STRING_CHAR_ADVANCE (p);
7587 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
7588 && ! char_charset (translate_char (translation_table, c),
7589 charset_list, NULL))
ec6d2bb8 7590 {
8f924df7
KH
7591 positions = Fcons (make_number (from), positions);
7592 n--;
7593 if (n == 0)
7594 break;
ec6d2bb8
KH
7595 }
7596
8f924df7
KH
7597 from++;
7598 }
d46c5b12 7599
8f924df7
KH
7600 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7601}
d46c5b12 7602
d46c5b12 7603
df7492f9
KH
7604DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7605 Scheck_coding_systems_region, 3, 3, 0,
7606 doc: /* Check if the region is encodable by coding systems.
d46c5b12 7607
df7492f9
KH
7608START and END are buffer positions specifying the region.
7609CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 7610
df7492f9
KH
7611The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7612CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7613whole region, POS0, POS1, ... are buffer positions where non-encodable
7614characters are found.
93dec019 7615
df7492f9
KH
7616If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7617value is nil.
93dec019 7618
df7492f9
KH
7619START may be a string. In that case, check if the string is
7620encodable, and the value contains indices to the string instead of
7621buffer positions. END is ignored. */)
7622 (start, end, coding_system_list)
7623 Lisp_Object start, end, coding_system_list;
05e6f5dc 7624{
df7492f9
KH
7625 Lisp_Object list;
7626 EMACS_INT start_byte, end_byte;
7627 int pos;
7c78e542 7628 const unsigned char *p, *pbeg, *pend;
df7492f9 7629 int c;
7d64c6ad 7630 Lisp_Object tail, elt, attrs;
70ad9fc4 7631
05e6f5dc
KH
7632 if (STRINGP (start))
7633 {
df7492f9 7634 if (!STRING_MULTIBYTE (start)
8f924df7 7635 && SCHARS (start) != SBYTES (start))
df7492f9
KH
7636 return Qnil;
7637 start_byte = 0;
8f924df7 7638 end_byte = SBYTES (start);
df7492f9 7639 pos = 0;
d46c5b12 7640 }
05e6f5dc 7641 else
b73bfc1c 7642 {
b7826503
PJ
7643 CHECK_NUMBER_COERCE_MARKER (start);
7644 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7645 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7646 args_out_of_range (start, end);
7647 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7648 return Qnil;
7649 start_byte = CHAR_TO_BYTE (XINT (start));
7650 end_byte = CHAR_TO_BYTE (XINT (end));
7651 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7652 return Qt;
df7492f9 7653
e1c23804 7654 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 7655 {
e1c23804
DL
7656 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7657 move_gap_both (XINT (start), start_byte);
df7492f9 7658 else
e1c23804 7659 move_gap_both (XINT (end), end_byte);
b73bfc1c 7660 }
e1c23804 7661 pos = XINT (start);
b73bfc1c 7662 }
7553d0e1 7663
df7492f9
KH
7664 list = Qnil;
7665 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 7666 {
df7492f9 7667 elt = XCAR (tail);
7d64c6ad 7668 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
7669 ASET (attrs, coding_attr_trans_tbl,
7670 get_translation_table (attrs, 1, NULL));
7d64c6ad 7671 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
7672 }
7673
df7492f9 7674 if (STRINGP (start))
8f924df7 7675 p = pbeg = SDATA (start);
72d1a715 7676 else
df7492f9
KH
7677 p = pbeg = BYTE_POS_ADDR (start_byte);
7678 pend = p + (end_byte - start_byte);
4ed46869 7679
df7492f9
KH
7680 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7681 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 7682
df7492f9 7683 while (p < pend)
d46c5b12 7684 {
df7492f9
KH
7685 if (ASCII_BYTE_P (*p))
7686 p++;
e133c8fa 7687 else
05e6f5dc 7688 {
df7492f9
KH
7689 c = STRING_CHAR_ADVANCE (p);
7690
7691 charset_map_loaded = 0;
7692 for (tail = list; CONSP (tail); tail = XCDR (tail))
7693 {
7694 elt = XCDR (XCAR (tail));
7695 if (! char_encodable_p (c, XCAR (elt)))
7696 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7697 }
7698 if (charset_map_loaded)
7699 {
7700 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7701
7702 if (STRINGP (start))
8f924df7 7703 pbeg = SDATA (start);
df7492f9
KH
7704 else
7705 pbeg = BYTE_POS_ADDR (start_byte);
7706 p = pbeg + p_offset;
7707 pend = pbeg + pend_offset;
7708 }
05e6f5dc 7709 }
df7492f9 7710 pos++;
d46c5b12 7711 }
4ed46869 7712
df7492f9
KH
7713 tail = list;
7714 list = Qnil;
7715 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 7716 {
df7492f9
KH
7717 elt = XCAR (tail);
7718 if (CONSP (XCDR (XCDR (elt))))
7719 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7720 list);
ec6d2bb8 7721 }
2b4f9037 7722
df7492f9 7723 return list;
d46c5b12
KH
7724}
7725
3fd9494b 7726
b73bfc1c 7727Lisp_Object
df7492f9
KH
7728code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7729 Lisp_Object start, end, coding_system, dst_object;
7730 int encodep, norecord;
4ed46869 7731{
3a73fa5d 7732 struct coding_system coding;
df7492f9
KH
7733 EMACS_INT from, from_byte, to, to_byte;
7734 Lisp_Object src_object;
4ed46869 7735
b7826503
PJ
7736 CHECK_NUMBER_COERCE_MARKER (start);
7737 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
7738 if (NILP (coding_system))
7739 coding_system = Qno_conversion;
7740 else
7741 CHECK_CODING_SYSTEM (coding_system);
7742 src_object = Fcurrent_buffer ();
7743 if (NILP (dst_object))
7744 dst_object = src_object;
7745 else if (! EQ (dst_object, Qt))
7746 CHECK_BUFFER (dst_object);
3a73fa5d 7747
d46c5b12
KH
7748 validate_region (&start, &end);
7749 from = XFASTINT (start);
df7492f9 7750 from_byte = CHAR_TO_BYTE (from);
d46c5b12 7751 to = XFASTINT (end);
df7492f9 7752 to_byte = CHAR_TO_BYTE (to);
764ca8da 7753
df7492f9
KH
7754 setup_coding_system (coding_system, &coding);
7755 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 7756
df7492f9
KH
7757 if (encodep)
7758 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7759 dst_object);
7760 else
7761 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7762 dst_object);
7763 if (! norecord)
7764 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 7765
df7492f9
KH
7766 return (BUFFERP (dst_object)
7767 ? make_number (coding.produced_char)
7768 : coding.dst_object);
4031e2bf 7769}
78108bcd 7770
4ed46869 7771
4031e2bf 7772DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 7773 3, 4, "r\nzCoding system: ",
48b0f3ae 7774 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
7775When called from a program, takes four arguments:
7776 START, END, CODING-SYSTEM, and DESTINATION.
7777START and END are buffer positions.
8844fa83 7778
df7492f9
KH
7779Optional 4th arguments DESTINATION specifies where the decoded text goes.
7780If nil, the region between START and END is replace by the decoded text.
7781If buffer, the decoded text is inserted in the buffer.
7782If t, the decoded text is returned.
8844fa83 7783
48b0f3ae
PJ
7784This function sets `last-coding-system-used' to the precise coding system
7785used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7786not fully specified.)
7787It returns the length of the decoded text. */)
df7492f9
KH
7788 (start, end, coding_system, destination)
7789 Lisp_Object start, end, coding_system, destination;
4031e2bf 7790{
df7492f9 7791 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 7792}
8844fa83 7793
3a73fa5d 7794DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
7795 3, 4, "r\nzCoding system: ",
7796 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
7797When called from a program, takes three arguments:
7798START, END, and CODING-SYSTEM. START and END are buffer positions.
d46c5b12 7799
df7492f9
KH
7800Optional 4th arguments DESTINATION specifies where the encoded text goes.
7801If nil, the region between START and END is replace by the encoded text.
7802If buffer, the encoded text is inserted in the buffer.
7803If t, the encoded text is returned.
2391eaa4 7804
48b0f3ae
PJ
7805This function sets `last-coding-system-used' to the precise coding system
7806used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7807not fully specified.)
7808It returns the length of the encoded text. */)
df7492f9
KH
7809 (start, end, coding_system, destination)
7810 Lisp_Object start, end, coding_system, destination;
3a73fa5d 7811{
df7492f9 7812 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
7813}
7814
7815Lisp_Object
df7492f9
KH
7816code_convert_string (string, coding_system, dst_object,
7817 encodep, nocopy, norecord)
7818 Lisp_Object string, coding_system, dst_object;
7819 int encodep, nocopy, norecord;
b73bfc1c 7820{
4031e2bf 7821 struct coding_system coding;
df7492f9 7822 EMACS_INT chars, bytes;
ec6d2bb8 7823
b7826503 7824 CHECK_STRING (string);
d46c5b12 7825 if (NILP (coding_system))
4956c225 7826 {
df7492f9
KH
7827 if (! norecord)
7828 Vlast_coding_system_used = Qno_conversion;
7829 if (NILP (dst_object))
7830 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 7831 }
b73bfc1c 7832
df7492f9
KH
7833 if (NILP (coding_system))
7834 coding_system = Qno_conversion;
7835 else
7836 CHECK_CODING_SYSTEM (coding_system);
7837 if (NILP (dst_object))
7838 dst_object = Qt;
7839 else if (! EQ (dst_object, Qt))
7840 CHECK_BUFFER (dst_object);
73be902c 7841
df7492f9 7842 setup_coding_system (coding_system, &coding);
d46c5b12 7843 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
7844 chars = SCHARS (string);
7845 bytes = SBYTES (string);
df7492f9
KH
7846 if (encodep)
7847 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7848 else
7849 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7850 if (! norecord)
7851 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 7852
df7492f9
KH
7853 return (BUFFERP (dst_object)
7854 ? make_number (coding.produced_char)
7855 : coding.dst_object);
4ed46869 7856}
73be902c 7857
b73bfc1c 7858
ecec61c1 7859/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 7860 Do not set Vlast_coding_system_used.
4ed46869 7861
ec6d2bb8
KH
7862 This function is called only from macros DECODE_FILE and
7863 ENCODE_FILE, thus we ignore character composition. */
4ed46869 7864
ecec61c1
KH
7865Lisp_Object
7866code_convert_string_norecord (string, coding_system, encodep)
7867 Lisp_Object string, coding_system;
7868 int encodep;
4ed46869 7869{
0be8721c 7870 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
7871}
7872
4ed46869 7873
df7492f9
KH
7874DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7875 2, 4, 0,
7876 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7877
7878Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7879if the decoding operation is trivial.
ecec61c1 7880
df7492f9 7881Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 7882inserted in BUFFER instead of returned as a string. In this case,
df7492f9 7883the return value is BUFFER.
ecec61c1 7884
df7492f9
KH
7885This function sets `last-coding-system-used' to the precise coding system
7886used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7887not fully specified. */)
7888 (string, coding_system, nocopy, buffer)
7889 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 7890{
df7492f9
KH
7891 return code_convert_string (string, coding_system, buffer,
7892 0, ! NILP (nocopy), 0);
4ed46869
KH
7893}
7894
df7492f9
KH
7895DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7896 2, 4, 0,
7897 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7898
7899Optional third arg NOCOPY non-nil means it is OK to return STRING
7900itself if the encoding operation is trivial.
7901
7902Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 7903inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
7904the return value is BUFFER.
7905
7906This function sets `last-coding-system-used' to the precise coding system
7907used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7908not fully specified.) */)
7909 (string, coding_system, nocopy, buffer)
7910 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 7911{
df7492f9 7912 return code_convert_string (string, coding_system, buffer,
c197f191 7913 1, ! NILP (nocopy), 1);
4ed46869 7914}
df7492f9 7915
3a73fa5d 7916\f
4ed46869 7917DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7918 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7919Return the corresponding character. */)
7920 (code)
4ed46869 7921 Lisp_Object code;
4ed46869 7922{
df7492f9
KH
7923 Lisp_Object spec, attrs, val;
7924 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7925 int c;
4ed46869 7926
df7492f9
KH
7927 CHECK_NATNUM (code);
7928 c = XFASTINT (code);
7929 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7930 attrs = AREF (spec, 0);
4ed46869 7931
df7492f9
KH
7932 if (ASCII_BYTE_P (c)
7933 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7934 return code;
4ed46869 7935
df7492f9
KH
7936 val = CODING_ATTR_CHARSET_LIST (attrs);
7937 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
7938 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7939 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 7940
df7492f9
KH
7941 if (c <= 0x7F)
7942 charset = charset_roman;
7943 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 7944 {
df7492f9
KH
7945 charset = charset_kana;
7946 c -= 0x80;
4ed46869 7947 }
55ab7be3 7948 else
4ed46869 7949 {
004068e4 7950 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
7951
7952 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7953 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7954 error ("Invalid code: %d", code);
7955 SJIS_TO_JIS (c);
7956 charset = charset_kanji;
4ed46869 7957 }
df7492f9
KH
7958 c = DECODE_CHAR (charset, c);
7959 if (c < 0)
7960 error ("Invalid code: %d", code);
7961 return make_number (c);
93dec019 7962}
4ed46869 7963
48b0f3ae 7964
4ed46869 7965DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7966 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7967Return the corresponding code in SJIS. */)
7968 (ch)
df7492f9 7969 Lisp_Object ch;
4ed46869 7970{
df7492f9
KH
7971 Lisp_Object spec, attrs, charset_list;
7972 int c;
7973 struct charset *charset;
7974 unsigned code;
48b0f3ae 7975
df7492f9
KH
7976 CHECK_CHARACTER (ch);
7977 c = XFASTINT (ch);
7978 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7979 attrs = AREF (spec, 0);
7980
7981 if (ASCII_CHAR_P (c)
7982 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7983 return ch;
7984
7985 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7986 charset = char_charset (c, charset_list, &code);
7987 if (code == CHARSET_INVALID_CODE (charset))
7988 error ("Can't encode by shift_jis encoding: %d", c);
7989 JIS_TO_SJIS (code);
7990
7991 return make_number (code);
4ed46869
KH
7992}
7993
7994DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7995 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7996Return the corresponding character. */)
7997 (code)
4ed46869 7998 Lisp_Object code;
d46c5b12 7999{
df7492f9
KH
8000 Lisp_Object spec, attrs, val;
8001 struct charset *charset_roman, *charset_big5, *charset;
8002 int c;
6289dd10 8003
df7492f9
KH
8004 CHECK_NATNUM (code);
8005 c = XFASTINT (code);
8006 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8007 attrs = AREF (spec, 0);
4ed46869 8008
df7492f9
KH
8009 if (ASCII_BYTE_P (c)
8010 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8011 return code;
6289dd10 8012
df7492f9
KH
8013 val = CODING_ATTR_CHARSET_LIST (attrs);
8014 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8015 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 8016
df7492f9
KH
8017 if (c <= 0x7F)
8018 charset = charset_roman;
c28a9453
KH
8019 else
8020 {
df7492f9
KH
8021 int b1 = c >> 8, b2 = c & 0x7F;
8022 if (b1 < 0xA1 || b1 > 0xFE
8023 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8024 error ("Invalid code: %d", code);
8025 charset = charset_big5;
c28a9453 8026 }
df7492f9
KH
8027 c = DECODE_CHAR (charset, (unsigned )c);
8028 if (c < 0)
8029 error ("Invalid code: %d", code);
8030 return make_number (c);
d46c5b12 8031}
6289dd10 8032
4ed46869 8033DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8034 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
8035Return the corresponding character code in Big5. */)
8036 (ch)
4ed46869
KH
8037 Lisp_Object ch;
8038{
df7492f9
KH
8039 Lisp_Object spec, attrs, charset_list;
8040 struct charset *charset;
8041 int c;
8042 unsigned code;
8043
8044 CHECK_CHARACTER (ch);
8045 c = XFASTINT (ch);
8046 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8047 attrs = AREF (spec, 0);
8048 if (ASCII_CHAR_P (c)
8049 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8050 return ch;
8051
8052 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8053 charset = char_charset (c, charset_list, &code);
8054 if (code == CHARSET_INVALID_CODE (charset))
8055 error ("Can't encode by Big5 encoding: %d", c);
8056
8057 return make_number (code);
4ed46869 8058}
48b0f3ae 8059
3a73fa5d 8060\f
1ba9e4ab
KH
8061DEFUN ("set-terminal-coding-system-internal",
8062 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
8063 Sset_terminal_coding_system_internal, 1, 1, 0,
8064 doc: /* Internal use only. */)
8065 (coding_system)
b74e4686 8066 Lisp_Object coding_system;
4ed46869 8067{
b7826503 8068 CHECK_SYMBOL (coding_system);
df7492f9
KH
8069 setup_coding_system (Fcheck_coding_system (coding_system),
8070 &terminal_coding);
48b0f3ae 8071
70c22245 8072 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
8073 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
8074 /* Characer composition should be disabled. */
8075 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8076 terminal_coding.src_multibyte = 1;
8077 terminal_coding.dst_multibyte = 0;
4ed46869
KH
8078 return Qnil;
8079}
8080
c4825358
KH
8081DEFUN ("set-safe-terminal-coding-system-internal",
8082 Fset_safe_terminal_coding_system_internal,
48b0f3ae 8083 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 8084 doc: /* Internal use only. */)
48b0f3ae 8085 (coding_system)
b74e4686 8086 Lisp_Object coding_system;
d46c5b12 8087{
b7826503 8088 CHECK_SYMBOL (coding_system);
c4825358
KH
8089 setup_coding_system (Fcheck_coding_system (coding_system),
8090 &safe_terminal_coding);
df7492f9
KH
8091 /* Characer composition should be disabled. */
8092 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8093 safe_terminal_coding.src_multibyte = 1;
8094 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
8095 return Qnil;
8096}
4ed46869 8097
4ed46869
KH
8098DEFUN ("terminal-coding-system",
8099 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
8100 doc: /* Return coding system specified for terminal output. */)
8101 ()
4ed46869 8102{
df7492f9 8103 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
8104}
8105
1ba9e4ab
KH
8106DEFUN ("set-keyboard-coding-system-internal",
8107 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
8108 Sset_keyboard_coding_system_internal, 1, 1, 0,
8109 doc: /* Internal use only. */)
8110 (coding_system)
4ed46869
KH
8111 Lisp_Object coding_system;
8112{
b7826503 8113 CHECK_SYMBOL (coding_system);
df7492f9
KH
8114 setup_coding_system (Fcheck_coding_system (coding_system),
8115 &keyboard_coding);
8116 /* Characer composition should be disabled. */
8117 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
8118 return Qnil;
8119}
8120
8121DEFUN ("keyboard-coding-system",
8122 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
8123 doc: /* Return coding system specified for decoding keyboard input. */)
8124 ()
4ed46869 8125{
df7492f9 8126 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
8127}
8128
4ed46869 8129\f
a5d301df
KH
8130DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8131 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
8132 doc: /* Choose a coding system for an operation based on the target name.
8133The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8134DECODING-SYSTEM is the coding system to use for decoding
8135\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8136for encoding (in case OPERATION does encoding).
05e6f5dc 8137
48b0f3ae
PJ
8138The first argument OPERATION specifies an I/O primitive:
8139 For file I/O, `insert-file-contents' or `write-region'.
8140 For process I/O, `call-process', `call-process-region', or `start-process'.
8141 For network I/O, `open-network-stream'.
05e6f5dc 8142
48b0f3ae
PJ
8143The remaining arguments should be the same arguments that were passed
8144to the primitive. Depending on which primitive, one of those arguments
8145is selected as the TARGET. For example, if OPERATION does file I/O,
8146whichever argument specifies the file name is TARGET.
05e6f5dc 8147
48b0f3ae
PJ
8148TARGET has a meaning which depends on OPERATION:
8149 For file I/O, TARGET is a file name.
8150 For process I/O, TARGET is a process name.
8151 For network I/O, TARGET is a service name or a port number
05e6f5dc 8152
48b0f3ae
PJ
8153This function looks up what specified for TARGET in,
8154`file-coding-system-alist', `process-coding-system-alist',
8155or `network-coding-system-alist' depending on OPERATION.
8156They may specify a coding system, a cons of coding systems,
8157or a function symbol to call.
8158In the last case, we call the function with one argument,
8159which is a list of all the arguments given to this function.
8160
8161usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
8162 (nargs, args)
4ed46869
KH
8163 int nargs;
8164 Lisp_Object *args;
6b89e3aa 8165{
4ed46869
KH
8166 Lisp_Object operation, target_idx, target, val;
8167 register Lisp_Object chain;
177c0ea7 8168
4ed46869
KH
8169 if (nargs < 2)
8170 error ("Too few arguments");
8171 operation = args[0];
8172 if (!SYMBOLP (operation)
8173 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 8174 error ("Invalid first arguement");
4ed46869
KH
8175 if (nargs < 1 + XINT (target_idx))
8176 error ("Too few arguments for operation: %s",
8f924df7 8177 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
8178 target = args[XINT (target_idx) + 1];
8179 if (!(STRINGP (target)
8180 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 8181 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 8182
2e34157c
RS
8183 chain = ((EQ (operation, Qinsert_file_contents)
8184 || EQ (operation, Qwrite_region))
02ba4723 8185 ? Vfile_coding_system_alist
2e34157c 8186 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
8187 ? Vnetwork_coding_system_alist
8188 : Vprocess_coding_system_alist));
4ed46869
KH
8189 if (NILP (chain))
8190 return Qnil;
8191
03699b14 8192 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 8193 {
f44d27ce 8194 Lisp_Object elt;
6b89e3aa 8195
df7492f9 8196 elt = XCAR (chain);
4ed46869
KH
8197 if (CONSP (elt)
8198 && ((STRINGP (target)
03699b14
KR
8199 && STRINGP (XCAR (elt))
8200 && fast_string_match (XCAR (elt), target) >= 0)
8201 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 8202 {
03699b14 8203 val = XCDR (elt);
b19fd4c5
KH
8204 /* Here, if VAL is both a valid coding system and a valid
8205 function symbol, we return VAL as a coding system. */
02ba4723
KH
8206 if (CONSP (val))
8207 return val;
8208 if (! SYMBOLP (val))
8209 return Qnil;
8210 if (! NILP (Fcoding_system_p (val)))
8211 return Fcons (val, val);
b19fd4c5 8212 if (! NILP (Ffboundp (val)))
6b89e3aa 8213 {
b19fd4c5
KH
8214 val = call1 (val, Flist (nargs, args));
8215 if (CONSP (val))
8216 return val;
8217 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8218 return Fcons (val, val);
6b89e3aa 8219 }
02ba4723 8220 return Qnil;
6b89e3aa
KH
8221 }
8222 }
4ed46869 8223 return Qnil;
6b89e3aa
KH
8224}
8225
df7492f9 8226DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 8227 Sset_coding_system_priority, 0, MANY, 0,
da7db224 8228 doc: /* Assign higher priority to the coding systems given as arguments.
ff563fce 8229If multiple coding systems belongs to the same category,
a3181084
DL
8230all but the first one are ignored.
8231
8232usage: (set-coding-system-priority ...) */)
df7492f9
KH
8233 (nargs, args)
8234 int nargs;
8235 Lisp_Object *args;
8236{
8237 int i, j;
8238 int changed[coding_category_max];
8239 enum coding_category priorities[coding_category_max];
8240
8241 bzero (changed, sizeof changed);
6b89e3aa 8242
df7492f9 8243 for (i = j = 0; i < nargs; i++)
6b89e3aa 8244 {
df7492f9
KH
8245 enum coding_category category;
8246 Lisp_Object spec, attrs;
6b89e3aa 8247
df7492f9
KH
8248 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8249 attrs = AREF (spec, 0);
8250 category = XINT (CODING_ATTR_CATEGORY (attrs));
8251 if (changed[category])
8252 /* Ignore this coding system because a coding system of the
8253 same category already had a higher priority. */
8254 continue;
8255 changed[category] = 1;
8256 priorities[j++] = category;
8257 if (coding_categories[category].id >= 0
8258 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8259 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 8260 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 8261 }
6b89e3aa 8262
df7492f9
KH
8263 /* Now we have decided top J priorities. Reflect the order of the
8264 original priorities to the remaining priorities. */
6b89e3aa 8265
df7492f9 8266 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 8267 {
df7492f9
KH
8268 while (j < coding_category_max
8269 && changed[coding_priorities[j]])
8270 j++;
8271 if (j == coding_category_max)
8272 abort ();
8273 priorities[i] = coding_priorities[j];
8274 }
6b89e3aa 8275
df7492f9 8276 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 8277
ff563fce
KH
8278 /* Update `coding-category-list'. */
8279 Vcoding_category_list = Qnil;
8280 for (i = coding_category_max - 1; i >= 0; i--)
8281 Vcoding_category_list
8282 = Fcons (AREF (Vcoding_category_table, priorities[i]),
8283 Vcoding_category_list);
6b89e3aa 8284
df7492f9 8285 return Qnil;
6b89e3aa
KH
8286}
8287
df7492f9
KH
8288DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8289 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
8290 doc: /* Return a list of coding systems ordered by their priorities.
8291HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
8292 (highestp)
8293 Lisp_Object highestp;
d46c5b12
KH
8294{
8295 int i;
df7492f9 8296 Lisp_Object val;
6b89e3aa 8297
df7492f9 8298 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 8299 {
df7492f9
KH
8300 enum coding_category category = coding_priorities[i];
8301 int id = coding_categories[category].id;
8302 Lisp_Object attrs;
068a9dbd 8303
df7492f9
KH
8304 if (id < 0)
8305 continue;
8306 attrs = CODING_ID_ATTRS (id);
8307 if (! NILP (highestp))
8308 return CODING_ATTR_BASE_NAME (attrs);
8309 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8310 }
8311 return Fnreverse (val);
8312}
068a9dbd 8313
f0064e1f 8314static char *suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
8315
8316static Lisp_Object
df7492f9
KH
8317make_subsidiaries (base)
8318 Lisp_Object base;
068a9dbd 8319{
df7492f9 8320 Lisp_Object subsidiaries;
8f924df7 8321 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
8322 char *buf = (char *) alloca (base_name_len + 6);
8323 int i;
068a9dbd 8324
8f924df7 8325 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
8326 subsidiaries = Fmake_vector (make_number (3), Qnil);
8327 for (i = 0; i < 3; i++)
068a9dbd 8328 {
df7492f9
KH
8329 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8330 ASET (subsidiaries, i, intern (buf));
068a9dbd 8331 }
df7492f9 8332 return subsidiaries;
068a9dbd
KH
8333}
8334
8335
df7492f9
KH
8336DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8337 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
8338 doc: /* For internal use only.
8339usage: (define-coding-system-internal ...) */)
df7492f9
KH
8340 (nargs, args)
8341 int nargs;
8342 Lisp_Object *args;
068a9dbd 8343{
df7492f9
KH
8344 Lisp_Object name;
8345 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
8346 Lisp_Object attrs; /* Vector of attributes. */
8347 Lisp_Object eol_type;
8348 Lisp_Object aliases;
8349 Lisp_Object coding_type, charset_list, safe_charsets;
8350 enum coding_category category;
8351 Lisp_Object tail, val;
8352 int max_charset_id = 0;
8353 int i;
068a9dbd 8354
df7492f9
KH
8355 if (nargs < coding_arg_max)
8356 goto short_args;
068a9dbd 8357
df7492f9 8358 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 8359
df7492f9
KH
8360 name = args[coding_arg_name];
8361 CHECK_SYMBOL (name);
8362 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 8363
df7492f9
KH
8364 val = args[coding_arg_mnemonic];
8365 if (! STRINGP (val))
8366 CHECK_CHARACTER (val);
8367 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 8368
df7492f9
KH
8369 coding_type = args[coding_arg_coding_type];
8370 CHECK_SYMBOL (coding_type);
8371 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 8372
df7492f9
KH
8373 charset_list = args[coding_arg_charset_list];
8374 if (SYMBOLP (charset_list))
8375 {
8376 if (EQ (charset_list, Qiso_2022))
8377 {
8378 if (! EQ (coding_type, Qiso_2022))
8379 error ("Invalid charset-list");
8380 charset_list = Viso_2022_charset_list;
8381 }
8382 else if (EQ (charset_list, Qemacs_mule))
8383 {
8384 if (! EQ (coding_type, Qemacs_mule))
8385 error ("Invalid charset-list");
8386 charset_list = Vemacs_mule_charset_list;
8387 }
8388 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8389 if (max_charset_id < XFASTINT (XCAR (tail)))
8390 max_charset_id = XFASTINT (XCAR (tail));
8391 }
068a9dbd
KH
8392 else
8393 {
df7492f9
KH
8394 charset_list = Fcopy_sequence (charset_list);
8395 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
068a9dbd 8396 {
df7492f9
KH
8397 struct charset *charset;
8398
8399 val = Fcar (tail);
8400 CHECK_CHARSET_GET_CHARSET (val, charset);
8401 if (EQ (coding_type, Qiso_2022)
8402 ? CHARSET_ISO_FINAL (charset) < 0
8403 : EQ (coding_type, Qemacs_mule)
8404 ? CHARSET_EMACS_MULE_ID (charset) < 0
8405 : 0)
8406 error ("Can't handle charset `%s'",
8f924df7 8407 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 8408
8f924df7 8409 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
8410 if (max_charset_id < charset->id)
8411 max_charset_id = charset->id;
068a9dbd
KH
8412 }
8413 }
df7492f9 8414 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 8415
df7492f9
KH
8416 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8417 make_number (255));
8418 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 8419 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 8420 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 8421
584948ac 8422 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 8423
df7492f9 8424 val = args[coding_arg_decode_translation_table];
a6f87d34 8425 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8426 CHECK_SYMBOL (val);
df7492f9 8427 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 8428
df7492f9 8429 val = args[coding_arg_encode_translation_table];
a6f87d34 8430 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8431 CHECK_SYMBOL (val);
df7492f9 8432 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 8433
df7492f9
KH
8434 val = args[coding_arg_post_read_conversion];
8435 CHECK_SYMBOL (val);
8436 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 8437
df7492f9
KH
8438 val = args[coding_arg_pre_write_conversion];
8439 CHECK_SYMBOL (val);
8440 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 8441
df7492f9
KH
8442 val = args[coding_arg_default_char];
8443 if (NILP (val))
8444 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8445 else
8446 {
8f924df7 8447 CHECK_CHARACTER (val);
df7492f9
KH
8448 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8449 }
4031e2bf 8450
8f924df7
KH
8451 val = args[coding_arg_for_unibyte];
8452 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 8453
df7492f9
KH
8454 val = args[coding_arg_plist];
8455 CHECK_LIST (val);
8456 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 8457
df7492f9
KH
8458 if (EQ (coding_type, Qcharset))
8459 {
c7c66a95
KH
8460 /* Generate a lisp vector of 256 elements. Each element is nil,
8461 integer, or a list of charset IDs.
3a73fa5d 8462
c7c66a95
KH
8463 If Nth element is nil, the byte code N is invalid in this
8464 coding system.
4ed46869 8465
c7c66a95
KH
8466 If Nth element is a number NUM, N is the first byte of a
8467 charset whose ID is NUM.
4ed46869 8468
c7c66a95
KH
8469 If Nth element is a list of charset IDs, N is the first byte
8470 of one of them. The list is sorted by dimensions of the
2bc515e4 8471 charsets. A charset of smaller dimension comes firtst. */
df7492f9 8472 val = Fmake_vector (make_number (256), Qnil);
4ed46869 8473
5c99c2e6 8474 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 8475 {
c7c66a95
KH
8476 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8477 int dim = CHARSET_DIMENSION (charset);
8478 int idx = (dim - 1) * 4;
4ed46869 8479
5c99c2e6 8480 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 8481 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 8482
15d143f7
KH
8483 for (i = charset->code_space[idx];
8484 i <= charset->code_space[idx + 1]; i++)
8485 {
c7c66a95
KH
8486 Lisp_Object tmp, tmp2;
8487 int dim2;
ec6d2bb8 8488
c7c66a95
KH
8489 tmp = AREF (val, i);
8490 if (NILP (tmp))
8491 tmp = XCAR (tail);
8492 else if (NUMBERP (tmp))
8493 {
8494 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8495 if (dim < dim2)
c7c66a95 8496 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
8497 else
8498 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 8499 }
15d143f7 8500 else
c7c66a95
KH
8501 {
8502 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8503 {
8504 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8505 if (dim < dim2)
8506 break;
8507 }
8508 if (NILP (tmp2))
8509 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8510 else
8511 {
8512 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8513 XSETCAR (tmp2, XCAR (tail));
8514 }
8515 }
8516 ASET (val, i, tmp);
15d143f7 8517 }
df7492f9
KH
8518 }
8519 ASET (attrs, coding_attr_charset_valids, val);
8520 category = coding_category_charset;
8521 }
8522 else if (EQ (coding_type, Qccl))
8523 {
8524 Lisp_Object valids;
ecec61c1 8525
df7492f9
KH
8526 if (nargs < coding_arg_ccl_max)
8527 goto short_args;
ecec61c1 8528
df7492f9
KH
8529 val = args[coding_arg_ccl_decoder];
8530 CHECK_CCL_PROGRAM (val);
8531 if (VECTORP (val))
8532 val = Fcopy_sequence (val);
8533 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 8534
df7492f9
KH
8535 val = args[coding_arg_ccl_encoder];
8536 CHECK_CCL_PROGRAM (val);
8537 if (VECTORP (val))
8538 val = Fcopy_sequence (val);
8539 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 8540
df7492f9
KH
8541 val = args[coding_arg_ccl_valids];
8542 valids = Fmake_string (make_number (256), make_number (0));
8543 for (tail = val; !NILP (tail); tail = Fcdr (tail))
8544 {
8dcbea82 8545 int from, to;
ecec61c1 8546
df7492f9
KH
8547 val = Fcar (tail);
8548 if (INTEGERP (val))
8dcbea82
KH
8549 {
8550 from = to = XINT (val);
8551 if (from < 0 || from > 255)
8552 args_out_of_range_3 (val, make_number (0), make_number (255));
8553 }
df7492f9
KH
8554 else
8555 {
df7492f9 8556 CHECK_CONS (val);
8f924df7
KH
8557 CHECK_NATNUM_CAR (val);
8558 CHECK_NATNUM_CDR (val);
df7492f9 8559 from = XINT (XCAR (val));
8f924df7 8560 if (from > 255)
8dcbea82
KH
8561 args_out_of_range_3 (XCAR (val),
8562 make_number (0), make_number (255));
df7492f9 8563 to = XINT (XCDR (val));
8dcbea82
KH
8564 if (to < from || to > 255)
8565 args_out_of_range_3 (XCDR (val),
8566 XCAR (val), make_number (255));
df7492f9 8567 }
8dcbea82 8568 for (i = from; i <= to; i++)
8f924df7 8569 SSET (valids, i, 1);
df7492f9
KH
8570 }
8571 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 8572
df7492f9 8573 category = coding_category_ccl;
55ab7be3 8574 }
df7492f9 8575 else if (EQ (coding_type, Qutf_16))
55ab7be3 8576 {
df7492f9 8577 Lisp_Object bom, endian;
4ed46869 8578
584948ac 8579 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 8580
df7492f9
KH
8581 if (nargs < coding_arg_utf16_max)
8582 goto short_args;
4ed46869 8583
df7492f9
KH
8584 bom = args[coding_arg_utf16_bom];
8585 if (! NILP (bom) && ! EQ (bom, Qt))
8586 {
8587 CHECK_CONS (bom);
8f924df7
KH
8588 val = XCAR (bom);
8589 CHECK_CODING_SYSTEM (val);
8590 val = XCDR (bom);
8591 CHECK_CODING_SYSTEM (val);
df7492f9
KH
8592 }
8593 ASET (attrs, coding_attr_utf_16_bom, bom);
8594
8595 endian = args[coding_arg_utf16_endian];
b49a1807
KH
8596 CHECK_SYMBOL (endian);
8597 if (NILP (endian))
8598 endian = Qbig;
8599 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 8600 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
8601 ASET (attrs, coding_attr_utf_16_endian, endian);
8602
8603 category = (CONSP (bom)
8604 ? coding_category_utf_16_auto
8605 : NILP (bom)
b49a1807 8606 ? (EQ (endian, Qbig)
df7492f9
KH
8607 ? coding_category_utf_16_be_nosig
8608 : coding_category_utf_16_le_nosig)
b49a1807 8609 : (EQ (endian, Qbig)
df7492f9
KH
8610 ? coding_category_utf_16_be
8611 : coding_category_utf_16_le));
8612 }
8613 else if (EQ (coding_type, Qiso_2022))
8614 {
8615 Lisp_Object initial, reg_usage, request, flags;
4776e638 8616 int i;
1397dc18 8617
df7492f9
KH
8618 if (nargs < coding_arg_iso2022_max)
8619 goto short_args;
8620
8621 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8622 CHECK_VECTOR (initial);
8623 for (i = 0; i < 4; i++)
8624 {
8625 val = Faref (initial, make_number (i));
8626 if (! NILP (val))
8627 {
584948ac
KH
8628 struct charset *charset;
8629
8630 CHECK_CHARSET_GET_CHARSET (val, charset);
8631 ASET (initial, i, make_number (CHARSET_ID (charset)));
8632 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8633 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8634 }
8635 else
8636 ASET (initial, i, make_number (-1));
8637 }
8638
8639 reg_usage = args[coding_arg_iso2022_reg_usage];
8640 CHECK_CONS (reg_usage);
8f924df7
KH
8641 CHECK_NUMBER_CAR (reg_usage);
8642 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
8643
8644 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8645 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 8646 {
df7492f9 8647 int id;
8f924df7 8648 Lisp_Object tmp;
df7492f9
KH
8649
8650 val = Fcar (tail);
8651 CHECK_CONS (val);
8f924df7
KH
8652 tmp = XCAR (val);
8653 CHECK_CHARSET_GET_ID (tmp, id);
8654 CHECK_NATNUM_CDR (val);
df7492f9
KH
8655 if (XINT (XCDR (val)) >= 4)
8656 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 8657 XSETCAR (val, make_number (id));
1397dc18 8658 }
4ed46869 8659
df7492f9
KH
8660 flags = args[coding_arg_iso2022_flags];
8661 CHECK_NATNUM (flags);
8662 i = XINT (flags);
8663 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8664 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8665
8666 ASET (attrs, coding_attr_iso_initial, initial);
8667 ASET (attrs, coding_attr_iso_usage, reg_usage);
8668 ASET (attrs, coding_attr_iso_request, request);
8669 ASET (attrs, coding_attr_iso_flags, flags);
8670 setup_iso_safe_charsets (attrs);
8671
8672 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8673 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8674 | CODING_ISO_FLAG_SINGLE_SHIFT))
8675 ? coding_category_iso_7_else
8676 : EQ (args[coding_arg_charset_list], Qiso_2022)
8677 ? coding_category_iso_7
8678 : coding_category_iso_7_tight);
8679 else
8680 {
8681 int id = XINT (AREF (initial, 1));
8682
c6fb6e98 8683 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
8684 || EQ (args[coding_arg_charset_list], Qiso_2022)
8685 || id < 0)
8686 ? coding_category_iso_8_else
8687 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8688 ? coding_category_iso_8_1
8689 : coding_category_iso_8_2);
8690 }
0ce7886f
KH
8691 if (category != coding_category_iso_8_1
8692 && category != coding_category_iso_8_2)
8693 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
8694 }
8695 else if (EQ (coding_type, Qemacs_mule))
c28a9453 8696 {
df7492f9
KH
8697 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8698 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 8699 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 8700 category = coding_category_emacs_mule;
c28a9453 8701 }
df7492f9 8702 else if (EQ (coding_type, Qshift_jis))
c28a9453 8703 {
df7492f9
KH
8704
8705 struct charset *charset;
8706
7d64c6ad 8707 if (XINT (Flength (charset_list)) != 3
6e07c25f 8708 && XINT (Flength (charset_list)) != 4)
7d64c6ad 8709 error ("There should be three or four charsets");
df7492f9
KH
8710
8711 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8712 if (CHARSET_DIMENSION (charset) != 1)
8713 error ("Dimension of charset %s is not one",
8f924df7 8714 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8715 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8716 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8717
8718 charset_list = XCDR (charset_list);
8719 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8720 if (CHARSET_DIMENSION (charset) != 1)
8721 error ("Dimension of charset %s is not one",
8f924df7 8722 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
8723
8724 charset_list = XCDR (charset_list);
8725 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8726 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
8727 error ("Dimension of charset %s is not two",
8728 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8729
8730 charset_list = XCDR (charset_list);
2b917a06
KH
8731 if (! NILP (charset_list))
8732 {
8733 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8734 if (CHARSET_DIMENSION (charset) != 2)
8735 error ("Dimension of charset %s is not two",
8736 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8737 }
df7492f9
KH
8738
8739 category = coding_category_sjis;
8740 Vsjis_coding_system = name;
c28a9453 8741 }
df7492f9
KH
8742 else if (EQ (coding_type, Qbig5))
8743 {
8744 struct charset *charset;
4ed46869 8745
df7492f9
KH
8746 if (XINT (Flength (charset_list)) != 2)
8747 error ("There should be just two charsets");
8748
8749 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8750 if (CHARSET_DIMENSION (charset) != 1)
8751 error ("Dimension of charset %s is not one",
8f924df7 8752 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8753 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8754 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8755
8756 charset_list = XCDR (charset_list);
8757 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8758 if (CHARSET_DIMENSION (charset) != 2)
8759 error ("Dimension of charset %s is not two",
8f924df7 8760 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 8761
df7492f9
KH
8762 category = coding_category_big5;
8763 Vbig5_coding_system = name;
8764 }
8765 else if (EQ (coding_type, Qraw_text))
c28a9453 8766 {
584948ac
KH
8767 category = coding_category_raw_text;
8768 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 8769 }
df7492f9 8770 else if (EQ (coding_type, Qutf_8))
4ed46869 8771 {
584948ac
KH
8772 category = coding_category_utf_8;
8773 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4ed46869 8774 }
df7492f9
KH
8775 else if (EQ (coding_type, Qundecided))
8776 category = coding_category_undecided;
4ed46869 8777 else
df7492f9 8778 error ("Invalid coding system type: %s",
8f924df7 8779 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 8780
df7492f9 8781 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
8782 CODING_ATTR_PLIST (attrs)
8783 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8784 CODING_ATTR_PLIST (attrs)));
c4825358 8785
df7492f9
KH
8786 eol_type = args[coding_arg_eol_type];
8787 if (! NILP (eol_type)
8788 && ! EQ (eol_type, Qunix)
8789 && ! EQ (eol_type, Qdos)
8790 && ! EQ (eol_type, Qmac))
8791 error ("Invalid eol-type");
4ed46869 8792
df7492f9 8793 aliases = Fcons (name, Qnil);
4ed46869 8794
df7492f9
KH
8795 if (NILP (eol_type))
8796 {
8797 eol_type = make_subsidiaries (name);
8798 for (i = 0; i < 3; i++)
1397dc18 8799 {
df7492f9
KH
8800 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8801
8802 this_name = AREF (eol_type, i);
8803 this_aliases = Fcons (this_name, Qnil);
8804 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8805 this_spec = Fmake_vector (make_number (3), attrs);
8806 ASET (this_spec, 1, this_aliases);
8807 ASET (this_spec, 2, this_eol_type);
8808 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8809 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8810 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8811 Vcoding_system_alist);
1397dc18 8812 }
d46c5b12 8813 }
4ed46869 8814
df7492f9
KH
8815 spec_vec = Fmake_vector (make_number (3), attrs);
8816 ASET (spec_vec, 1, aliases);
8817 ASET (spec_vec, 2, eol_type);
48b0f3ae 8818
df7492f9
KH
8819 Fputhash (name, spec_vec, Vcoding_system_hash_table);
8820 Vcoding_system_list = Fcons (name, Vcoding_system_list);
8821 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8822 Vcoding_system_alist);
48b0f3ae 8823
df7492f9
KH
8824 {
8825 int id = coding_categories[category].id;
48b0f3ae 8826
df7492f9
KH
8827 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8828 setup_coding_system (name, &coding_categories[category]);
8829 }
48b0f3ae 8830
d46c5b12 8831 return Qnil;
48b0f3ae 8832
df7492f9
KH
8833 short_args:
8834 return Fsignal (Qwrong_number_of_arguments,
8835 Fcons (intern ("define-coding-system-internal"),
8836 make_number (nargs)));
d46c5b12 8837}
4ed46869 8838
d6925f38 8839
a6f87d34
KH
8840DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
8841 3, 3, 0,
8842 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
8843 (coding_system, prop, val)
8844 Lisp_Object coding_system, prop, val;
8845{
8846 Lisp_Object spec, attrs, plist;
8847
8848 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8849 attrs = AREF (spec, 0);
8850 if (EQ (prop, QCmnemonic))
8851 {
8852 if (! STRINGP (val))
8853 CHECK_CHARACTER (val);
8854 CODING_ATTR_MNEMONIC (attrs) = val;
8855 }
8856 else if (EQ (prop, QCdefalut_char))
8857 {
8858 if (NILP (val))
8859 val = make_number (' ');
8860 else
8861 CHECK_CHARACTER (val);
8862 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8863 }
8864 else if (EQ (prop, QCdecode_translation_table))
8865 {
8866 if (! CHAR_TABLE_P (val) && ! CONSP (val))
8867 CHECK_SYMBOL (val);
8868 CODING_ATTR_DECODE_TBL (attrs) = val;
8869 }
8870 else if (EQ (prop, QCencode_translation_table))
8871 {
8872 if (! CHAR_TABLE_P (val) && ! CONSP (val))
8873 CHECK_SYMBOL (val);
8874 CODING_ATTR_ENCODE_TBL (attrs) = val;
8875 }
8876 else if (EQ (prop, QCpost_read_conversion))
8877 {
8878 CHECK_SYMBOL (val);
8879 CODING_ATTR_POST_READ (attrs) = val;
8880 }
8881 else if (EQ (prop, QCpre_write_conversion))
8882 {
8883 CHECK_SYMBOL (val);
8884 CODING_ATTR_PRE_WRITE (attrs) = val;
8885 }
8886
8887 CODING_ATTR_PLIST (attrs)
8888 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
8889 return val;
8890}
8891
8892
df7492f9
KH
8893DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8894 Sdefine_coding_system_alias, 2, 2, 0,
8895 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8896 (alias, coding_system)
8897 Lisp_Object alias, coding_system;
66cfb530 8898{
df7492f9 8899 Lisp_Object spec, aliases, eol_type;
4ed46869 8900
df7492f9
KH
8901 CHECK_SYMBOL (alias);
8902 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8903 aliases = AREF (spec, 1);
d6925f38
KH
8904 /* ALISES should be a list of length more than zero, and the first
8905 element is a base coding system. Append ALIAS at the tail of the
8906 list. */
df7492f9
KH
8907 while (!NILP (XCDR (aliases)))
8908 aliases = XCDR (aliases);
8f924df7 8909 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 8910
df7492f9
KH
8911 eol_type = AREF (spec, 2);
8912 if (VECTORP (eol_type))
4ed46869 8913 {
df7492f9
KH
8914 Lisp_Object subsidiaries;
8915 int i;
4ed46869 8916
df7492f9
KH
8917 subsidiaries = make_subsidiaries (alias);
8918 for (i = 0; i < 3; i++)
8919 Fdefine_coding_system_alias (AREF (subsidiaries, i),
8920 AREF (eol_type, i));
4ed46869 8921 }
df7492f9
KH
8922
8923 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 8924 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
5bad0796
DL
8925 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8926 Vcoding_system_alist);
66cfb530 8927
4ed46869
KH
8928 return Qnil;
8929}
8930
df7492f9
KH
8931DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8932 1, 1, 0,
8933 doc: /* Return the base of CODING-SYSTEM.
da7db224 8934Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
8935 (coding_system)
8936 Lisp_Object coding_system;
d46c5b12 8937{
df7492f9 8938 Lisp_Object spec, attrs;
d46c5b12 8939
df7492f9
KH
8940 if (NILP (coding_system))
8941 return (Qno_conversion);
8942 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8943 attrs = AREF (spec, 0);
8944 return CODING_ATTR_BASE_NAME (attrs);
8945}
1397dc18 8946
df7492f9
KH
8947DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8948 1, 1, 0,
8949 doc: "Return the property list of CODING-SYSTEM.")
8950 (coding_system)
8951 Lisp_Object coding_system;
8952{
8953 Lisp_Object spec, attrs;
1397dc18 8954
df7492f9
KH
8955 if (NILP (coding_system))
8956 coding_system = Qno_conversion;
8957 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8958 attrs = AREF (spec, 0);
8959 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
8960}
8961
df7492f9
KH
8962
8963DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8964 1, 1, 0,
da7db224 8965 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
8966 (coding_system)
8967 Lisp_Object coding_system;
66cfb530 8968{
df7492f9 8969 Lisp_Object spec;
84d60297 8970
df7492f9
KH
8971 if (NILP (coding_system))
8972 coding_system = Qno_conversion;
8973 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 8974 return AREF (spec, 1);
df7492f9 8975}
66cfb530 8976
df7492f9
KH
8977DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8978 Scoding_system_eol_type, 1, 1, 0,
8979 doc: /* Return eol-type of CODING-SYSTEM.
8980An eol-type is integer 0, 1, 2, or a vector of coding systems.
66cfb530 8981
df7492f9
KH
8982Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8983and CR respectively.
66cfb530 8984
df7492f9
KH
8985A vector value indicates that a format of end-of-line should be
8986detected automatically. Nth element of the vector is the subsidiary
8987coding system whose eol-type is N. */)
6b89e3aa
KH
8988 (coding_system)
8989 Lisp_Object coding_system;
8990{
df7492f9
KH
8991 Lisp_Object spec, eol_type;
8992 int n;
6b89e3aa 8993
df7492f9
KH
8994 if (NILP (coding_system))
8995 coding_system = Qno_conversion;
8996 if (! CODING_SYSTEM_P (coding_system))
8997 return Qnil;
8998 spec = CODING_SYSTEM_SPEC (coding_system);
8999 eol_type = AREF (spec, 2);
9000 if (VECTORP (eol_type))
9001 return Fcopy_sequence (eol_type);
9002 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9003 return make_number (n);
6b89e3aa
KH
9004}
9005
4ed46869
KH
9006#endif /* emacs */
9007
9008\f
1397dc18 9009/*** 9. Post-amble ***/
4ed46869 9010
dfcf069d 9011void
4ed46869
KH
9012init_coding_once ()
9013{
9014 int i;
9015
df7492f9
KH
9016 for (i = 0; i < coding_category_max; i++)
9017 {
9018 coding_categories[i].id = -1;
9019 coding_priorities[i] = i;
9020 }
4ed46869
KH
9021
9022 /* ISO2022 specific initialize routine. */
9023 for (i = 0; i < 0x20; i++)
b73bfc1c 9024 iso_code_class[i] = ISO_control_0;
4ed46869
KH
9025 for (i = 0x21; i < 0x7F; i++)
9026 iso_code_class[i] = ISO_graphic_plane_0;
9027 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 9028 iso_code_class[i] = ISO_control_1;
4ed46869
KH
9029 for (i = 0xA1; i < 0xFF; i++)
9030 iso_code_class[i] = ISO_graphic_plane_1;
9031 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9032 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
9033 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9034 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9035 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9036 iso_code_class[ISO_CODE_ESC] = ISO_escape;
9037 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9038 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9039 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9040
df7492f9
KH
9041 for (i = 0; i < 256; i++)
9042 {
9043 emacs_mule_bytes[i] = 1;
9044 }
7c78e542
KH
9045 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9046 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9047 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9048 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
9049}
9050
9051#ifdef emacs
9052
dfcf069d 9053void
e0e989f6
KH
9054syms_of_coding ()
9055{
df7492f9 9056 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
9057 {
9058 Lisp_Object args[2];
9059 args[0] = QCtest;
9060 args[1] = Qeq;
9061 Vcoding_system_hash_table = Fmake_hash_table (2, args);
9062 }
df7492f9
KH
9063
9064 staticpro (&Vsjis_coding_system);
9065 Vsjis_coding_system = Qnil;
e0e989f6 9066
df7492f9
KH
9067 staticpro (&Vbig5_coding_system);
9068 Vbig5_coding_system = Qnil;
9069
24a73b0a
KH
9070 staticpro (&Vcode_conversion_reused_workbuf);
9071 Vcode_conversion_reused_workbuf = Qnil;
9072
9073 staticpro (&Vcode_conversion_workbuf_name);
9074 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
e0e989f6 9075
24a73b0a 9076 reused_workbuf_in_use = 0;
df7492f9
KH
9077
9078 DEFSYM (Qcharset, "charset");
9079 DEFSYM (Qtarget_idx, "target-idx");
9080 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
9081 Fset (Qcoding_system_history, Qnil);
9082
9ce27fde 9083 /* Target FILENAME is the first argument. */
e0e989f6 9084 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 9085 /* Target FILENAME is the third argument. */
e0e989f6
KH
9086 Fput (Qwrite_region, Qtarget_idx, make_number (2));
9087
df7492f9 9088 DEFSYM (Qcall_process, "call-process");
9ce27fde 9089 /* Target PROGRAM is the first argument. */
e0e989f6
KH
9090 Fput (Qcall_process, Qtarget_idx, make_number (0));
9091
df7492f9 9092 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 9093 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9094 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9095
df7492f9 9096 DEFSYM (Qstart_process, "start-process");
9ce27fde 9097 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9098 Fput (Qstart_process, Qtarget_idx, make_number (2));
9099
df7492f9 9100 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 9101 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
9102 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9103
df7492f9
KH
9104 DEFSYM (Qcoding_system, "coding-system");
9105 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 9106
df7492f9
KH
9107 DEFSYM (Qeol_type, "eol-type");
9108 DEFSYM (Qunix, "unix");
9109 DEFSYM (Qdos, "dos");
4ed46869 9110
df7492f9
KH
9111 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9112 DEFSYM (Qpost_read_conversion, "post-read-conversion");
9113 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9114 DEFSYM (Qdefault_char, "default-char");
9115 DEFSYM (Qundecided, "undecided");
9116 DEFSYM (Qno_conversion, "no-conversion");
9117 DEFSYM (Qraw_text, "raw-text");
4ed46869 9118
df7492f9 9119 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 9120
df7492f9 9121 DEFSYM (Qutf_8, "utf-8");
8f924df7 9122 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 9123
df7492f9 9124 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
9125 DEFSYM (Qbig, "big");
9126 DEFSYM (Qlittle, "little");
27901516 9127
df7492f9
KH
9128 DEFSYM (Qshift_jis, "shift-jis");
9129 DEFSYM (Qbig5, "big5");
4ed46869 9130
df7492f9 9131 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 9132
df7492f9 9133 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
9134 Fput (Qcoding_system_error, Qerror_conditions,
9135 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9136 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 9137 build_string ("Invalid coding system"));
4ed46869 9138
05e6f5dc
KH
9139 /* Intern this now in case it isn't already done.
9140 Setting this variable twice is harmless.
9141 But don't staticpro it here--that is done in alloc.c. */
9142 Qchar_table_extra_slots = intern ("char-table-extra-slots");
70c22245 9143
df7492f9 9144 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 9145 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
9146 DEFSYM (Qtranslation_table_id, "translation-table-id");
9147 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9148 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 9149
df7492f9 9150 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 9151
df7492f9 9152 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 9153
01378f49 9154 DEFSYM (QCcategory, ":category");
a6f87d34
KH
9155 DEFSYM (QCmnemonic, ":mnemonic");
9156 DEFSYM (QCdefalut_char, ":default-char");
9157 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9158 DEFSYM (QCencode_translation_table, ":encode-translation-table");
9159 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9160 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
01378f49 9161
df7492f9
KH
9162 Vcoding_category_table
9163 = Fmake_vector (make_number (coding_category_max), Qnil);
9164 staticpro (&Vcoding_category_table);
9165 /* Followings are target of code detection. */
9166 ASET (Vcoding_category_table, coding_category_iso_7,
9167 intern ("coding-category-iso-7"));
9168 ASET (Vcoding_category_table, coding_category_iso_7_tight,
9169 intern ("coding-category-iso-7-tight"));
9170 ASET (Vcoding_category_table, coding_category_iso_8_1,
9171 intern ("coding-category-iso-8-1"));
9172 ASET (Vcoding_category_table, coding_category_iso_8_2,
9173 intern ("coding-category-iso-8-2"));
9174 ASET (Vcoding_category_table, coding_category_iso_7_else,
9175 intern ("coding-category-iso-7-else"));
9176 ASET (Vcoding_category_table, coding_category_iso_8_else,
9177 intern ("coding-category-iso-8-else"));
9178 ASET (Vcoding_category_table, coding_category_utf_8,
9179 intern ("coding-category-utf-8"));
9180 ASET (Vcoding_category_table, coding_category_utf_16_be,
9181 intern ("coding-category-utf-16-be"));
ff563fce
KH
9182 ASET (Vcoding_category_table, coding_category_utf_16_auto,
9183 intern ("coding-category-utf-16-auto"));
df7492f9
KH
9184 ASET (Vcoding_category_table, coding_category_utf_16_le,
9185 intern ("coding-category-utf-16-le"));
9186 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9187 intern ("coding-category-utf-16-be-nosig"));
9188 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9189 intern ("coding-category-utf-16-le-nosig"));
9190 ASET (Vcoding_category_table, coding_category_charset,
9191 intern ("coding-category-charset"));
9192 ASET (Vcoding_category_table, coding_category_sjis,
9193 intern ("coding-category-sjis"));
9194 ASET (Vcoding_category_table, coding_category_big5,
9195 intern ("coding-category-big5"));
9196 ASET (Vcoding_category_table, coding_category_ccl,
9197 intern ("coding-category-ccl"));
9198 ASET (Vcoding_category_table, coding_category_emacs_mule,
9199 intern ("coding-category-emacs-mule"));
9200 /* Followings are NOT target of code detection. */
9201 ASET (Vcoding_category_table, coding_category_raw_text,
9202 intern ("coding-category-raw-text"));
9203 ASET (Vcoding_category_table, coding_category_undecided,
9204 intern ("coding-category-undecided"));
ecf488bc 9205
065e3595
KH
9206 DEFSYM (Qinsufficient_source, "insufficient-source");
9207 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9208 DEFSYM (Qinvalid_source, "invalid-source");
9209 DEFSYM (Qinterrupted, "interrupted");
9210 DEFSYM (Qinsufficient_memory, "insufficient-memory");
9211
4ed46869
KH
9212 defsubr (&Scoding_system_p);
9213 defsubr (&Sread_coding_system);
9214 defsubr (&Sread_non_nil_coding_system);
9215 defsubr (&Scheck_coding_system);
9216 defsubr (&Sdetect_coding_region);
d46c5b12 9217 defsubr (&Sdetect_coding_string);
05e6f5dc 9218 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 9219 defsubr (&Sunencodable_char_position);
df7492f9 9220 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
9221 defsubr (&Sdecode_coding_region);
9222 defsubr (&Sencode_coding_region);
9223 defsubr (&Sdecode_coding_string);
9224 defsubr (&Sencode_coding_string);
9225 defsubr (&Sdecode_sjis_char);
9226 defsubr (&Sencode_sjis_char);
9227 defsubr (&Sdecode_big5_char);
9228 defsubr (&Sencode_big5_char);
1ba9e4ab 9229 defsubr (&Sset_terminal_coding_system_internal);
c4825358 9230 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 9231 defsubr (&Sterminal_coding_system);
1ba9e4ab 9232 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 9233 defsubr (&Skeyboard_coding_system);
a5d301df 9234 defsubr (&Sfind_operation_coding_system);
df7492f9 9235 defsubr (&Sset_coding_system_priority);
6b89e3aa 9236 defsubr (&Sdefine_coding_system_internal);
df7492f9 9237 defsubr (&Sdefine_coding_system_alias);
a6f87d34 9238 defsubr (&Scoding_system_put);
df7492f9
KH
9239 defsubr (&Scoding_system_base);
9240 defsubr (&Scoding_system_plist);
9241 defsubr (&Scoding_system_aliases);
9242 defsubr (&Scoding_system_eol_type);
9243 defsubr (&Scoding_system_priority_list);
4ed46869 9244
4608c386 9245 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
9246 doc: /* List of coding systems.
9247
9248Do not alter the value of this variable manually. This variable should be
df7492f9 9249updated by the functions `define-coding-system' and
48b0f3ae 9250`define-coding-system-alias'. */);
4608c386
KH
9251 Vcoding_system_list = Qnil;
9252
9253 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
9254 doc: /* Alist of coding system names.
9255Each element is one element list of coding system name.
9256This variable is given to `completing-read' as TABLE argument.
9257
9258Do not alter the value of this variable manually. This variable should be
9259updated by the functions `make-coding-system' and
9260`define-coding-system-alias'. */);
4608c386
KH
9261 Vcoding_system_alist = Qnil;
9262
4ed46869 9263 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
9264 doc: /* List of coding-categories (symbols) ordered by priority.
9265
9266On detecting a coding system, Emacs tries code detection algorithms
9267associated with each coding-category one by one in this order. When
9268one algorithm agrees with a byte sequence of source text, the coding
9269system bound to the corresponding coding-category is selected. */);
4ed46869
KH
9270 {
9271 int i;
9272
9273 Vcoding_category_list = Qnil;
df7492f9 9274 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 9275 Vcoding_category_list
d46c5b12
KH
9276 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9277 Vcoding_category_list);
4ed46869
KH
9278 }
9279
9280 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
9281 doc: /* Specify the coding system for read operations.
9282It is useful to bind this variable with `let', but do not set it globally.
9283If the value is a coding system, it is used for decoding on read operation.
9284If not, an appropriate element is used from one of the coding system alists:
9285There are three such tables, `file-coding-system-alist',
9286`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
9287 Vcoding_system_for_read = Qnil;
9288
9289 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
9290 doc: /* Specify the coding system for write operations.
9291Programs bind this variable with `let', but you should not set it globally.
9292If the value is a coding system, it is used for encoding of output,
9293when writing it to a file and when sending it to a file or subprocess.
9294
9295If this does not specify a coding system, an appropriate element
9296is used from one of the coding system alists:
9297There are three such tables, `file-coding-system-alist',
9298`process-coding-system-alist', and `network-coding-system-alist'.
9299For output to files, if the above procedure does not specify a coding system,
9300the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
9301 Vcoding_system_for_write = Qnil;
9302
9303 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
9304 doc: /*
9305Coding system used in the latest file or process I/O. */);
4ed46869
KH
9306 Vlast_coding_system_used = Qnil;
9307
065e3595
KH
9308 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9309 doc: /*
9310Error status of the last code conversion.
9311
9312When an error was detected in the last code conversion, this variable
9313is set to one of the following symbols.
9314 `insufficient-source'
9315 `inconsistent-eol'
9316 `invalid-source'
9317 `interrupted'
9318 `insufficient-memory'
9319When no error was detected, the value doesn't change. So, to check
9320the error status of a code conversion by this variable, you must
9321explicitly set this variable to nil before performing code
9322conversion. */);
9323 Vlast_code_conversion_error = Qnil;
9324
9ce27fde 9325 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
9326 doc: /*
9327*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
9328See info node `Coding Systems' and info node `Text and Binary' concerning
9329such conversion. */);
9ce27fde
KH
9330 inhibit_eol_conversion = 0;
9331
ed29121d 9332 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
9333 doc: /*
9334Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
9335Bind it to t if the process output is to be treated as if it were a file
9336read from some filesystem. */);
ed29121d
EZ
9337 inherit_process_coding_system = 0;
9338
02ba4723 9339 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
9340 doc: /*
9341Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
9342The format is ((PATTERN . VAL) ...),
9343where PATTERN is a regular expression matching a file name,
9344VAL is a coding system, a cons of coding systems, or a function symbol.
9345If VAL is a coding system, it is used for both decoding and encoding
9346the file contents.
9347If VAL is a cons of coding systems, the car part is used for decoding,
9348and the cdr part is used for encoding.
9349If VAL is a function symbol, the function must return a coding system
0192762c
DL
9350or a cons of coding systems which are used as above. The function gets
9351the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
9352
9353See also the function `find-operation-coding-system'
9354and the variable `auto-coding-alist'. */);
02ba4723
KH
9355 Vfile_coding_system_alist = Qnil;
9356
9357 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
9358 doc: /*
9359Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
9360The format is ((PATTERN . VAL) ...),
9361where PATTERN is a regular expression matching a program name,
9362VAL is a coding system, a cons of coding systems, or a function symbol.
9363If VAL is a coding system, it is used for both decoding what received
9364from the program and encoding what sent to the program.
9365If VAL is a cons of coding systems, the car part is used for decoding,
9366and the cdr part is used for encoding.
9367If VAL is a function symbol, the function must return a coding system
9368or a cons of coding systems which are used as above.
9369
9370See also the function `find-operation-coding-system'. */);
02ba4723
KH
9371 Vprocess_coding_system_alist = Qnil;
9372
9373 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
9374 doc: /*
9375Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
9376The format is ((PATTERN . VAL) ...),
9377where PATTERN is a regular expression matching a network service name
9378or is a port number to connect to,
9379VAL is a coding system, a cons of coding systems, or a function symbol.
9380If VAL is a coding system, it is used for both decoding what received
9381from the network stream and encoding what sent to the network stream.
9382If VAL is a cons of coding systems, the car part is used for decoding,
9383and the cdr part is used for encoding.
9384If VAL is a function symbol, the function must return a coding system
9385or a cons of coding systems which are used as above.
9386
9387See also the function `find-operation-coding-system'. */);
02ba4723 9388 Vnetwork_coding_system_alist = Qnil;
4ed46869 9389
68c45bf0 9390 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
9391 doc: /* Coding system to use with system messages.
9392Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
9393 Vlocale_coding_system = Qnil;
9394
005f0d35 9395 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 9396 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
9397 doc: /*
9398*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 9399 eol_mnemonic_unix = build_string (":");
4ed46869 9400
7722baf9 9401 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
9402 doc: /*
9403*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 9404 eol_mnemonic_dos = build_string ("\\");
4ed46869 9405
7722baf9 9406 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
9407 doc: /*
9408*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 9409 eol_mnemonic_mac = build_string ("/");
4ed46869 9410
7722baf9 9411 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
9412 doc: /*
9413*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 9414 eol_mnemonic_undecided = build_string (":");
4ed46869 9415
84fbb8a0 9416 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
9417 doc: /*
9418*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 9419 Venable_character_translation = Qt;
bdd9fb48 9420
f967223b 9421 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
9422 &Vstandard_translation_table_for_decode,
9423 doc: /* Table for translating characters while decoding. */);
f967223b 9424 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 9425
f967223b 9426 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
9427 &Vstandard_translation_table_for_encode,
9428 doc: /* Table for translating characters while encoding. */);
f967223b 9429 Vstandard_translation_table_for_encode = Qnil;
4ed46869 9430
df7492f9 9431 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
9432 doc: /* Alist of charsets vs revision numbers.
9433While encoding, if a charset (car part of an element) is found,
df7492f9
KH
9434designate it with the escape sequence identifying revision (cdr part
9435of the element). */);
9436 Vcharset_revision_table = Qnil;
02ba4723
KH
9437
9438 DEFVAR_LISP ("default-process-coding-system",
9439 &Vdefault_process_coding_system,
48b0f3ae
PJ
9440 doc: /* Cons of coding systems used for process I/O by default.
9441The car part is used for decoding a process output,
9442the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 9443 Vdefault_process_coding_system = Qnil;
c4825358 9444
3f003981 9445 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
9446 doc: /*
9447Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
9448This is a vector of length 256.
9449If Nth element is non-nil, the existence of code N in a file
9450\(or output of subprocess) doesn't prevent it to be detected as
9451a coding system of ISO 2022 variant which has a flag
9452`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9453or reading output of a subprocess.
9454Only 128th through 159th elements has a meaning. */);
3f003981 9455 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
9456
9457 DEFVAR_LISP ("select-safe-coding-system-function",
9458 &Vselect_safe_coding_system_function,
df7492f9
KH
9459 doc: /*
9460Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
9461
9462If set, this function is called to force a user to select a proper
9463coding system which can encode the text in the case that a default
9464coding system used in each operation can't encode the text.
9465
9466The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
9467 Vselect_safe_coding_system_function = Qnil;
9468
5d5bf4d8
KH
9469 DEFVAR_BOOL ("coding-system-require-warning",
9470 &coding_system_require_warning,
9471 doc: /* Internal use only.
6b89e3aa
KH
9472If non-nil, on writing a file, `select-safe-coding-system-function' is
9473called even if `coding-system-for-write' is non-nil. The command
9474`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
9475 coding_system_require_warning = 0;
9476
9477
22ab2303 9478 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 9479 &inhibit_iso_escape_detection,
df7492f9
KH
9480 doc: /*
9481If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
9482
9483By default, on reading a file, Emacs tries to detect how the text is
9484encoded. This code detection is sensitive to escape sequences. If
9485the sequence is valid as ISO2022, the code is determined as one of
9486the ISO2022 encodings, and the file is decoded by the corresponding
9487coding system (e.g. `iso-2022-7bit').
9488
9489However, there may be a case that you want to read escape sequences in
9490a file as is. In such a case, you can set this variable to non-nil.
9491Then, as the code detection ignores any escape sequences, no file is
9492detected as encoded in some ISO2022 encoding. The result is that all
9493escape sequences become visible in a buffer.
9494
9495The default value is nil, and it is strongly recommended not to change
9496it. That is because many Emacs Lisp source files that contain
9497non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9498in Emacs's distribution, and they won't be decoded correctly on
9499reading if you suppress escape sequence detection.
9500
9501The other way to read escape sequences in a file without decoding is
9502to explicitly specify some coding system that doesn't use ISO2022's
9503escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 9504 inhibit_iso_escape_detection = 0;
002fdb44
DL
9505
9506 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1
DL
9507 doc: /* Char table for translating self-inserting characters.
9508This is applied to the result of input methods, not their input. See also
9509`keyboard-translate-table'. */);
002fdb44 9510 Vtranslation_table_for_input = Qnil;
8f924df7 9511
2c78b7e1
KH
9512 {
9513 Lisp_Object args[coding_arg_max];
8f924df7 9514 Lisp_Object plist[16];
2c78b7e1
KH
9515 int i;
9516
9517 for (i = 0; i < coding_arg_max; i++)
9518 args[i] = Qnil;
9519
9520 plist[0] = intern (":name");
9521 plist[1] = args[coding_arg_name] = Qno_conversion;
9522 plist[2] = intern (":mnemonic");
9523 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9524 plist[4] = intern (":coding-type");
9525 plist[5] = args[coding_arg_coding_type] = Qraw_text;
9526 plist[6] = intern (":ascii-compatible-p");
9527 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9528 plist[8] = intern (":default-char");
9529 plist[9] = args[coding_arg_default_char] = make_number (0);
8f924df7
KH
9530 plist[10] = intern (":for-unibyte");
9531 plist[11] = args[coding_arg_for_unibyte] = Qt;
9532 plist[12] = intern (":docstring");
9533 plist[13] = build_string ("Do no conversion.\n\
2c78b7e1
KH
9534\n\
9535When you visit a file with this coding, the file is read into a\n\
9536unibyte buffer as is, thus each byte of a file is treated as a\n\
9537character.");
8f924df7
KH
9538 plist[14] = intern (":eol-type");
9539 plist[15] = args[coding_arg_eol_type] = Qunix;
9540 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1
KH
9541 Fdefine_coding_system_internal (coding_arg_max, args);
9542 }
9543
9544 setup_coding_system (Qno_conversion, &keyboard_coding);
9545 setup_coding_system (Qno_conversion, &terminal_coding);
9546 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
9547
9548 {
9549 int i;
9550
9551 for (i = 0; i < coding_category_max; i++)
9552 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9553 }
4ed46869
KH
9554}
9555
68c45bf0
PE
9556char *
9557emacs_strerror (error_number)
9558 int error_number;
9559{
9560 char *str;
9561
ca9c0567 9562 synchronize_system_messages_locale ();
68c45bf0
PE
9563 str = strerror (error_number);
9564
9565 if (! NILP (Vlocale_coding_system))
9566 {
9567 Lisp_Object dec = code_convert_string_norecord (build_string (str),
9568 Vlocale_coding_system,
9569 0);
d5db4077 9570 str = (char *) SDATA (dec);
68c45bf0
PE
9571 }
9572
9573 return str;
9574}
9575
4ed46869 9576#endif /* emacs */