(detect_coding): Delete unused variables.
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
8f924df7 3 Licensed to the Free Software Foundation.
6f197c07 4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
8f924df7 5 Copyright (C) 2003
df7492f9
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 56 coding system.
4ed46869 57
e19c3639
KH
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
4ed46869 63
e19c3639 64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
4ed46869 66
df7492f9
KH
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
4ed46869
KH
82
83 The most famous coding system for multiple character sets. X's
df7492f9
KH
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
4ed46869 87
df7492f9 88 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
cfb43547 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
4ed46869
KH
138
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9 146 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
df7492f9 149
ff0dacd7 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
151
152 Below is the template of these functions. */
153
4ed46869 154#if 0
df7492f9 155static int
ff0dacd7 156detect_coding_XXX (coding, detect_info)
df7492f9 157 struct coding_system *coding;
ff0dacd7 158 struct coding_detection_info *detect_info;
4ed46869 159{
df7492f9
KH
160 unsigned char *src = coding->source;
161 unsigned char *src_end = coding->source + coding->src_bytes;
162 int multibytep = coding->src_multibyte;
ff0dacd7 163 int consumed_chars = 0;
df7492f9
KH
164 int found = 0;
165 ...;
166
167 while (1)
168 {
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
171 ONE_MORE_BYTE (c);
ff0dacd7
KH
172
173 if (! __C_conforms_to_XXX___ (c))
174 break;
175 if (! __C_strongly_suggests_XXX__ (c))
176 found = CATEGORY_MASK_XXX;
df7492f9 177 }
ff0dacd7
KH
178 /* The byte sequence is invalid for XXX. */
179 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 180 return 0;
ff0dacd7 181
df7492f9 182 no_more_source:
ff0dacd7
KH
183 /* The source exausted successfully. */
184 detect_info->found |= found;
df7492f9 185 return 1;
4ed46869
KH
186}
187#endif
188
189/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
190
df7492f9
KH
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
d46c5b12 195
df7492f9
KH
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
d46c5b12 200
df7492f9 201 Below is the template of these functions. */
d46c5b12 202
4ed46869 203#if 0
b73bfc1c 204static void
df7492f9 205decode_coding_XXXX (coding)
4ed46869 206 struct coding_system *coding;
4ed46869 207{
df7492f9
KH
208 unsigned char *src = coding->source + coding->consumed;
209 unsigned char *src_end = coding->source + coding->src_bytes;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base;
214 /* A buffer to produce decoded characters. */
69a80ea3
KH
215 int *charbuf = coding->charbuf + coding->charbuf_used;
216 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
217 int multibytep = coding->src_multibyte;
218
219 while (1)
220 {
221 src_base = src;
222 if (charbuf < charbuf_end)
223 /* No more room to produce a decoded character. */
224 break;
225 ONE_MORE_BYTE (c);
226 /* Decode it. */
227 }
228
229 no_more_source:
230 if (src_base < src_end
231 && coding->mode & CODING_MODE_LAST_BLOCK)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base < src_end && charbuf < charbuf_end)
235 *charbuf++ = *src_base++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding->consumed = coding->consumed_char = src_base - coding->source;
239 /* Remember how many characters we produced. */
240 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
241}
242#endif
243
244/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
245
df7492f9
KH
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
d46c5b12 250
df7492f9
KH
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 255
df7492f9
KH
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
d46c5b12 259
df7492f9 260 Below is a template of these functions. */
4ed46869 261#if 0
b73bfc1c 262static void
df7492f9 263encode_coding_XXX (coding)
4ed46869 264 struct coding_system *coding;
4ed46869 265{
df7492f9
KH
266 int multibytep = coding->dst_multibyte;
267 int *charbuf = coding->charbuf;
268 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
269 unsigned char *dst = coding->destination + coding->produced;
270 unsigned char *dst_end = coding->destination + coding->dst_bytes;
271 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
272 int produced_chars = 0;
273
274 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
275 {
276 int c = *charbuf;
277 /* Encode C into DST, and increment DST. */
278 }
279 label_no_more_destination:
280 /* How many chars and bytes we produced. */
281 coding->produced_char += produced_chars;
282 coding->produced = dst - coding->destination;
4ed46869
KH
283}
284#endif
285
4ed46869
KH
286\f
287/*** 1. Preamble ***/
288
68c45bf0 289#include <config.h>
4ed46869
KH
290#include <stdio.h>
291
4ed46869
KH
292#include "lisp.h"
293#include "buffer.h"
df7492f9 294#include "character.h"
4ed46869
KH
295#include "charset.h"
296#include "ccl.h"
df7492f9 297#include "composite.h"
4ed46869
KH
298#include "coding.h"
299#include "window.h"
4ed46869 300
df7492f9 301Lisp_Object Vcoding_system_hash_table;
4ed46869 302
df7492f9 303Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
304Lisp_Object Qunix, Qdos;
305extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
306Lisp_Object Qbuffer_file_coding_system;
307Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 308Lisp_Object Qdefault_char;
27901516 309Lisp_Object Qno_conversion, Qundecided;
df7492f9 310Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 311Lisp_Object Qbig, Qlittle;
bb0115a2 312Lisp_Object Qcoding_system_history;
1397dc18 313Lisp_Object Qvalid_codes;
a6f87d34
KH
314Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
315Lisp_Object QCdecode_translation_table, QCencode_translation_table;
316Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
4ed46869
KH
317
318extern Lisp_Object Qinsert_file_contents, Qwrite_region;
319Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
320Lisp_Object Qstart_process, Qopen_network_stream;
321Lisp_Object Qtarget_idx;
322
065e3595
KH
323Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
324Lisp_Object Qinterrupted, Qinsufficient_memory;
325
5d5bf4d8
KH
326int coding_system_require_warning;
327
d46c5b12
KH
328Lisp_Object Vselect_safe_coding_system_function;
329
7722baf9
EZ
330/* Mnemonic string for each format of end-of-line. */
331Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
332/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 333 decided. */
7722baf9 334Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
335
336#ifdef emacs
337
4608c386
KH
338Lisp_Object Vcoding_system_list, Vcoding_system_alist;
339
340Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 341
d46c5b12
KH
342/* Coding system emacs-mule and raw-text are for converting only
343 end-of-line format. */
344Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 345Lisp_Object Qutf_8_emacs;
ecf488bc 346
4ed46869
KH
347/* Coding-systems are handed between Emacs Lisp programs and C internal
348 routines by the following three variables. */
349/* Coding-system for reading files and receiving data from process. */
350Lisp_Object Vcoding_system_for_read;
351/* Coding-system for writing files and sending data to process. */
352Lisp_Object Vcoding_system_for_write;
353/* Coding-system actually used in the latest I/O. */
354Lisp_Object Vlast_coding_system_used;
065e3595
KH
355/* Set to non-nil when an error is detected while code conversion. */
356Lisp_Object Vlast_code_conversion_error;
c4825358 357/* A vector of length 256 which contains information about special
94487c4e 358 Latin codes (especially for dealing with Microsoft codes). */
3f003981 359Lisp_Object Vlatin_extra_code_table;
c4825358 360
9ce27fde
KH
361/* Flag to inhibit code conversion of end-of-line format. */
362int inhibit_eol_conversion;
363
74383408
KH
364/* Flag to inhibit ISO2022 escape sequence detection. */
365int inhibit_iso_escape_detection;
366
ed29121d
EZ
367/* Flag to make buffer-file-coding-system inherit from process-coding. */
368int inherit_process_coding_system;
369
c4825358 370/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
371struct coding_system terminal_coding;
372
c4825358
KH
373/* Coding system to be used to encode text for terminal display when
374 terminal coding system is nil. */
375struct coding_system safe_terminal_coding;
376
377/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
378struct coding_system keyboard_coding;
379
02ba4723
KH
380Lisp_Object Vfile_coding_system_alist;
381Lisp_Object Vprocess_coding_system_alist;
382Lisp_Object Vnetwork_coding_system_alist;
4ed46869 383
68c45bf0
PE
384Lisp_Object Vlocale_coding_system;
385
4ed46869
KH
386#endif /* emacs */
387
f967223b
KH
388/* Flag to tell if we look up translation table on character code
389 conversion. */
84fbb8a0 390Lisp_Object Venable_character_translation;
f967223b
KH
391/* Standard translation table to look up on decoding (reading). */
392Lisp_Object Vstandard_translation_table_for_decode;
393/* Standard translation table to look up on encoding (writing). */
394Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 395
f967223b
KH
396Lisp_Object Qtranslation_table;
397Lisp_Object Qtranslation_table_id;
398Lisp_Object Qtranslation_table_for_decode;
399Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
400
401/* Alist of charsets vs revision number. */
df7492f9 402static Lisp_Object Vcharset_revision_table;
4ed46869 403
02ba4723
KH
404/* Default coding systems used for process I/O. */
405Lisp_Object Vdefault_process_coding_system;
406
002fdb44
DL
407/* Char table for translating Quail and self-inserting input. */
408Lisp_Object Vtranslation_table_for_input;
409
df7492f9
KH
410/* Two special coding systems. */
411Lisp_Object Vsjis_coding_system;
412Lisp_Object Vbig5_coding_system;
413
df7492f9
KH
414/* ISO2022 section */
415
416#define CODING_ISO_INITIAL(coding, reg) \
417 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
418 coding_attr_iso_initial), \
419 reg)))
420
421
422#define CODING_ISO_REQUEST(coding, charset_id) \
423 ((charset_id <= (coding)->max_charset_id \
424 ? (coding)->safe_charsets[charset_id] \
425 : -1))
426
427
428#define CODING_ISO_FLAGS(coding) \
429 ((coding)->spec.iso_2022.flags)
430#define CODING_ISO_DESIGNATION(coding, reg) \
431 ((coding)->spec.iso_2022.current_designation[reg])
432#define CODING_ISO_INVOCATION(coding, plane) \
433 ((coding)->spec.iso_2022.current_invocation[plane])
434#define CODING_ISO_SINGLE_SHIFTING(coding) \
435 ((coding)->spec.iso_2022.single_shifting)
436#define CODING_ISO_BOL(coding) \
437 ((coding)->spec.iso_2022.bol)
438#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
439 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
440
441/* Control characters of ISO2022. */
442 /* code */ /* function */
443#define ISO_CODE_LF 0x0A /* line-feed */
444#define ISO_CODE_CR 0x0D /* carriage-return */
445#define ISO_CODE_SO 0x0E /* shift-out */
446#define ISO_CODE_SI 0x0F /* shift-in */
447#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
448#define ISO_CODE_ESC 0x1B /* escape */
449#define ISO_CODE_SS2 0x8E /* single-shift-2 */
450#define ISO_CODE_SS3 0x8F /* single-shift-3 */
451#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
452
453/* All code (1-byte) of ISO2022 is classified into one of the
454 followings. */
455enum iso_code_class_type
456 {
457 ISO_control_0, /* Control codes in the range
458 0x00..0x1F and 0x7F, except for the
459 following 5 codes. */
df7492f9
KH
460 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
461 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
462 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
463 ISO_escape, /* ISO_CODE_SO (0x1B) */
464 ISO_control_1, /* Control codes in the range
465 0x80..0x9F, except for the
466 following 3 codes. */
467 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
468 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
469 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
470 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
471 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
472 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
473 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
474 };
05e6f5dc 475
df7492f9
KH
476/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
477 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 478
df7492f9
KH
479/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
480 instead of the correct short-form sequence (e.g. ESC $ A). */
481#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 482
df7492f9
KH
483/* If set, reset graphic planes and registers at end-of-line to the
484 initial state. */
485#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 486
df7492f9
KH
487/* If set, reset graphic planes and registers before any control
488 characters to the initial state. */
489#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 490
df7492f9
KH
491/* If set, encode by 7-bit environment. */
492#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 493
df7492f9
KH
494/* If set, use locking-shift function. */
495#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 496
df7492f9
KH
497/* If set, use single-shift function. Overwrite
498 CODING_ISO_FLAG_LOCKING_SHIFT. */
499#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 500
df7492f9
KH
501/* If set, use designation escape sequence. */
502#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 503
df7492f9
KH
504/* If set, produce revision number sequence. */
505#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 506
df7492f9
KH
507/* If set, produce ISO6429's direction specifying sequence. */
508#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 509
df7492f9
KH
510/* If set, assume designation states are reset at beginning of line on
511 output. */
512#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 513
df7492f9
KH
514/* If set, designation sequence should be placed at beginning of line
515 on output. */
516#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 517
df7492f9
KH
518/* If set, do not encode unsafe charactes on output. */
519#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 520
df7492f9
KH
521/* If set, extra latin codes (128..159) are accepted as a valid code
522 on input. */
523#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 524
df7492f9 525#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 526
df7492f9 527#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 528
bf16eb23 529#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 530
bf16eb23 531#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 532
bf16eb23 533#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 534
df7492f9
KH
535/* A character to be produced on output if encoding of the original
536 character is prohibited by CODING_ISO_FLAG_SAFE. */
537#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 538
4ed46869 539
df7492f9
KH
540/* UTF-16 section */
541#define CODING_UTF_16_BOM(coding) \
542 ((coding)->spec.utf_16.bom)
4ed46869 543
df7492f9
KH
544#define CODING_UTF_16_ENDIAN(coding) \
545 ((coding)->spec.utf_16.endian)
4ed46869 546
df7492f9
KH
547#define CODING_UTF_16_SURROGATE(coding) \
548 ((coding)->spec.utf_16.surrogate)
4ed46869 549
4ed46869 550
df7492f9
KH
551/* CCL section */
552#define CODING_CCL_DECODER(coding) \
553 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
554#define CODING_CCL_ENCODER(coding) \
555 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
556#define CODING_CCL_VALIDS(coding) \
8f924df7 557 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 558
5a936b46 559/* Index for each coding category in `coding_categories' */
4ed46869 560
df7492f9
KH
561enum coding_category
562 {
563 coding_category_iso_7,
564 coding_category_iso_7_tight,
565 coding_category_iso_8_1,
566 coding_category_iso_8_2,
567 coding_category_iso_7_else,
568 coding_category_iso_8_else,
569 coding_category_utf_8,
570 coding_category_utf_16_auto,
571 coding_category_utf_16_be,
572 coding_category_utf_16_le,
573 coding_category_utf_16_be_nosig,
574 coding_category_utf_16_le_nosig,
575 coding_category_charset,
576 coding_category_sjis,
577 coding_category_big5,
578 coding_category_ccl,
579 coding_category_emacs_mule,
580 /* All above are targets of code detection. */
581 coding_category_raw_text,
582 coding_category_undecided,
583 coding_category_max
584 };
585
586/* Definitions of flag bits used in detect_coding_XXXX. */
587#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
588#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
589#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
590#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
591#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
592#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
593#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 594#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
595#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
596#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
597#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
598#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
599#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
600#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
601#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
602#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
603#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 604#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
605
606/* This value is returned if detect_coding_mask () find nothing other
607 than ASCII characters. */
608#define CATEGORY_MASK_ANY \
609 (CATEGORY_MASK_ISO_7 \
610 | CATEGORY_MASK_ISO_7_TIGHT \
611 | CATEGORY_MASK_ISO_8_1 \
612 | CATEGORY_MASK_ISO_8_2 \
613 | CATEGORY_MASK_ISO_7_ELSE \
614 | CATEGORY_MASK_ISO_8_ELSE \
615 | CATEGORY_MASK_UTF_8 \
616 | CATEGORY_MASK_UTF_16_BE \
617 | CATEGORY_MASK_UTF_16_LE \
618 | CATEGORY_MASK_UTF_16_BE_NOSIG \
619 | CATEGORY_MASK_UTF_16_LE_NOSIG \
620 | CATEGORY_MASK_CHARSET \
621 | CATEGORY_MASK_SJIS \
622 | CATEGORY_MASK_BIG5 \
623 | CATEGORY_MASK_CCL \
624 | CATEGORY_MASK_EMACS_MULE)
625
626
627#define CATEGORY_MASK_ISO_7BIT \
628 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
629
630#define CATEGORY_MASK_ISO_8BIT \
631 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
632
633#define CATEGORY_MASK_ISO_ELSE \
634 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
635
636#define CATEGORY_MASK_ISO_ESCAPE \
637 (CATEGORY_MASK_ISO_7 \
638 | CATEGORY_MASK_ISO_7_TIGHT \
639 | CATEGORY_MASK_ISO_7_ELSE \
640 | CATEGORY_MASK_ISO_8_ELSE)
641
642#define CATEGORY_MASK_ISO \
643 ( CATEGORY_MASK_ISO_7BIT \
644 | CATEGORY_MASK_ISO_8BIT \
645 | CATEGORY_MASK_ISO_ELSE)
646
647#define CATEGORY_MASK_UTF_16 \
648 (CATEGORY_MASK_UTF_16_BE \
649 | CATEGORY_MASK_UTF_16_LE \
650 | CATEGORY_MASK_UTF_16_BE_NOSIG \
651 | CATEGORY_MASK_UTF_16_LE_NOSIG)
652
653
654/* List of symbols `coding-category-xxx' ordered by priority. This
655 variable is exposed to Emacs Lisp. */
656static Lisp_Object Vcoding_category_list;
657
658/* Table of coding categories (Lisp symbols). This variable is for
659 internal use oly. */
660static Lisp_Object Vcoding_category_table;
661
662/* Table of coding-categories ordered by priority. */
663static enum coding_category coding_priorities[coding_category_max];
664
665/* Nth element is a coding context for the coding system bound to the
666 Nth coding category. */
667static struct coding_system coding_categories[coding_category_max];
668
df7492f9
KH
669/*** Commonly used macros and functions ***/
670
671#ifndef min
672#define min(a, b) ((a) < (b) ? (a) : (b))
673#endif
674#ifndef max
675#define max(a, b) ((a) > (b) ? (a) : (b))
676#endif
4ed46869 677
24a73b0a
KH
678#define CODING_GET_INFO(coding, attrs, charset_list) \
679 do { \
680 (attrs) = CODING_ID_ATTRS ((coding)->id); \
681 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 682 } while (0)
4ed46869 683
4ed46869 684
df7492f9
KH
685/* Safely get one byte from the source text pointed by SRC which ends
686 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
687 in the source, it jumps to `no_more_source'. If multibytep is
688 nonzero, and a multibyte character is found at SRC, set C to the
689 negative value of the character code. The caller should declare
690 and set these variables appropriately in advance:
691 src, src_end, multibytep */
aa72b389 692
065e3595
KH
693#define ONE_MORE_BYTE(c) \
694 do { \
695 if (src == src_end) \
696 { \
697 if (src_base < src) \
698 record_conversion_result \
699 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
700 goto no_more_source; \
701 } \
702 c = *src++; \
703 if (multibytep && (c & 0x80)) \
704 { \
705 if ((c & 0xFE) == 0xC0) \
706 c = ((c & 1) << 6) | *src++; \
707 else \
708 { \
709 c = - string_char (--src, &src, NULL); \
710 record_conversion_result \
711 (coding, CODING_RESULT_INVALID_SRC); \
712 } \
713 } \
714 consumed_chars++; \
aa72b389
KH
715 } while (0)
716
aa72b389 717
065e3595
KH
718#define ONE_MORE_BYTE_NO_CHECK(c) \
719 do { \
720 c = *src++; \
721 if (multibytep && (c & 0x80)) \
722 { \
723 if ((c & 0xFE) == 0xC0) \
724 c = ((c & 1) << 6) | *src++; \
725 else \
726 { \
727 c = - string_char (--src, &src, NULL); \
728 record_conversion_result \
729 (coding, CODING_RESULT_INVALID_SRC); \
730 } \
731 } \
732 consumed_chars++; \
aa72b389
KH
733 } while (0)
734
aa72b389 735
df7492f9
KH
736/* Store a byte C in the place pointed by DST and increment DST to the
737 next free point, and increment PRODUCED_CHARS. The caller should
738 assure that C is 0..127, and declare and set the variable `dst'
739 appropriately in advance.
740*/
aa72b389
KH
741
742
df7492f9
KH
743#define EMIT_ONE_ASCII_BYTE(c) \
744 do { \
745 produced_chars++; \
746 *dst++ = (c); \
b6871cc7 747 } while (0)
aa72b389
KH
748
749
df7492f9 750/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 751
df7492f9
KH
752#define EMIT_TWO_ASCII_BYTES(c1, c2) \
753 do { \
754 produced_chars += 2; \
755 *dst++ = (c1), *dst++ = (c2); \
756 } while (0)
aa72b389
KH
757
758
df7492f9
KH
759/* Store a byte C in the place pointed by DST and increment DST to the
760 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
761 nonzero, store in an appropriate multibyte from. The caller should
762 declare and set the variables `dst' and `multibytep' appropriately
763 in advance. */
764
765#define EMIT_ONE_BYTE(c) \
766 do { \
767 produced_chars++; \
768 if (multibytep) \
769 { \
770 int ch = (c); \
771 if (ch >= 0x80) \
772 ch = BYTE8_TO_CHAR (ch); \
773 CHAR_STRING_ADVANCE (ch, dst); \
774 } \
775 else \
776 *dst++ = (c); \
aa72b389 777 } while (0)
aa72b389 778
aa72b389 779
df7492f9 780/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 781
e19c3639
KH
782#define EMIT_TWO_BYTES(c1, c2) \
783 do { \
784 produced_chars += 2; \
785 if (multibytep) \
786 { \
787 int ch; \
788 \
789 ch = (c1); \
790 if (ch >= 0x80) \
791 ch = BYTE8_TO_CHAR (ch); \
792 CHAR_STRING_ADVANCE (ch, dst); \
793 ch = (c2); \
794 if (ch >= 0x80) \
795 ch = BYTE8_TO_CHAR (ch); \
796 CHAR_STRING_ADVANCE (ch, dst); \
797 } \
798 else \
799 { \
800 *dst++ = (c1); \
801 *dst++ = (c2); \
802 } \
aa72b389
KH
803 } while (0)
804
805
df7492f9
KH
806#define EMIT_THREE_BYTES(c1, c2, c3) \
807 do { \
808 EMIT_ONE_BYTE (c1); \
809 EMIT_TWO_BYTES (c2, c3); \
810 } while (0)
aa72b389 811
aa72b389 812
df7492f9
KH
813#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
814 do { \
815 EMIT_TWO_BYTES (c1, c2); \
816 EMIT_TWO_BYTES (c3, c4); \
817 } while (0)
aa72b389 818
aa72b389 819
f6cbaf43
KH
820/* Prototypes for static functions. */
821static void record_conversion_result P_ ((struct coding_system *coding,
822 enum coding_result_code result));
823static int detect_coding_utf_8 P_ ((struct coding_system *,
824 struct coding_detection_info *info));
825static void decode_coding_utf_8 P_ ((struct coding_system *));
826static int encode_coding_utf_8 P_ ((struct coding_system *));
827
828static int detect_coding_utf_16 P_ ((struct coding_system *,
829 struct coding_detection_info *info));
830static void decode_coding_utf_16 P_ ((struct coding_system *));
831static int encode_coding_utf_16 P_ ((struct coding_system *));
832
833static int detect_coding_iso_2022 P_ ((struct coding_system *,
834 struct coding_detection_info *info));
835static void decode_coding_iso_2022 P_ ((struct coding_system *));
836static int encode_coding_iso_2022 P_ ((struct coding_system *));
837
838static int detect_coding_emacs_mule P_ ((struct coding_system *,
839 struct coding_detection_info *info));
840static void decode_coding_emacs_mule P_ ((struct coding_system *));
841static int encode_coding_emacs_mule P_ ((struct coding_system *));
842
843static int detect_coding_sjis P_ ((struct coding_system *,
844 struct coding_detection_info *info));
845static void decode_coding_sjis P_ ((struct coding_system *));
846static int encode_coding_sjis P_ ((struct coding_system *));
847
848static int detect_coding_big5 P_ ((struct coding_system *,
849 struct coding_detection_info *info));
850static void decode_coding_big5 P_ ((struct coding_system *));
851static int encode_coding_big5 P_ ((struct coding_system *));
852
853static int detect_coding_ccl P_ ((struct coding_system *,
854 struct coding_detection_info *info));
855static void decode_coding_ccl P_ ((struct coding_system *));
856static int encode_coding_ccl P_ ((struct coding_system *));
857
858static void decode_coding_raw_text P_ ((struct coding_system *));
859static int encode_coding_raw_text P_ ((struct coding_system *));
860
861static void coding_set_source P_ ((struct coding_system *));
862static void coding_set_destination P_ ((struct coding_system *));
863static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
864static void coding_alloc_by_making_gap P_ ((struct coding_system *,
865 EMACS_INT));
866static unsigned char *alloc_destination P_ ((struct coding_system *,
867 EMACS_INT, unsigned char *));
868static void setup_iso_safe_charsets P_ ((Lisp_Object));
869static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
870 int *, int *,
871 unsigned char *));
872static int detect_eol P_ ((const unsigned char *,
873 EMACS_INT, enum coding_category));
874static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
875static void decode_eol P_ ((struct coding_system *));
876static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
877static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
878 int, int *, int *));
879static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
880static INLINE void produce_composition P_ ((struct coding_system *, int *,
881 EMACS_INT));
882static INLINE void produce_charset P_ ((struct coding_system *, int *,
883 EMACS_INT));
884static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
885static int decode_coding P_ ((struct coding_system *));
886static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
887 struct coding_system *,
888 int *, EMACS_INT *));
889static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
890 struct coding_system *,
891 int *, EMACS_INT *));
892static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
893static int encode_coding P_ ((struct coding_system *));
894static Lisp_Object make_conversion_work_buffer P_ ((int));
895static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
896static INLINE int char_encodable_p P_ ((int, Lisp_Object));
897static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
898
065e3595
KH
899static void
900record_conversion_result (struct coding_system *coding,
901 enum coding_result_code result)
902{
903 coding->result = result;
904 switch (result)
905 {
906 case CODING_RESULT_INSUFFICIENT_SRC:
907 Vlast_code_conversion_error = Qinsufficient_source;
908 break;
909 case CODING_RESULT_INCONSISTENT_EOL:
910 Vlast_code_conversion_error = Qinconsistent_eol;
911 break;
912 case CODING_RESULT_INVALID_SRC:
913 Vlast_code_conversion_error = Qinvalid_source;
914 break;
915 case CODING_RESULT_INTERRUPT:
916 Vlast_code_conversion_error = Qinterrupted;
917 break;
918 case CODING_RESULT_INSUFFICIENT_MEM:
919 Vlast_code_conversion_error = Qinsufficient_memory;
920 break;
921 }
922}
923
df7492f9
KH
924#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
925 do { \
926 charset_map_loaded = 0; \
927 c = DECODE_CHAR (charset, code); \
928 if (charset_map_loaded) \
929 { \
8f924df7 930 const unsigned char *orig = coding->source; \
df7492f9
KH
931 EMACS_INT offset; \
932 \
933 coding_set_source (coding); \
934 offset = coding->source - orig; \
935 src += offset; \
936 src_base += offset; \
937 src_end += offset; \
938 } \
aa72b389
KH
939 } while (0)
940
941
df7492f9
KH
942#define ASSURE_DESTINATION(bytes) \
943 do { \
944 if (dst + (bytes) >= dst_end) \
945 { \
946 int more_bytes = charbuf_end - charbuf + (bytes); \
947 \
948 dst = alloc_destination (coding, more_bytes, dst); \
949 dst_end = coding->destination + coding->dst_bytes; \
950 } \
951 } while (0)
aa72b389 952
aa72b389 953
aa72b389 954
df7492f9
KH
955static void
956coding_set_source (coding)
aa72b389 957 struct coding_system *coding;
aa72b389 958{
df7492f9
KH
959 if (BUFFERP (coding->src_object))
960 {
2cb26057 961 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 962
df7492f9 963 if (coding->src_pos < 0)
2cb26057 964 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 965 else
2cb26057 966 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 967 }
df7492f9 968 else if (STRINGP (coding->src_object))
aa72b389 969 {
8f924df7 970 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 971 }
df7492f9
KH
972 else
973 /* Otherwise, the source is C string and is never relocated
974 automatically. Thus we don't have to update anything. */
975 ;
976}
aa72b389 977
df7492f9
KH
978static void
979coding_set_destination (coding)
980 struct coding_system *coding;
981{
982 if (BUFFERP (coding->dst_object))
aa72b389 983 {
df7492f9 984 if (coding->src_pos < 0)
aa72b389 985 {
28f67a95
KH
986 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
987 coding->dst_bytes = (GAP_END_ADDR
988 - (coding->src_bytes - coding->consumed)
989 - coding->destination);
aa72b389 990 }
df7492f9 991 else
28f67a95
KH
992 {
993 /* We are sure that coding->dst_pos_byte is before the gap
994 of the buffer. */
995 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
996 + coding->dst_pos_byte - 1);
997 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
998 - coding->destination);
999 }
df7492f9
KH
1000 }
1001 else
1002 /* Otherwise, the destination is C string and is never relocated
1003 automatically. Thus we don't have to update anything. */
1004 ;
1005}
1006
1007
1008static void
1009coding_alloc_by_realloc (coding, bytes)
1010 struct coding_system *coding;
1011 EMACS_INT bytes;
1012{
1013 coding->destination = (unsigned char *) xrealloc (coding->destination,
1014 coding->dst_bytes + bytes);
1015 coding->dst_bytes += bytes;
1016}
1017
1018static void
1019coding_alloc_by_making_gap (coding, bytes)
1020 struct coding_system *coding;
1021 EMACS_INT bytes;
1022{
2c78b7e1
KH
1023 if (BUFFERP (coding->dst_object)
1024 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
1025 {
1026 EMACS_INT add = coding->src_bytes - coding->consumed;
1027
1028 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1029 make_gap (bytes);
1030 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1031 }
1032 else
1033 {
2c78b7e1
KH
1034 Lisp_Object this_buffer;
1035
1036 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1037 set_buffer_internal (XBUFFER (coding->dst_object));
1038 make_gap (bytes);
1039 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1040 }
df7492f9 1041}
8f924df7 1042
df7492f9
KH
1043
1044static unsigned char *
1045alloc_destination (coding, nbytes, dst)
1046 struct coding_system *coding;
3e139625 1047 EMACS_INT nbytes;
df7492f9
KH
1048 unsigned char *dst;
1049{
1050 EMACS_INT offset = dst - coding->destination;
1051
1052 if (BUFFERP (coding->dst_object))
1053 coding_alloc_by_making_gap (coding, nbytes);
aa72b389 1054 else
df7492f9 1055 coding_alloc_by_realloc (coding, nbytes);
065e3595 1056 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1057 coding_set_destination (coding);
1058 dst = coding->destination + offset;
1059 return dst;
1060}
aa72b389 1061
ff0dacd7
KH
1062/** Macros for annotations. */
1063
1064/* Maximum length of annotation data (sum of annotations for
1065 composition and charset). */
69a80ea3 1066#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
ff0dacd7
KH
1067
1068/* An annotation data is stored in the array coding->charbuf in this
1069 format:
69a80ea3 1070 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1071 LENGTH is the number of elements in the annotation.
1072 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1073 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1074
1075 The format of the following elements depend on ANNOTATION_MASK.
1076
1077 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1078 follows:
1079 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1080 METHOD is one of enum composition_method.
1081 Optionnal COMPOSITION-COMPONENTS are characters and composition
1082 rules.
1083
1084 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1085 follows. */
1086
69a80ea3 1087#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1088 do { \
1089 *(buf)++ = -(len); \
1090 *(buf)++ = (mask); \
69a80ea3 1091 *(buf)++ = (nchars); \
ff0dacd7
KH
1092 coding->annotated = 1; \
1093 } while (0);
1094
69a80ea3
KH
1095#define ADD_COMPOSITION_DATA(buf, nchars, method) \
1096 do { \
1097 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1098 *buf++ = method; \
ff0dacd7
KH
1099 } while (0)
1100
1101
69a80ea3
KH
1102#define ADD_CHARSET_DATA(buf, nchars, id) \
1103 do { \
1104 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1105 *buf++ = id; \
ff0dacd7
KH
1106 } while (0)
1107
df7492f9
KH
1108\f
1109/*** 2. Emacs' internal format (emacs-utf-8) ***/
1110
1111
1112
1113\f
1114/*** 3. UTF-8 ***/
1115
1116/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1117 Check if a text is encoded in UTF-8. If it is, return 1, else
1118 return 0. */
df7492f9
KH
1119
1120#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1121#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1122#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1123#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1124#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1125#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1126
1127static int
ff0dacd7 1128detect_coding_utf_8 (coding, detect_info)
df7492f9 1129 struct coding_system *coding;
ff0dacd7 1130 struct coding_detection_info *detect_info;
df7492f9 1131{
065e3595 1132 const unsigned char *src = coding->source, *src_base;
8f924df7 1133 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1134 int multibytep = coding->src_multibyte;
1135 int consumed_chars = 0;
1136 int found = 0;
1137
ff0dacd7 1138 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1139 /* A coding system of this category is always ASCII compatible. */
1140 src += coding->head_ascii;
1141
1142 while (1)
aa72b389 1143 {
df7492f9 1144 int c, c1, c2, c3, c4;
aa72b389 1145
065e3595 1146 src_base = src;
df7492f9 1147 ONE_MORE_BYTE (c);
065e3595 1148 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1149 continue;
1150 ONE_MORE_BYTE (c1);
065e3595 1151 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1152 break;
1153 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1154 {
ff0dacd7 1155 found = CATEGORY_MASK_UTF_8;
df7492f9 1156 continue;
aa72b389 1157 }
df7492f9 1158 ONE_MORE_BYTE (c2);
065e3595 1159 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1160 break;
1161 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1162 {
ff0dacd7 1163 found = CATEGORY_MASK_UTF_8;
df7492f9 1164 continue;
aa72b389 1165 }
df7492f9 1166 ONE_MORE_BYTE (c3);
065e3595 1167 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1168 break;
1169 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1170 {
ff0dacd7 1171 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1172 continue;
1173 }
1174 ONE_MORE_BYTE (c4);
065e3595 1175 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1176 break;
1177 if (UTF_8_5_OCTET_LEADING_P (c))
1178 {
ff0dacd7 1179 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1180 continue;
1181 }
1182 break;
aa72b389 1183 }
ff0dacd7 1184 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1185 return 0;
aa72b389 1186
df7492f9 1187 no_more_source:
065e3595 1188 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1189 {
ff0dacd7 1190 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1191 return 0;
aa72b389 1192 }
ff0dacd7
KH
1193 detect_info->found |= found;
1194 return 1;
aa72b389
KH
1195}
1196
4ed46869 1197
b73bfc1c 1198static void
df7492f9 1199decode_coding_utf_8 (coding)
b73bfc1c 1200 struct coding_system *coding;
b73bfc1c 1201{
8f924df7
KH
1202 const unsigned char *src = coding->source + coding->consumed;
1203 const unsigned char *src_end = coding->source + coding->src_bytes;
1204 const unsigned char *src_base;
69a80ea3
KH
1205 int *charbuf = coding->charbuf + coding->charbuf_used;
1206 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1207 int consumed_chars = 0, consumed_chars_base;
1208 int multibytep = coding->src_multibyte;
24a73b0a 1209 Lisp_Object attr, charset_list;
4ed46869 1210
24a73b0a 1211 CODING_GET_INFO (coding, attr, charset_list);
df7492f9
KH
1212
1213 while (1)
b73bfc1c 1214 {
df7492f9 1215 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1216
df7492f9
KH
1217 src_base = src;
1218 consumed_chars_base = consumed_chars;
4af310db 1219
df7492f9
KH
1220 if (charbuf >= charbuf_end)
1221 break;
1222
1223 ONE_MORE_BYTE (c1);
065e3595
KH
1224 if (c1 < 0)
1225 {
1226 c = - c1;
1227 }
1228 else if (UTF_8_1_OCTET_P(c1))
df7492f9
KH
1229 {
1230 c = c1;
4af310db 1231 }
df7492f9 1232 else
4af310db 1233 {
df7492f9 1234 ONE_MORE_BYTE (c2);
065e3595 1235 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1236 goto invalid_code;
1237 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1238 {
b0edb2c5
DL
1239 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1240 /* Reject overlong sequences here and below. Encoders
1241 producing them are incorrect, they can be misleading,
1242 and they mess up read/write invariance. */
1243 if (c < 128)
1244 goto invalid_code;
4af310db 1245 }
df7492f9 1246 else
aa72b389 1247 {
df7492f9 1248 ONE_MORE_BYTE (c3);
065e3595 1249 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1250 goto invalid_code;
1251 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1252 {
1253 c = (((c1 & 0xF) << 12)
1254 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1255 if (c < 0x800
1256 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1257 goto invalid_code;
1258 }
df7492f9
KH
1259 else
1260 {
1261 ONE_MORE_BYTE (c4);
065e3595 1262 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1263 goto invalid_code;
1264 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1265 {
df7492f9
KH
1266 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1267 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1268 if (c < 0x10000)
1269 goto invalid_code;
1270 }
df7492f9
KH
1271 else
1272 {
1273 ONE_MORE_BYTE (c5);
065e3595 1274 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1275 goto invalid_code;
1276 if (UTF_8_5_OCTET_LEADING_P (c1))
1277 {
1278 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1279 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1280 | (c5 & 0x3F));
b0edb2c5 1281 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1282 goto invalid_code;
1283 }
1284 else
1285 goto invalid_code;
1286 }
1287 }
aa72b389 1288 }
b73bfc1c 1289 }
df7492f9
KH
1290
1291 *charbuf++ = c;
1292 continue;
1293
1294 invalid_code:
1295 src = src_base;
1296 consumed_chars = consumed_chars_base;
1297 ONE_MORE_BYTE (c);
1298 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1299 coding->errors++;
aa72b389
KH
1300 }
1301
df7492f9
KH
1302 no_more_source:
1303 coding->consumed_char += consumed_chars_base;
1304 coding->consumed = src_base - coding->source;
1305 coding->charbuf_used = charbuf - coding->charbuf;
1306}
1307
1308
1309static int
1310encode_coding_utf_8 (coding)
1311 struct coding_system *coding;
1312{
1313 int multibytep = coding->dst_multibyte;
1314 int *charbuf = coding->charbuf;
1315 int *charbuf_end = charbuf + coding->charbuf_used;
1316 unsigned char *dst = coding->destination + coding->produced;
1317 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1318 int produced_chars = 0;
df7492f9
KH
1319 int c;
1320
1321 if (multibytep)
aa72b389 1322 {
df7492f9
KH
1323 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1324
1325 while (charbuf < charbuf_end)
b73bfc1c 1326 {
df7492f9 1327 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1328
df7492f9
KH
1329 ASSURE_DESTINATION (safe_room);
1330 c = *charbuf++;
28f67a95
KH
1331 if (CHAR_BYTE8_P (c))
1332 {
1333 c = CHAR_TO_BYTE8 (c);
1334 EMIT_ONE_BYTE (c);
1335 }
1336 else
1337 {
1338 CHAR_STRING_ADVANCE (c, pend);
1339 for (p = str; p < pend; p++)
1340 EMIT_ONE_BYTE (*p);
1341 }
b73bfc1c 1342 }
aa72b389 1343 }
df7492f9
KH
1344 else
1345 {
1346 int safe_room = MAX_MULTIBYTE_LENGTH;
1347
1348 while (charbuf < charbuf_end)
b73bfc1c 1349 {
df7492f9
KH
1350 ASSURE_DESTINATION (safe_room);
1351 c = *charbuf++;
f03caae0
KH
1352 if (CHAR_BYTE8_P (c))
1353 *dst++ = CHAR_TO_BYTE8 (c);
1354 else
1355 dst += CHAR_STRING (c, dst);
df7492f9 1356 produced_chars++;
4ed46869
KH
1357 }
1358 }
065e3595 1359 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1360 coding->produced_char += produced_chars;
1361 coding->produced = dst - coding->destination;
1362 return 0;
4ed46869
KH
1363}
1364
b73bfc1c 1365
df7492f9 1366/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1367 Check if a text is encoded in one of UTF-16 based coding systems.
1368 If it is, return 1, else return 0. */
aa72b389 1369
df7492f9
KH
1370#define UTF_16_HIGH_SURROGATE_P(val) \
1371 (((val) & 0xFC00) == 0xD800)
1372
1373#define UTF_16_LOW_SURROGATE_P(val) \
1374 (((val) & 0xFC00) == 0xDC00)
93dec019 1375
df7492f9
KH
1376#define UTF_16_INVALID_P(val) \
1377 (((val) == 0xFFFE) \
1378 || ((val) == 0xFFFF) \
1379 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1380
aa72b389 1381
df7492f9 1382static int
ff0dacd7 1383detect_coding_utf_16 (coding, detect_info)
aa72b389 1384 struct coding_system *coding;
ff0dacd7 1385 struct coding_detection_info *detect_info;
aa72b389 1386{
8f924df7
KH
1387 const unsigned char *src = coding->source, *src_base = src;
1388 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1389 int multibytep = coding->src_multibyte;
1390 int consumed_chars = 0;
1391 int c1, c2;
aa72b389 1392
ff0dacd7 1393 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1394 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1395 && (coding->src_chars & 1))
ff0dacd7
KH
1396 {
1397 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1398 return 0;
1399 }
24a73b0a 1400
df7492f9
KH
1401 ONE_MORE_BYTE (c1);
1402 ONE_MORE_BYTE (c2);
df7492f9 1403 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1404 {
b49a1807
KH
1405 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1406 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1407 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1408 | CATEGORY_MASK_UTF_16_BE_NOSIG
1409 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1410 }
df7492f9 1411 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1412 {
b49a1807
KH
1413 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1414 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1415 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1416 | CATEGORY_MASK_UTF_16_BE_NOSIG
1417 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1418 }
065e3595 1419 else if (c1 >= 0 && c2 >= 0)
24a73b0a
KH
1420 {
1421 unsigned char b1[256], b2[256];
1422 int b1_variants = 1, b2_variants = 1;
1423 int n;
1424
1425 bzero (b1, 256), bzero (b2, 256);
1426 b1[c1]++, b2[c2]++;
1427 for (n = 0; n < 256 && src < src_end; n++)
1428 {
065e3595 1429 src_base = src;
24a73b0a
KH
1430 ONE_MORE_BYTE (c1);
1431 ONE_MORE_BYTE (c2);
065e3595
KH
1432 if (c1 < 0 || c2 < 0)
1433 break;
24a73b0a
KH
1434 if (! b1[c1++]) b1_variants++;
1435 if (! b2[c2++]) b2_variants++;
1436 }
1437 if (b1_variants < b2_variants)
1438 detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1439 else
1440 detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1441 detect_info->rejected
1442 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
ff0dacd7 1443 }
df7492f9 1444 no_more_source:
ff0dacd7 1445 return 1;
df7492f9 1446}
aa72b389 1447
df7492f9
KH
1448static void
1449decode_coding_utf_16 (coding)
1450 struct coding_system *coding;
1451{
8f924df7
KH
1452 const unsigned char *src = coding->source + coding->consumed;
1453 const unsigned char *src_end = coding->source + coding->src_bytes;
1454 const unsigned char *src_base;
69a80ea3
KH
1455 int *charbuf = coding->charbuf + coding->charbuf_used;
1456 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1457 int consumed_chars = 0, consumed_chars_base;
1458 int multibytep = coding->src_multibyte;
1459 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1460 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1461 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1462 Lisp_Object attr, charset_list;
df7492f9 1463
24a73b0a 1464 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1465
b49a1807 1466 if (bom == utf_16_with_bom)
aa72b389 1467 {
df7492f9 1468 int c, c1, c2;
4af310db 1469
aa72b389 1470 src_base = src;
df7492f9
KH
1471 ONE_MORE_BYTE (c1);
1472 ONE_MORE_BYTE (c2);
e19c3639 1473 c = (c1 << 8) | c2;
aa72b389 1474
b49a1807
KH
1475 if (endian == utf_16_big_endian
1476 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1477 {
b49a1807
KH
1478 /* The first two bytes are not BOM. Treat them as bytes
1479 for a normal character. */
1480 src = src_base;
1481 coding->errors++;
aa72b389 1482 }
b49a1807
KH
1483 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1484 }
1485 else if (bom == utf_16_detect_bom)
1486 {
1487 /* We have already tried to detect BOM and failed in
1488 detect_coding. */
1489 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9 1490 }
aa72b389 1491
df7492f9
KH
1492 while (1)
1493 {
1494 int c, c1, c2;
1495
1496 src_base = src;
1497 consumed_chars_base = consumed_chars;
1498
1499 if (charbuf + 2 >= charbuf_end)
1500 break;
1501
1502 ONE_MORE_BYTE (c1);
065e3595
KH
1503 if (c1 < 0)
1504 {
1505 *charbuf++ = -c1;
1506 continue;
1507 }
df7492f9 1508 ONE_MORE_BYTE (c2);
065e3595
KH
1509 if (c2 < 0)
1510 {
1511 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1512 *charbuf++ = -c2;
1513 continue;
1514 }
df7492f9 1515 c = (endian == utf_16_big_endian
e19c3639 1516 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1517 if (surrogate)
fd3ae0b9 1518 {
df7492f9 1519 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1520 {
df7492f9
KH
1521 if (endian == utf_16_big_endian)
1522 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1523 else
1524 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1525 *charbuf++ = c1;
1526 *charbuf++ = c2;
1527 coding->errors++;
1528 if (UTF_16_HIGH_SURROGATE_P (c))
1529 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1530 else
df7492f9 1531 *charbuf++ = c;
fd3ae0b9
KH
1532 }
1533 else
df7492f9
KH
1534 {
1535 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1536 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1537 *charbuf++ = 0x10000 + c;
df7492f9 1538 }
fd3ae0b9 1539 }
aa72b389 1540 else
df7492f9
KH
1541 {
1542 if (UTF_16_HIGH_SURROGATE_P (c))
1543 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1544 else
1545 *charbuf++ = c;
8f924df7 1546 }
aa72b389 1547 }
df7492f9
KH
1548
1549 no_more_source:
1550 coding->consumed_char += consumed_chars_base;
1551 coding->consumed = src_base - coding->source;
1552 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1553}
b73bfc1c 1554
df7492f9
KH
1555static int
1556encode_coding_utf_16 (coding)
1557 struct coding_system *coding;
1558{
1559 int multibytep = coding->dst_multibyte;
1560 int *charbuf = coding->charbuf;
1561 int *charbuf_end = charbuf + coding->charbuf_used;
1562 unsigned char *dst = coding->destination + coding->produced;
1563 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1564 int safe_room = 8;
1565 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1566 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1567 int produced_chars = 0;
24a73b0a 1568 Lisp_Object attrs, charset_list;
df7492f9 1569 int c;
4ed46869 1570
24a73b0a 1571 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1572
b49a1807 1573 if (bom != utf_16_without_bom)
df7492f9
KH
1574 {
1575 ASSURE_DESTINATION (safe_room);
1576 if (big_endian)
df7492f9 1577 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1578 else
1579 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1580 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1581 }
1582
1583 while (charbuf < charbuf_end)
1584 {
1585 ASSURE_DESTINATION (safe_room);
1586 c = *charbuf++;
e19c3639
KH
1587 if (c >= MAX_UNICODE_CHAR)
1588 c = coding->default_char;
df7492f9
KH
1589
1590 if (c < 0x10000)
1591 {
1592 if (big_endian)
1593 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1594 else
1595 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1596 }
1597 else
1598 {
1599 int c1, c2;
1600
1601 c -= 0x10000;
1602 c1 = (c >> 10) + 0xD800;
1603 c2 = (c & 0x3FF) + 0xDC00;
1604 if (big_endian)
1605 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1606 else
1607 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1608 }
1609 }
065e3595 1610 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1611 coding->produced = dst - coding->destination;
1612 coding->produced_char += produced_chars;
1613 return 0;
1614}
1615
1616\f
1617/*** 6. Old Emacs' internal format (emacs-mule) ***/
1618
1619/* Emacs' internal format for representation of multiple character
1620 sets is a kind of multi-byte encoding, i.e. characters are
1621 represented by variable-length sequences of one-byte codes.
1622
1623 ASCII characters and control characters (e.g. `tab', `newline') are
1624 represented by one-byte sequences which are their ASCII codes, in
1625 the range 0x00 through 0x7F.
1626
1627 8-bit characters of the range 0x80..0x9F are represented by
1628 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1629 code + 0x20).
1630
1631 8-bit characters of the range 0xA0..0xFF are represented by
1632 one-byte sequences which are their 8-bit code.
1633
1634 The other characters are represented by a sequence of `base
1635 leading-code', optional `extended leading-code', and one or two
1636 `position-code's. The length of the sequence is determined by the
1637 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1638 whereas extended leading-code and position-code take the range 0xA0
1639 through 0xFF. See `charset.h' for more details about leading-code
1640 and position-code.
1641
1642 --- CODE RANGE of Emacs' internal format ---
1643 character set range
1644 ------------- -----
1645 ascii 0x00..0x7F
1646 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1647 eight-bit-graphic 0xA0..0xBF
1648 ELSE 0x81..0x9D + [0xA0..0xFF]+
1649 ---------------------------------------------
1650
1651 As this is the internal character representation, the format is
1652 usually not used externally (i.e. in a file or in a data sent to a
1653 process). But, it is possible to have a text externally in this
1654 format (i.e. by encoding by the coding system `emacs-mule').
1655
1656 In that case, a sequence of one-byte codes has a slightly different
1657 form.
1658
1659 At first, all characters in eight-bit-control are represented by
1660 one-byte sequences which are their 8-bit code.
1661
1662 Next, character composition data are represented by the byte
1663 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1664 where,
1665 METHOD is 0xF0 plus one of composition method (enum
1666 composition_method),
1667
1668 BYTES is 0xA0 plus a byte length of this composition data,
1669
1670 CHARS is 0x20 plus a number of characters composed by this
1671 data,
1672
1673 COMPONENTs are characters of multibye form or composition
1674 rules encoded by two-byte of ASCII codes.
1675
1676 In addition, for backward compatibility, the following formats are
1677 also recognized as composition data on decoding.
1678
1679 0x80 MSEQ ...
1680 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1681
1682 Here,
1683 MSEQ is a multibyte form but in these special format:
1684 ASCII: 0xA0 ASCII_CODE+0x80,
1685 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1686 RULE is a one byte code of the range 0xA0..0xF0 that
1687 represents a composition rule.
1688 */
1689
1690char emacs_mule_bytes[256];
1691
df7492f9 1692int
ff0dacd7 1693emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1694 struct coding_system *coding;
065e3595 1695 const unsigned char *src;
ff0dacd7 1696 int *nbytes, *nchars, *id;
df7492f9 1697{
8f924df7
KH
1698 const unsigned char *src_end = coding->source + coding->src_bytes;
1699 const unsigned char *src_base = src;
df7492f9 1700 int multibytep = coding->src_multibyte;
df7492f9
KH
1701 struct charset *charset;
1702 unsigned code;
1703 int c;
1704 int consumed_chars = 0;
1705
1706 ONE_MORE_BYTE (c);
065e3595 1707 if (c < 0)
df7492f9 1708 {
065e3595
KH
1709 c = -c;
1710 charset = emacs_mule_charset[0];
1711 }
1712 else
1713 {
1714 switch (emacs_mule_bytes[c])
b73bfc1c 1715 {
065e3595 1716 case 2:
df7492f9
KH
1717 if (! (charset = emacs_mule_charset[c]))
1718 goto invalid_code;
1719 ONE_MORE_BYTE (c);
9ffd559c 1720 if (c < 0xA0)
065e3595 1721 goto invalid_code;
df7492f9 1722 code = c & 0x7F;
065e3595
KH
1723 break;
1724
1725 case 3:
1726 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1727 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1728 {
1729 ONE_MORE_BYTE (c);
9ffd559c 1730 if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
065e3595
KH
1731 goto invalid_code;
1732 ONE_MORE_BYTE (c);
9ffd559c 1733 if (c < 0xA0)
065e3595
KH
1734 goto invalid_code;
1735 code = c & 0x7F;
1736 }
1737 else
1738 {
1739 if (! (charset = emacs_mule_charset[c]))
1740 goto invalid_code;
1741 ONE_MORE_BYTE (c);
9ffd559c 1742 if (c < 0xA0)
065e3595
KH
1743 goto invalid_code;
1744 code = (c & 0x7F) << 8;
1745 ONE_MORE_BYTE (c);
9ffd559c 1746 if (c < 0xA0)
065e3595
KH
1747 goto invalid_code;
1748 code |= c & 0x7F;
1749 }
1750 break;
1751
1752 case 4:
1753 ONE_MORE_BYTE (c);
1754 if (c < 0 || ! (charset = emacs_mule_charset[c]))
df7492f9
KH
1755 goto invalid_code;
1756 ONE_MORE_BYTE (c);
9ffd559c 1757 if (c < 0xA0)
065e3595 1758 goto invalid_code;
781d7a48 1759 code = (c & 0x7F) << 8;
df7492f9 1760 ONE_MORE_BYTE (c);
9ffd559c 1761 if (c < 0xA0)
065e3595 1762 goto invalid_code;
df7492f9 1763 code |= c & 0x7F;
065e3595 1764 break;
df7492f9 1765
065e3595
KH
1766 case 1:
1767 code = c;
1768 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1769 ? charset_ascii : charset_eight_bit);
1770 break;
df7492f9 1771
065e3595
KH
1772 default:
1773 abort ();
1774 }
1775 c = DECODE_CHAR (charset, code);
1776 if (c < 0)
1777 goto invalid_code;
df7492f9 1778 }
df7492f9
KH
1779 *nbytes = src - src_base;
1780 *nchars = consumed_chars;
ff0dacd7
KH
1781 if (id)
1782 *id = charset->id;
df7492f9
KH
1783 return c;
1784
1785 no_more_source:
1786 return -2;
1787
1788 invalid_code:
1789 return -1;
1790}
1791
1792
1793/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1794 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1795 else return 0. */
df7492f9
KH
1796
1797static int
ff0dacd7 1798detect_coding_emacs_mule (coding, detect_info)
df7492f9 1799 struct coding_system *coding;
ff0dacd7 1800 struct coding_detection_info *detect_info;
df7492f9 1801{
065e3595 1802 const unsigned char *src = coding->source, *src_base;
8f924df7 1803 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1804 int multibytep = coding->src_multibyte;
1805 int consumed_chars = 0;
1806 int c;
1807 int found = 0;
1808
ff0dacd7 1809 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1810 /* A coding system of this category is always ASCII compatible. */
1811 src += coding->head_ascii;
1812
1813 while (1)
1814 {
065e3595 1815 src_base = src;
df7492f9 1816 ONE_MORE_BYTE (c);
065e3595
KH
1817 if (c < 0)
1818 continue;
df7492f9
KH
1819 if (c == 0x80)
1820 {
1821 /* Perhaps the start of composite character. We simple skip
1822 it because analyzing it is too heavy for detecting. But,
1823 at least, we check that the composite character
1824 constitues of more than 4 bytes. */
8f924df7 1825 const unsigned char *src_base;
df7492f9
KH
1826
1827 repeat:
1828 src_base = src;
1829 do
1830 {
1831 ONE_MORE_BYTE (c);
1832 }
1833 while (c >= 0xA0);
1834
1835 if (src - src_base <= 4)
1836 break;
ff0dacd7 1837 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1838 if (c == 0x80)
1839 goto repeat;
b73bfc1c 1840 }
df7492f9
KH
1841
1842 if (c < 0x80)
b73bfc1c 1843 {
df7492f9
KH
1844 if (c < 0x20
1845 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1846 break;
1847 }
1848 else
1849 {
0e219d54 1850 int more_bytes = emacs_mule_bytes[*src_base] - 1;
df7492f9 1851
0e219d54 1852 while (more_bytes > 0)
df7492f9
KH
1853 {
1854 ONE_MORE_BYTE (c);
0e219d54
KH
1855 if (c < 0xA0)
1856 {
1857 src--; /* Unread the last byte. */
1858 break;
1859 }
1860 more_bytes--;
df7492f9 1861 }
0e219d54 1862 if (more_bytes != 0)
df7492f9 1863 break;
ff0dacd7 1864 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1865 }
1866 }
ff0dacd7 1867 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1868 return 0;
1869
1870 no_more_source:
065e3595 1871 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 1872 {
ff0dacd7 1873 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1874 return 0;
1875 }
ff0dacd7
KH
1876 detect_info->found |= found;
1877 return 1;
4ed46869
KH
1878}
1879
b73bfc1c 1880
df7492f9
KH
1881/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1882
1883/* Decode a character represented as a component of composition
1884 sequence of Emacs 20/21 style at SRC. Set C to that character and
1885 update SRC to the head of next character (or an encoded composition
1886 rule). If SRC doesn't points a composition component, set C to -1.
1887 If SRC points an invalid byte sequence, global exit by a return
1888 value 0. */
1889
1890#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1891 if (1) \
1892 { \
1893 int c; \
1894 int nbytes, nchars; \
1895 \
1896 if (src == src_end) \
1897 break; \
ff0dacd7 1898 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1899 if (c < 0) \
1900 { \
1901 if (c == -2) \
1902 break; \
1903 goto invalid_code; \
1904 } \
1905 *buf++ = c; \
1906 src += nbytes; \
1907 consumed_chars += nchars; \
1908 } \
1909 else
1910
1911
1912/* Decode a composition rule represented as a component of composition
781d7a48
KH
1913 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1914 and increment BUF. If SRC points an invalid byte sequence, set C
1915 to -1. */
df7492f9 1916
781d7a48 1917#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1918 do { \
1919 int c, gref, nref; \
1920 \
781d7a48 1921 if (src >= src_end) \
df7492f9
KH
1922 goto invalid_code; \
1923 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1924 c -= 0x20; \
df7492f9
KH
1925 if (c < 0 || c >= 81) \
1926 goto invalid_code; \
1927 \
1928 gref = c / 9, nref = c % 9; \
1929 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1930 } while (0)
1931
1932
781d7a48
KH
1933/* Decode a composition rule represented as a component of composition
1934 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1935 and increment BUF. If SRC points an invalid byte sequence, set C
1936 to -1. */
1937
1938#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1939 do { \
1940 int gref, nref; \
1941 \
1942 if (src + 1>= src_end) \
1943 goto invalid_code; \
1944 ONE_MORE_BYTE_NO_CHECK (gref); \
1945 gref -= 0x20; \
1946 ONE_MORE_BYTE_NO_CHECK (nref); \
1947 nref -= 0x20; \
1948 if (gref < 0 || gref >= 81 \
1949 || nref < 0 || nref >= 81) \
1950 goto invalid_code; \
1951 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1952 } while (0)
1953
1954
df7492f9 1955#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1956 do { \
df7492f9 1957 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1958 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1959 the byte length of this composition information, CHARS is the \
1960 number of characters composed by this composition. */ \
781d7a48
KH
1961 enum composition_method method = c - 0xF2; \
1962 int *charbuf_base = charbuf; \
df7492f9
KH
1963 int consumed_chars_limit; \
1964 int nbytes, nchars; \
1965 \
1966 ONE_MORE_BYTE (c); \
065e3595
KH
1967 if (c < 0) \
1968 goto invalid_code; \
df7492f9
KH
1969 nbytes = c - 0xA0; \
1970 if (nbytes < 3) \
1971 goto invalid_code; \
1972 ONE_MORE_BYTE (c); \
065e3595
KH
1973 if (c < 0) \
1974 goto invalid_code; \
df7492f9 1975 nchars = c - 0xA0; \
69a80ea3 1976 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9
KH
1977 consumed_chars_limit = consumed_chars_base + nbytes; \
1978 if (method != COMPOSITION_RELATIVE) \
aa72b389 1979 { \
df7492f9
KH
1980 int i = 0; \
1981 while (consumed_chars < consumed_chars_limit) \
aa72b389 1982 { \
df7492f9 1983 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1984 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1985 else \
1986 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1987 i++; \
aa72b389 1988 } \
df7492f9
KH
1989 if (consumed_chars < consumed_chars_limit) \
1990 goto invalid_code; \
781d7a48 1991 charbuf_base[0] -= i; \
aa72b389
KH
1992 } \
1993 } while (0)
93dec019 1994
aa72b389 1995
df7492f9
KH
1996#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1997 do { \
1998 /* Emacs 20 style format for relative composition. */ \
1999 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 2000 enum composition_method method = COMPOSITION_RELATIVE; \
df7492f9
KH
2001 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2002 int *buf = components; \
2003 int i, j; \
2004 \
2005 src = src_base; \
2006 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2007 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
2008 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2009 if (i < 2) \
2010 goto invalid_code; \
69a80ea3 2011 ADD_COMPOSITION_DATA (charbuf, i, method); \
df7492f9
KH
2012 for (j = 0; j < i; j++) \
2013 *charbuf++ = components[j]; \
2014 } while (0)
2015
2016
2017#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
2018 do { \
2019 /* Emacs 20 style format for rule-base composition. */ \
2020 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 2021 enum composition_method method = COMPOSITION_WITH_RULE; \
df7492f9
KH
2022 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2023 int *buf = components; \
2024 int i, j; \
2025 \
2026 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2027 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
2028 { \
781d7a48 2029 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
2030 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2031 } \
2032 if (i < 1 || (buf - components) % 2 == 0) \
2033 goto invalid_code; \
2034 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
2035 goto no_more_source; \
69a80ea3 2036 ADD_COMPOSITION_DATA (buf, i, method); \
df7492f9
KH
2037 for (j = 0; j < i; j++) \
2038 *charbuf++ = components[j]; \
2039 for (j = 0; j < i; j += 2) \
2040 *charbuf++ = components[j]; \
2041 } while (0)
2042
aa72b389
KH
2043
2044static void
df7492f9 2045decode_coding_emacs_mule (coding)
aa72b389 2046 struct coding_system *coding;
aa72b389 2047{
8f924df7
KH
2048 const unsigned char *src = coding->source + coding->consumed;
2049 const unsigned char *src_end = coding->source + coding->src_bytes;
2050 const unsigned char *src_base;
69a80ea3
KH
2051 int *charbuf = coding->charbuf + coding->charbuf_used;
2052 int *charbuf_end
2053 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 2054 int consumed_chars = 0, consumed_chars_base;
df7492f9 2055 int multibytep = coding->src_multibyte;
24a73b0a 2056 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2057 int char_offset = coding->produced_char;
2058 int last_offset = char_offset;
2059 int last_id = charset_ascii;
aa72b389 2060
24a73b0a 2061 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2062
aa72b389
KH
2063 while (1)
2064 {
df7492f9
KH
2065 int c;
2066
aa72b389 2067 src_base = src;
df7492f9
KH
2068 consumed_chars_base = consumed_chars;
2069
2070 if (charbuf >= charbuf_end)
2071 break;
aa72b389 2072
df7492f9 2073 ONE_MORE_BYTE (c);
065e3595
KH
2074 if (c < 0)
2075 {
2076 *charbuf++ = -c;
2077 char_offset++;
2078 }
2079 else if (c < 0x80)
aa72b389 2080 {
df7492f9
KH
2081 *charbuf++ = c;
2082 char_offset++;
aa72b389 2083 }
df7492f9
KH
2084 else if (c == 0x80)
2085 {
df7492f9 2086 ONE_MORE_BYTE (c);
065e3595
KH
2087 if (c < 0)
2088 goto invalid_code;
781d7a48
KH
2089 if (c - 0xF2 >= COMPOSITION_RELATIVE
2090 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
2091 DECODE_EMACS_MULE_21_COMPOSITION (c);
2092 else if (c < 0xC0)
2093 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2094 else if (c == 0xFF)
2095 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2096 else
2097 goto invalid_code;
2098 }
2099 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2100 {
2101 int nbytes, nchars;
ff0dacd7
KH
2102 int id;
2103
781d7a48
KH
2104 src = src_base;
2105 consumed_chars = consumed_chars_base;
ff0dacd7 2106 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2107 if (c < 0)
2108 {
2109 if (c == -2)
2110 break;
2111 goto invalid_code;
2112 }
ff0dacd7
KH
2113 if (last_id != id)
2114 {
2115 if (last_id != charset_ascii)
69a80ea3 2116 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
2117 last_id = id;
2118 last_offset = char_offset;
2119 }
df7492f9 2120 *charbuf++ = c;
781d7a48
KH
2121 src += nbytes;
2122 consumed_chars += nchars;
df7492f9
KH
2123 char_offset++;
2124 }
2125 continue;
2126
2127 invalid_code:
2128 src = src_base;
2129 consumed_chars = consumed_chars_base;
2130 ONE_MORE_BYTE (c);
2131 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2132 char_offset++;
df7492f9
KH
2133 coding->errors++;
2134 }
2135
2136 no_more_source:
ff0dacd7 2137 if (last_id != charset_ascii)
69a80ea3 2138 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2139 coding->consumed_char += consumed_chars_base;
2140 coding->consumed = src_base - coding->source;
2141 coding->charbuf_used = charbuf - coding->charbuf;
2142}
2143
2144
2145#define EMACS_MULE_LEADING_CODES(id, codes) \
2146 do { \
2147 if (id < 0xA0) \
2148 codes[0] = id, codes[1] = 0; \
2149 else if (id < 0xE0) \
2150 codes[0] = 0x9A, codes[1] = id; \
2151 else if (id < 0xF0) \
2152 codes[0] = 0x9B, codes[1] = id; \
2153 else if (id < 0xF5) \
2154 codes[0] = 0x9C, codes[1] = id; \
2155 else \
2156 codes[0] = 0x9D, codes[1] = id; \
2157 } while (0);
2158
aa72b389 2159
df7492f9
KH
2160static int
2161encode_coding_emacs_mule (coding)
2162 struct coding_system *coding;
2163{
2164 int multibytep = coding->dst_multibyte;
2165 int *charbuf = coding->charbuf;
2166 int *charbuf_end = charbuf + coding->charbuf_used;
2167 unsigned char *dst = coding->destination + coding->produced;
2168 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2169 int safe_room = 8;
df7492f9 2170 int produced_chars = 0;
24a73b0a 2171 Lisp_Object attrs, charset_list;
df7492f9 2172 int c;
ff0dacd7 2173 int preferred_charset_id = -1;
df7492f9 2174
24a73b0a 2175 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2176 if (! EQ (charset_list, Vemacs_mule_charset_list))
2177 {
2178 CODING_ATTR_CHARSET_LIST (attrs)
2179 = charset_list = Vemacs_mule_charset_list;
2180 }
df7492f9
KH
2181
2182 while (charbuf < charbuf_end)
2183 {
2184 ASSURE_DESTINATION (safe_room);
2185 c = *charbuf++;
ff0dacd7
KH
2186
2187 if (c < 0)
2188 {
2189 /* Handle an annotation. */
2190 switch (*charbuf)
2191 {
2192 case CODING_ANNOTATE_COMPOSITION_MASK:
2193 /* Not yet implemented. */
2194 break;
2195 case CODING_ANNOTATE_CHARSET_MASK:
2196 preferred_charset_id = charbuf[3];
2197 if (preferred_charset_id >= 0
2198 && NILP (Fmemq (make_number (preferred_charset_id),
2199 charset_list)))
2200 preferred_charset_id = -1;
2201 break;
2202 default:
2203 abort ();
2204 }
2205 charbuf += -c - 1;
2206 continue;
2207 }
2208
df7492f9
KH
2209 if (ASCII_CHAR_P (c))
2210 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2211 else if (CHAR_BYTE8_P (c))
2212 {
2213 c = CHAR_TO_BYTE8 (c);
2214 EMIT_ONE_BYTE (c);
2215 }
df7492f9 2216 else
aa72b389 2217 {
df7492f9
KH
2218 struct charset *charset;
2219 unsigned code;
2220 int dimension;
2221 int emacs_mule_id;
2222 unsigned char leading_codes[2];
2223
ff0dacd7
KH
2224 if (preferred_charset_id >= 0)
2225 {
2226 charset = CHARSET_FROM_ID (preferred_charset_id);
2227 if (! CHAR_CHARSET_P (c, charset))
2228 charset = char_charset (c, charset_list, NULL);
2229 }
2230 else
2231 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2232 if (! charset)
2233 {
2234 c = coding->default_char;
2235 if (ASCII_CHAR_P (c))
2236 {
2237 EMIT_ONE_ASCII_BYTE (c);
2238 continue;
2239 }
2240 charset = char_charset (c, charset_list, &code);
2241 }
2242 dimension = CHARSET_DIMENSION (charset);
2243 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2244 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2245 EMIT_ONE_BYTE (leading_codes[0]);
2246 if (leading_codes[1])
2247 EMIT_ONE_BYTE (leading_codes[1]);
2248 if (dimension == 1)
1fa663f9 2249 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2250 else
df7492f9 2251 {
1fa663f9 2252 code |= 0x8080;
df7492f9
KH
2253 EMIT_ONE_BYTE (code >> 8);
2254 EMIT_ONE_BYTE (code & 0xFF);
2255 }
aa72b389 2256 }
aa72b389 2257 }
065e3595 2258 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2259 coding->produced_char += produced_chars;
2260 coding->produced = dst - coding->destination;
2261 return 0;
aa72b389 2262}
b73bfc1c 2263
4ed46869 2264\f
df7492f9 2265/*** 7. ISO2022 handlers ***/
4ed46869
KH
2266
2267/* The following note describes the coding system ISO2022 briefly.
39787efd 2268 Since the intention of this note is to help understand the
5a936b46 2269 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2270 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2271 original document of ISO2022. This is equivalent to the standard
cfb43547 2272 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2273
2274 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2275 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2276 is encoded using bytes less than 128. This may make the encoded
2277 text a little bit longer, but the text passes more easily through
cfb43547 2278 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2279 Significant Bit).
b73bfc1c 2280
cfb43547
DL
2281 There are two kinds of character sets: control character sets and
2282 graphic character sets. The former contain control characters such
4ed46869 2283 as `newline' and `escape' to provide control functions (control
39787efd 2284 functions are also provided by escape sequences). The latter
cfb43547 2285 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2286 two control character sets and many graphic character sets.
2287
2288 Graphic character sets are classified into one of the following
39787efd
KH
2289 four classes, according to the number of bytes (DIMENSION) and
2290 number of characters in one dimension (CHARS) of the set:
2291 - DIMENSION1_CHARS94
2292 - DIMENSION1_CHARS96
2293 - DIMENSION2_CHARS94
2294 - DIMENSION2_CHARS96
2295
2296 In addition, each character set is assigned an identification tag,
cfb43547 2297 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2298 hereafter). The <F> of each character set is decided by ECMA(*)
2299 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2300 (0x30..0x3F are for private use only).
4ed46869
KH
2301
2302 Note (*): ECMA = European Computer Manufacturers Association
2303
cfb43547 2304 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2305 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2306 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2307 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2308 o DIMENSION2_CHARS96 -- none for the moment
2309
39787efd 2310 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2311 C0 [0x00..0x1F] -- control character plane 0
2312 GL [0x20..0x7F] -- graphic character plane 0
2313 C1 [0x80..0x9F] -- control character plane 1
2314 GR [0xA0..0xFF] -- graphic character plane 1
2315
2316 A control character set is directly designated and invoked to C0 or
39787efd
KH
2317 C1 by an escape sequence. The most common case is that:
2318 - ISO646's control character set is designated/invoked to C0, and
2319 - ISO6429's control character set is designated/invoked to C1,
2320 and usually these designations/invocations are omitted in encoded
2321 text. In a 7-bit environment, only C0 can be used, and a control
2322 character for C1 is encoded by an appropriate escape sequence to
2323 fit into the environment. All control characters for C1 are
2324 defined to have corresponding escape sequences.
4ed46869
KH
2325
2326 A graphic character set is at first designated to one of four
2327 graphic registers (G0 through G3), then these graphic registers are
2328 invoked to GL or GR. These designations and invocations can be
2329 done independently. The most common case is that G0 is invoked to
39787efd
KH
2330 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2331 these invocations and designations are omitted in encoded text.
2332 In a 7-bit environment, only GL can be used.
4ed46869 2333
39787efd
KH
2334 When a graphic character set of CHARS94 is invoked to GL, codes
2335 0x20 and 0x7F of the GL area work as control characters SPACE and
2336 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2337 be used.
4ed46869
KH
2338
2339 There are two ways of invocation: locking-shift and single-shift.
2340 With locking-shift, the invocation lasts until the next different
39787efd
KH
2341 invocation, whereas with single-shift, the invocation affects the
2342 following character only and doesn't affect the locking-shift
2343 state. Invocations are done by the following control characters or
2344 escape sequences:
4ed46869
KH
2345
2346 ----------------------------------------------------------------------
39787efd 2347 abbrev function cntrl escape seq description
4ed46869 2348 ----------------------------------------------------------------------
39787efd
KH
2349 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2350 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2351 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2352 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2353 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2354 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2355 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2356 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2357 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2358 ----------------------------------------------------------------------
39787efd
KH
2359 (*) These are not used by any known coding system.
2360
2361 Control characters for these functions are defined by macros
2362 ISO_CODE_XXX in `coding.h'.
4ed46869 2363
39787efd 2364 Designations are done by the following escape sequences:
4ed46869
KH
2365 ----------------------------------------------------------------------
2366 escape sequence description
2367 ----------------------------------------------------------------------
2368 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2369 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2370 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2371 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2372 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2373 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2374 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2375 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2376 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2377 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2378 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2379 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2380 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2381 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2382 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2383 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2384 ----------------------------------------------------------------------
2385
2386 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2387 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2388
2389 Note (*): Although these designations are not allowed in ISO2022,
2390 Emacs accepts them on decoding, and produces them on encoding
39787efd 2391 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2392 7-bit environment, non-locking-shift, and non-single-shift.
2393
2394 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2395 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2396
cfb43547 2397 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2398 same multilingual text in ISO2022. Actually, there exist many
2399 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2400 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2401 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2402 localized platforms), and all of these are variants of ISO2022.
2403
2404 In addition to the above, Emacs handles two more kinds of escape
2405 sequences: ISO6429's direction specification and Emacs' private
2406 sequence for specifying character composition.
2407
39787efd 2408 ISO6429's direction specification takes the following form:
4ed46869
KH
2409 o CSI ']' -- end of the current direction
2410 o CSI '0' ']' -- end of the current direction
2411 o CSI '1' ']' -- start of left-to-right text
2412 o CSI '2' ']' -- start of right-to-left text
2413 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2414 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2415
2416 Character composition specification takes the following form:
ec6d2bb8
KH
2417 o ESC '0' -- start relative composition
2418 o ESC '1' -- end composition
2419 o ESC '2' -- start rule-base composition (*)
2420 o ESC '3' -- start relative composition with alternate chars (**)
2421 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2422 Since these are not standard escape sequences of any ISO standard,
cfb43547 2423 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2424
5a936b46
DL
2425 (*) This form is used only in Emacs 20.7 and older versions,
2426 but newer versions can safely decode it.
cfb43547 2427 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2428 and older versions can't decode it.
ec6d2bb8 2429
cfb43547 2430 Here's a list of example usages of these composition escape
b73bfc1c 2431 sequences (categorized by `enum composition_method').
ec6d2bb8 2432
b73bfc1c 2433 COMPOSITION_RELATIVE:
ec6d2bb8 2434 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2435 COMPOSITION_WITH_RULE:
ec6d2bb8 2436 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2437 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2438 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2439 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2440 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2441
2442enum iso_code_class_type iso_code_class[256];
2443
df7492f9
KH
2444#define SAFE_CHARSET_P(coding, id) \
2445 ((id) <= (coding)->max_charset_id \
2446 && (coding)->safe_charsets[id] >= 0)
2447
2448
2449#define SHIFT_OUT_OK(category) \
2450 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2451
2452static void
f0064e1f
DL
2453setup_iso_safe_charsets (attrs)
2454 Lisp_Object attrs;
df7492f9
KH
2455{
2456 Lisp_Object charset_list, safe_charsets;
2457 Lisp_Object request;
2458 Lisp_Object reg_usage;
2459 Lisp_Object tail;
2460 int reg94, reg96;
2461 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2462 int max_charset_id;
2463
2464 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2465 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2466 && ! EQ (charset_list, Viso_2022_charset_list))
2467 {
2468 CODING_ATTR_CHARSET_LIST (attrs)
2469 = charset_list = Viso_2022_charset_list;
2470 ASET (attrs, coding_attr_safe_charsets, Qnil);
2471 }
2472
2473 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2474 return;
2475
2476 max_charset_id = 0;
2477 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2478 {
2479 int id = XINT (XCAR (tail));
2480 if (max_charset_id < id)
2481 max_charset_id = id;
2482 }
d46c5b12 2483
df7492f9
KH
2484 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2485 make_number (255));
2486 request = AREF (attrs, coding_attr_iso_request);
2487 reg_usage = AREF (attrs, coding_attr_iso_usage);
2488 reg94 = XINT (XCAR (reg_usage));
2489 reg96 = XINT (XCDR (reg_usage));
2490
2491 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2492 {
2493 Lisp_Object id;
2494 Lisp_Object reg;
2495 struct charset *charset;
2496
2497 id = XCAR (tail);
2498 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2499 reg = Fcdr (Fassq (id, request));
df7492f9 2500 if (! NILP (reg))
8f924df7 2501 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2502 else if (charset->iso_chars_96)
2503 {
2504 if (reg96 < 4)
8f924df7 2505 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2506 }
2507 else
2508 {
2509 if (reg94 < 4)
8f924df7 2510 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2511 }
2512 }
2513 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2514}
d46c5b12 2515
b6871cc7 2516
4ed46869 2517/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2518 Check if a text is encoded in one of ISO-2022 based codig systems.
2519 If it is, return 1, else return 0. */
4ed46869 2520
0a28aafb 2521static int
ff0dacd7 2522detect_coding_iso_2022 (coding, detect_info)
df7492f9 2523 struct coding_system *coding;
ff0dacd7 2524 struct coding_detection_info *detect_info;
4ed46869 2525{
8f924df7
KH
2526 const unsigned char *src = coding->source, *src_base = src;
2527 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2528 int multibytep = coding->src_multibyte;
ff0dacd7 2529 int single_shifting = 0;
df7492f9
KH
2530 int id;
2531 int c, c1;
2532 int consumed_chars = 0;
2533 int i;
ff0dacd7
KH
2534 int rejected = 0;
2535 int found = 0;
2536
2537 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2538
2539 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2540 {
2541 struct coding_system *this = &(coding_categories[i]);
2542 Lisp_Object attrs, val;
2543
2544 attrs = CODING_ID_ATTRS (this->id);
2545 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2546 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2547 setup_iso_safe_charsets (attrs);
2548 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
2549 this->max_charset_id = SCHARS (val) - 1;
2550 this->safe_charsets = (char *) SDATA (val);
df7492f9
KH
2551 }
2552
2553 /* A coding system of this category is always ASCII compatible. */
2554 src += coding->head_ascii;
3f003981 2555
ff0dacd7 2556 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2557 {
065e3595 2558 src_base = src;
df7492f9 2559 ONE_MORE_BYTE (c);
4ed46869
KH
2560 switch (c)
2561 {
2562 case ISO_CODE_ESC:
74383408
KH
2563 if (inhibit_iso_escape_detection)
2564 break;
f46869e4 2565 single_shifting = 0;
df7492f9 2566 ONE_MORE_BYTE (c);
d46c5b12 2567 if (c >= '(' && c <= '/')
4ed46869 2568 {
bf9cdd4e 2569 /* Designation sequence for a charset of dimension 1. */
df7492f9 2570 ONE_MORE_BYTE (c1);
d46c5b12 2571 if (c1 < ' ' || c1 >= 0x80
df7492f9 2572 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2573 /* Invalid designation sequence. Just ignore. */
2574 break;
bf9cdd4e
KH
2575 }
2576 else if (c == '$')
2577 {
2578 /* Designation sequence for a charset of dimension 2. */
df7492f9 2579 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2580 if (c >= '@' && c <= 'B')
2581 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2582 id = iso_charset_table[1][0][c];
bf9cdd4e 2583 else if (c >= '(' && c <= '/')
bcf26d6a 2584 {
df7492f9 2585 ONE_MORE_BYTE (c1);
d46c5b12 2586 if (c1 < ' ' || c1 >= 0x80
df7492f9 2587 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2588 /* Invalid designation sequence. Just ignore. */
2589 break;
bcf26d6a 2590 }
bf9cdd4e 2591 else
ff0dacd7 2592 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2593 break;
2594 }
ae9ff118 2595 else if (c == 'N' || c == 'O')
d46c5b12 2596 {
ae9ff118 2597 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2598 single_shifting = 1;
2599 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2600 break;
4ed46869 2601 }
ec6d2bb8
KH
2602 else if (c >= '0' && c <= '4')
2603 {
2604 /* ESC <Fp> for start/end composition. */
ff0dacd7 2605 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2606 break;
2607 }
bf9cdd4e 2608 else
df7492f9 2609 {
ff0dacd7 2610 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2611 break;
2612 }
d46c5b12
KH
2613
2614 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2615 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2616 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2617 id))
ff0dacd7 2618 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2619 else
ff0dacd7 2620 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2621 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2622 id))
ff0dacd7 2623 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2624 else
ff0dacd7 2625 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2626 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2627 id))
ff0dacd7 2628 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2629 else
ff0dacd7 2630 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2631 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2632 id))
ff0dacd7 2633 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2634 else
ff0dacd7 2635 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2636 break;
2637
4ed46869 2638 case ISO_CODE_SO:
d46c5b12 2639 case ISO_CODE_SI:
ff0dacd7 2640 /* Locking shift out/in. */
74383408
KH
2641 if (inhibit_iso_escape_detection)
2642 break;
f46869e4 2643 single_shifting = 0;
ff0dacd7
KH
2644 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2645 found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2646 break;
2647
4ed46869 2648 case ISO_CODE_CSI:
ff0dacd7 2649 /* Control sequence introducer. */
f46869e4 2650 single_shifting = 0;
ff0dacd7
KH
2651 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2652 found |= CATEGORY_MASK_ISO_8_ELSE;
2653 goto check_extra_latin;
2654
4ed46869
KH
2655 case ISO_CODE_SS2:
2656 case ISO_CODE_SS3:
ff0dacd7
KH
2657 /* Single shift. */
2658 if (inhibit_iso_escape_detection)
2659 break;
75e2a253 2660 single_shifting = 0;
ff0dacd7
KH
2661 rejected |= CATEGORY_MASK_ISO_7BIT;
2662 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2663 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 2664 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
2665 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2666 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
2667 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2668 if (single_shifting)
2669 break;
ff0dacd7 2670 goto check_extra_latin;
4ed46869
KH
2671
2672 default:
065e3595
KH
2673 if (c < 0)
2674 continue;
4ed46869 2675 if (c < 0x80)
f46869e4
KH
2676 {
2677 single_shifting = 0;
2678 break;
2679 }
ff0dacd7 2680 if (c >= 0xA0)
c4825358 2681 {
ff0dacd7
KH
2682 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2683 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2684 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2685 0xA0..0FF. If the byte length is even, we include
2686 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2687 only when we are not single shifting. */
2688 if (! single_shifting
2689 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2690 {
e17de821 2691 int i = 1;
b73bfc1c
KH
2692 while (src < src_end)
2693 {
df7492f9 2694 ONE_MORE_BYTE (c);
b73bfc1c
KH
2695 if (c < 0xA0)
2696 break;
2697 i++;
2698 }
2699
2700 if (i & 1 && src < src_end)
ff0dacd7 2701 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2702 else
ff0dacd7 2703 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2704 }
ff0dacd7 2705 break;
4ed46869 2706 }
ff0dacd7
KH
2707 check_extra_latin:
2708 single_shifting = 0;
2709 if (! VECTORP (Vlatin_extra_code_table)
2710 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2711 {
2712 rejected = CATEGORY_MASK_ISO;
2713 break;
2714 }
2715 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2716 & CODING_ISO_FLAG_LATIN_EXTRA)
2717 found |= CATEGORY_MASK_ISO_8_1;
2718 else
2719 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 2720 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2721 }
2722 }
ff0dacd7
KH
2723 detect_info->rejected |= CATEGORY_MASK_ISO;
2724 return 0;
4ed46869 2725
df7492f9 2726 no_more_source:
ff0dacd7
KH
2727 detect_info->rejected |= rejected;
2728 detect_info->found |= (found & ~rejected);
df7492f9 2729 return 1;
4ed46869 2730}
ec6d2bb8 2731
4ed46869 2732
134b9549
KH
2733/* Set designation state into CODING. Set CHARS_96 to -1 if the
2734 escape sequence should be kept. */
df7492f9
KH
2735#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2736 do { \
2737 int id, prev; \
2738 \
2739 if (final < '0' || final >= 128 \
2740 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2741 || !SAFE_CHARSET_P (coding, id)) \
2742 { \
2743 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
2744 chars_96 = -1; \
2745 break; \
df7492f9
KH
2746 } \
2747 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2748 if (id == charset_jisx0201_roman) \
2749 { \
2750 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2751 id = charset_ascii; \
2752 } \
2753 else if (id == charset_jisx0208_1978) \
2754 { \
2755 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2756 id = charset_jisx0208; \
2757 } \
df7492f9
KH
2758 CODING_ISO_DESIGNATION (coding, reg) = id; \
2759 /* If there was an invalid designation to REG previously, and this \
2760 designation is ASCII to REG, we should keep this designation \
2761 sequence. */ \
2762 if (prev == -2 && id == charset_ascii) \
134b9549 2763 chars_96 = -1; \
4ed46869
KH
2764 } while (0)
2765
d46c5b12 2766
df7492f9
KH
2767#define MAYBE_FINISH_COMPOSITION() \
2768 do { \
2769 int i; \
2770 if (composition_state == COMPOSING_NO) \
2771 break; \
2772 /* It is assured that we have enough room for producing \
2773 characters stored in the table `components'. */ \
2774 if (charbuf + component_idx > charbuf_end) \
2775 goto no_more_source; \
2776 composition_state = COMPOSING_NO; \
2777 if (method == COMPOSITION_RELATIVE \
2778 || method == COMPOSITION_WITH_ALTCHARS) \
2779 { \
2780 for (i = 0; i < component_idx; i++) \
2781 *charbuf++ = components[i]; \
2782 char_offset += component_idx; \
2783 } \
2784 else \
2785 { \
2786 for (i = 0; i < component_idx; i += 2) \
2787 *charbuf++ = components[i]; \
2788 char_offset += (component_idx / 2) + 1; \
2789 } \
2790 } while (0)
2791
d46c5b12 2792
aa72b389
KH
2793/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2794 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2795 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2796 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2797 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2798 */
ec6d2bb8 2799
df7492f9
KH
2800#define DECODE_COMPOSITION_START(c1) \
2801 do { \
2802 if (c1 == '0' \
781d7a48 2803 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2804 { \
2805 component_len = component_idx; \
2806 composition_state = COMPOSING_CHAR; \
2807 } \
2808 else \
2809 { \
8f924df7 2810 const unsigned char *p; \
df7492f9
KH
2811 \
2812 MAYBE_FINISH_COMPOSITION (); \
2813 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2814 goto no_more_source; \
2815 for (p = src; p < src_end - 1; p++) \
2816 if (*p == ISO_CODE_ESC && p[1] == '1') \
2817 break; \
2818 if (p == src_end - 1) \
2819 { \
2820 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2821 goto invalid_code; \
2822 goto no_more_source; \
2823 } \
2824 \
2825 /* This is surely the start of a composition. */ \
2826 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2827 : c1 == '2' ? COMPOSITION_WITH_RULE \
2828 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2829 : COMPOSITION_WITH_RULE_ALTCHARS); \
2830 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2831 : COMPOSING_COMPONENT_CHAR); \
2832 component_idx = component_len = 0; \
2833 } \
ec6d2bb8
KH
2834 } while (0)
2835
ec6d2bb8 2836
df7492f9
KH
2837/* Handle compositoin end sequence ESC 1. */
2838
2839#define DECODE_COMPOSITION_END() \
ec6d2bb8 2840 do { \
df7492f9
KH
2841 int nchars = (component_len > 0 ? component_idx - component_len \
2842 : method == COMPOSITION_RELATIVE ? component_idx \
2843 : (component_idx + 1) / 2); \
2844 int i; \
2845 int *saved_charbuf = charbuf; \
2846 \
69a80ea3 2847 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9 2848 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2849 { \
df7492f9
KH
2850 if (component_len == 0) \
2851 for (i = 0; i < component_idx; i++) \
2852 *charbuf++ = components[i]; \
2853 else \
2854 for (i = 0; i < component_len; i++) \
2855 *charbuf++ = components[i]; \
2856 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2857 } \
df7492f9
KH
2858 if (method == COMPOSITION_WITH_RULE) \
2859 for (i = 0; i < component_idx; i += 2, char_offset++) \
2860 *charbuf++ = components[i]; \
ec6d2bb8 2861 else \
df7492f9
KH
2862 for (i = component_len; i < component_idx; i++, char_offset++) \
2863 *charbuf++ = components[i]; \
2864 coding->annotated = 1; \
2865 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2866 } while (0)
2867
df7492f9 2868
ec6d2bb8
KH
2869/* Decode a composition rule from the byte C1 (and maybe one more byte
2870 from SRC) and store one encoded composition rule in
2871 coding->cmp_data. */
2872
2873#define DECODE_COMPOSITION_RULE(c1) \
2874 do { \
ec6d2bb8
KH
2875 (c1) -= 32; \
2876 if (c1 < 81) /* old format (before ver.21) */ \
2877 { \
2878 int gref = (c1) / 9; \
2879 int nref = (c1) % 9; \
2880 if (gref == 4) gref = 10; \
2881 if (nref == 4) nref = 10; \
df7492f9 2882 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2883 } \
b73bfc1c 2884 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2885 { \
2886 ONE_MORE_BYTE (c2); \
df7492f9 2887 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2888 } \
df7492f9
KH
2889 else \
2890 c1 = 0; \
ec6d2bb8 2891 } while (0)
88993dfd 2892
d46c5b12 2893
4ed46869
KH
2894/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2895
b73bfc1c 2896static void
df7492f9 2897decode_coding_iso_2022 (coding)
4ed46869 2898 struct coding_system *coding;
4ed46869 2899{
8f924df7
KH
2900 const unsigned char *src = coding->source + coding->consumed;
2901 const unsigned char *src_end = coding->source + coding->src_bytes;
2902 const unsigned char *src_base;
69a80ea3 2903 int *charbuf = coding->charbuf + coding->charbuf_used;
ff0dacd7 2904 int *charbuf_end
69a80ea3 2905 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2906 int consumed_chars = 0, consumed_chars_base;
df7492f9 2907 int multibytep = coding->src_multibyte;
4ed46869 2908 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
2909 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2910 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 2911 int charset_id_2, charset_id_3;
df7492f9
KH
2912 struct charset *charset;
2913 int c;
2914 /* For handling composition sequence. */
2915#define COMPOSING_NO 0
2916#define COMPOSING_CHAR 1
2917#define COMPOSING_RULE 2
2918#define COMPOSING_COMPONENT_CHAR 3
2919#define COMPOSING_COMPONENT_RULE 4
2920
2921 int composition_state = COMPOSING_NO;
2922 enum composition_method method;
2923 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2924 int component_idx;
2925 int component_len;
24a73b0a 2926 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2927 int char_offset = coding->produced_char;
2928 int last_offset = char_offset;
2929 int last_id = charset_ascii;
df7492f9 2930
24a73b0a 2931 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 2932 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2933
2934 while (1)
4ed46869 2935 {
463f5630 2936 int c1, c2;
b73bfc1c
KH
2937
2938 src_base = src;
df7492f9
KH
2939 consumed_chars_base = consumed_chars;
2940
2941 if (charbuf >= charbuf_end)
2942 break;
2943
b73bfc1c 2944 ONE_MORE_BYTE (c1);
065e3595
KH
2945 if (c1 < 0)
2946 goto invalid_code;
4ed46869 2947
98725083 2948 /* We produce at most one character. */
4ed46869
KH
2949 switch (iso_code_class [c1])
2950 {
2951 case ISO_0x20_or_0x7F:
df7492f9 2952 if (composition_state != COMPOSING_NO)
ec6d2bb8 2953 {
df7492f9
KH
2954 if (composition_state == COMPOSING_RULE
2955 || composition_state == COMPOSING_COMPONENT_RULE)
2956 {
2957 DECODE_COMPOSITION_RULE (c1);
2958 components[component_idx++] = c1;
2959 composition_state--;
2960 continue;
2961 }
4ed46869 2962 }
df7492f9
KH
2963 if (charset_id_0 < 0
2964 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2965 /* This is SPACE or DEL. */
2966 charset = CHARSET_FROM_ID (charset_ascii);
2967 else
2968 charset = CHARSET_FROM_ID (charset_id_0);
2969 break;
4ed46869
KH
2970
2971 case ISO_graphic_plane_0:
781d7a48 2972 if (composition_state != COMPOSING_NO)
b73bfc1c 2973 {
781d7a48
KH
2974 if (composition_state == COMPOSING_RULE
2975 || composition_state == COMPOSING_COMPONENT_RULE)
2976 {
2977 DECODE_COMPOSITION_RULE (c1);
2978 components[component_idx++] = c1;
2979 composition_state--;
2980 continue;
2981 }
b73bfc1c 2982 }
134b9549
KH
2983 if (charset_id_0 < 0)
2984 charset = CHARSET_FROM_ID (charset_ascii);
2985 else
2986 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2987 break;
2988
2989 case ISO_0xA0_or_0xFF:
df7492f9
KH
2990 if (charset_id_1 < 0
2991 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2992 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2993 goto invalid_code;
4ed46869
KH
2994 /* This is a graphic character, we fall down ... */
2995
2996 case ISO_graphic_plane_1:
df7492f9
KH
2997 if (charset_id_1 < 0)
2998 goto invalid_code;
2999 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3000 break;
3001
df7492f9
KH
3002 case ISO_control_0:
3003 MAYBE_FINISH_COMPOSITION ();
3004 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3005 break;
3006
df7492f9
KH
3007 case ISO_control_1:
3008 MAYBE_FINISH_COMPOSITION ();
3009 goto invalid_code;
3010
4ed46869 3011 case ISO_shift_out:
df7492f9
KH
3012 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3013 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3014 goto invalid_code;
3015 CODING_ISO_INVOCATION (coding, 0) = 1;
3016 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3017 continue;
4ed46869
KH
3018
3019 case ISO_shift_in:
df7492f9
KH
3020 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3021 goto invalid_code;
3022 CODING_ISO_INVOCATION (coding, 0) = 0;
3023 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3024 continue;
4ed46869
KH
3025
3026 case ISO_single_shift_2_7:
3027 case ISO_single_shift_2:
df7492f9
KH
3028 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3029 goto invalid_code;
4ed46869
KH
3030 /* SS2 is handled as an escape sequence of ESC 'N' */
3031 c1 = 'N';
3032 goto label_escape_sequence;
3033
3034 case ISO_single_shift_3:
df7492f9
KH
3035 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3036 goto invalid_code;
4ed46869
KH
3037 /* SS2 is handled as an escape sequence of ESC 'O' */
3038 c1 = 'O';
3039 goto label_escape_sequence;
3040
3041 case ISO_control_sequence_introducer:
3042 /* CSI is handled as an escape sequence of ESC '[' ... */
3043 c1 = '[';
3044 goto label_escape_sequence;
3045
3046 case ISO_escape:
3047 ONE_MORE_BYTE (c1);
3048 label_escape_sequence:
df7492f9 3049 /* Escape sequences handled here are invocation,
4ed46869
KH
3050 designation, direction specification, and character
3051 composition specification. */
3052 switch (c1)
3053 {
3054 case '&': /* revision of following character set */
3055 ONE_MORE_BYTE (c1);
3056 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3057 goto invalid_code;
4ed46869
KH
3058 ONE_MORE_BYTE (c1);
3059 if (c1 != ISO_CODE_ESC)
df7492f9 3060 goto invalid_code;
4ed46869
KH
3061 ONE_MORE_BYTE (c1);
3062 goto label_escape_sequence;
3063
3064 case '$': /* designation of 2-byte character set */
df7492f9
KH
3065 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3066 goto invalid_code;
134b9549
KH
3067 {
3068 int reg, chars96;
3069
3070 ONE_MORE_BYTE (c1);
3071 if (c1 >= '@' && c1 <= 'B')
3072 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3073 or JISX0208.1980 */
134b9549
KH
3074 reg = 0, chars96 = 0;
3075 }
3076 else if (c1 >= 0x28 && c1 <= 0x2B)
3077 { /* designation of DIMENSION2_CHARS94 character set */
3078 reg = c1 - 0x28, chars96 = 0;
3079 ONE_MORE_BYTE (c1);
3080 }
3081 else if (c1 >= 0x2C && c1 <= 0x2F)
3082 { /* designation of DIMENSION2_CHARS96 character set */
3083 reg = c1 - 0x2C, chars96 = 1;
3084 ONE_MORE_BYTE (c1);
3085 }
3086 else
3087 goto invalid_code;
3088 DECODE_DESIGNATION (reg, 2, chars96, c1);
3089 /* We must update these variables now. */
3090 if (reg == 0)
3091 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3092 else if (reg == 1)
3093 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3094 if (chars96 < 0)
3095 goto invalid_code;
3096 }
b73bfc1c 3097 continue;
4ed46869
KH
3098
3099 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3100 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3101 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3102 goto invalid_code;
3103 CODING_ISO_INVOCATION (coding, 0) = 2;
3104 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3105 continue;
4ed46869
KH
3106
3107 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3108 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3109 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3110 goto invalid_code;
3111 CODING_ISO_INVOCATION (coding, 0) = 3;
3112 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3113 continue;
4ed46869
KH
3114
3115 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3116 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3117 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3118 goto invalid_code;
134b9549
KH
3119 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3120 if (charset_id_2 < 0)
3121 charset = CHARSET_FROM_ID (charset_ascii);
3122 else
3123 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3124 ONE_MORE_BYTE (c1);
e7046a18 3125 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3126 goto invalid_code;
4ed46869
KH
3127 break;
3128
3129 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3130 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3131 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3132 goto invalid_code;
134b9549
KH
3133 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3134 if (charset_id_3 < 0)
3135 charset = CHARSET_FROM_ID (charset_ascii);
3136 else
3137 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3138 ONE_MORE_BYTE (c1);
e7046a18 3139 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3140 goto invalid_code;
4ed46869
KH
3141 break;
3142
ec6d2bb8 3143 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3144 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3145 goto invalid_code;
ec6d2bb8 3146 DECODE_COMPOSITION_START (c1);
b73bfc1c 3147 continue;
4ed46869 3148
ec6d2bb8 3149 case '1': /* end composition */
df7492f9
KH
3150 if (composition_state == COMPOSING_NO)
3151 goto invalid_code;
3152 DECODE_COMPOSITION_END ();
b73bfc1c 3153 continue;
4ed46869
KH
3154
3155 case '[': /* specification of direction */
df7492f9
KH
3156 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3157 goto invalid_code;
4ed46869 3158 /* For the moment, nested direction is not supported.
d46c5b12 3159 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3160 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3161 ONE_MORE_BYTE (c1);
3162 switch (c1)
3163 {
3164 case ']': /* end of the current direction */
d46c5b12 3165 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3166
3167 case '0': /* end of the current direction */
3168 case '1': /* start of left-to-right direction */
3169 ONE_MORE_BYTE (c1);
3170 if (c1 == ']')
d46c5b12 3171 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3172 else
df7492f9 3173 goto invalid_code;
4ed46869
KH
3174 break;
3175
3176 case '2': /* start of right-to-left direction */
3177 ONE_MORE_BYTE (c1);
3178 if (c1 == ']')
d46c5b12 3179 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3180 else
df7492f9 3181 goto invalid_code;
4ed46869
KH
3182 break;
3183
3184 default:
df7492f9 3185 goto invalid_code;
4ed46869 3186 }
b73bfc1c 3187 continue;
4ed46869 3188
103e0180 3189 case '%':
103e0180
KH
3190 ONE_MORE_BYTE (c1);
3191 if (c1 == '/')
3192 {
3193 /* CTEXT extended segment:
3194 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3195 We keep these bytes as is for the moment.
3196 They may be decoded by post-read-conversion. */
3197 int dim, M, L;
4776e638 3198 int size;
8f924df7 3199
103e0180
KH
3200 ONE_MORE_BYTE (dim);
3201 ONE_MORE_BYTE (M);
3202 ONE_MORE_BYTE (L);
3203 size = ((M - 128) * 128) + (L - 128);
4776e638
KH
3204 if (charbuf + 8 + size > charbuf_end)
3205 goto break_loop;
3206 *charbuf++ = ISO_CODE_ESC;
3207 *charbuf++ = '%';
3208 *charbuf++ = '/';
3209 *charbuf++ = dim;
3210 *charbuf++ = BYTE8_TO_CHAR (M);
3211 *charbuf++ = BYTE8_TO_CHAR (L);
103e0180
KH
3212 while (size-- > 0)
3213 {
3214 ONE_MORE_BYTE (c1);
4776e638 3215 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3216 }
103e0180
KH
3217 }
3218 else if (c1 == 'G')
3219 {
103e0180
KH
3220 /* XFree86 extension for embedding UTF-8 in CTEXT:
3221 ESC % G --UTF-8-BYTES-- ESC % @
3222 We keep these bytes as is for the moment.
3223 They may be decoded by post-read-conversion. */
4776e638
KH
3224 int *p = charbuf;
3225
3226 if (p + 6 > charbuf_end)
3227 goto break_loop;
3228 *p++ = ISO_CODE_ESC;
3229 *p++ = '%';
3230 *p++ = 'G';
3231 while (p < charbuf_end)
103e0180
KH
3232 {
3233 ONE_MORE_BYTE (c1);
3234 if (c1 == ISO_CODE_ESC
3235 && src + 1 < src_end
3236 && src[0] == '%'
3237 && src[1] == '@')
9ffd559c
KH
3238 {
3239 src += 2;
3240 break;
3241 }
4776e638 3242 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3243 }
4776e638
KH
3244 if (p + 3 > charbuf_end)
3245 goto break_loop;
3246 *p++ = ISO_CODE_ESC;
3247 *p++ = '%';
3248 *p++ = '@';
3249 charbuf = p;
103e0180
KH
3250 }
3251 else
4776e638 3252 goto invalid_code;
103e0180 3253 continue;
4776e638 3254 break;
103e0180 3255
4ed46869 3256 default:
df7492f9
KH
3257 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3258 goto invalid_code;
134b9549
KH
3259 {
3260 int reg, chars96;
3261
3262 if (c1 >= 0x28 && c1 <= 0x2B)
3263 { /* designation of DIMENSION1_CHARS94 character set */
3264 reg = c1 - 0x28, chars96 = 0;
3265 ONE_MORE_BYTE (c1);
3266 }
3267 else if (c1 >= 0x2C && c1 <= 0x2F)
3268 { /* designation of DIMENSION1_CHARS96 character set */
3269 reg = c1 - 0x2C, chars96 = 1;
3270 ONE_MORE_BYTE (c1);
3271 }
3272 else
3273 goto invalid_code;
3274 DECODE_DESIGNATION (reg, 1, chars96, c1);
3275 /* We must update these variables now. */
3276 if (reg == 0)
3277 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3278 else if (reg == 1)
3279 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3280 if (chars96 < 0)
3281 goto invalid_code;
3282 }
b73bfc1c 3283 continue;
4ed46869 3284 }
b73bfc1c 3285 }
4ed46869 3286
ff0dacd7
KH
3287 if (charset->id != charset_ascii
3288 && last_id != charset->id)
3289 {
3290 if (last_id != charset_ascii)
69a80ea3 3291 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3292 last_id = charset->id;
3293 last_offset = char_offset;
3294 }
3295
b73bfc1c 3296 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3297 Produce a decoded character while getting 2nd position code
3298 C2 if necessary. */
3299 c1 &= 0x7F;
3300 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3301 {
3302 ONE_MORE_BYTE (c2);
df7492f9 3303 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3304 /* C2 is not in a valid range. */
df7492f9
KH
3305 goto invalid_code;
3306 c1 = (c1 << 8) | (c2 & 0x7F);
3307 if (CHARSET_DIMENSION (charset) > 2)
3308 {
3309 ONE_MORE_BYTE (c2);
3310 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3311 /* C2 is not in a valid range. */
3312 goto invalid_code;
3313 c1 = (c1 << 8) | (c2 & 0x7F);
3314 }
3315 }
3316
3317 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3318 if (c < 0)
3319 {
3320 MAYBE_FINISH_COMPOSITION ();
3321 for (; src_base < src; src_base++, char_offset++)
3322 {
3323 if (ASCII_BYTE_P (*src_base))
3324 *charbuf++ = *src_base;
3325 else
3326 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3327 }
3328 }
3329 else if (composition_state == COMPOSING_NO)
3330 {
3331 *charbuf++ = c;
3332 char_offset++;
4ed46869 3333 }
df7492f9 3334 else
781d7a48
KH
3335 {
3336 components[component_idx++] = c;
3337 if (method == COMPOSITION_WITH_RULE
3338 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3339 && composition_state == COMPOSING_COMPONENT_CHAR))
3340 composition_state++;
4ed46869
KH
3341 }
3342 continue;
3343
df7492f9
KH
3344 invalid_code:
3345 MAYBE_FINISH_COMPOSITION ();
4ed46869 3346 src = src_base;
df7492f9
KH
3347 consumed_chars = consumed_chars_base;
3348 ONE_MORE_BYTE (c);
065e3595 3349 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3350 char_offset++;
df7492f9 3351 coding->errors++;
4776e638
KH
3352 continue;
3353
3354 break_loop:
3355 break;
4ed46869 3356 }
fb88bf2d 3357
df7492f9 3358 no_more_source:
ff0dacd7 3359 if (last_id != charset_ascii)
69a80ea3 3360 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
3361 coding->consumed_char += consumed_chars_base;
3362 coding->consumed = src_base - coding->source;
3363 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3364}
3365
b73bfc1c 3366
f4dee582 3367/* ISO2022 encoding stuff. */
4ed46869
KH
3368
3369/*
f4dee582 3370 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3371 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3372 variant has the following specifications:
df7492f9 3373 1. Initial designation to G0 thru G3.
4ed46869
KH
3374 2. Allows short-form designation?
3375 3. ASCII should be designated to G0 before control characters?
3376 4. ASCII should be designated to G0 at end of line?
3377 5. 7-bit environment or 8-bit environment?
3378 6. Use locking-shift?
3379 7. Use Single-shift?
3380 And the following two are only for Japanese:
3381 8. Use ASCII in place of JIS0201-1976-Roman?
3382 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3383 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3384 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3385 details.
4ed46869
KH
3386*/
3387
3388/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3389 register REG at DST, and increment DST. If <final-char> of CHARSET is
3390 '@', 'A', or 'B' and the coding system CODING allows, produce
3391 designation sequence of short-form. */
4ed46869
KH
3392
3393#define ENCODE_DESIGNATION(charset, reg, coding) \
3394 do { \
df7492f9 3395 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3396 char *intermediate_char_94 = "()*+"; \
3397 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3398 int revision = -1; \
3399 int c; \
3400 \
3401 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3402 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3403 \
3404 if (revision >= 0) \
70c22245 3405 { \
df7492f9
KH
3406 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3407 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3408 } \
df7492f9 3409 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3410 if (CHARSET_DIMENSION (charset) == 1) \
3411 { \
df7492f9
KH
3412 if (! CHARSET_ISO_CHARS_96 (charset)) \
3413 c = intermediate_char_94[reg]; \
4ed46869 3414 else \
df7492f9
KH
3415 c = intermediate_char_96[reg]; \
3416 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3417 } \
3418 else \
3419 { \
df7492f9
KH
3420 EMIT_ONE_ASCII_BYTE ('$'); \
3421 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3422 { \
df7492f9 3423 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3424 || reg != 0 \
3425 || final_char < '@' || final_char > 'B') \
df7492f9 3426 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3427 } \
3428 else \
df7492f9 3429 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3430 } \
df7492f9
KH
3431 EMIT_ONE_ASCII_BYTE (final_char); \
3432 \
3433 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3434 } while (0)
3435
df7492f9 3436
4ed46869
KH
3437/* The following two macros produce codes (control character or escape
3438 sequence) for ISO2022 single-shift functions (single-shift-2 and
3439 single-shift-3). */
3440
df7492f9
KH
3441#define ENCODE_SINGLE_SHIFT_2 \
3442 do { \
3443 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3444 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3445 else \
3446 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3447 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3448 } while (0)
3449
df7492f9
KH
3450
3451#define ENCODE_SINGLE_SHIFT_3 \
3452 do { \
3453 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3454 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3455 else \
3456 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3457 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3458 } while (0)
3459
df7492f9 3460
4ed46869
KH
3461/* The following four macros produce codes (control character or
3462 escape sequence) for ISO2022 locking-shift functions (shift-in,
3463 shift-out, locking-shift-2, and locking-shift-3). */
3464
df7492f9
KH
3465#define ENCODE_SHIFT_IN \
3466 do { \
3467 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3468 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3469 } while (0)
3470
df7492f9
KH
3471
3472#define ENCODE_SHIFT_OUT \
3473 do { \
3474 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3475 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3476 } while (0)
3477
df7492f9
KH
3478
3479#define ENCODE_LOCKING_SHIFT_2 \
3480 do { \
3481 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3482 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3483 } while (0)
3484
df7492f9
KH
3485
3486#define ENCODE_LOCKING_SHIFT_3 \
3487 do { \
3488 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3489 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3490 } while (0)
3491
df7492f9 3492
f4dee582
RS
3493/* Produce codes for a DIMENSION1 character whose character set is
3494 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3495 sequences are also produced in advance if necessary. */
3496
6e85d753
KH
3497#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3498 do { \
df7492f9 3499 int id = CHARSET_ID (charset); \
bf16eb23
KH
3500 \
3501 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3502 && id == charset_ascii) \
3503 { \
3504 id = charset_jisx0201_roman; \
3505 charset = CHARSET_FROM_ID (id); \
3506 } \
3507 \
df7492f9 3508 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3509 { \
df7492f9
KH
3510 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3511 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3512 else \
df7492f9
KH
3513 EMIT_ONE_BYTE (c1 | 0x80); \
3514 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3515 break; \
3516 } \
df7492f9 3517 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3518 { \
df7492f9 3519 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3520 break; \
3521 } \
df7492f9 3522 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3523 { \
df7492f9 3524 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3525 break; \
3526 } \
6e85d753
KH
3527 else \
3528 /* Since CHARSET is not yet invoked to any graphic planes, we \
3529 must invoke it, or, at first, designate it to some graphic \
3530 register. Then repeat the loop to actually produce the \
3531 character. */ \
df7492f9
KH
3532 dst = encode_invocation_designation (charset, coding, dst, \
3533 &produced_chars); \
4ed46869
KH
3534 } while (1)
3535
df7492f9 3536
f4dee582
RS
3537/* Produce codes for a DIMENSION2 character whose character set is
3538 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3539 invocation codes are also produced in advance if necessary. */
3540
6e85d753
KH
3541#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3542 do { \
df7492f9 3543 int id = CHARSET_ID (charset); \
bf16eb23
KH
3544 \
3545 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3546 && id == charset_jisx0208) \
3547 { \
3548 id = charset_jisx0208_1978; \
3549 charset = CHARSET_FROM_ID (id); \
3550 } \
3551 \
df7492f9 3552 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3553 { \
df7492f9
KH
3554 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3555 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3556 else \
df7492f9
KH
3557 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3558 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3559 break; \
3560 } \
df7492f9 3561 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3562 { \
df7492f9 3563 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3564 break; \
3565 } \
df7492f9 3566 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3567 { \
df7492f9 3568 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3569 break; \
3570 } \
6e85d753
KH
3571 else \
3572 /* Since CHARSET is not yet invoked to any graphic planes, we \
3573 must invoke it, or, at first, designate it to some graphic \
3574 register. Then repeat the loop to actually produce the \
3575 character. */ \
df7492f9
KH
3576 dst = encode_invocation_designation (charset, coding, dst, \
3577 &produced_chars); \
4ed46869
KH
3578 } while (1)
3579
05e6f5dc 3580
df7492f9
KH
3581#define ENCODE_ISO_CHARACTER(charset, c) \
3582 do { \
3583 int code = ENCODE_CHAR ((charset),(c)); \
3584 \
3585 if (CHARSET_DIMENSION (charset) == 1) \
3586 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3587 else \
3588 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3589 } while (0)
bdd9fb48 3590
05e6f5dc 3591
4ed46869 3592/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3593 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3594 Return new DST. */
3595
3596unsigned char *
df7492f9
KH
3597encode_invocation_designation (charset, coding, dst, p_nchars)
3598 struct charset *charset;
4ed46869
KH
3599 struct coding_system *coding;
3600 unsigned char *dst;
df7492f9 3601 int *p_nchars;
4ed46869 3602{
df7492f9
KH
3603 int multibytep = coding->dst_multibyte;
3604 int produced_chars = *p_nchars;
4ed46869 3605 int reg; /* graphic register number */
df7492f9 3606 int id = CHARSET_ID (charset);
4ed46869
KH
3607
3608 /* At first, check designations. */
3609 for (reg = 0; reg < 4; reg++)
df7492f9 3610 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3611 break;
3612
3613 if (reg >= 4)
3614 {
3615 /* CHARSET is not yet designated to any graphic registers. */
3616 /* At first check the requested designation. */
df7492f9
KH
3617 reg = CODING_ISO_REQUEST (coding, id);
3618 if (reg < 0)
1ba9e4ab
KH
3619 /* Since CHARSET requests no special designation, designate it
3620 to graphic register 0. */
4ed46869
KH
3621 reg = 0;
3622
3623 ENCODE_DESIGNATION (charset, reg, coding);
3624 }
3625
df7492f9
KH
3626 if (CODING_ISO_INVOCATION (coding, 0) != reg
3627 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3628 {
3629 /* Since the graphic register REG is not invoked to any graphic
3630 planes, invoke it to graphic plane 0. */
3631 switch (reg)
3632 {
3633 case 0: /* graphic register 0 */
3634 ENCODE_SHIFT_IN;
3635 break;
3636
3637 case 1: /* graphic register 1 */
3638 ENCODE_SHIFT_OUT;
3639 break;
3640
3641 case 2: /* graphic register 2 */
df7492f9 3642 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3643 ENCODE_SINGLE_SHIFT_2;
3644 else
3645 ENCODE_LOCKING_SHIFT_2;
3646 break;
3647
3648 case 3: /* graphic register 3 */
df7492f9 3649 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3650 ENCODE_SINGLE_SHIFT_3;
3651 else
3652 ENCODE_LOCKING_SHIFT_3;
3653 break;
3654 }
3655 }
b73bfc1c 3656
df7492f9 3657 *p_nchars = produced_chars;
4ed46869
KH
3658 return dst;
3659}
3660
df7492f9
KH
3661/* The following three macros produce codes for indicating direction
3662 of text. */
3663#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3664 do { \
df7492f9
KH
3665 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3666 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3667 else \
df7492f9 3668 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3669 } while (0)
3670
ec6d2bb8 3671
df7492f9
KH
3672#define ENCODE_DIRECTION_R2L() \
3673 do { \
3674 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3675 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3676 } while (0)
3677
ec6d2bb8 3678
df7492f9 3679#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3680 do { \
df7492f9
KH
3681 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3682 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 3683 } while (0)
4ed46869 3684
4ed46869
KH
3685
3686/* Produce codes for designation and invocation to reset the graphic
3687 planes and registers to initial state. */
df7492f9
KH
3688#define ENCODE_RESET_PLANE_AND_REGISTER() \
3689 do { \
3690 int reg; \
3691 struct charset *charset; \
3692 \
3693 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3694 ENCODE_SHIFT_IN; \
3695 for (reg = 0; reg < 4; reg++) \
3696 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3697 && (CODING_ISO_DESIGNATION (coding, reg) \
3698 != CODING_ISO_INITIAL (coding, reg))) \
3699 { \
3700 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3701 ENCODE_DESIGNATION (charset, reg, coding); \
3702 } \
4ed46869
KH
3703 } while (0)
3704
df7492f9 3705
bdd9fb48 3706/* Produce designation sequences of charsets in the line started from
b73bfc1c 3707 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3708
3709 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3710 find all the necessary designations. */
3711
b73bfc1c 3712static unsigned char *
df7492f9 3713encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3714 struct coding_system *coding;
df7492f9
KH
3715 int *charbuf, *charbuf_end;
3716 unsigned char *dst;
e0e989f6 3717{
df7492f9 3718 struct charset *charset;
bdd9fb48
KH
3719 /* Table of charsets to be designated to each graphic register. */
3720 int r[4];
df7492f9
KH
3721 int c, found = 0, reg;
3722 int produced_chars = 0;
3723 int multibytep = coding->dst_multibyte;
3724 Lisp_Object attrs;
3725 Lisp_Object charset_list;
3726
3727 attrs = CODING_ID_ATTRS (coding->id);
3728 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3729 if (EQ (charset_list, Qiso_2022))
3730 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3731
3732 for (reg = 0; reg < 4; reg++)
3733 r[reg] = -1;
3734
b73bfc1c 3735 while (found < 4)
e0e989f6 3736 {
df7492f9
KH
3737 int id;
3738
3739 c = *charbuf++;
b73bfc1c
KH
3740 if (c == '\n')
3741 break;
df7492f9
KH
3742 charset = char_charset (c, charset_list, NULL);
3743 id = CHARSET_ID (charset);
3744 reg = CODING_ISO_REQUEST (coding, id);
3745 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3746 {
3747 found++;
df7492f9 3748 r[reg] = id;
bdd9fb48 3749 }
bdd9fb48
KH
3750 }
3751
3752 if (found)
3753 {
3754 for (reg = 0; reg < 4; reg++)
3755 if (r[reg] >= 0
df7492f9
KH
3756 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3757 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3758 }
b73bfc1c
KH
3759
3760 return dst;
e0e989f6
KH
3761}
3762
4ed46869
KH
3763/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3764
df7492f9
KH
3765static int
3766encode_coding_iso_2022 (coding)
4ed46869 3767 struct coding_system *coding;
4ed46869 3768{
df7492f9
KH
3769 int multibytep = coding->dst_multibyte;
3770 int *charbuf = coding->charbuf;
3771 int *charbuf_end = charbuf + coding->charbuf_used;
3772 unsigned char *dst = coding->destination + coding->produced;
3773 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3774 int safe_room = 16;
3775 int bol_designation
3776 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3777 && CODING_ISO_BOL (coding));
3778 int produced_chars = 0;
3779 Lisp_Object attrs, eol_type, charset_list;
3780 int ascii_compatible;
b73bfc1c 3781 int c;
ff0dacd7 3782 int preferred_charset_id = -1;
05e6f5dc 3783
24a73b0a
KH
3784 CODING_GET_INFO (coding, attrs, charset_list);
3785 eol_type = CODING_ID_EOL_TYPE (coding->id);
3786 if (VECTORP (eol_type))
3787 eol_type = Qunix;
3788
004068e4 3789 setup_iso_safe_charsets (attrs);
ff0dacd7
KH
3790 /* Charset list may have been changed. */
3791 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
8f924df7 3792 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
0eecad43 3793
df7492f9 3794 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 3795
df7492f9 3796 while (charbuf < charbuf_end)
4ed46869 3797 {
df7492f9 3798 ASSURE_DESTINATION (safe_room);
b73bfc1c 3799
df7492f9 3800 if (bol_designation)
b73bfc1c 3801 {
df7492f9 3802 unsigned char *dst_prev = dst;
4ed46869 3803
bdd9fb48 3804 /* We have to produce designation sequences if any now. */
df7492f9
KH
3805 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3806 bol_designation = 0;
3807 /* We are sure that designation sequences are all ASCII bytes. */
3808 produced_chars += dst - dst_prev;
e0e989f6
KH
3809 }
3810
df7492f9 3811 c = *charbuf++;
ec6d2bb8 3812
ff0dacd7
KH
3813 if (c < 0)
3814 {
3815 /* Handle an annotation. */
3816 switch (*charbuf)
ec6d2bb8 3817 {
ff0dacd7
KH
3818 case CODING_ANNOTATE_COMPOSITION_MASK:
3819 /* Not yet implemented. */
3820 break;
3821 case CODING_ANNOTATE_CHARSET_MASK:
3822 preferred_charset_id = charbuf[3];
3823 if (preferred_charset_id >= 0
3824 && NILP (Fmemq (make_number (preferred_charset_id),
3825 charset_list)))
3826 preferred_charset_id = -1;
3827 break;
3828 default:
3829 abort ();
4ed46869 3830 }
ff0dacd7
KH
3831 charbuf += -c - 1;
3832 continue;
4ed46869 3833 }
ec6d2bb8 3834
b73bfc1c
KH
3835 /* Now encode the character C. */
3836 if (c < 0x20 || c == 0x7F)
3837 {
df7492f9
KH
3838 if (c == '\n'
3839 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3840 {
df7492f9
KH
3841 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3842 ENCODE_RESET_PLANE_AND_REGISTER ();
3843 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3844 {
df7492f9
KH
3845 int i;
3846
3847 for (i = 0; i < 4; i++)
3848 CODING_ISO_DESIGNATION (coding, i)
3849 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3850 }
df7492f9
KH
3851 bol_designation
3852 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3853 }
df7492f9
KH
3854 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3855 ENCODE_RESET_PLANE_AND_REGISTER ();
3856 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3857 }
df7492f9 3858 else if (ASCII_CHAR_P (c))
88993dfd 3859 {
df7492f9
KH
3860 if (ascii_compatible)
3861 EMIT_ONE_ASCII_BYTE (c);
93dec019 3862 else
19a8d9e0 3863 {
bf16eb23
KH
3864 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3865 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 3866 }
4ed46869 3867 }
16eafb5d 3868 else if (CHAR_BYTE8_P (c))
88993dfd 3869 {
16eafb5d
KH
3870 c = CHAR_TO_BYTE8 (c);
3871 EMIT_ONE_BYTE (c);
88993dfd 3872 }
b73bfc1c 3873 else
df7492f9 3874 {
ff0dacd7 3875 struct charset *charset;
b73bfc1c 3876
ff0dacd7
KH
3877 if (preferred_charset_id >= 0)
3878 {
3879 charset = CHARSET_FROM_ID (preferred_charset_id);
3880 if (! CHAR_CHARSET_P (c, charset))
3881 charset = char_charset (c, charset_list, NULL);
3882 }
3883 else
3884 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3885 if (!charset)
3886 {
41cbe562
KH
3887 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3888 {
3889 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3890 charset = CHARSET_FROM_ID (charset_ascii);
3891 }
3892 else
3893 {
3894 c = coding->default_char;
3895 charset = char_charset (c, charset_list, NULL);
3896 }
df7492f9
KH
3897 }
3898 ENCODE_ISO_CHARACTER (charset, c);
3899 }
84fbb8a0 3900 }
b73bfc1c 3901
df7492f9
KH
3902 if (coding->mode & CODING_MODE_LAST_BLOCK
3903 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3904 {
3905 ASSURE_DESTINATION (safe_room);
3906 ENCODE_RESET_PLANE_AND_REGISTER ();
3907 }
065e3595 3908 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
3909 CODING_ISO_BOL (coding) = bol_designation;
3910 coding->produced_char += produced_chars;
3911 coding->produced = dst - coding->destination;
3912 return 0;
4ed46869
KH
3913}
3914
3915\f
df7492f9 3916/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3917
df7492f9 3918/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3919 quite widely. So, for the moment, Emacs supports them in the bare
3920 C code. But, in the future, they may be supported only by CCL. */
3921
3922/* SJIS is a coding system encoding three character sets: ASCII, right
3923 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3924 as is. A character of charset katakana-jisx0201 is encoded by
3925 "position-code + 0x80". A character of charset japanese-jisx0208
3926 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3927 so that it fit in the range below.
4ed46869
KH
3928
3929 --- CODE RANGE of SJIS ---
3930 (character set) (range)
3931 ASCII 0x00 .. 0x7F
df7492f9 3932 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3933 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3934 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3935 -------------------------------
3936
3937*/
3938
3939/* BIG5 is a coding system encoding two character sets: ASCII and
3940 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3941 character set and is encoded in two-byte.
4ed46869
KH
3942
3943 --- CODE RANGE of BIG5 ---
3944 (character set) (range)
3945 ASCII 0x00 .. 0x7F
3946 Big5 (1st byte) 0xA1 .. 0xFE
3947 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3948 --------------------------
3949
df7492f9 3950 */
4ed46869
KH
3951
3952/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3953 Check if a text is encoded in SJIS. If it is, return
df7492f9 3954 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3955
0a28aafb 3956static int
ff0dacd7 3957detect_coding_sjis (coding, detect_info)
df7492f9 3958 struct coding_system *coding;
ff0dacd7 3959 struct coding_detection_info *detect_info;
4ed46869 3960{
065e3595 3961 const unsigned char *src = coding->source, *src_base;
8f924df7 3962 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
3963 int multibytep = coding->src_multibyte;
3964 int consumed_chars = 0;
3965 int found = 0;
b73bfc1c 3966 int c;
df7492f9 3967
ff0dacd7 3968 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
3969 /* A coding system of this category is always ASCII compatible. */
3970 src += coding->head_ascii;
4ed46869 3971
b73bfc1c 3972 while (1)
4ed46869 3973 {
065e3595 3974 src_base = src;
df7492f9 3975 ONE_MORE_BYTE (c);
682169fe
KH
3976 if (c < 0x80)
3977 continue;
df7492f9 3978 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3979 {
df7492f9 3980 ONE_MORE_BYTE (c);
682169fe 3981 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 3982 break;
ff0dacd7 3983 found = CATEGORY_MASK_SJIS;
4ed46869 3984 }
df7492f9 3985 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 3986 found = CATEGORY_MASK_SJIS;
df7492f9
KH
3987 else
3988 break;
4ed46869 3989 }
ff0dacd7 3990 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
3991 return 0;
3992
3993 no_more_source:
065e3595 3994 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 3995 {
ff0dacd7 3996 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 3997 return 0;
4ed46869 3998 }
ff0dacd7
KH
3999 detect_info->found |= found;
4000 return 1;
4ed46869
KH
4001}
4002
4003/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4004 Check if a text is encoded in BIG5. If it is, return
df7492f9 4005 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4006
0a28aafb 4007static int
ff0dacd7 4008detect_coding_big5 (coding, detect_info)
df7492f9 4009 struct coding_system *coding;
ff0dacd7 4010 struct coding_detection_info *detect_info;
4ed46869 4011{
065e3595 4012 const unsigned char *src = coding->source, *src_base;
8f924df7 4013 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4014 int multibytep = coding->src_multibyte;
4015 int consumed_chars = 0;
4016 int found = 0;
b73bfc1c 4017 int c;
fa42c37f 4018
ff0dacd7 4019 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4020 /* A coding system of this category is always ASCII compatible. */
4021 src += coding->head_ascii;
fa42c37f 4022
b73bfc1c 4023 while (1)
fa42c37f 4024 {
065e3595 4025 src_base = src;
df7492f9
KH
4026 ONE_MORE_BYTE (c);
4027 if (c < 0x80)
fa42c37f 4028 continue;
df7492f9 4029 if (c >= 0xA1)
fa42c37f 4030 {
df7492f9
KH
4031 ONE_MORE_BYTE (c);
4032 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4033 return 0;
ff0dacd7 4034 found = CATEGORY_MASK_BIG5;
fa42c37f 4035 }
df7492f9
KH
4036 else
4037 break;
fa42c37f 4038 }
ff0dacd7 4039 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4040 return 0;
fa42c37f 4041
df7492f9 4042 no_more_source:
065e3595 4043 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4044 {
ff0dacd7 4045 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4046 return 0;
4047 }
ff0dacd7
KH
4048 detect_info->found |= found;
4049 return 1;
fa42c37f
KH
4050}
4051
4ed46869
KH
4052/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4053 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4054
b73bfc1c 4055static void
df7492f9 4056decode_coding_sjis (coding)
4ed46869 4057 struct coding_system *coding;
4ed46869 4058{
8f924df7
KH
4059 const unsigned char *src = coding->source + coding->consumed;
4060 const unsigned char *src_end = coding->source + coding->src_bytes;
4061 const unsigned char *src_base;
69a80ea3
KH
4062 int *charbuf = coding->charbuf + coding->charbuf_used;
4063 int *charbuf_end
4064 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4065 int consumed_chars = 0, consumed_chars_base;
4066 int multibytep = coding->src_multibyte;
4067 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4068 struct charset *charset_kanji2;
24a73b0a 4069 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4070 int char_offset = coding->produced_char;
4071 int last_offset = char_offset;
4072 int last_id = charset_ascii;
a5d301df 4073
24a73b0a 4074 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4075
4076 val = charset_list;
4077 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4078 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4079 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4080 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4081
b73bfc1c 4082 while (1)
4ed46869 4083 {
df7492f9 4084 int c, c1;
24a73b0a 4085 struct charset *charset;
fa42c37f 4086
b73bfc1c 4087 src_base = src;
df7492f9 4088 consumed_chars_base = consumed_chars;
fa42c37f 4089
df7492f9
KH
4090 if (charbuf >= charbuf_end)
4091 break;
4092
4093 ONE_MORE_BYTE (c);
065e3595
KH
4094 if (c < 0)
4095 goto invalid_code;
24a73b0a
KH
4096 if (c < 0x80)
4097 charset = charset_roman;
57a47f8a 4098 else if (c == 0x80 || c == 0xA0)
8e921c4b 4099 goto invalid_code;
57a47f8a
KH
4100 else if (c >= 0xA1 && c <= 0xDF)
4101 {
4102 /* SJIS -> JISX0201-Kana */
4103 c &= 0x7F;
4104 charset = charset_kana;
4105 }
4106 else if (c <= 0xEF)
df7492f9 4107 {
57a47f8a
KH
4108 /* SJIS -> JISX0208 */
4109 ONE_MORE_BYTE (c1);
4110 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4111 goto invalid_code;
57a47f8a
KH
4112 c = (c << 8) | c1;
4113 SJIS_TO_JIS (c);
4114 charset = charset_kanji;
4115 }
4116 else if (c <= 0xFC && charset_kanji2)
4117 {
c6876370 4118 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4119 ONE_MORE_BYTE (c1);
4120 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4121 goto invalid_code;
57a47f8a
KH
4122 c = (c << 8) | c1;
4123 SJIS_TO_JIS2 (c);
4124 charset = charset_kanji2;
df7492f9 4125 }
57a47f8a
KH
4126 else
4127 goto invalid_code;
24a73b0a
KH
4128 if (charset->id != charset_ascii
4129 && last_id != charset->id)
4130 {
4131 if (last_id != charset_ascii)
69a80ea3 4132 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4133 last_id = charset->id;
4134 last_offset = char_offset;
4135 }
4136 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4137 *charbuf++ = c;
ff0dacd7 4138 char_offset++;
df7492f9 4139 continue;
b73bfc1c 4140
df7492f9
KH
4141 invalid_code:
4142 src = src_base;
4143 consumed_chars = consumed_chars_base;
4144 ONE_MORE_BYTE (c);
065e3595 4145 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4146 char_offset++;
df7492f9
KH
4147 coding->errors++;
4148 }
fa42c37f 4149
df7492f9 4150 no_more_source:
ff0dacd7 4151 if (last_id != charset_ascii)
69a80ea3 4152 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4153 coding->consumed_char += consumed_chars_base;
4154 coding->consumed = src_base - coding->source;
4155 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4156}
4157
b73bfc1c 4158static void
df7492f9 4159decode_coding_big5 (coding)
4ed46869 4160 struct coding_system *coding;
4ed46869 4161{
8f924df7
KH
4162 const unsigned char *src = coding->source + coding->consumed;
4163 const unsigned char *src_end = coding->source + coding->src_bytes;
4164 const unsigned char *src_base;
69a80ea3
KH
4165 int *charbuf = coding->charbuf + coding->charbuf_used;
4166 int *charbuf_end
4167 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4168 int consumed_chars = 0, consumed_chars_base;
4169 int multibytep = coding->src_multibyte;
4170 struct charset *charset_roman, *charset_big5;
24a73b0a 4171 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4172 int char_offset = coding->produced_char;
4173 int last_offset = char_offset;
4174 int last_id = charset_ascii;
df7492f9 4175
24a73b0a 4176 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4177 val = charset_list;
4178 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4179 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4180
b73bfc1c 4181 while (1)
4ed46869 4182 {
df7492f9 4183 int c, c1;
24a73b0a 4184 struct charset *charset;
b73bfc1c
KH
4185
4186 src_base = src;
df7492f9
KH
4187 consumed_chars_base = consumed_chars;
4188
4189 if (charbuf >= charbuf_end)
4190 break;
4191
4192 ONE_MORE_BYTE (c);
b73bfc1c 4193
065e3595
KH
4194 if (c < 0)
4195 goto invalid_code;
24a73b0a
KH
4196 if (c < 0x80)
4197 charset = charset_roman;
4198 else
4ed46869 4199 {
24a73b0a
KH
4200 /* BIG5 -> Big5 */
4201 if (c < 0xA1 || c > 0xFE)
4202 goto invalid_code;
4203 ONE_MORE_BYTE (c1);
4204 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4205 goto invalid_code;
4206 c = c << 8 | c1;
4207 charset = charset_big5;
4ed46869 4208 }
24a73b0a
KH
4209 if (charset->id != charset_ascii
4210 && last_id != charset->id)
df7492f9 4211 {
24a73b0a 4212 if (last_id != charset_ascii)
69a80ea3 4213 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4214 last_id = charset->id;
4215 last_offset = char_offset;
4ed46869 4216 }
24a73b0a 4217 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4218 *charbuf++ = c;
ff0dacd7 4219 char_offset++;
fb88bf2d
KH
4220 continue;
4221
df7492f9 4222 invalid_code:
4ed46869 4223 src = src_base;
df7492f9
KH
4224 consumed_chars = consumed_chars_base;
4225 ONE_MORE_BYTE (c);
065e3595 4226 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4227 char_offset++;
df7492f9 4228 coding->errors++;
fb88bf2d 4229 }
d46c5b12 4230
df7492f9 4231 no_more_source:
ff0dacd7 4232 if (last_id != charset_ascii)
69a80ea3 4233 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4234 coding->consumed_char += consumed_chars_base;
4235 coding->consumed = src_base - coding->source;
4236 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4237}
4238
4239/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4240 This function can encode charsets `ascii', `katakana-jisx0201',
4241 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4242 are sure that all these charsets are registered as official charset
4ed46869
KH
4243 (i.e. do not have extended leading-codes). Characters of other
4244 charsets are produced without any encoding. If SJIS_P is 1, encode
4245 SJIS text, else encode BIG5 text. */
4246
df7492f9
KH
4247static int
4248encode_coding_sjis (coding)
4ed46869 4249 struct coding_system *coding;
4ed46869 4250{
df7492f9
KH
4251 int multibytep = coding->dst_multibyte;
4252 int *charbuf = coding->charbuf;
4253 int *charbuf_end = charbuf + coding->charbuf_used;
4254 unsigned char *dst = coding->destination + coding->produced;
4255 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4256 int safe_room = 4;
4257 int produced_chars = 0;
24a73b0a 4258 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4259 int ascii_compatible;
4260 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4261 struct charset *charset_kanji2;
df7492f9 4262 int c;
a5d301df 4263
24a73b0a 4264 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4265 val = charset_list;
4266 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4267 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4268 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4269 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4270
df7492f9 4271 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4272
df7492f9
KH
4273 while (charbuf < charbuf_end)
4274 {
4275 ASSURE_DESTINATION (safe_room);
4276 c = *charbuf++;
b73bfc1c 4277 /* Now encode the character C. */
df7492f9
KH
4278 if (ASCII_CHAR_P (c) && ascii_compatible)
4279 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4280 else if (CHAR_BYTE8_P (c))
4281 {
4282 c = CHAR_TO_BYTE8 (c);
4283 EMIT_ONE_BYTE (c);
4284 }
df7492f9 4285 else
b73bfc1c 4286 {
df7492f9
KH
4287 unsigned code;
4288 struct charset *charset = char_charset (c, charset_list, &code);
4289
4290 if (!charset)
4ed46869 4291 {
41cbe562 4292 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4293 {
41cbe562
KH
4294 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4295 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4296 }
41cbe562 4297 else
b73bfc1c 4298 {
41cbe562
KH
4299 c = coding->default_char;
4300 charset = char_charset (c, charset_list, &code);
b73bfc1c 4301 }
b73bfc1c 4302 }
df7492f9
KH
4303 if (code == CHARSET_INVALID_CODE (charset))
4304 abort ();
4305 if (charset == charset_kanji)
4306 {
4307 int c1, c2;
4308 JIS_TO_SJIS (code);
4309 c1 = code >> 8, c2 = code & 0xFF;
4310 EMIT_TWO_BYTES (c1, c2);
4311 }
4312 else if (charset == charset_kana)
4313 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
4314 else if (charset_kanji2 && charset == charset_kanji2)
4315 {
4316 int c1, c2;
4317
4318 c1 = code >> 8;
4319 if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4320 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4321 {
4322 JIS_TO_SJIS2 (code);
4323 c1 = code >> 8, c2 = code & 0xFF;
4324 EMIT_TWO_BYTES (c1, c2);
4325 }
4326 else
4327 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4328 }
df7492f9
KH
4329 else
4330 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4331 }
4332 }
065e3595 4333 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4334 coding->produced_char += produced_chars;
4335 coding->produced = dst - coding->destination;
4336 return 0;
4337}
4338
4339static int
4340encode_coding_big5 (coding)
4341 struct coding_system *coding;
4342{
4343 int multibytep = coding->dst_multibyte;
4344 int *charbuf = coding->charbuf;
4345 int *charbuf_end = charbuf + coding->charbuf_used;
4346 unsigned char *dst = coding->destination + coding->produced;
4347 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4348 int safe_room = 4;
4349 int produced_chars = 0;
24a73b0a 4350 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4351 int ascii_compatible;
4352 struct charset *charset_roman, *charset_big5;
4353 int c;
4354
24a73b0a 4355 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4356 val = charset_list;
4357 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4358 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4359 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4360
4361 while (charbuf < charbuf_end)
4362 {
4363 ASSURE_DESTINATION (safe_room);
4364 c = *charbuf++;
4365 /* Now encode the character C. */
4366 if (ASCII_CHAR_P (c) && ascii_compatible)
4367 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4368 else if (CHAR_BYTE8_P (c))
4369 {
4370 c = CHAR_TO_BYTE8 (c);
4371 EMIT_ONE_BYTE (c);
b73bfc1c
KH
4372 }
4373 else
4374 {
df7492f9
KH
4375 unsigned code;
4376 struct charset *charset = char_charset (c, charset_list, &code);
4377
4378 if (! charset)
b73bfc1c 4379 {
41cbe562 4380 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4381 {
41cbe562
KH
4382 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4383 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4384 }
41cbe562 4385 else
0eecad43 4386 {
41cbe562
KH
4387 c = coding->default_char;
4388 charset = char_charset (c, charset_list, &code);
0eecad43 4389 }
4ed46869 4390 }
df7492f9
KH
4391 if (code == CHARSET_INVALID_CODE (charset))
4392 abort ();
4393 if (charset == charset_big5)
b73bfc1c 4394 {
df7492f9
KH
4395 int c1, c2;
4396
4397 c1 = code >> 8, c2 = code & 0xFF;
4398 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 4399 }
df7492f9
KH
4400 else
4401 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 4402 }
4ed46869 4403 }
065e3595 4404 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4405 coding->produced_char += produced_chars;
4406 coding->produced = dst - coding->destination;
4407 return 0;
4ed46869
KH
4408}
4409
4410\f
df7492f9 4411/*** 10. CCL handlers ***/
1397dc18
KH
4412
4413/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4414 Check if a text is encoded in a coding system of which
4415 encoder/decoder are written in CCL program. If it is, return
df7492f9 4416 CATEGORY_MASK_CCL, else return 0. */
1397dc18 4417
0a28aafb 4418static int
ff0dacd7 4419detect_coding_ccl (coding, detect_info)
df7492f9 4420 struct coding_system *coding;
ff0dacd7 4421 struct coding_detection_info *detect_info;
1397dc18 4422{
065e3595 4423 const unsigned char *src = coding->source, *src_base;
8f924df7 4424 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4425 int multibytep = coding->src_multibyte;
4426 int consumed_chars = 0;
4427 int found = 0;
0e219d54 4428 unsigned char *valids;
df7492f9
KH
4429 int head_ascii = coding->head_ascii;
4430 Lisp_Object attrs;
4431
ff0dacd7
KH
4432 detect_info->checked |= CATEGORY_MASK_CCL;
4433
df7492f9 4434 coding = &coding_categories[coding_category_ccl];
0e219d54 4435 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
4436 attrs = CODING_ID_ATTRS (coding->id);
4437 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4438 src += head_ascii;
1397dc18 4439
b73bfc1c 4440 while (1)
1397dc18 4441 {
df7492f9 4442 int c;
065e3595
KH
4443
4444 src_base = src;
df7492f9 4445 ONE_MORE_BYTE (c);
065e3595 4446 if (c < 0 || ! valids[c])
df7492f9 4447 break;
ff0dacd7
KH
4448 if ((valids[c] > 1))
4449 found = CATEGORY_MASK_CCL;
df7492f9 4450 }
ff0dacd7 4451 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4452 return 0;
4453
4454 no_more_source:
ff0dacd7
KH
4455 detect_info->found |= found;
4456 return 1;
df7492f9
KH
4457}
4458
4459static void
4460decode_coding_ccl (coding)
4461 struct coding_system *coding;
4462{
7c78e542 4463 const unsigned char *src = coding->source + coding->consumed;
8f924df7 4464 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
4465 int *charbuf = coding->charbuf + coding->charbuf_used;
4466 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
4467 int consumed_chars = 0;
4468 int multibytep = coding->src_multibyte;
4469 struct ccl_program ccl;
4470 int source_charbuf[1024];
4471 int source_byteidx[1024];
24a73b0a 4472 Lisp_Object attrs, charset_list;
df7492f9 4473
24a73b0a 4474 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4475 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4476
4477 while (src < src_end)
4478 {
7c78e542 4479 const unsigned char *p = src;
df7492f9
KH
4480 int *source, *source_end;
4481 int i = 0;
4482
4483 if (multibytep)
4484 while (i < 1024 && p < src_end)
4485 {
4486 source_byteidx[i] = p - src;
4487 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4488 }
4489 else
4490 while (i < 1024 && p < src_end)
4491 source_charbuf[i++] = *p++;
8f924df7 4492
df7492f9
KH
4493 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4494 ccl.last_block = 1;
4495
4496 source = source_charbuf;
4497 source_end = source + i;
4498 while (source < source_end)
4499 {
4500 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4501 source_end - source, charbuf_end - charbuf,
4502 charset_list);
df7492f9
KH
4503 source += ccl.consumed;
4504 charbuf += ccl.produced;
4505 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4506 break;
4507 }
4508 if (source < source_end)
4509 src += source_byteidx[source - source_charbuf];
4510 else
4511 src = p;
4512 consumed_chars += source - source_charbuf;
4513
4514 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4515 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4516 break;
4517 }
4518
4519 switch (ccl.status)
4520 {
4521 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4522 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4523 break;
4524 case CCL_STAT_SUSPEND_BY_DST:
4525 break;
4526 case CCL_STAT_QUIT:
4527 case CCL_STAT_INVALID_CMD:
065e3595 4528 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4529 break;
4530 default:
065e3595 4531 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4532 break;
4533 }
4534 coding->consumed_char += consumed_chars;
4535 coding->consumed = src - coding->source;
4536 coding->charbuf_used = charbuf - coding->charbuf;
4537}
4538
4539static int
4540encode_coding_ccl (coding)
4541 struct coding_system *coding;
4542{
4543 struct ccl_program ccl;
4544 int multibytep = coding->dst_multibyte;
4545 int *charbuf = coding->charbuf;
4546 int *charbuf_end = charbuf + coding->charbuf_used;
4547 unsigned char *dst = coding->destination + coding->produced;
4548 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4549 unsigned char *adjusted_dst_end = dst_end - 1;
4550 int destination_charbuf[1024];
4551 int i, produced_chars = 0;
24a73b0a 4552 Lisp_Object attrs, charset_list;
df7492f9 4553
24a73b0a 4554 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4555 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4556
4557 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4558 ccl.dst_multibyte = coding->dst_multibyte;
4559
4560 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4561 {
4562 int dst_bytes = dst_end - dst;
4563 if (dst_bytes > 1024)
4564 dst_bytes = 1024;
4565
4566 ccl_driver (&ccl, charbuf, destination_charbuf,
8dcbea82 4567 charbuf_end - charbuf, dst_bytes, charset_list);
df7492f9
KH
4568 charbuf += ccl.consumed;
4569 if (multibytep)
4570 for (i = 0; i < ccl.produced; i++)
4571 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4572 else
4573 {
4574 for (i = 0; i < ccl.produced; i++)
4575 *dst++ = destination_charbuf[i] & 0xFF;
4576 produced_chars += ccl.produced;
4577 }
4578 }
4579
4580 switch (ccl.status)
4581 {
4582 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4583 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4584 break;
4585 case CCL_STAT_SUSPEND_BY_DST:
065e3595 4586 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
4587 break;
4588 case CCL_STAT_QUIT:
4589 case CCL_STAT_INVALID_CMD:
065e3595 4590 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4591 break;
4592 default:
065e3595 4593 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4594 break;
1397dc18 4595 }
df7492f9
KH
4596
4597 coding->produced_char += produced_chars;
4598 coding->produced = dst - coding->destination;
4599 return 0;
1397dc18
KH
4600}
4601
df7492f9 4602
1397dc18 4603\f
df7492f9 4604/*** 10, 11. no-conversion handlers ***/
4ed46869 4605
b73bfc1c 4606/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4607
b73bfc1c 4608static void
df7492f9 4609decode_coding_raw_text (coding)
4ed46869 4610 struct coding_system *coding;
4ed46869 4611{
df7492f9 4612 coding->chars_at_source = 1;
2c78b7e1
KH
4613 coding->consumed_char = 0;
4614 coding->consumed = 0;
065e3595 4615 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4616}
4ed46869 4617
df7492f9
KH
4618static int
4619encode_coding_raw_text (coding)
4620 struct coding_system *coding;
4621{
4622 int multibytep = coding->dst_multibyte;
4623 int *charbuf = coding->charbuf;
4624 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4625 unsigned char *dst = coding->destination + coding->produced;
4626 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4627 int produced_chars = 0;
b73bfc1c
KH
4628 int c;
4629
df7492f9 4630 if (multibytep)
b73bfc1c 4631 {
df7492f9 4632 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4633
df7492f9
KH
4634 if (coding->src_multibyte)
4635 while (charbuf < charbuf_end)
4636 {
4637 ASSURE_DESTINATION (safe_room);
4638 c = *charbuf++;
4639 if (ASCII_CHAR_P (c))
4640 EMIT_ONE_ASCII_BYTE (c);
4641 else if (CHAR_BYTE8_P (c))
4642 {
4643 c = CHAR_TO_BYTE8 (c);
4644 EMIT_ONE_BYTE (c);
4645 }
4646 else
4647 {
4648 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4649
df7492f9
KH
4650 CHAR_STRING_ADVANCE (c, p1);
4651 while (p0 < p1)
9d123124
KH
4652 {
4653 EMIT_ONE_BYTE (*p0);
4654 p0++;
4655 }
df7492f9
KH
4656 }
4657 }
b73bfc1c 4658 else
df7492f9
KH
4659 while (charbuf < charbuf_end)
4660 {
4661 ASSURE_DESTINATION (safe_room);
4662 c = *charbuf++;
4663 EMIT_ONE_BYTE (c);
4664 }
4665 }
4666 else
4ed46869 4667 {
df7492f9 4668 if (coding->src_multibyte)
d46c5b12 4669 {
df7492f9
KH
4670 int safe_room = MAX_MULTIBYTE_LENGTH;
4671
4672 while (charbuf < charbuf_end)
d46c5b12 4673 {
df7492f9
KH
4674 ASSURE_DESTINATION (safe_room);
4675 c = *charbuf++;
4676 if (ASCII_CHAR_P (c))
4677 *dst++ = c;
4678 else if (CHAR_BYTE8_P (c))
4679 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4680 else
df7492f9
KH
4681 CHAR_STRING_ADVANCE (c, dst);
4682 produced_chars++;
d46c5b12
KH
4683 }
4684 }
df7492f9
KH
4685 else
4686 {
4687 ASSURE_DESTINATION (charbuf_end - charbuf);
4688 while (charbuf < charbuf_end && dst < dst_end)
4689 *dst++ = *charbuf++;
4690 produced_chars = dst - (coding->destination + coding->dst_bytes);
8f924df7 4691 }
4ed46869 4692 }
065e3595 4693 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4694 coding->produced_char += produced_chars;
4695 coding->produced = dst - coding->destination;
4696 return 0;
4ed46869
KH
4697}
4698
ff0dacd7
KH
4699/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4700 Check if a text is encoded in a charset-based coding system. If it
4701 is, return 1, else return 0. */
4702
0a28aafb 4703static int
ff0dacd7 4704detect_coding_charset (coding, detect_info)
df7492f9 4705 struct coding_system *coding;
ff0dacd7 4706 struct coding_detection_info *detect_info;
1397dc18 4707{
065e3595 4708 const unsigned char *src = coding->source, *src_base;
8f924df7 4709 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4710 int multibytep = coding->src_multibyte;
4711 int consumed_chars = 0;
4712 Lisp_Object attrs, valids;
584948ac 4713 int found = 0;
1397dc18 4714
ff0dacd7
KH
4715 detect_info->checked |= CATEGORY_MASK_CHARSET;
4716
df7492f9
KH
4717 coding = &coding_categories[coding_category_charset];
4718 attrs = CODING_ID_ATTRS (coding->id);
4719 valids = AREF (attrs, coding_attr_charset_valids);
4720
4721 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4722 src += coding->head_ascii;
1397dc18 4723
b73bfc1c 4724 while (1)
1397dc18 4725 {
df7492f9 4726 int c;
1397dc18 4727
065e3595 4728 src_base = src;
df7492f9 4729 ONE_MORE_BYTE (c);
065e3595
KH
4730 if (c < 0)
4731 continue;
df7492f9
KH
4732 if (NILP (AREF (valids, c)))
4733 break;
584948ac 4734 if (c >= 0x80)
ff0dacd7 4735 found = CATEGORY_MASK_CHARSET;
df7492f9 4736 }
ff0dacd7 4737 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4738 return 0;
4ed46869 4739
df7492f9 4740 no_more_source:
ff0dacd7
KH
4741 detect_info->found |= found;
4742 return 1;
df7492f9 4743}
b73bfc1c 4744
b73bfc1c 4745static void
df7492f9 4746decode_coding_charset (coding)
4ed46869 4747 struct coding_system *coding;
4ed46869 4748{
8f924df7
KH
4749 const unsigned char *src = coding->source + coding->consumed;
4750 const unsigned char *src_end = coding->source + coding->src_bytes;
4751 const unsigned char *src_base;
69a80ea3
KH
4752 int *charbuf = coding->charbuf + coding->charbuf_used;
4753 int *charbuf_end
4754 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4755 int consumed_chars = 0, consumed_chars_base;
4756 int multibytep = coding->src_multibyte;
24a73b0a 4757 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
4758 int char_offset = coding->produced_char;
4759 int last_offset = char_offset;
4760 int last_id = charset_ascii;
df7492f9 4761
24a73b0a 4762 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 4763 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4764
df7492f9 4765 while (1)
4ed46869 4766 {
4eb6d3f1 4767 int c;
24a73b0a
KH
4768 Lisp_Object val;
4769 struct charset *charset;
4770 int dim;
4771 int len = 1;
4772 unsigned code;
df7492f9
KH
4773
4774 src_base = src;
4775 consumed_chars_base = consumed_chars;
b73bfc1c 4776
df7492f9
KH
4777 if (charbuf >= charbuf_end)
4778 break;
4779
4eb6d3f1 4780 ONE_MORE_BYTE (c);
065e3595
KH
4781 if (c < 0)
4782 goto invalid_code;
24a73b0a
KH
4783 code = c;
4784
4785 val = AREF (valids, c);
4786 if (NILP (val))
4787 goto invalid_code;
4788 if (INTEGERP (val))
d46c5b12 4789 {
24a73b0a
KH
4790 charset = CHARSET_FROM_ID (XFASTINT (val));
4791 dim = CHARSET_DIMENSION (charset);
4792 while (len < dim)
b73bfc1c 4793 {
24a73b0a
KH
4794 ONE_MORE_BYTE (c);
4795 code = (code << 8) | c;
4796 len++;
b73bfc1c 4797 }
24a73b0a
KH
4798 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4799 charset, code, c);
d46c5b12 4800 }
df7492f9 4801 else
d46c5b12 4802 {
24a73b0a
KH
4803 /* VAL is a list of charset IDs. It is assured that the
4804 list is sorted by charset dimensions (smaller one
4805 comes first). */
4806 while (CONSP (val))
4eb6d3f1 4807 {
24a73b0a 4808 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 4809 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4810 while (len < dim)
4eb6d3f1 4811 {
acb2a965
KH
4812 ONE_MORE_BYTE (c);
4813 code = (code << 8) | c;
f9d71dcd 4814 len++;
4eb6d3f1 4815 }
24a73b0a
KH
4816 CODING_DECODE_CHAR (coding, src, src_base,
4817 src_end, charset, code, c);
4818 if (c >= 0)
4819 break;
4820 val = XCDR (val);
ff0dacd7 4821 }
d46c5b12 4822 }
24a73b0a
KH
4823 if (c < 0)
4824 goto invalid_code;
4825 if (charset->id != charset_ascii
4826 && last_id != charset->id)
4827 {
4828 if (last_id != charset_ascii)
69a80ea3 4829 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4830 last_id = charset->id;
4831 last_offset = char_offset;
4832 }
4833
df7492f9 4834 *charbuf++ = c;
ff0dacd7 4835 char_offset++;
df7492f9
KH
4836 continue;
4837
4838 invalid_code:
4839 src = src_base;
4840 consumed_chars = consumed_chars_base;
4841 ONE_MORE_BYTE (c);
065e3595 4842 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4843 char_offset++;
df7492f9 4844 coding->errors++;
4ed46869
KH
4845 }
4846
df7492f9 4847 no_more_source:
ff0dacd7 4848 if (last_id != charset_ascii)
69a80ea3 4849 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4850 coding->consumed_char += consumed_chars_base;
4851 coding->consumed = src_base - coding->source;
4852 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4853}
4854
df7492f9
KH
4855static int
4856encode_coding_charset (coding)
4ed46869 4857 struct coding_system *coding;
4ed46869 4858{
df7492f9
KH
4859 int multibytep = coding->dst_multibyte;
4860 int *charbuf = coding->charbuf;
4861 int *charbuf_end = charbuf + coding->charbuf_used;
4862 unsigned char *dst = coding->destination + coding->produced;
4863 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4864 int safe_room = MAX_MULTIBYTE_LENGTH;
4865 int produced_chars = 0;
24a73b0a 4866 Lisp_Object attrs, charset_list;
df7492f9 4867 int ascii_compatible;
b73bfc1c 4868 int c;
b73bfc1c 4869
24a73b0a 4870 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 4871 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4872
df7492f9 4873 while (charbuf < charbuf_end)
4ed46869 4874 {
4eb6d3f1 4875 struct charset *charset;
df7492f9 4876 unsigned code;
8f924df7 4877
df7492f9
KH
4878 ASSURE_DESTINATION (safe_room);
4879 c = *charbuf++;
4880 if (ascii_compatible && ASCII_CHAR_P (c))
4881 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 4882 else if (CHAR_BYTE8_P (c))
4ed46869 4883 {
16eafb5d
KH
4884 c = CHAR_TO_BYTE8 (c);
4885 EMIT_ONE_BYTE (c);
d46c5b12 4886 }
d46c5b12 4887 else
b73bfc1c 4888 {
4eb6d3f1
KH
4889 charset = char_charset (c, charset_list, &code);
4890 if (charset)
4891 {
4892 if (CHARSET_DIMENSION (charset) == 1)
4893 EMIT_ONE_BYTE (code);
4894 else if (CHARSET_DIMENSION (charset) == 2)
4895 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4896 else if (CHARSET_DIMENSION (charset) == 3)
4897 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4898 else
4899 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4900 (code >> 8) & 0xFF, code & 0xFF);
4901 }
4902 else
41cbe562
KH
4903 {
4904 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4905 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4906 else
4907 c = coding->default_char;
4908 EMIT_ONE_BYTE (c);
4909 }
4ed46869 4910 }
4ed46869
KH
4911 }
4912
065e3595 4913 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4914 coding->produced_char += produced_chars;
4915 coding->produced = dst - coding->destination;
4916 return 0;
4ed46869
KH
4917}
4918
4919\f
1397dc18 4920/*** 7. C library functions ***/
4ed46869 4921
df7492f9
KH
4922/* Setup coding context CODING from information about CODING_SYSTEM.
4923 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4924 CODING_SYSTEM is invalid, signal an error. */
4ed46869 4925
ec6d2bb8 4926void
e0e989f6
KH
4927setup_coding_system (coding_system, coding)
4928 Lisp_Object coding_system;
4ed46869
KH
4929 struct coding_system *coding;
4930{
df7492f9
KH
4931 Lisp_Object attrs;
4932 Lisp_Object eol_type;
4933 Lisp_Object coding_type;
4608c386 4934 Lisp_Object val;
4ed46869 4935
df7492f9
KH
4936 if (NILP (coding_system))
4937 coding_system = Qno_conversion;
c07c8e12 4938
df7492f9 4939 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 4940
df7492f9
KH
4941 attrs = CODING_ID_ATTRS (coding->id);
4942 eol_type = CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 4943
df7492f9
KH
4944 coding->mode = 0;
4945 coding->head_ascii = -1;
4946 coding->common_flags
4947 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
5e5c78be
KH
4948 if (! NILP (CODING_ATTR_POST_READ (attrs)))
4949 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4950 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4951 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
4952 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4953 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 4954
df7492f9 4955 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
4956 coding->max_charset_id = SCHARS (val) - 1;
4957 coding->safe_charsets = (char *) SDATA (val);
df7492f9 4958 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4608c386 4959
df7492f9
KH
4960 coding_type = CODING_ATTR_TYPE (attrs);
4961 if (EQ (coding_type, Qundecided))
d46c5b12 4962 {
df7492f9
KH
4963 coding->detector = NULL;
4964 coding->decoder = decode_coding_raw_text;
4965 coding->encoder = encode_coding_raw_text;
4966 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 4967 }
df7492f9 4968 else if (EQ (coding_type, Qiso_2022))
d46c5b12 4969 {
df7492f9
KH
4970 int i;
4971 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4972
4973 /* Invoke graphic register 0 to plane 0. */
4974 CODING_ISO_INVOCATION (coding, 0) = 0;
4975 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4976 CODING_ISO_INVOCATION (coding, 1)
4977 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4978 /* Setup the initial status of designation. */
4979 for (i = 0; i < 4; i++)
4980 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4981 /* Not single shifting initially. */
4982 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4983 /* Beginning of buffer should also be regarded as bol. */
4984 CODING_ISO_BOL (coding) = 1;
4985 coding->detector = detect_coding_iso_2022;
4986 coding->decoder = decode_coding_iso_2022;
4987 coding->encoder = encode_coding_iso_2022;
4988 if (flags & CODING_ISO_FLAG_SAFE)
4989 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 4990 coding->common_flags
df7492f9
KH
4991 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4992 | CODING_REQUIRE_FLUSHING_MASK);
4993 if (flags & CODING_ISO_FLAG_COMPOSITION)
4994 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
4995 if (flags & CODING_ISO_FLAG_DESIGNATION)
4996 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
4997 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4998 {
4999 setup_iso_safe_charsets (attrs);
5000 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
5001 coding->max_charset_id = SCHARS (val) - 1;
5002 coding->safe_charsets = (char *) SDATA (val);
df7492f9
KH
5003 }
5004 CODING_ISO_FLAGS (coding) = flags;
d46c5b12 5005 }
df7492f9 5006 else if (EQ (coding_type, Qcharset))
d46c5b12 5007 {
df7492f9
KH
5008 coding->detector = detect_coding_charset;
5009 coding->decoder = decode_coding_charset;
5010 coding->encoder = encode_coding_charset;
d46c5b12 5011 coding->common_flags
df7492f9 5012 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5013 }
df7492f9 5014 else if (EQ (coding_type, Qutf_8))
d46c5b12 5015 {
df7492f9
KH
5016 coding->detector = detect_coding_utf_8;
5017 coding->decoder = decode_coding_utf_8;
5018 coding->encoder = encode_coding_utf_8;
5019 coding->common_flags
5020 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5021 }
5022 else if (EQ (coding_type, Qutf_16))
5023 {
5024 val = AREF (attrs, coding_attr_utf_16_bom);
5025 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5026 : EQ (val, Qt) ? utf_16_with_bom
5027 : utf_16_without_bom);
5028 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5029 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5030 : utf_16_little_endian);
e19c3639 5031 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5032 coding->detector = detect_coding_utf_16;
5033 coding->decoder = decode_coding_utf_16;
5034 coding->encoder = encode_coding_utf_16;
5035 coding->common_flags
5036 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
5037 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5038 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5039 }
df7492f9 5040 else if (EQ (coding_type, Qccl))
4ed46869 5041 {
df7492f9
KH
5042 coding->detector = detect_coding_ccl;
5043 coding->decoder = decode_coding_ccl;
5044 coding->encoder = encode_coding_ccl;
c952af22 5045 coding->common_flags
df7492f9
KH
5046 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5047 | CODING_REQUIRE_FLUSHING_MASK);
5048 }
5049 else if (EQ (coding_type, Qemacs_mule))
5050 {
5051 coding->detector = detect_coding_emacs_mule;
5052 coding->decoder = decode_coding_emacs_mule;
5053 coding->encoder = encode_coding_emacs_mule;
c952af22 5054 coding->common_flags
df7492f9
KH
5055 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5056 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5057 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5058 {
5059 Lisp_Object tail, safe_charsets;
5060 int max_charset_id = 0;
5061
5062 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5063 tail = XCDR (tail))
5064 if (max_charset_id < XFASTINT (XCAR (tail)))
5065 max_charset_id = XFASTINT (XCAR (tail));
5066 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5067 make_number (255));
5068 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5069 tail = XCDR (tail))
8f924df7 5070 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5071 coding->max_charset_id = max_charset_id;
8f924df7 5072 coding->safe_charsets = (char *) SDATA (safe_charsets);
df7492f9
KH
5073 }
5074 }
5075 else if (EQ (coding_type, Qshift_jis))
5076 {
5077 coding->detector = detect_coding_sjis;
5078 coding->decoder = decode_coding_sjis;
5079 coding->encoder = encode_coding_sjis;
c952af22 5080 coding->common_flags
df7492f9
KH
5081 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5082 }
5083 else if (EQ (coding_type, Qbig5))
5084 {
5085 coding->detector = detect_coding_big5;
5086 coding->decoder = decode_coding_big5;
5087 coding->encoder = encode_coding_big5;
c952af22 5088 coding->common_flags
df7492f9
KH
5089 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5090 }
5091 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5092 {
df7492f9
KH
5093 coding->detector = NULL;
5094 coding->decoder = decode_coding_raw_text;
5095 coding->encoder = encode_coding_raw_text;
4ed46869 5096 }
4ed46869 5097
df7492f9 5098 return;
4ed46869
KH
5099}
5100
df7492f9
KH
5101/* Return raw-text or one of its subsidiaries that has the same
5102 eol_type as CODING-SYSTEM. */
ec6d2bb8 5103
df7492f9
KH
5104Lisp_Object
5105raw_text_coding_system (coding_system)
5106 Lisp_Object coding_system;
ec6d2bb8 5107{
0be8721c 5108 Lisp_Object spec, attrs;
df7492f9 5109 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5110
d3e4cb56
KH
5111 if (NILP (coding_system))
5112 return Qraw_text;
df7492f9
KH
5113 spec = CODING_SYSTEM_SPEC (coding_system);
5114 attrs = AREF (spec, 0);
ec6d2bb8 5115
df7492f9
KH
5116 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5117 return coding_system;
ec6d2bb8 5118
df7492f9
KH
5119 eol_type = AREF (spec, 2);
5120 if (VECTORP (eol_type))
5121 return Qraw_text;
5122 spec = CODING_SYSTEM_SPEC (Qraw_text);
5123 raw_text_eol_type = AREF (spec, 2);
5124 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5125 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5126 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5127}
5128
54f78171 5129
df7492f9
KH
5130/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5131 does, return one of the subsidiary that has the same eol-spec as
5132 PARENT. Otherwise, return CODING_SYSTEM. */
5133
5134Lisp_Object
5135coding_inherit_eol_type (coding_system, parent)
b74e4686 5136 Lisp_Object coding_system, parent;
54f78171 5137{
3e139625 5138 Lisp_Object spec, eol_type;
54f78171 5139
d3e4cb56
KH
5140 if (NILP (coding_system))
5141 coding_system = Qraw_text;
df7492f9 5142 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5143 eol_type = AREF (spec, 2);
d3e4cb56
KH
5144 if (VECTORP (eol_type)
5145 && ! NILP (parent))
df7492f9
KH
5146 {
5147 Lisp_Object parent_spec;
df7492f9
KH
5148 Lisp_Object parent_eol_type;
5149
5150 parent_spec
5151 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5152 parent_eol_type = AREF (parent_spec, 2);
5153 if (EQ (parent_eol_type, Qunix))
5154 coding_system = AREF (eol_type, 0);
5155 else if (EQ (parent_eol_type, Qdos))
5156 coding_system = AREF (eol_type, 1);
5157 else if (EQ (parent_eol_type, Qmac))
5158 coding_system = AREF (eol_type, 2);
54f78171 5159 }
df7492f9 5160 return coding_system;
54f78171
KH
5161}
5162
4ed46869
KH
5163/* Emacs has a mechanism to automatically detect a coding system if it
5164 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5165 it's impossible to distinguish some coding systems accurately
5166 because they use the same range of codes. So, at first, coding
5167 systems are categorized into 7, those are:
5168
0ef69138 5169 o coding-category-emacs-mule
4ed46869
KH
5170
5171 The category for a coding system which has the same code range
5172 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5173 symbol) `emacs-mule' by default.
4ed46869
KH
5174
5175 o coding-category-sjis
5176
5177 The category for a coding system which has the same code range
5178 as SJIS. Assigned the coding-system (Lisp
7717c392 5179 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5180
5181 o coding-category-iso-7
5182
5183 The category for a coding system which has the same code range
7717c392 5184 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5185 shift and single shift functions. This can encode/decode all
5186 charsets. Assigned the coding-system (Lisp symbol)
5187 `iso-2022-7bit' by default.
5188
5189 o coding-category-iso-7-tight
5190
5191 Same as coding-category-iso-7 except that this can
5192 encode/decode only the specified charsets.
4ed46869
KH
5193
5194 o coding-category-iso-8-1
5195
5196 The category for a coding system which has the same code range
5197 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5198 for DIMENSION1 charset. This doesn't use any locking shift
5199 and single shift functions. Assigned the coding-system (Lisp
5200 symbol) `iso-latin-1' by default.
4ed46869
KH
5201
5202 o coding-category-iso-8-2
5203
5204 The category for a coding system which has the same code range
5205 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5206 for DIMENSION2 charset. This doesn't use any locking shift
5207 and single shift functions. Assigned the coding-system (Lisp
5208 symbol) `japanese-iso-8bit' by default.
4ed46869 5209
7717c392 5210 o coding-category-iso-7-else
4ed46869
KH
5211
5212 The category for a coding system which has the same code range
df7492f9 5213 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5214 single shift functions. Assigned the coding-system (Lisp
5215 symbol) `iso-2022-7bit-lock' by default.
5216
5217 o coding-category-iso-8-else
5218
5219 The category for a coding system which has the same code range
df7492f9 5220 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5221 single shift functions. Assigned the coding-system (Lisp
5222 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5223
5224 o coding-category-big5
5225
5226 The category for a coding system which has the same code range
5227 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5228 `cn-big5' by default.
4ed46869 5229
fa42c37f
KH
5230 o coding-category-utf-8
5231
5232 The category for a coding system which has the same code range
6e76ae91 5233 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
5234 symbol) `utf-8' by default.
5235
5236 o coding-category-utf-16-be
5237
5238 The category for a coding system in which a text has an
5239 Unicode signature (cf. Unicode Standard) in the order of BIG
5240 endian at the head. Assigned the coding-system (Lisp symbol)
5241 `utf-16-be' by default.
5242
5243 o coding-category-utf-16-le
5244
5245 The category for a coding system in which a text has an
5246 Unicode signature (cf. Unicode Standard) in the order of
5247 LITTLE endian at the head. Assigned the coding-system (Lisp
5248 symbol) `utf-16-le' by default.
5249
1397dc18
KH
5250 o coding-category-ccl
5251
5252 The category for a coding system of which encoder/decoder is
5253 written in CCL programs. The default value is nil, i.e., no
5254 coding system is assigned.
5255
4ed46869
KH
5256 o coding-category-binary
5257
5258 The category for a coding system not categorized in any of the
5259 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5260 `no-conversion' by default.
4ed46869
KH
5261
5262 Each of them is a Lisp symbol and the value is an actual
df7492f9 5263 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5264 What Emacs does actually is to detect a category of coding system.
5265 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5266 decide only one possible category, it selects a category of the
4ed46869
KH
5267 highest priority. Priorities of categories are also specified by a
5268 user in a Lisp variable `coding-category-list'.
5269
5270*/
5271
df7492f9
KH
5272#define EOL_SEEN_NONE 0
5273#define EOL_SEEN_LF 1
5274#define EOL_SEEN_CR 2
5275#define EOL_SEEN_CRLF 4
66cfb530 5276
ff0dacd7
KH
5277/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5278 SOURCE is encoded. If CATEGORY is one of
5279 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5280 two-byte, else they are encoded by one-byte.
5281
5282 Return one of EOL_SEEN_XXX. */
4ed46869 5283
bc4bc72a 5284#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
5285
5286static int
89528eb3 5287detect_eol (source, src_bytes, category)
f6cbaf43 5288 const unsigned char *source;
df7492f9 5289 EMACS_INT src_bytes;
89528eb3 5290 enum coding_category category;
4ed46869 5291{
f6cbaf43 5292 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5293 unsigned char c;
df7492f9
KH
5294 int total = 0;
5295 int eol_seen = EOL_SEEN_NONE;
4ed46869 5296
89528eb3 5297 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5298 {
df7492f9 5299 int msb, lsb;
fa42c37f 5300
89528eb3
KH
5301 msb = category == (coding_category_utf_16_le
5302 | coding_category_utf_16_le_nosig);
df7492f9 5303 lsb = 1 - msb;
fa42c37f 5304
df7492f9 5305 while (src + 1 < src_end)
fa42c37f 5306 {
df7492f9
KH
5307 c = src[lsb];
5308 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5309 {
df7492f9
KH
5310 int this_eol;
5311
5312 if (c == '\n')
5313 this_eol = EOL_SEEN_LF;
5314 else if (src + 3 >= src_end
5315 || src[msb + 2] != 0
5316 || src[lsb + 2] != '\n')
5317 this_eol = EOL_SEEN_CR;
fa42c37f 5318 else
8f924df7 5319 this_eol = EOL_SEEN_CRLF;
df7492f9
KH
5320
5321 if (eol_seen == EOL_SEEN_NONE)
5322 /* This is the first end-of-line. */
5323 eol_seen = this_eol;
5324 else if (eol_seen != this_eol)
fa42c37f 5325 {
df7492f9
KH
5326 /* The found type is different from what found before. */
5327 eol_seen = EOL_SEEN_LF;
5328 break;
fa42c37f 5329 }
df7492f9
KH
5330 if (++total == MAX_EOL_CHECK_COUNT)
5331 break;
fa42c37f 5332 }
df7492f9 5333 src += 2;
fa42c37f 5334 }
bcf26d6a 5335 }
d46c5b12 5336 else
c4825358 5337 {
df7492f9 5338 while (src < src_end)
27901516 5339 {
df7492f9
KH
5340 c = *src++;
5341 if (c == '\n' || c == '\r')
5342 {
5343 int this_eol;
d46c5b12 5344
df7492f9
KH
5345 if (c == '\n')
5346 this_eol = EOL_SEEN_LF;
5347 else if (src >= src_end || *src != '\n')
5348 this_eol = EOL_SEEN_CR;
5349 else
5350 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5351
df7492f9
KH
5352 if (eol_seen == EOL_SEEN_NONE)
5353 /* This is the first end-of-line. */
5354 eol_seen = this_eol;
5355 else if (eol_seen != this_eol)
5356 {
5357 /* The found type is different from what found before. */
5358 eol_seen = EOL_SEEN_LF;
5359 break;
5360 }
5361 if (++total == MAX_EOL_CHECK_COUNT)
5362 break;
5363 }
5364 }
73be902c 5365 }
df7492f9 5366 return eol_seen;
73be902c
KH
5367}
5368
df7492f9 5369
24a73b0a 5370static Lisp_Object
df7492f9
KH
5371adjust_coding_eol_type (coding, eol_seen)
5372 struct coding_system *coding;
5373 int eol_seen;
73be902c 5374{
0be8721c 5375 Lisp_Object eol_type;
8f924df7 5376
df7492f9
KH
5377 eol_type = CODING_ID_EOL_TYPE (coding->id);
5378 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
5379 {
5380 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5381 eol_type = Qunix;
5382 }
6f197c07 5383 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
5384 {
5385 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5386 eol_type = Qdos;
5387 }
6f197c07 5388 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
5389 {
5390 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5391 eol_type = Qmac;
5392 }
5393 return eol_type;
d46c5b12 5394}
4ed46869 5395
df7492f9
KH
5396/* Detect how a text specified in CODING is encoded. If a coding
5397 system is detected, update fields of CODING by the detected coding
5398 system. */
0a28aafb 5399
df7492f9
KH
5400void
5401detect_coding (coding)
d46c5b12 5402 struct coding_system *coding;
d46c5b12 5403{
8f924df7 5404 const unsigned char *src, *src_end;
d46c5b12 5405
df7492f9
KH
5406 coding->consumed = coding->consumed_char = 0;
5407 coding->produced = coding->produced_char = 0;
5408 coding_set_source (coding);
1c3478b0 5409
df7492f9 5410 src_end = coding->source + coding->src_bytes;
1c3478b0 5411
df7492f9
KH
5412 /* If we have not yet decided the text encoding type, detect it
5413 now. */
5414 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5415 {
df7492f9
KH
5416 int c, i;
5417
24a73b0a 5418 for (i = 0, src = coding->source; src < src_end; i++, src++)
d46c5b12 5419 {
df7492f9 5420 c = *src;
75e2a253 5421 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
df7492f9
KH
5422 || c == ISO_CODE_SI
5423 || c == ISO_CODE_SO)))
5424 break;
d46c5b12 5425 }
df7492f9
KH
5426 coding->head_ascii = src - (coding->source + coding->consumed);
5427
5428 if (coding->head_ascii < coding->src_bytes)
d46c5b12 5429 {
ff0dacd7
KH
5430 struct coding_detection_info detect_info;
5431 enum coding_category category;
5432 struct coding_system *this;
df7492f9 5433
ff0dacd7 5434 detect_info.checked = detect_info.found = detect_info.rejected = 0;
df7492f9 5435 for (i = 0; i < coding_category_raw_text; i++)
d46c5b12 5436 {
ff0dacd7
KH
5437 category = coding_priorities[i];
5438 this = coding_categories + category;
df7492f9 5439 if (this->id < 0)
fa42c37f 5440 {
df7492f9 5441 /* No coding system of this category is defined. */
ff0dacd7 5442 detect_info.rejected |= (1 << category);
fa42c37f 5443 }
ff0dacd7 5444 else if (category >= coding_category_raw_text)
89528eb3 5445 continue;
ff0dacd7 5446 else if (detect_info.checked & (1 << category))
fa42c37f 5447 {
ff0dacd7
KH
5448 if (detect_info.found & (1 << category))
5449 break;
fa42c37f 5450 }
ff0dacd7
KH
5451 else if ((*(this->detector)) (coding, &detect_info)
5452 && detect_info.found & (1 << category))
24a73b0a
KH
5453 {
5454 if (category == coding_category_utf_16_auto)
5455 {
5456 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5457 category = coding_category_utf_16_le;
5458 else
5459 category = coding_category_utf_16_be;
5460 }
5461 break;
5462 }
d46c5b12 5463 }
ff0dacd7
KH
5464 if (i < coding_category_raw_text)
5465 setup_coding_system (CODING_ID_NAME (this->id), coding);
5466 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5467 setup_coding_system (Qraw_text, coding);
ff0dacd7 5468 else if (detect_info.rejected)
df7492f9 5469 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5470 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5471 {
5472 this = coding_categories + coding_priorities[i];
5473 setup_coding_system (CODING_ID_NAME (this->id), coding);
5474 break;
5475 }
d46c5b12 5476 }
b73bfc1c 5477 }
24a73b0a
KH
5478 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5479 == coding_category_utf_16_auto)
b49a1807
KH
5480 {
5481 Lisp_Object coding_systems;
5482 struct coding_detection_info detect_info;
5483
5484 coding_systems
5485 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5486 detect_info.found = detect_info.rejected = 0;
5487 if (CONSP (coding_systems)
24a73b0a 5488 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
5489 {
5490 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5491 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 5492 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
5493 setup_coding_system (XCDR (coding_systems), coding);
5494 }
5495 }
4ed46869 5496}
4ed46869 5497
d46c5b12 5498
aaaf0b1e 5499static void
df7492f9 5500decode_eol (coding)
aaaf0b1e 5501 struct coding_system *coding;
aaaf0b1e 5502{
24a73b0a
KH
5503 Lisp_Object eol_type;
5504 unsigned char *p, *pbeg, *pend;
5505
5506 eol_type = CODING_ID_EOL_TYPE (coding->id);
5507 if (EQ (eol_type, Qunix))
5508 return;
5509
5510 if (NILP (coding->dst_object))
5511 pbeg = coding->destination;
5512 else
5513 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5514 pend = pbeg + coding->produced;
5515
5516 if (VECTORP (eol_type))
aaaf0b1e 5517 {
df7492f9 5518 int eol_seen = EOL_SEEN_NONE;
4ed46869 5519
24a73b0a 5520 for (p = pbeg; p < pend; p++)
aaaf0b1e 5521 {
df7492f9
KH
5522 if (*p == '\n')
5523 eol_seen |= EOL_SEEN_LF;
5524 else if (*p == '\r')
aaaf0b1e 5525 {
df7492f9 5526 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5527 {
df7492f9
KH
5528 eol_seen |= EOL_SEEN_CRLF;
5529 p++;
aaaf0b1e 5530 }
aaaf0b1e 5531 else
df7492f9 5532 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5533 }
aaaf0b1e 5534 }
24a73b0a
KH
5535 if (eol_seen != EOL_SEEN_NONE
5536 && eol_seen != EOL_SEEN_LF
5537 && eol_seen != EOL_SEEN_CRLF
5538 && eol_seen != EOL_SEEN_CR)
5539 eol_seen = EOL_SEEN_LF;
df7492f9 5540 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 5541 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5542 }
d46c5b12 5543
24a73b0a 5544 if (EQ (eol_type, Qmac))
27901516 5545 {
24a73b0a 5546 for (p = pbeg; p < pend; p++)
df7492f9
KH
5547 if (*p == '\r')
5548 *p = '\n';
4ed46869 5549 }
24a73b0a 5550 else if (EQ (eol_type, Qdos))
df7492f9 5551 {
24a73b0a 5552 int n = 0;
b73bfc1c 5553
24a73b0a
KH
5554 if (NILP (coding->dst_object))
5555 {
5556 for (p = pend - 2; p >= pbeg; p--)
5557 if (*p == '\r')
5558 {
5559 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5560 n++;
5561 }
5562 }
5563 else
5564 {
5565 for (p = pend - 2; p >= pbeg; p--)
5566 if (*p == '\r')
5567 {
5568 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5569 int pos = BYTE_TO_CHAR (pos_byte);
5570
5571 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5572 n++;
5573 }
5574 }
5575 coding->produced -= n;
5576 coding->produced_char -= n;
aaaf0b1e 5577 }
4ed46869
KH
5578}
5579
7d64c6ad 5580
a6f87d34
KH
5581/* Return a translation table (or list of them) from coding system
5582 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5583 decoding (ENCODEP is zero). */
7d64c6ad 5584
e6a54062 5585static Lisp_Object
09ee6fdd
KH
5586get_translation_table (attrs, encodep, max_lookup)
5587 Lisp_Object attrs;
5588 int encodep, *max_lookup;
7d64c6ad
KH
5589{
5590 Lisp_Object standard, translation_table;
09ee6fdd 5591 Lisp_Object val;
7d64c6ad
KH
5592
5593 if (encodep)
5594 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5595 standard = Vstandard_translation_table_for_encode;
5596 else
5597 translation_table = CODING_ATTR_DECODE_TBL (attrs),
5598 standard = Vstandard_translation_table_for_decode;
7d64c6ad 5599 if (NILP (translation_table))
09ee6fdd
KH
5600 translation_table = standard;
5601 else
a6f87d34 5602 {
09ee6fdd
KH
5603 if (SYMBOLP (translation_table))
5604 translation_table = Fget (translation_table, Qtranslation_table);
5605 else if (CONSP (translation_table))
5606 {
5607 translation_table = Fcopy_sequence (translation_table);
5608 for (val = translation_table; CONSP (val); val = XCDR (val))
5609 if (SYMBOLP (XCAR (val)))
5610 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5611 }
5612 if (CHAR_TABLE_P (standard))
5613 {
5614 if (CONSP (translation_table))
5615 translation_table = nconc2 (translation_table,
5616 Fcons (standard, Qnil));
5617 else
5618 translation_table = Fcons (translation_table,
5619 Fcons (standard, Qnil));
5620 }
a6f87d34 5621 }
2170c8f0
KH
5622
5623 if (max_lookup)
09ee6fdd 5624 {
2170c8f0
KH
5625 *max_lookup = 1;
5626 if (CHAR_TABLE_P (translation_table)
5627 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5628 {
5629 val = XCHAR_TABLE (translation_table)->extras[1];
5630 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5631 *max_lookup = XFASTINT (val);
5632 }
5633 else if (CONSP (translation_table))
5634 {
5635 Lisp_Object tail, val;
09ee6fdd 5636
2170c8f0
KH
5637 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5638 if (CHAR_TABLE_P (XCAR (tail))
5639 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5640 {
5641 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5642 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5643 *max_lookup = XFASTINT (val);
5644 }
5645 }
a6f87d34 5646 }
7d64c6ad
KH
5647 return translation_table;
5648}
5649
09ee6fdd
KH
5650#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
5651 do { \
5652 trans = Qnil; \
5653 if (CHAR_TABLE_P (table)) \
5654 { \
5655 trans = CHAR_TABLE_REF (table, c); \
5656 if (CHARACTERP (trans)) \
5657 c = XFASTINT (trans), trans = Qnil; \
5658 } \
5659 else if (CONSP (table)) \
5660 { \
5661 Lisp_Object tail; \
5662 \
5663 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
5664 if (CHAR_TABLE_P (XCAR (tail))) \
5665 { \
5666 trans = CHAR_TABLE_REF (XCAR (tail), c); \
5667 if (CHARACTERP (trans)) \
5668 c = XFASTINT (trans), trans = Qnil; \
5669 else if (! NILP (trans)) \
5670 break; \
5671 } \
5672 } \
e6a54062
KH
5673 } while (0)
5674
7d64c6ad 5675
69a80ea3
KH
5676static Lisp_Object
5677get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5678 Lisp_Object val;
5679 int *buf, *buf_end;
5680 int last_block;
5681 int *from_nchars, *to_nchars;
5682{
433f7f87
KH
5683 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
5684 [TO-CHAR ...]. */
69a80ea3
KH
5685 if (CONSP (val))
5686 {
433f7f87 5687 Lisp_Object from, tail;
69a80ea3
KH
5688 int i, len;
5689
433f7f87 5690 for (tail = val; CONSP (tail); tail = XCDR (tail))
69a80ea3 5691 {
433f7f87
KH
5692 val = XCAR (tail);
5693 from = XCAR (val);
5694 len = ASIZE (from);
5695 for (i = 0; i < len; i++)
5696 {
5697 if (buf + i == buf_end)
5698 {
5699 if (! last_block)
5700 return Qt;
5701 break;
5702 }
5703 if (XINT (AREF (from, i)) != buf[i])
5704 break;
5705 }
5706 if (i == len)
5707 {
5708 val = XCDR (val);
5709 *from_nchars = len;
5710 break;
5711 }
69a80ea3 5712 }
433f7f87
KH
5713 if (! CONSP (tail))
5714 return Qnil;
69a80ea3
KH
5715 }
5716 if (VECTORP (val))
5717 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5718 else
5719 *buf = XINT (val);
5720 return val;
5721}
5722
5723
d46c5b12 5724static int
69a80ea3 5725produce_chars (coding, translation_table, last_block)
df7492f9 5726 struct coding_system *coding;
69a80ea3
KH
5727 Lisp_Object translation_table;
5728 int last_block;
4ed46869 5729{
df7492f9
KH
5730 unsigned char *dst = coding->destination + coding->produced;
5731 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5732 int produced;
5733 int produced_chars = 0;
69a80ea3 5734 int carryover = 0;
4ed46869 5735
df7492f9 5736 if (! coding->chars_at_source)
4ed46869 5737 {
df7492f9 5738 /* Characters are in coding->charbuf. */
fba4576f
AS
5739 int *buf = coding->charbuf;
5740 int *buf_end = buf + coding->charbuf_used;
4ed46869 5741
df7492f9
KH
5742 if (BUFFERP (coding->src_object)
5743 && EQ (coding->src_object, coding->dst_object))
8f924df7 5744 dst_end = ((unsigned char *) coding->source) + coding->consumed;
4ed46869 5745
df7492f9 5746 while (buf < buf_end)
4ed46869 5747 {
69a80ea3 5748 int c = *buf, i;
bc4bc72a 5749
df7492f9
KH
5750 if (c >= 0)
5751 {
69a80ea3
KH
5752 int from_nchars = 1, to_nchars = 1;
5753 Lisp_Object trans = Qnil;
5754
09ee6fdd 5755 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 5756 if (! NILP (trans))
69a80ea3
KH
5757 {
5758 trans = get_translation (trans, buf, buf_end, last_block,
5759 &from_nchars, &to_nchars);
5760 if (EQ (trans, Qt))
5761 break;
5762 c = *buf;
5763 }
5764
5765 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5766 {
5767 dst = alloc_destination (coding,
5768 buf_end - buf
5769 + MAX_MULTIBYTE_LENGTH * to_nchars,
5770 dst);
5771 dst_end = coding->destination + coding->dst_bytes;
5772 }
5773
433f7f87 5774 for (i = 0; i < to_nchars; i++)
69a80ea3 5775 {
433f7f87
KH
5776 if (i > 0)
5777 c = XINT (AREF (trans, i));
69a80ea3
KH
5778 if (coding->dst_multibyte
5779 || ! CHAR_BYTE8_P (c))
5780 CHAR_STRING_ADVANCE (c, dst);
5781 else
5782 *dst++ = CHAR_TO_BYTE8 (c);
5783 }
5784 produced_chars += to_nchars;
5785 *buf++ = to_nchars;
5786 while (--from_nchars > 0)
5787 *buf++ = 0;
d46c5b12 5788 }
df7492f9 5789 else
69a80ea3
KH
5790 /* This is an annotation datum. (-C) is the length. */
5791 buf += -c;
4ed46869 5792 }
69a80ea3 5793 carryover = buf_end - buf;
4ed46869 5794 }
fa42c37f 5795 else
fa42c37f 5796 {
8f924df7
KH
5797 const unsigned char *src = coding->source;
5798 const unsigned char *src_end = src + coding->src_bytes;
df7492f9 5799 Lisp_Object eol_type;
fa42c37f 5800
df7492f9 5801 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5802
df7492f9 5803 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 5804 {
df7492f9 5805 if (coding->src_multibyte)
fa42c37f 5806 {
71c81426 5807 int multibytep = 1;
df7492f9 5808 int consumed_chars;
d46c5b12 5809
df7492f9
KH
5810 while (1)
5811 {
8f924df7 5812 const unsigned char *src_base = src;
df7492f9 5813 int c;
b73bfc1c 5814
df7492f9
KH
5815 ONE_MORE_BYTE (c);
5816 if (c == '\r')
5817 {
5818 if (EQ (eol_type, Qdos))
5819 {
98725083
KH
5820 if (src == src_end)
5821 {
065e3595
KH
5822 record_conversion_result
5823 (coding, CODING_RESULT_INSUFFICIENT_SRC);
98725083
KH
5824 goto no_more_source;
5825 }
5826 if (*src == '\n')
df7492f9
KH
5827 c = *src++;
5828 }
5829 else if (EQ (eol_type, Qmac))
5830 c = '\n';
5831 }
5832 if (dst == dst_end)
5833 {
2c78b7e1 5834 coding->consumed = src - coding->source;
b73bfc1c 5835
2c78b7e1 5836 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5837 dst_end = (unsigned char *) src;
2c78b7e1
KH
5838 if (dst == dst_end)
5839 {
5840 dst = alloc_destination (coding, src_end - src + 1,
5841 dst);
5842 dst_end = coding->destination + coding->dst_bytes;
5843 coding_set_source (coding);
5844 src = coding->source + coding->consumed;
5845 src_end = coding->source + coding->src_bytes;
5846 }
df7492f9
KH
5847 }
5848 *dst++ = c;
5849 produced_chars++;
5850 }
5851 no_more_source:
5852 ;
fa42c37f
KH
5853 }
5854 else
df7492f9
KH
5855 while (src < src_end)
5856 {
71c81426 5857 int multibytep = 1;
df7492f9 5858 int c = *src++;
b73bfc1c 5859
df7492f9
KH
5860 if (c == '\r')
5861 {
5862 if (EQ (eol_type, Qdos))
5863 {
5864 if (src < src_end
5865 && *src == '\n')
5866 c = *src++;
5867 }
5868 else if (EQ (eol_type, Qmac))
5869 c = '\n';
5870 }
5871 if (dst >= dst_end - 1)
5872 {
2c78b7e1 5873 coding->consumed = src - coding->source;
df7492f9 5874
2c78b7e1 5875 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5876 dst_end = (unsigned char *) src;
2c78b7e1
KH
5877 if (dst >= dst_end - 1)
5878 {
5879 dst = alloc_destination (coding, src_end - src + 2,
5880 dst);
5881 dst_end = coding->destination + coding->dst_bytes;
5882 coding_set_source (coding);
5883 src = coding->source + coding->consumed;
5884 src_end = coding->source + coding->src_bytes;
5885 }
df7492f9
KH
5886 }
5887 EMIT_ONE_BYTE (c);
5888 }
d46c5b12 5889 }
df7492f9
KH
5890 else
5891 {
5892 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 5893 {
df7492f9 5894 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5895
df7492f9 5896 if (require > 0)
fa42c37f 5897 {
df7492f9
KH
5898 EMACS_INT offset = src - coding->source;
5899
5900 dst = alloc_destination (coding, require, dst);
5901 coding_set_source (coding);
5902 src = coding->source + offset;
5903 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
5904 }
5905 }
df7492f9
KH
5906 produced_chars = coding->src_chars;
5907 while (src < src_end)
fa42c37f 5908 {
df7492f9
KH
5909 int c = *src++;
5910
5911 if (c == '\r')
5912 {
5913 if (EQ (eol_type, Qdos))
5914 {
5915 if (src < src_end
5916 && *src == '\n')
5917 c = *src++;
5918 produced_chars--;
5919 }
5920 else if (EQ (eol_type, Qmac))
5921 c = '\n';
5922 }
5923 *dst++ = c;
fa42c37f
KH
5924 }
5925 }
2c78b7e1
KH
5926 coding->consumed = coding->src_bytes;
5927 coding->consumed_char = coding->src_chars;
fa42c37f
KH
5928 }
5929
df7492f9
KH
5930 produced = dst - (coding->destination + coding->produced);
5931 if (BUFFERP (coding->dst_object))
5932 insert_from_gap (produced_chars, produced);
5933 coding->produced += produced;
5934 coding->produced_char += produced_chars;
69a80ea3 5935 return carryover;
fa42c37f
KH
5936}
5937
ff0dacd7
KH
5938/* Compose text in CODING->object according to the annotation data at
5939 CHARBUF. CHARBUF is an array:
5940 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 5941 */
4ed46869 5942
df7492f9 5943static INLINE void
69a80ea3 5944produce_composition (coding, charbuf, pos)
4ed46869 5945 struct coding_system *coding;
df7492f9 5946 int *charbuf;
69a80ea3 5947 EMACS_INT pos;
4ed46869 5948{
df7492f9 5949 int len;
69a80ea3 5950 EMACS_INT to;
df7492f9 5951 enum composition_method method;
df7492f9 5952 Lisp_Object components;
fa42c37f 5953
df7492f9 5954 len = -charbuf[0];
69a80ea3 5955 to = pos + charbuf[2];
9ffd559c
KH
5956 if (to <= pos)
5957 return;
69a80ea3 5958 method = (enum composition_method) (charbuf[3]);
d46c5b12 5959
df7492f9
KH
5960 if (method == COMPOSITION_RELATIVE)
5961 components = Qnil;
9ffd559c
KH
5962 else if (method >= COMPOSITION_WITH_RULE
5963 && method <= COMPOSITION_WITH_RULE_ALTCHARS)
d46c5b12 5964 {
df7492f9
KH
5965 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5966 int i;
b73bfc1c 5967
69a80ea3
KH
5968 len -= 4;
5969 charbuf += 4;
df7492f9 5970 for (i = 0; i < len; i++)
9ffd559c
KH
5971 {
5972 args[i] = make_number (charbuf[i]);
5973 if (args[i] < 0)
5974 return;
5975 }
df7492f9
KH
5976 components = (method == COMPOSITION_WITH_ALTCHARS
5977 ? Fstring (len, args) : Fvector (len, args));
d46c5b12 5978 }
9ffd559c
KH
5979 else
5980 return;
69a80ea3 5981 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
5982}
5983
d46c5b12 5984
ff0dacd7
KH
5985/* Put `charset' property on text in CODING->object according to
5986 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 5987 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 5988 */
d46c5b12 5989
ff0dacd7 5990static INLINE void
69a80ea3 5991produce_charset (coding, charbuf, pos)
d46c5b12 5992 struct coding_system *coding;
ff0dacd7 5993 int *charbuf;
69a80ea3 5994 EMACS_INT pos;
d46c5b12 5995{
69a80ea3
KH
5996 EMACS_INT from = pos - charbuf[2];
5997 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 5998
69a80ea3 5999 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
6000 Qcharset, CHARSET_NAME (charset),
6001 coding->dst_object);
d46c5b12
KH
6002}
6003
d46c5b12 6004
df7492f9
KH
6005#define CHARBUF_SIZE 0x4000
6006
6007#define ALLOC_CONVERSION_WORK_AREA(coding) \
6008 do { \
6009 int size = CHARBUF_SIZE;; \
6010 \
6011 coding->charbuf = NULL; \
6012 while (size > 1024) \
6013 { \
6014 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6015 if (coding->charbuf) \
6016 break; \
6017 size >>= 1; \
6018 } \
6019 if (! coding->charbuf) \
6020 { \
065e3595 6021 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
6022 return coding->result; \
6023 } \
6024 coding->charbuf_size = size; \
6025 } while (0)
4ed46869 6026
d46c5b12
KH
6027
6028static void
69a80ea3 6029produce_annotation (coding, pos)
d46c5b12 6030 struct coding_system *coding;
69a80ea3 6031 EMACS_INT pos;
d46c5b12 6032{
df7492f9
KH
6033 int *charbuf = coding->charbuf;
6034 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 6035
ff0dacd7
KH
6036 if (NILP (coding->dst_object))
6037 return;
d46c5b12 6038
df7492f9 6039 while (charbuf < charbuf_end)
a84f1519 6040 {
df7492f9 6041 if (*charbuf >= 0)
69a80ea3 6042 pos += *charbuf++;
d46c5b12 6043 else
d46c5b12 6044 {
df7492f9 6045 int len = -*charbuf;
ff0dacd7 6046 switch (charbuf[1])
df7492f9
KH
6047 {
6048 case CODING_ANNOTATE_COMPOSITION_MASK:
69a80ea3 6049 produce_composition (coding, charbuf, pos);
df7492f9 6050 break;
ff0dacd7 6051 case CODING_ANNOTATE_CHARSET_MASK:
69a80ea3 6052 produce_charset (coding, charbuf, pos);
ff0dacd7 6053 break;
df7492f9
KH
6054 default:
6055 abort ();
6056 }
6057 charbuf += len;
d46c5b12 6058 }
a84f1519 6059 }
d46c5b12
KH
6060}
6061
df7492f9
KH
6062/* Decode the data at CODING->src_object into CODING->dst_object.
6063 CODING->src_object is a buffer, a string, or nil.
6064 CODING->dst_object is a buffer.
d46c5b12 6065
df7492f9
KH
6066 If CODING->src_object is a buffer, it must be the current buffer.
6067 In this case, if CODING->src_pos is positive, it is a position of
6068 the source text in the buffer, otherwise, the source text is in the
6069 gap area of the buffer, and CODING->src_pos specifies the offset of
6070 the text from GPT (which must be the same as PT). If this is the
6071 same buffer as CODING->dst_object, CODING->src_pos must be
6072 negative.
d46c5b12 6073
df7492f9
KH
6074 If CODING->src_object is a string, CODING->src_pos in an index to
6075 that string.
d46c5b12 6076
df7492f9
KH
6077 If CODING->src_object is nil, CODING->source must already point to
6078 the non-relocatable memory area. In this case, CODING->src_pos is
6079 an offset from CODING->source.
73be902c 6080
df7492f9
KH
6081 The decoded data is inserted at the current point of the buffer
6082 CODING->dst_object.
6083*/
d46c5b12 6084
df7492f9
KH
6085static int
6086decode_coding (coding)
d46c5b12 6087 struct coding_system *coding;
d46c5b12 6088{
df7492f9 6089 Lisp_Object attrs;
24a73b0a 6090 Lisp_Object undo_list;
7d64c6ad 6091 Lisp_Object translation_table;
69a80ea3
KH
6092 int carryover;
6093 int i;
d46c5b12 6094
df7492f9
KH
6095 if (BUFFERP (coding->src_object)
6096 && coding->src_pos > 0
6097 && coding->src_pos < GPT
6098 && coding->src_pos + coding->src_chars > GPT)
6099 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 6100
24a73b0a 6101 undo_list = Qt;
df7492f9 6102 if (BUFFERP (coding->dst_object))
1c3478b0 6103 {
df7492f9
KH
6104 if (current_buffer != XBUFFER (coding->dst_object))
6105 set_buffer_internal (XBUFFER (coding->dst_object));
6106 if (GPT != PT)
6107 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
6108 undo_list = current_buffer->undo_list;
6109 current_buffer->undo_list = Qt;
1c3478b0
KH
6110 }
6111
df7492f9
KH
6112 coding->consumed = coding->consumed_char = 0;
6113 coding->produced = coding->produced_char = 0;
6114 coding->chars_at_source = 0;
065e3595 6115 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 6116 coding->errors = 0;
1c3478b0 6117
df7492f9
KH
6118 ALLOC_CONVERSION_WORK_AREA (coding);
6119
6120 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 6121 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 6122
69a80ea3 6123 carryover = 0;
df7492f9 6124 do
b73bfc1c 6125 {
69a80ea3
KH
6126 EMACS_INT pos = coding->dst_pos + coding->produced_char;
6127
df7492f9
KH
6128 coding_set_source (coding);
6129 coding->annotated = 0;
69a80ea3 6130 coding->charbuf_used = carryover;
df7492f9 6131 (*(coding->decoder)) (coding);
df7492f9 6132 coding_set_destination (coding);
69a80ea3 6133 carryover = produce_chars (coding, translation_table, 0);
df7492f9 6134 if (coding->annotated)
69a80ea3
KH
6135 produce_annotation (coding, pos);
6136 for (i = 0; i < carryover; i++)
6137 coding->charbuf[i]
6138 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 6139 }
df7492f9
KH
6140 while (coding->consumed < coding->src_bytes
6141 && ! coding->result);
d46c5b12 6142
69a80ea3
KH
6143 if (carryover > 0)
6144 {
6145 coding_set_destination (coding);
6146 coding->charbuf_used = carryover;
6147 produce_chars (coding, translation_table, 1);
6148 }
6149
df7492f9
KH
6150 coding->carryover_bytes = 0;
6151 if (coding->consumed < coding->src_bytes)
d46c5b12 6152 {
df7492f9 6153 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 6154 const unsigned char *src;
df7492f9
KH
6155
6156 coding_set_source (coding);
6157 coding_set_destination (coding);
6158 src = coding->source + coding->consumed;
6159
6160 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 6161 {
df7492f9
KH
6162 /* Flush out unprocessed data as binary chars. We are sure
6163 that the number of data is less than the size of
6164 coding->charbuf. */
065e3595 6165 coding->charbuf_used = 0;
df7492f9 6166 while (nbytes-- > 0)
1c3478b0 6167 {
df7492f9 6168 int c = *src++;
98725083
KH
6169
6170 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
1c3478b0 6171 }
f6cbaf43 6172 produce_chars (coding, Qnil, 1);
d46c5b12 6173 }
d46c5b12 6174 else
df7492f9
KH
6175 {
6176 /* Record unprocessed bytes in coding->carryover. We are
6177 sure that the number of data is less than the size of
6178 coding->carryover. */
6179 unsigned char *p = coding->carryover;
6180
6181 coding->carryover_bytes = nbytes;
6182 while (nbytes-- > 0)
6183 *p++ = *src++;
1c3478b0 6184 }
df7492f9 6185 coding->consumed = coding->src_bytes;
b73bfc1c 6186 }
69f76525 6187
24a73b0a
KH
6188 if (BUFFERP (coding->dst_object))
6189 {
6190 current_buffer->undo_list = undo_list;
6191 record_insert (coding->dst_pos, coding->produced_char);
6192 }
6193 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6194 decode_eol (coding);
73be902c 6195 return coding->result;
4ed46869
KH
6196}
6197
aaaf0b1e 6198
e1c23804 6199/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
6200 ending before LIMIT of CODING->src_object (buffer or string), store
6201 the data in BUF, set *STOP to a starting position of the next
6202 composition (if any) or to LIMIT, and return the address of the
6203 next element of BUF.
6204
6205 If such an annotation is not found, set *STOP to a starting
6206 position of a composition after POS (if any) or to LIMIT, and
6207 return BUF. */
6208
6209static INLINE int *
6210handle_composition_annotation (pos, limit, coding, buf, stop)
6211 EMACS_INT pos, limit;
aaaf0b1e 6212 struct coding_system *coding;
ff0dacd7
KH
6213 int *buf;
6214 EMACS_INT *stop;
aaaf0b1e 6215{
ff0dacd7
KH
6216 EMACS_INT start, end;
6217 Lisp_Object prop;
aaaf0b1e 6218
ff0dacd7
KH
6219 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6220 || end > limit)
6221 *stop = limit;
6222 else if (start > pos)
6223 *stop = start;
6224 else
aaaf0b1e 6225 {
ff0dacd7 6226 if (start == pos)
aaaf0b1e 6227 {
ff0dacd7
KH
6228 /* We found a composition. Store the corresponding
6229 annotation data in BUF. */
6230 int *head = buf;
6231 enum composition_method method = COMPOSITION_METHOD (prop);
6232 int nchars = COMPOSITION_LENGTH (prop);
6233
69a80ea3 6234 ADD_COMPOSITION_DATA (buf, nchars, method);
ff0dacd7 6235 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 6236 {
ff0dacd7
KH
6237 Lisp_Object components;
6238 int len, i, i_byte;
6239
6240 components = COMPOSITION_COMPONENTS (prop);
6241 if (VECTORP (components))
aaaf0b1e 6242 {
ff0dacd7
KH
6243 len = XVECTOR (components)->size;
6244 for (i = 0; i < len; i++)
6245 *buf++ = XINT (AREF (components, i));
aaaf0b1e 6246 }
ff0dacd7 6247 else if (STRINGP (components))
aaaf0b1e 6248 {
8f924df7 6249 len = SCHARS (components);
ff0dacd7
KH
6250 i = i_byte = 0;
6251 while (i < len)
6252 {
6253 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6254 buf++;
6255 }
6256 }
6257 else if (INTEGERP (components))
6258 {
6259 len = 1;
6260 *buf++ = XINT (components);
6261 }
6262 else if (CONSP (components))
6263 {
6264 for (len = 0; CONSP (components);
6265 len++, components = XCDR (components))
6266 *buf++ = XINT (XCAR (components));
aaaf0b1e 6267 }
aaaf0b1e 6268 else
ff0dacd7
KH
6269 abort ();
6270 *head -= len;
aaaf0b1e 6271 }
aaaf0b1e 6272 }
ff0dacd7
KH
6273
6274 if (find_composition (end, limit, &start, &end, &prop,
6275 coding->src_object)
6276 && end <= limit)
6277 *stop = start;
6278 else
6279 *stop = limit;
aaaf0b1e 6280 }
ff0dacd7
KH
6281 return buf;
6282}
6283
6284
e1c23804 6285/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
6286 CODING->src_object (buffer of string), store the data in BUF, set
6287 *STOP to the position where the value of `charset' property changes
6288 (limiting by LIMIT), and return the address of the next element of
6289 BUF.
6290
6291 If the property value is nil, set *STOP to the position where the
6292 property value is non-nil (limiting by LIMIT), and return BUF. */
6293
6294static INLINE int *
6295handle_charset_annotation (pos, limit, coding, buf, stop)
6296 EMACS_INT pos, limit;
6297 struct coding_system *coding;
6298 int *buf;
6299 EMACS_INT *stop;
6300{
6301 Lisp_Object val, next;
6302 int id;
6303
6304 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6305 if (! NILP (val) && CHARSETP (val))
6306 id = XINT (CHARSET_SYMBOL_ID (val));
6307 else
6308 id = -1;
69a80ea3 6309 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
6310 next = Fnext_single_property_change (make_number (pos), Qcharset,
6311 coding->src_object,
6312 make_number (limit));
6313 *stop = XINT (next);
6314 return buf;
6315}
6316
6317
df7492f9 6318static void
09ee6fdd 6319consume_chars (coding, translation_table, max_lookup)
df7492f9 6320 struct coding_system *coding;
433f7f87 6321 Lisp_Object translation_table;
09ee6fdd 6322 int max_lookup;
df7492f9
KH
6323{
6324 int *buf = coding->charbuf;
ff0dacd7 6325 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 6326 const unsigned char *src = coding->source + coding->consumed;
4776e638 6327 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
6328 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6329 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6330 int multibytep = coding->src_multibyte;
6331 Lisp_Object eol_type;
6332 int c;
ff0dacd7 6333 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 6334 int *lookup_buf = NULL;
433f7f87
KH
6335
6336 if (! NILP (translation_table))
09ee6fdd 6337 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 6338
df7492f9
KH
6339 eol_type = CODING_ID_EOL_TYPE (coding->id);
6340 if (VECTORP (eol_type))
6341 eol_type = Qunix;
88993dfd 6342
df7492f9
KH
6343 /* Note: composition handling is not yet implemented. */
6344 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6345
0b5670c9
KH
6346 if (NILP (coding->src_object))
6347 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 6348 else
0b5670c9
KH
6349 {
6350 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6351 stop = stop_composition = pos;
6352 else
6353 stop = stop_composition = end_pos;
6354 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6355 stop = stop_charset = pos;
6356 else
6357 stop_charset = end_pos;
6358 }
ec6d2bb8 6359
24a73b0a 6360 /* Compensate for CRLF and conversion. */
ff0dacd7 6361 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6362 while (buf < buf_end)
aaaf0b1e 6363 {
433f7f87
KH
6364 Lisp_Object trans;
6365
df7492f9 6366 if (pos == stop)
ec6d2bb8 6367 {
df7492f9
KH
6368 if (pos == end_pos)
6369 break;
ff0dacd7
KH
6370 if (pos == stop_composition)
6371 buf = handle_composition_annotation (pos, end_pos, coding,
6372 buf, &stop_composition);
6373 if (pos == stop_charset)
6374 buf = handle_charset_annotation (pos, end_pos, coding,
6375 buf, &stop_charset);
6376 stop = (stop_composition < stop_charset
6377 ? stop_composition : stop_charset);
df7492f9
KH
6378 }
6379
6380 if (! multibytep)
4776e638 6381 {
d3e4cb56 6382 EMACS_INT bytes;
aaaf0b1e 6383
f03caae0 6384 if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
4776e638
KH
6385 c = STRING_CHAR_ADVANCE (src), pos += bytes;
6386 else
f03caae0 6387 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 6388 }
df7492f9 6389 else
4776e638 6390 c = STRING_CHAR_ADVANCE (src), pos++;
df7492f9
KH
6391 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6392 c = '\n';
6393 if (! EQ (eol_type, Qunix))
aaaf0b1e 6394 {
df7492f9 6395 if (c == '\n')
aaaf0b1e 6396 {
df7492f9
KH
6397 if (EQ (eol_type, Qdos))
6398 *buf++ = '\r';
6399 else
6400 c = '\r';
aaaf0b1e
KH
6401 }
6402 }
433f7f87 6403
e6a54062 6404 trans = Qnil;
09ee6fdd 6405 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6406 if (NILP (trans))
433f7f87
KH
6407 *buf++ = c;
6408 else
6409 {
6410 int from_nchars = 1, to_nchars = 1;
6411 int *lookup_buf_end;
6412 const unsigned char *p = src;
6413 int i;
6414
6415 lookup_buf[0] = c;
6416 for (i = 1; i < max_lookup && p < src_end; i++)
6417 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6418 lookup_buf_end = lookup_buf + i;
6419 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6420 &from_nchars, &to_nchars);
6421 if (EQ (trans, Qt)
6422 || buf + to_nchars > buf_end)
6423 break;
6424 *buf++ = *lookup_buf;
6425 for (i = 1; i < to_nchars; i++)
6426 *buf++ = XINT (AREF (trans, i));
6427 for (i = 1; i < from_nchars; i++, pos++)
6428 src += MULTIBYTE_LENGTH_NO_CHECK (src);
6429 }
aaaf0b1e 6430 }
ec6d2bb8 6431
df7492f9
KH
6432 coding->consumed = src - coding->source;
6433 coding->consumed_char = pos - coding->src_pos;
6434 coding->charbuf_used = buf - coding->charbuf;
6435 coding->chars_at_source = 0;
aaaf0b1e
KH
6436}
6437
4ed46869 6438
df7492f9
KH
6439/* Encode the text at CODING->src_object into CODING->dst_object.
6440 CODING->src_object is a buffer or a string.
6441 CODING->dst_object is a buffer or nil.
6442
6443 If CODING->src_object is a buffer, it must be the current buffer.
6444 In this case, if CODING->src_pos is positive, it is a position of
6445 the source text in the buffer, otherwise. the source text is in the
6446 gap area of the buffer, and coding->src_pos specifies the offset of
6447 the text from GPT (which must be the same as PT). If this is the
6448 same buffer as CODING->dst_object, CODING->src_pos must be
6449 negative and CODING should not have `pre-write-conversion'.
6450
6451 If CODING->src_object is a string, CODING should not have
6452 `pre-write-conversion'.
6453
6454 If CODING->dst_object is a buffer, the encoded data is inserted at
6455 the current point of that buffer.
6456
6457 If CODING->dst_object is nil, the encoded data is placed at the
6458 memory area specified by CODING->destination. */
6459
6460static int
6461encode_coding (coding)
4ed46869 6462 struct coding_system *coding;
4ed46869 6463{
df7492f9 6464 Lisp_Object attrs;
7d64c6ad 6465 Lisp_Object translation_table;
09ee6fdd 6466 int max_lookup;
9861e777 6467
df7492f9 6468 attrs = CODING_ID_ATTRS (coding->id);
09ee6fdd 6469 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 6470
df7492f9 6471 if (BUFFERP (coding->dst_object))
8844fa83 6472 {
df7492f9
KH
6473 set_buffer_internal (XBUFFER (coding->dst_object));
6474 coding->dst_multibyte
6475 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 6476 }
4ed46869 6477
b73bfc1c 6478 coding->consumed = coding->consumed_char = 0;
df7492f9 6479 coding->produced = coding->produced_char = 0;
065e3595 6480 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 6481 coding->errors = 0;
b73bfc1c 6482
df7492f9 6483 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 6484
df7492f9
KH
6485 do {
6486 coding_set_source (coding);
09ee6fdd 6487 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
6488 coding_set_destination (coding);
6489 (*(coding->encoder)) (coding);
6490 } while (coding->consumed_char < coding->src_chars);
6491
6492 if (BUFFERP (coding->dst_object))
6493 insert_from_gap (coding->produced_char, coding->produced);
6494
6495 return (coding->result);
ec6d2bb8
KH
6496}
6497
fb88bf2d 6498
24a73b0a
KH
6499/* Name (or base name) of work buffer for code conversion. */
6500static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 6501
24a73b0a
KH
6502/* A working buffer used by the top level conversion. Once it is
6503 created, it is never destroyed. It has the name
6504 Vcode_conversion_workbuf_name. The other working buffers are
6505 destroyed after the use is finished, and their names are modified
6506 versions of Vcode_conversion_workbuf_name. */
6507static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 6508
24a73b0a
KH
6509/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6510static int reused_workbuf_in_use;
4ed46869 6511
24a73b0a
KH
6512
6513/* Return a working buffer of code convesion. MULTIBYTE specifies the
6514 multibyteness of returning buffer. */
b73bfc1c 6515
f6cbaf43 6516static Lisp_Object
24a73b0a 6517make_conversion_work_buffer (multibyte)
f6cbaf43 6518 int multibyte;
df7492f9 6519{
24a73b0a
KH
6520 Lisp_Object name, workbuf;
6521 struct buffer *current;
4ed46869 6522
24a73b0a 6523 if (reused_workbuf_in_use++)
065e3595
KH
6524 {
6525 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6526 workbuf = Fget_buffer_create (name);
6527 }
df7492f9 6528 else
065e3595
KH
6529 {
6530 name = Vcode_conversion_workbuf_name;
6531 workbuf = Fget_buffer_create (name);
6532 if (NILP (Vcode_conversion_reused_workbuf))
6533 Vcode_conversion_reused_workbuf = workbuf;
6534 }
24a73b0a
KH
6535 current = current_buffer;
6536 set_buffer_internal (XBUFFER (workbuf));
6537 Ferase_buffer ();
df7492f9 6538 current_buffer->undo_list = Qt;
24a73b0a 6539 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 6540 set_buffer_internal (current);
24a73b0a 6541 return workbuf;
df7492f9 6542}
d46c5b12 6543
24a73b0a 6544
4776e638 6545static Lisp_Object
24a73b0a
KH
6546code_conversion_restore (arg)
6547 Lisp_Object arg;
4776e638 6548{
24a73b0a
KH
6549 Lisp_Object current, workbuf;
6550
6551 current = XCAR (arg);
6552 workbuf = XCDR (arg);
6553 if (! NILP (workbuf))
6554 {
6555 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6556 reused_workbuf_in_use = 0;
6557 else if (! NILP (Fbuffer_live_p (workbuf)))
6558 Fkill_buffer (workbuf);
6559 }
6560 set_buffer_internal (XBUFFER (current));
4776e638
KH
6561 return Qnil;
6562}
b73bfc1c 6563
24a73b0a
KH
6564Lisp_Object
6565code_conversion_save (with_work_buf, multibyte)
4776e638 6566 int with_work_buf, multibyte;
df7492f9 6567{
24a73b0a 6568 Lisp_Object workbuf = Qnil;
b73bfc1c 6569
4776e638 6570 if (with_work_buf)
24a73b0a
KH
6571 workbuf = make_conversion_work_buffer (multibyte);
6572 record_unwind_protect (code_conversion_restore,
6573 Fcons (Fcurrent_buffer (), workbuf));
4776e638 6574 return workbuf;
df7492f9 6575}
d46c5b12 6576
df7492f9
KH
6577int
6578decode_coding_gap (coding, chars, bytes)
6579 struct coding_system *coding;
6580 EMACS_INT chars, bytes;
6581{
6582 int count = specpdl_ptr - specpdl;
5e5c78be 6583 Lisp_Object attrs;
fb88bf2d 6584
24a73b0a 6585 code_conversion_save (0, 0);
ec6d2bb8 6586
24a73b0a 6587 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6588 coding->src_chars = chars;
6589 coding->src_bytes = bytes;
6590 coding->src_pos = -chars;
6591 coding->src_pos_byte = -bytes;
6592 coding->src_multibyte = chars < bytes;
24a73b0a 6593 coding->dst_object = coding->src_object;
df7492f9
KH
6594 coding->dst_pos = PT;
6595 coding->dst_pos_byte = PT_BYTE;
71c81426 6596 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
98725083 6597 coding->mode |= CODING_MODE_LAST_BLOCK;
4ed46869 6598
df7492f9
KH
6599 if (CODING_REQUIRE_DETECTION (coding))
6600 detect_coding (coding);
8f924df7 6601
df7492f9 6602 decode_coding (coding);
d46c5b12 6603
5e5c78be
KH
6604 attrs = CODING_ID_ATTRS (coding->id);
6605 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 6606 {
5e5c78be
KH
6607 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6608 Lisp_Object val;
6609
6610 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
6611 val = call1 (CODING_ATTR_POST_READ (attrs),
6612 make_number (coding->produced_char));
5e5c78be
KH
6613 CHECK_NATNUM (val);
6614 coding->produced_char += Z - prev_Z;
6615 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 6616 }
4ed46869 6617
df7492f9 6618 unbind_to (count, Qnil);
b73bfc1c
KH
6619 return coding->result;
6620}
52d41803 6621
4ed46869 6622int
df7492f9 6623encode_coding_gap (coding, chars, bytes)
4ed46869 6624 struct coding_system *coding;
df7492f9 6625 EMACS_INT chars, bytes;
4ed46869 6626{
df7492f9 6627 int count = specpdl_ptr - specpdl;
4ed46869 6628
24a73b0a 6629 code_conversion_save (0, 0);
4ed46869 6630
24a73b0a 6631 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6632 coding->src_chars = chars;
6633 coding->src_bytes = bytes;
6634 coding->src_pos = -chars;
6635 coding->src_pos_byte = -bytes;
6636 coding->src_multibyte = chars < bytes;
6637 coding->dst_object = coding->src_object;
6638 coding->dst_pos = PT;
6639 coding->dst_pos_byte = PT_BYTE;
4ed46869 6640
df7492f9 6641 encode_coding (coding);
b73bfc1c 6642
df7492f9
KH
6643 unbind_to (count, Qnil);
6644 return coding->result;
6645}
4ed46869 6646
d46c5b12 6647
df7492f9
KH
6648/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6649 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 6650
df7492f9 6651 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 6652
df7492f9
KH
6653 If it is a buffer, the text is at point of the buffer. FROM and TO
6654 are positions in the buffer.
b73bfc1c 6655
df7492f9
KH
6656 If it is a string, the text is at the beginning of the string.
6657 FROM and TO are indices to the string.
4ed46869 6658
df7492f9
KH
6659 If it is nil, the text is at coding->source. FROM and TO are
6660 indices to coding->source.
bb10be8b 6661
df7492f9 6662 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 6663
df7492f9
KH
6664 If it is a buffer, the decoded text is inserted at point of the
6665 buffer. If the buffer is the same as SRC_OBJECT, the source text
6666 is deleted.
4ed46869 6667
df7492f9
KH
6668 If it is Qt, a string is made from the decoded text, and
6669 set in CODING->dst_object.
d46c5b12 6670
df7492f9 6671 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6672 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6673 CODING->destination by xmalloc. If the decoded text is longer than
6674 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6675 */
d46c5b12 6676
df7492f9
KH
6677void
6678decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6679 dst_object)
d46c5b12 6680 struct coding_system *coding;
df7492f9
KH
6681 Lisp_Object src_object;
6682 EMACS_INT from, from_byte, to, to_byte;
6683 Lisp_Object dst_object;
d46c5b12 6684{
df7492f9
KH
6685 int count = specpdl_ptr - specpdl;
6686 unsigned char *destination;
6687 EMACS_INT dst_bytes;
6688 EMACS_INT chars = to - from;
6689 EMACS_INT bytes = to_byte - from_byte;
6690 Lisp_Object attrs;
4776e638
KH
6691 Lisp_Object buffer;
6692 int saved_pt = -1, saved_pt_byte;
d46c5b12 6693
4776e638 6694 buffer = Fcurrent_buffer ();
93dec019 6695
df7492f9 6696 if (NILP (dst_object))
d46c5b12 6697 {
df7492f9
KH
6698 destination = coding->destination;
6699 dst_bytes = coding->dst_bytes;
d46c5b12 6700 }
93dec019 6701
df7492f9
KH
6702 coding->src_object = src_object;
6703 coding->src_chars = chars;
6704 coding->src_bytes = bytes;
6705 coding->src_multibyte = chars < bytes;
70ad9fc4 6706
df7492f9 6707 if (STRINGP (src_object))
d46c5b12 6708 {
df7492f9
KH
6709 coding->src_pos = from;
6710 coding->src_pos_byte = from_byte;
d46c5b12 6711 }
df7492f9 6712 else if (BUFFERP (src_object))
88993dfd 6713 {
df7492f9
KH
6714 set_buffer_internal (XBUFFER (src_object));
6715 if (from != GPT)
6716 move_gap_both (from, from_byte);
6717 if (EQ (src_object, dst_object))
fb88bf2d 6718 {
4776e638 6719 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6720 TEMP_SET_PT_BOTH (from, from_byte);
6721 del_range_both (from, from_byte, to, to_byte, 1);
6722 coding->src_pos = -chars;
6723 coding->src_pos_byte = -bytes;
fb88bf2d 6724 }
df7492f9 6725 else
fb88bf2d 6726 {
df7492f9
KH
6727 coding->src_pos = from;
6728 coding->src_pos_byte = from_byte;
fb88bf2d 6729 }
88993dfd
KH
6730 }
6731
df7492f9
KH
6732 if (CODING_REQUIRE_DETECTION (coding))
6733 detect_coding (coding);
6734 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 6735
2cb26057
KH
6736 if (EQ (dst_object, Qt)
6737 || (! NILP (CODING_ATTR_POST_READ (attrs))
6738 && NILP (dst_object)))
b73bfc1c 6739 {
24a73b0a 6740 coding->dst_object = code_conversion_save (1, 1);
df7492f9
KH
6741 coding->dst_pos = BEG;
6742 coding->dst_pos_byte = BEG_BYTE;
6743 coding->dst_multibyte = 1;
b73bfc1c 6744 }
df7492f9 6745 else if (BUFFERP (dst_object))
d46c5b12 6746 {
24a73b0a 6747 code_conversion_save (0, 0);
df7492f9
KH
6748 coding->dst_object = dst_object;
6749 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6750 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6751 coding->dst_multibyte
6752 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
6753 }
6754 else
6755 {
24a73b0a 6756 code_conversion_save (0, 0);
df7492f9
KH
6757 coding->dst_object = Qnil;
6758 coding->dst_multibyte = 1;
d46c5b12
KH
6759 }
6760
df7492f9 6761 decode_coding (coding);
fa46990e 6762
df7492f9
KH
6763 if (BUFFERP (coding->dst_object))
6764 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 6765
df7492f9 6766 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6767 {
df7492f9
KH
6768 struct gcpro gcpro1, gcpro2;
6769 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6770 Lisp_Object val;
d46c5b12 6771
c0cc7f7f 6772 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6773 GCPRO2 (coding->src_object, coding->dst_object);
6774 val = call1 (CODING_ATTR_POST_READ (attrs),
6775 make_number (coding->produced_char));
6776 UNGCPRO;
6777 CHECK_NATNUM (val);
6778 coding->produced_char += Z - prev_Z;
6779 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6780 }
de79a6a5 6781
df7492f9 6782 if (EQ (dst_object, Qt))
ec6d2bb8 6783 {
df7492f9
KH
6784 coding->dst_object = Fbuffer_string ();
6785 }
6786 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6787 {
6788 set_buffer_internal (XBUFFER (coding->dst_object));
6789 if (dst_bytes < coding->produced)
6790 {
6791 destination
6792 = (unsigned char *) xrealloc (destination, coding->produced);
6793 if (! destination)
6794 {
065e3595
KH
6795 record_conversion_result (coding,
6796 CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
6797 unbind_to (count, Qnil);
6798 return;
6799 }
6800 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6801 move_gap_both (BEGV, BEGV_BYTE);
6802 bcopy (BEGV_ADDR, destination, coding->produced);
6803 coding->destination = destination;
d46c5b12 6804 }
ec6d2bb8 6805 }
b73bfc1c 6806
4776e638
KH
6807 if (saved_pt >= 0)
6808 {
6809 /* This is the case of:
6810 (BUFFERP (src_object) && EQ (src_object, dst_object))
6811 As we have moved PT while replacing the original buffer
6812 contents, we must recover it now. */
6813 set_buffer_internal (XBUFFER (src_object));
6814 if (saved_pt < from)
6815 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6816 else if (saved_pt < from + chars)
6817 TEMP_SET_PT_BOTH (from, from_byte);
6818 else if (! NILP (current_buffer->enable_multibyte_characters))
6819 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6820 saved_pt_byte + (coding->produced - bytes));
6821 else
6822 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6823 saved_pt_byte + (coding->produced - bytes));
d46c5b12 6824 }
4776e638 6825
065e3595 6826 unbind_to (count, coding->dst_object);
d46c5b12
KH
6827}
6828
d46c5b12 6829
df7492f9
KH
6830void
6831encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6832 dst_object)
d46c5b12 6833 struct coding_system *coding;
df7492f9
KH
6834 Lisp_Object src_object;
6835 EMACS_INT from, from_byte, to, to_byte;
6836 Lisp_Object dst_object;
d46c5b12 6837{
b73bfc1c 6838 int count = specpdl_ptr - specpdl;
df7492f9
KH
6839 EMACS_INT chars = to - from;
6840 EMACS_INT bytes = to_byte - from_byte;
6841 Lisp_Object attrs;
4776e638
KH
6842 Lisp_Object buffer;
6843 int saved_pt = -1, saved_pt_byte;
df7492f9 6844
4776e638 6845 buffer = Fcurrent_buffer ();
df7492f9
KH
6846
6847 coding->src_object = src_object;
6848 coding->src_chars = chars;
6849 coding->src_bytes = bytes;
6850 coding->src_multibyte = chars < bytes;
6851
6852 attrs = CODING_ID_ATTRS (coding->id);
6853
6854 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6855 {
24a73b0a 6856 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
6857 set_buffer_internal (XBUFFER (coding->src_object));
6858 if (STRINGP (src_object))
6859 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6860 else if (BUFFERP (src_object))
6861 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6862 else
6863 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 6864
df7492f9
KH
6865 if (EQ (src_object, dst_object))
6866 {
6867 set_buffer_internal (XBUFFER (src_object));
4776e638 6868 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6869 del_range_both (from, from_byte, to, to_byte, 1);
6870 set_buffer_internal (XBUFFER (coding->src_object));
6871 }
6872
ac87bbef
KH
6873 call2 (CODING_ATTR_PRE_WRITE (attrs),
6874 make_number (BEG), make_number (Z));
6875 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6876 if (BEG != GPT)
6877 move_gap_both (BEG, BEG_BYTE);
6878 coding->src_chars = Z - BEG;
6879 coding->src_bytes = Z_BYTE - BEG_BYTE;
6880 coding->src_pos = BEG;
6881 coding->src_pos_byte = BEG_BYTE;
6882 coding->src_multibyte = Z < Z_BYTE;
6883 }
6884 else if (STRINGP (src_object))
d46c5b12 6885 {
24a73b0a 6886 code_conversion_save (0, 0);
df7492f9
KH
6887 coding->src_pos = from;
6888 coding->src_pos_byte = from_byte;
b73bfc1c 6889 }
df7492f9 6890 else if (BUFFERP (src_object))
b73bfc1c 6891 {
24a73b0a 6892 code_conversion_save (0, 0);
df7492f9 6893 set_buffer_internal (XBUFFER (src_object));
df7492f9 6894 if (EQ (src_object, dst_object))
d46c5b12 6895 {
4776e638 6896 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
6897 coding->src_object = del_range_1 (from, to, 1, 1);
6898 coding->src_pos = 0;
6899 coding->src_pos_byte = 0;
d46c5b12 6900 }
df7492f9 6901 else
d46c5b12 6902 {
ff0dacd7
KH
6903 if (from < GPT && to >= GPT)
6904 move_gap_both (from, from_byte);
df7492f9
KH
6905 coding->src_pos = from;
6906 coding->src_pos_byte = from_byte;
d46c5b12 6907 }
d46c5b12 6908 }
4776e638 6909 else
24a73b0a 6910 code_conversion_save (0, 0);
d46c5b12 6911
df7492f9 6912 if (BUFFERP (dst_object))
88993dfd 6913 {
df7492f9 6914 coding->dst_object = dst_object;
28f67a95
KH
6915 if (EQ (src_object, dst_object))
6916 {
6917 coding->dst_pos = from;
6918 coding->dst_pos_byte = from_byte;
6919 }
6920 else
6921 {
6922 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6923 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6924 }
df7492f9
KH
6925 coding->dst_multibyte
6926 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 6927 }
df7492f9 6928 else if (EQ (dst_object, Qt))
d46c5b12 6929 {
df7492f9 6930 coding->dst_object = Qnil;
df7492f9 6931 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6932 if (coding->dst_bytes == 0)
6933 coding->dst_bytes = 1;
6934 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6935 coding->dst_multibyte = 0;
d46c5b12
KH
6936 }
6937 else
6938 {
df7492f9
KH
6939 coding->dst_object = Qnil;
6940 coding->dst_multibyte = 0;
d46c5b12
KH
6941 }
6942
df7492f9 6943 encode_coding (coding);
d46c5b12 6944
df7492f9 6945 if (EQ (dst_object, Qt))
d46c5b12 6946 {
df7492f9
KH
6947 if (BUFFERP (coding->dst_object))
6948 coding->dst_object = Fbuffer_string ();
6949 else
d46c5b12 6950 {
df7492f9
KH
6951 coding->dst_object
6952 = make_unibyte_string ((char *) coding->destination,
6953 coding->produced);
6954 xfree (coding->destination);
d46c5b12 6955 }
4ed46869 6956 }
d46c5b12 6957
4776e638
KH
6958 if (saved_pt >= 0)
6959 {
6960 /* This is the case of:
6961 (BUFFERP (src_object) && EQ (src_object, dst_object))
6962 As we have moved PT while replacing the original buffer
6963 contents, we must recover it now. */
6964 set_buffer_internal (XBUFFER (src_object));
6965 if (saved_pt < from)
6966 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6967 else if (saved_pt < from + chars)
6968 TEMP_SET_PT_BOTH (from, from_byte);
6969 else if (! NILP (current_buffer->enable_multibyte_characters))
6970 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6971 saved_pt_byte + (coding->produced - bytes));
d46c5b12 6972 else
4776e638
KH
6973 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6974 saved_pt_byte + (coding->produced - bytes));
6975 }
6976
df7492f9 6977 unbind_to (count, Qnil);
b73bfc1c
KH
6978}
6979
df7492f9 6980
b73bfc1c 6981Lisp_Object
df7492f9 6982preferred_coding_system ()
b73bfc1c 6983{
df7492f9 6984 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6985
df7492f9 6986 return CODING_ID_NAME (id);
4ed46869
KH
6987}
6988
6989\f
6990#ifdef emacs
1397dc18 6991/*** 8. Emacs Lisp library functions ***/
4ed46869 6992
4ed46869 6993DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6994 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6995See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6996about coding-system objects. */)
6997 (obj)
4ed46869
KH
6998 Lisp_Object obj;
6999{
df7492f9 7000 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
7001}
7002
9d991de8
RS
7003DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7004 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
7005 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
7006 (prompt)
4ed46869
KH
7007 Lisp_Object prompt;
7008{
e0e989f6 7009 Lisp_Object val;
9d991de8
RS
7010 do
7011 {
4608c386
KH
7012 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7013 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 7014 }
8f924df7 7015 while (SCHARS (val) == 0);
e0e989f6 7016 return (Fintern (val, Qnil));
4ed46869
KH
7017}
7018
9b787f3e 7019DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
7020 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7021If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
7022 (prompt, default_coding_system)
9b787f3e 7023 Lisp_Object prompt, default_coding_system;
4ed46869 7024{
f44d27ce 7025 Lisp_Object val;
9b787f3e 7026 if (SYMBOLP (default_coding_system))
a3181084 7027 XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
4608c386 7028 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
7029 Qt, Qnil, Qcoding_system_history,
7030 default_coding_system, Qnil);
8f924df7 7031 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
7032}
7033
7034DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7035 1, 1, 0,
48b0f3ae 7036 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
7037If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7038It is valid if it is nil or a symbol defined as a coding system by the
7039function `define-coding-system'. */)
df7492f9 7040 (coding_system)
4ed46869
KH
7041 Lisp_Object coding_system;
7042{
b7826503 7043 CHECK_SYMBOL (coding_system);
4ed46869
KH
7044 if (!NILP (Fcoding_system_p (coding_system)))
7045 return coding_system;
7046 while (1)
02ba4723 7047 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 7048}
df7492f9 7049
3a73fa5d 7050\f
89528eb3
KH
7051/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
7052 HIGHEST is nonzero, return the coding system of the highest
7053 priority among the detected coding systems. Otherwize return a
7054 list of detected coding systems sorted by their priorities. If
7055 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7056 multibyte form but contains only ASCII and eight-bit chars.
7057 Otherwise, the bytes are raw bytes.
7058
7059 CODING-SYSTEM controls the detection as below:
7060
7061 If it is nil, detect both text-format and eol-format. If the
7062 text-format part of CODING-SYSTEM is already specified
7063 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
7064 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7065 detect only text-format. */
7066
d46c5b12 7067Lisp_Object
24a73b0a
KH
7068detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7069 coding_system)
8f924df7 7070 const unsigned char *src;
24a73b0a 7071 int src_chars, src_bytes, highest;
0a28aafb 7072 int multibytep;
df7492f9 7073 Lisp_Object coding_system;
4ed46869 7074{
8f924df7 7075 const unsigned char *src_end = src + src_bytes;
df7492f9
KH
7076 Lisp_Object attrs, eol_type;
7077 Lisp_Object val;
7078 struct coding_system coding;
89528eb3 7079 int id;
ff0dacd7 7080 struct coding_detection_info detect_info;
24a73b0a 7081 enum coding_category base_category;
b73bfc1c 7082
df7492f9
KH
7083 if (NILP (coding_system))
7084 coding_system = Qundecided;
7085 setup_coding_system (coding_system, &coding);
7086 attrs = CODING_ID_ATTRS (coding.id);
7087 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 7088 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 7089
df7492f9 7090 coding.source = src;
24a73b0a 7091 coding.src_chars = src_chars;
df7492f9
KH
7092 coding.src_bytes = src_bytes;
7093 coding.src_multibyte = multibytep;
7094 coding.consumed = 0;
89528eb3 7095 coding.mode |= CODING_MODE_LAST_BLOCK;
d46c5b12 7096
ff0dacd7 7097 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 7098
89528eb3 7099 /* At first, detect text-format if necessary. */
24a73b0a
KH
7100 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7101 if (base_category == coding_category_undecided)
4ed46869 7102 {
ff0dacd7
KH
7103 enum coding_category category;
7104 struct coding_system *this;
7105 int c, i;
88993dfd 7106
24a73b0a
KH
7107 /* Skip all ASCII bytes except for a few ISO2022 controls. */
7108 for (i = 0; src < src_end; i++, src++)
4ed46869 7109 {
df7492f9 7110 c = *src;
75e2a253 7111 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
24a73b0a
KH
7112 || c == ISO_CODE_SI
7113 || c == ISO_CODE_SO)))
d46c5b12 7114 break;
4ed46869 7115 }
df7492f9 7116 coding.head_ascii = src - coding.source;
88993dfd 7117
df7492f9
KH
7118 if (src < src_end)
7119 for (i = 0; i < coding_category_raw_text; i++)
7120 {
ff0dacd7
KH
7121 category = coding_priorities[i];
7122 this = coding_categories + category;
b843d1ae 7123
df7492f9
KH
7124 if (this->id < 0)
7125 {
7126 /* No coding system of this category is defined. */
ff0dacd7 7127 detect_info.rejected |= (1 << category);
df7492f9 7128 }
ff0dacd7 7129 else if (category >= coding_category_raw_text)
89528eb3 7130 continue;
ff0dacd7
KH
7131 else if (detect_info.checked & (1 << category))
7132 {
7133 if (highest
7134 && (detect_info.found & (1 << category)))
7135 break;
7136 }
df7492f9
KH
7137 else
7138 {
ff0dacd7 7139 if ((*(this->detector)) (&coding, &detect_info)
89528eb3 7140 && highest
ff0dacd7 7141 && (detect_info.found & (1 << category)))
24a73b0a
KH
7142 {
7143 if (category == coding_category_utf_16_auto)
7144 {
7145 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7146 category = coding_category_utf_16_le;
7147 else
7148 category = coding_category_utf_16_be;
7149 }
7150 break;
7151 }
df7492f9
KH
7152 }
7153 }
ec6d2bb8 7154
ff0dacd7 7155 if (detect_info.rejected == CATEGORY_MASK_ANY)
ec6d2bb8 7156 {
ff0dacd7 7157 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
7158 id = coding_categories[coding_category_raw_text].id;
7159 val = Fcons (make_number (id), Qnil);
7160 }
ff0dacd7 7161 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 7162 {
ff0dacd7 7163 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
7164 id = coding_categories[coding_category_undecided].id;
7165 val = Fcons (make_number (id), Qnil);
7166 }
7167 else if (highest)
7168 {
ff0dacd7 7169 if (detect_info.found)
ec6d2bb8 7170 {
ff0dacd7
KH
7171 detect_info.found = 1 << category;
7172 val = Fcons (make_number (this->id), Qnil);
7173 }
7174 else
7175 for (i = 0; i < coding_category_raw_text; i++)
7176 if (! (detect_info.rejected & (1 << coding_priorities[i])))
7177 {
7178 detect_info.found = 1 << coding_priorities[i];
7179 id = coding_categories[coding_priorities[i]].id;
7180 val = Fcons (make_number (id), Qnil);
7181 break;
7182 }
7183 }
89528eb3
KH
7184 else
7185 {
ff0dacd7
KH
7186 int mask = detect_info.rejected | detect_info.found;
7187 int found = 0;
89528eb3 7188 val = Qnil;
ec6d2bb8 7189
89528eb3 7190 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
7191 {
7192 category = coding_priorities[i];
7193 if (! (mask & (1 << category)))
ec6d2bb8 7194 {
ff0dacd7
KH
7195 found |= 1 << category;
7196 id = coding_categories[category].id;
7197 val = Fcons (make_number (id), val);
7198 }
7199 }
7200 for (i = coding_category_raw_text - 1; i >= 0; i--)
7201 {
7202 category = coding_priorities[i];
7203 if (detect_info.found & (1 << category))
7204 {
7205 id = coding_categories[category].id;
7206 val = Fcons (make_number (id), val);
ec6d2bb8 7207 }
ec6d2bb8 7208 }
ff0dacd7 7209 detect_info.found |= found;
ec6d2bb8 7210 }
ec6d2bb8 7211 }
24a73b0a
KH
7212 else if (base_category == coding_category_utf_16_auto)
7213 {
7214 if (detect_coding_utf_16 (&coding, &detect_info))
7215 {
24a73b0a
KH
7216 struct coding_system *this;
7217
7218 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7219 this = coding_categories + coding_category_utf_16_le;
7220 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7221 this = coding_categories + coding_category_utf_16_be;
7222 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7223 this = coding_categories + coding_category_utf_16_be_nosig;
7224 else
7225 this = coding_categories + coding_category_utf_16_le_nosig;
7226 val = Fcons (make_number (this->id), Qnil);
7227 }
7228 }
df7492f9
KH
7229 else
7230 {
ff0dacd7 7231 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 7232 val = Fcons (make_number (coding.id), Qnil);
4ed46869 7233 }
df7492f9 7234
89528eb3 7235 /* Then, detect eol-format if necessary. */
df7492f9 7236 {
89528eb3 7237 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
7238 Lisp_Object tail;
7239
89528eb3
KH
7240 if (VECTORP (eol_type))
7241 {
ff0dacd7 7242 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
7243 normal_eol = detect_eol (coding.source, src_bytes,
7244 coding_category_raw_text);
ff0dacd7
KH
7245 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7246 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
7247 utf_16_be_eol = detect_eol (coding.source, src_bytes,
7248 coding_category_utf_16_be);
ff0dacd7
KH
7249 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7250 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
7251 utf_16_le_eol = detect_eol (coding.source, src_bytes,
7252 coding_category_utf_16_le);
7253 }
7254 else
7255 {
7256 if (EQ (eol_type, Qunix))
7257 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7258 else if (EQ (eol_type, Qdos))
7259 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7260 else
7261 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7262 }
7263
df7492f9
KH
7264 for (tail = val; CONSP (tail); tail = XCDR (tail))
7265 {
89528eb3 7266 enum coding_category category;
df7492f9 7267 int this_eol;
89528eb3
KH
7268
7269 id = XINT (XCAR (tail));
7270 attrs = CODING_ID_ATTRS (id);
7271 category = XINT (CODING_ATTR_CATEGORY (attrs));
7272 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
7273 if (VECTORP (eol_type))
7274 {
89528eb3
KH
7275 if (category == coding_category_utf_16_be
7276 || category == coding_category_utf_16_be_nosig)
7277 this_eol = utf_16_be_eol;
7278 else if (category == coding_category_utf_16_le
7279 || category == coding_category_utf_16_le_nosig)
7280 this_eol = utf_16_le_eol;
df7492f9 7281 else
89528eb3
KH
7282 this_eol = normal_eol;
7283
df7492f9
KH
7284 if (this_eol == EOL_SEEN_LF)
7285 XSETCAR (tail, AREF (eol_type, 0));
7286 else if (this_eol == EOL_SEEN_CRLF)
7287 XSETCAR (tail, AREF (eol_type, 1));
7288 else if (this_eol == EOL_SEEN_CR)
7289 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
7290 else
7291 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 7292 }
89528eb3
KH
7293 else
7294 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
7295 }
7296 }
ec6d2bb8 7297
03699b14 7298 return (highest ? XCAR (val) : val);
ec6d2bb8
KH
7299}
7300
ec6d2bb8 7301
d46c5b12
KH
7302DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7303 2, 3, 0,
48b0f3ae
PJ
7304 doc: /* Detect coding system of the text in the region between START and END.
7305Return a list of possible coding systems ordered by priority.
ec6d2bb8 7306
48b0f3ae
PJ
7307If only ASCII characters are found, it returns a list of single element
7308`undecided' or its subsidiary coding system according to a detected
7309end-of-line format.
ec6d2bb8 7310
48b0f3ae
PJ
7311If optional argument HIGHEST is non-nil, return the coding system of
7312highest priority. */)
7313 (start, end, highest)
d46c5b12
KH
7314 Lisp_Object start, end, highest;
7315{
7316 int from, to;
7317 int from_byte, to_byte;
ec6d2bb8 7318
b7826503
PJ
7319 CHECK_NUMBER_COERCE_MARKER (start);
7320 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 7321
d46c5b12
KH
7322 validate_region (&start, &end);
7323 from = XINT (start), to = XINT (end);
7324 from_byte = CHAR_TO_BYTE (from);
7325 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 7326
d46c5b12
KH
7327 if (from < GPT && to >= GPT)
7328 move_gap_both (to, to_byte);
c210f766 7329
d46c5b12 7330 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 7331 to - from, to_byte - from_byte,
0a28aafb
KH
7332 !NILP (highest),
7333 !NILP (current_buffer
df7492f9
KH
7334 ->enable_multibyte_characters),
7335 Qnil);
ec6d2bb8
KH
7336}
7337
d46c5b12
KH
7338DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7339 1, 2, 0,
48b0f3ae
PJ
7340 doc: /* Detect coding system of the text in STRING.
7341Return a list of possible coding systems ordered by priority.
fb88bf2d 7342
48b0f3ae
PJ
7343If only ASCII characters are found, it returns a list of single element
7344`undecided' or its subsidiary coding system according to a detected
7345end-of-line format.
d46c5b12 7346
48b0f3ae
PJ
7347If optional argument HIGHEST is non-nil, return the coding system of
7348highest priority. */)
7349 (string, highest)
d46c5b12
KH
7350 Lisp_Object string, highest;
7351{
b7826503 7352 CHECK_STRING (string);
b73bfc1c 7353
24a73b0a
KH
7354 return detect_coding_system (SDATA (string),
7355 SCHARS (string), SBYTES (string),
8f924df7 7356 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 7357 Qnil);
4ed46869 7358}
4ed46869 7359
b73bfc1c 7360
df7492f9
KH
7361static INLINE int
7362char_encodable_p (c, attrs)
7363 int c;
7364 Lisp_Object attrs;
05e6f5dc 7365{
df7492f9 7366 Lisp_Object tail;
df7492f9 7367 struct charset *charset;
7d64c6ad 7368 Lisp_Object translation_table;
d46c5b12 7369
7d64c6ad 7370 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 7371 if (! NILP (translation_table))
7d64c6ad 7372 c = translate_char (translation_table, c);
df7492f9
KH
7373 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7374 CONSP (tail); tail = XCDR (tail))
e133c8fa 7375 {
df7492f9
KH
7376 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7377 if (CHAR_CHARSET_P (c, charset))
7378 break;
e133c8fa 7379 }
df7492f9 7380 return (! NILP (tail));
05e6f5dc 7381}
83fa074f 7382
fb88bf2d 7383
df7492f9
KH
7384/* Return a list of coding systems that safely encode the text between
7385 START and END. If EXCLUDE is non-nil, it is a list of coding
7386 systems not to check. The returned list doesn't contain any such
48468dac 7387 coding systems. In any case, if the text contains only ASCII or is
df7492f9 7388 unibyte, return t. */
e077cc80 7389
df7492f9
KH
7390DEFUN ("find-coding-systems-region-internal",
7391 Ffind_coding_systems_region_internal,
7392 Sfind_coding_systems_region_internal, 2, 3, 0,
7393 doc: /* Internal use only. */)
7394 (start, end, exclude)
7395 Lisp_Object start, end, exclude;
7396{
7397 Lisp_Object coding_attrs_list, safe_codings;
7398 EMACS_INT start_byte, end_byte;
7c78e542 7399 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7400 int c;
7401 Lisp_Object tail, elt;
d46c5b12 7402
df7492f9
KH
7403 if (STRINGP (start))
7404 {
7405 if (!STRING_MULTIBYTE (start)
8f924df7 7406 || SCHARS (start) == SBYTES (start))
df7492f9
KH
7407 return Qt;
7408 start_byte = 0;
8f924df7 7409 end_byte = SBYTES (start);
df7492f9
KH
7410 }
7411 else
d46c5b12 7412 {
df7492f9
KH
7413 CHECK_NUMBER_COERCE_MARKER (start);
7414 CHECK_NUMBER_COERCE_MARKER (end);
7415 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7416 args_out_of_range (start, end);
7417 if (NILP (current_buffer->enable_multibyte_characters))
7418 return Qt;
7419 start_byte = CHAR_TO_BYTE (XINT (start));
7420 end_byte = CHAR_TO_BYTE (XINT (end));
7421 if (XINT (end) - XINT (start) == end_byte - start_byte)
7422 return Qt;
d46c5b12 7423
e1c23804 7424 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 7425 {
e1c23804
DL
7426 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7427 move_gap_both (XINT (start), start_byte);
df7492f9 7428 else
e1c23804 7429 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
7430 }
7431 }
7432
df7492f9
KH
7433 coding_attrs_list = Qnil;
7434 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7435 if (NILP (exclude)
7436 || NILP (Fmemq (XCAR (tail), exclude)))
7437 {
7438 Lisp_Object attrs;
d46c5b12 7439
df7492f9
KH
7440 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7441 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7442 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
7443 {
7444 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 7445 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
7446 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7447 }
df7492f9 7448 }
d46c5b12 7449
df7492f9 7450 if (STRINGP (start))
8f924df7 7451 p = pbeg = SDATA (start);
df7492f9
KH
7452 else
7453 p = pbeg = BYTE_POS_ADDR (start_byte);
7454 pend = p + (end_byte - start_byte);
b843d1ae 7455
df7492f9
KH
7456 while (p < pend && ASCII_BYTE_P (*p)) p++;
7457 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 7458
05e6f5dc 7459 while (p < pend)
72d1a715 7460 {
df7492f9
KH
7461 if (ASCII_BYTE_P (*p))
7462 p++;
72d1a715
RS
7463 else
7464 {
df7492f9 7465 c = STRING_CHAR_ADVANCE (p);
12410ef1 7466
df7492f9
KH
7467 charset_map_loaded = 0;
7468 for (tail = coding_attrs_list; CONSP (tail);)
7469 {
7470 elt = XCAR (tail);
7471 if (NILP (elt))
7472 tail = XCDR (tail);
7473 else if (char_encodable_p (c, elt))
7474 tail = XCDR (tail);
7475 else if (CONSP (XCDR (tail)))
7476 {
7477 XSETCAR (tail, XCAR (XCDR (tail)));
7478 XSETCDR (tail, XCDR (XCDR (tail)));
7479 }
7480 else
7481 {
7482 XSETCAR (tail, Qnil);
7483 tail = XCDR (tail);
7484 }
7485 }
7486 if (charset_map_loaded)
7487 {
7488 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 7489
df7492f9 7490 if (STRINGP (start))
8f924df7 7491 pbeg = SDATA (start);
df7492f9
KH
7492 else
7493 pbeg = BYTE_POS_ADDR (start_byte);
7494 p = pbeg + p_offset;
7495 pend = pbeg + pend_offset;
7496 }
7497 }
ec6d2bb8 7498 }
fb88bf2d 7499
988b3759 7500 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
7501 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7502 if (! NILP (XCAR (tail)))
7503 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 7504
05e6f5dc
KH
7505 return safe_codings;
7506}
4956c225 7507
d46c5b12 7508
8f924df7
KH
7509DEFUN ("unencodable-char-position", Funencodable_char_position,
7510 Sunencodable_char_position, 3, 5, 0,
7511 doc: /*
7512Return position of first un-encodable character in a region.
7513START and END specfiy the region and CODING-SYSTEM specifies the
7514encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 7515
8f924df7
KH
7516If optional 4th argument COUNT is non-nil, it specifies at most how
7517many un-encodable characters to search. In this case, the value is a
7518list of positions.
d46c5b12 7519
8f924df7
KH
7520If optional 5th argument STRING is non-nil, it is a string to search
7521for un-encodable characters. In that case, START and END are indexes
7522to the string. */)
7523 (start, end, coding_system, count, string)
7524 Lisp_Object start, end, coding_system, count, string;
7525{
7526 int n;
7527 struct coding_system coding;
7d64c6ad 7528 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
7529 Lisp_Object positions;
7530 int from, to;
7531 const unsigned char *p, *stop, *pend;
7532 int ascii_compatible;
fb88bf2d 7533
8f924df7
KH
7534 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7535 attrs = CODING_ID_ATTRS (coding.id);
7536 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7537 return Qnil;
7538 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7539 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 7540 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 7541
8f924df7
KH
7542 if (NILP (string))
7543 {
7544 validate_region (&start, &end);
7545 from = XINT (start);
7546 to = XINT (end);
7547 if (NILP (current_buffer->enable_multibyte_characters)
7548 || (ascii_compatible
7549 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7550 return Qnil;
7551 p = CHAR_POS_ADDR (from);
7552 pend = CHAR_POS_ADDR (to);
7553 if (from < GPT && to >= GPT)
7554 stop = GPT_ADDR;
7555 else
7556 stop = pend;
7557 }
7558 else
7559 {
7560 CHECK_STRING (string);
7561 CHECK_NATNUM (start);
7562 CHECK_NATNUM (end);
7563 from = XINT (start);
7564 to = XINT (end);
7565 if (from > to
7566 || to > SCHARS (string))
7567 args_out_of_range_3 (string, start, end);
7568 if (! STRING_MULTIBYTE (string))
7569 return Qnil;
7570 p = SDATA (string) + string_char_to_byte (string, from);
7571 stop = pend = SDATA (string) + string_char_to_byte (string, to);
7572 if (ascii_compatible && (to - from) == (pend - p))
7573 return Qnil;
7574 }
f2558efd 7575
8f924df7
KH
7576 if (NILP (count))
7577 n = 1;
7578 else
b73bfc1c 7579 {
8f924df7
KH
7580 CHECK_NATNUM (count);
7581 n = XINT (count);
b73bfc1c
KH
7582 }
7583
8f924df7
KH
7584 positions = Qnil;
7585 while (1)
d46c5b12 7586 {
8f924df7 7587 int c;
ec6d2bb8 7588
8f924df7
KH
7589 if (ascii_compatible)
7590 while (p < stop && ASCII_BYTE_P (*p))
7591 p++, from++;
7592 if (p >= stop)
0e79d667 7593 {
8f924df7
KH
7594 if (p >= pend)
7595 break;
7596 stop = pend;
7597 p = GAP_END_ADDR;
0e79d667 7598 }
ec6d2bb8 7599
8f924df7
KH
7600 c = STRING_CHAR_ADVANCE (p);
7601 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
7602 && ! char_charset (translate_char (translation_table, c),
7603 charset_list, NULL))
ec6d2bb8 7604 {
8f924df7
KH
7605 positions = Fcons (make_number (from), positions);
7606 n--;
7607 if (n == 0)
7608 break;
ec6d2bb8
KH
7609 }
7610
8f924df7
KH
7611 from++;
7612 }
d46c5b12 7613
8f924df7
KH
7614 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7615}
d46c5b12 7616
d46c5b12 7617
df7492f9
KH
7618DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7619 Scheck_coding_systems_region, 3, 3, 0,
7620 doc: /* Check if the region is encodable by coding systems.
d46c5b12 7621
df7492f9
KH
7622START and END are buffer positions specifying the region.
7623CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 7624
df7492f9
KH
7625The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7626CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7627whole region, POS0, POS1, ... are buffer positions where non-encodable
7628characters are found.
93dec019 7629
df7492f9
KH
7630If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7631value is nil.
93dec019 7632
df7492f9
KH
7633START may be a string. In that case, check if the string is
7634encodable, and the value contains indices to the string instead of
7635buffer positions. END is ignored. */)
7636 (start, end, coding_system_list)
7637 Lisp_Object start, end, coding_system_list;
05e6f5dc 7638{
df7492f9
KH
7639 Lisp_Object list;
7640 EMACS_INT start_byte, end_byte;
7641 int pos;
7c78e542 7642 const unsigned char *p, *pbeg, *pend;
df7492f9 7643 int c;
7d64c6ad 7644 Lisp_Object tail, elt, attrs;
70ad9fc4 7645
05e6f5dc
KH
7646 if (STRINGP (start))
7647 {
df7492f9 7648 if (!STRING_MULTIBYTE (start)
8f924df7 7649 && SCHARS (start) != SBYTES (start))
df7492f9
KH
7650 return Qnil;
7651 start_byte = 0;
8f924df7 7652 end_byte = SBYTES (start);
df7492f9 7653 pos = 0;
d46c5b12 7654 }
05e6f5dc 7655 else
b73bfc1c 7656 {
b7826503
PJ
7657 CHECK_NUMBER_COERCE_MARKER (start);
7658 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7659 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7660 args_out_of_range (start, end);
7661 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7662 return Qnil;
7663 start_byte = CHAR_TO_BYTE (XINT (start));
7664 end_byte = CHAR_TO_BYTE (XINT (end));
7665 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7666 return Qt;
df7492f9 7667
e1c23804 7668 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 7669 {
e1c23804
DL
7670 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7671 move_gap_both (XINT (start), start_byte);
df7492f9 7672 else
e1c23804 7673 move_gap_both (XINT (end), end_byte);
b73bfc1c 7674 }
e1c23804 7675 pos = XINT (start);
b73bfc1c 7676 }
7553d0e1 7677
df7492f9
KH
7678 list = Qnil;
7679 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 7680 {
df7492f9 7681 elt = XCAR (tail);
7d64c6ad 7682 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
7683 ASET (attrs, coding_attr_trans_tbl,
7684 get_translation_table (attrs, 1, NULL));
7d64c6ad 7685 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
7686 }
7687
df7492f9 7688 if (STRINGP (start))
8f924df7 7689 p = pbeg = SDATA (start);
72d1a715 7690 else
df7492f9
KH
7691 p = pbeg = BYTE_POS_ADDR (start_byte);
7692 pend = p + (end_byte - start_byte);
4ed46869 7693
df7492f9
KH
7694 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7695 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 7696
df7492f9 7697 while (p < pend)
d46c5b12 7698 {
df7492f9
KH
7699 if (ASCII_BYTE_P (*p))
7700 p++;
e133c8fa 7701 else
05e6f5dc 7702 {
df7492f9
KH
7703 c = STRING_CHAR_ADVANCE (p);
7704
7705 charset_map_loaded = 0;
7706 for (tail = list; CONSP (tail); tail = XCDR (tail))
7707 {
7708 elt = XCDR (XCAR (tail));
7709 if (! char_encodable_p (c, XCAR (elt)))
7710 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7711 }
7712 if (charset_map_loaded)
7713 {
7714 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7715
7716 if (STRINGP (start))
8f924df7 7717 pbeg = SDATA (start);
df7492f9
KH
7718 else
7719 pbeg = BYTE_POS_ADDR (start_byte);
7720 p = pbeg + p_offset;
7721 pend = pbeg + pend_offset;
7722 }
05e6f5dc 7723 }
df7492f9 7724 pos++;
d46c5b12 7725 }
4ed46869 7726
df7492f9
KH
7727 tail = list;
7728 list = Qnil;
7729 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 7730 {
df7492f9
KH
7731 elt = XCAR (tail);
7732 if (CONSP (XCDR (XCDR (elt))))
7733 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7734 list);
ec6d2bb8 7735 }
2b4f9037 7736
df7492f9 7737 return list;
d46c5b12
KH
7738}
7739
3fd9494b 7740
b73bfc1c 7741Lisp_Object
df7492f9
KH
7742code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7743 Lisp_Object start, end, coding_system, dst_object;
7744 int encodep, norecord;
4ed46869 7745{
3a73fa5d 7746 struct coding_system coding;
df7492f9
KH
7747 EMACS_INT from, from_byte, to, to_byte;
7748 Lisp_Object src_object;
4ed46869 7749
b7826503
PJ
7750 CHECK_NUMBER_COERCE_MARKER (start);
7751 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
7752 if (NILP (coding_system))
7753 coding_system = Qno_conversion;
7754 else
7755 CHECK_CODING_SYSTEM (coding_system);
7756 src_object = Fcurrent_buffer ();
7757 if (NILP (dst_object))
7758 dst_object = src_object;
7759 else if (! EQ (dst_object, Qt))
7760 CHECK_BUFFER (dst_object);
3a73fa5d 7761
d46c5b12
KH
7762 validate_region (&start, &end);
7763 from = XFASTINT (start);
df7492f9 7764 from_byte = CHAR_TO_BYTE (from);
d46c5b12 7765 to = XFASTINT (end);
df7492f9 7766 to_byte = CHAR_TO_BYTE (to);
764ca8da 7767
df7492f9
KH
7768 setup_coding_system (coding_system, &coding);
7769 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 7770
df7492f9
KH
7771 if (encodep)
7772 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7773 dst_object);
7774 else
7775 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7776 dst_object);
7777 if (! norecord)
7778 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 7779
df7492f9
KH
7780 return (BUFFERP (dst_object)
7781 ? make_number (coding.produced_char)
7782 : coding.dst_object);
4031e2bf 7783}
78108bcd 7784
4ed46869 7785
4031e2bf 7786DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 7787 3, 4, "r\nzCoding system: ",
48b0f3ae 7788 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
7789When called from a program, takes four arguments:
7790 START, END, CODING-SYSTEM, and DESTINATION.
7791START and END are buffer positions.
8844fa83 7792
df7492f9
KH
7793Optional 4th arguments DESTINATION specifies where the decoded text goes.
7794If nil, the region between START and END is replace by the decoded text.
7795If buffer, the decoded text is inserted in the buffer.
7796If t, the decoded text is returned.
8844fa83 7797
48b0f3ae
PJ
7798This function sets `last-coding-system-used' to the precise coding system
7799used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7800not fully specified.)
7801It returns the length of the decoded text. */)
df7492f9
KH
7802 (start, end, coding_system, destination)
7803 Lisp_Object start, end, coding_system, destination;
4031e2bf 7804{
df7492f9 7805 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 7806}
8844fa83 7807
3a73fa5d 7808DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
7809 3, 4, "r\nzCoding system: ",
7810 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
7811When called from a program, takes three arguments:
7812START, END, and CODING-SYSTEM. START and END are buffer positions.
d46c5b12 7813
df7492f9
KH
7814Optional 4th arguments DESTINATION specifies where the encoded text goes.
7815If nil, the region between START and END is replace by the encoded text.
7816If buffer, the encoded text is inserted in the buffer.
7817If t, the encoded text is returned.
2391eaa4 7818
48b0f3ae
PJ
7819This function sets `last-coding-system-used' to the precise coding system
7820used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7821not fully specified.)
7822It returns the length of the encoded text. */)
df7492f9
KH
7823 (start, end, coding_system, destination)
7824 Lisp_Object start, end, coding_system, destination;
3a73fa5d 7825{
df7492f9 7826 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
7827}
7828
7829Lisp_Object
df7492f9
KH
7830code_convert_string (string, coding_system, dst_object,
7831 encodep, nocopy, norecord)
7832 Lisp_Object string, coding_system, dst_object;
7833 int encodep, nocopy, norecord;
b73bfc1c 7834{
4031e2bf 7835 struct coding_system coding;
df7492f9 7836 EMACS_INT chars, bytes;
ec6d2bb8 7837
b7826503 7838 CHECK_STRING (string);
d46c5b12 7839 if (NILP (coding_system))
4956c225 7840 {
df7492f9
KH
7841 if (! norecord)
7842 Vlast_coding_system_used = Qno_conversion;
7843 if (NILP (dst_object))
7844 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 7845 }
b73bfc1c 7846
df7492f9
KH
7847 if (NILP (coding_system))
7848 coding_system = Qno_conversion;
7849 else
7850 CHECK_CODING_SYSTEM (coding_system);
7851 if (NILP (dst_object))
7852 dst_object = Qt;
7853 else if (! EQ (dst_object, Qt))
7854 CHECK_BUFFER (dst_object);
73be902c 7855
df7492f9 7856 setup_coding_system (coding_system, &coding);
d46c5b12 7857 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
7858 chars = SCHARS (string);
7859 bytes = SBYTES (string);
df7492f9
KH
7860 if (encodep)
7861 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7862 else
7863 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7864 if (! norecord)
7865 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 7866
df7492f9
KH
7867 return (BUFFERP (dst_object)
7868 ? make_number (coding.produced_char)
7869 : coding.dst_object);
4ed46869 7870}
73be902c 7871
b73bfc1c 7872
ecec61c1 7873/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 7874 Do not set Vlast_coding_system_used.
4ed46869 7875
ec6d2bb8
KH
7876 This function is called only from macros DECODE_FILE and
7877 ENCODE_FILE, thus we ignore character composition. */
4ed46869 7878
ecec61c1
KH
7879Lisp_Object
7880code_convert_string_norecord (string, coding_system, encodep)
7881 Lisp_Object string, coding_system;
7882 int encodep;
4ed46869 7883{
0be8721c 7884 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
7885}
7886
4ed46869 7887
df7492f9
KH
7888DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7889 2, 4, 0,
7890 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7891
7892Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7893if the decoding operation is trivial.
ecec61c1 7894
df7492f9 7895Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 7896inserted in BUFFER instead of returned as a string. In this case,
df7492f9 7897the return value is BUFFER.
ecec61c1 7898
df7492f9
KH
7899This function sets `last-coding-system-used' to the precise coding system
7900used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7901not fully specified. */)
7902 (string, coding_system, nocopy, buffer)
7903 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 7904{
df7492f9
KH
7905 return code_convert_string (string, coding_system, buffer,
7906 0, ! NILP (nocopy), 0);
4ed46869
KH
7907}
7908
df7492f9
KH
7909DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7910 2, 4, 0,
7911 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7912
7913Optional third arg NOCOPY non-nil means it is OK to return STRING
7914itself if the encoding operation is trivial.
7915
7916Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 7917inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
7918the return value is BUFFER.
7919
7920This function sets `last-coding-system-used' to the precise coding system
7921used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7922not fully specified.) */)
7923 (string, coding_system, nocopy, buffer)
7924 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 7925{
df7492f9 7926 return code_convert_string (string, coding_system, buffer,
c197f191 7927 1, ! NILP (nocopy), 1);
4ed46869 7928}
df7492f9 7929
3a73fa5d 7930\f
4ed46869 7931DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7932 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7933Return the corresponding character. */)
7934 (code)
4ed46869 7935 Lisp_Object code;
4ed46869 7936{
df7492f9
KH
7937 Lisp_Object spec, attrs, val;
7938 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7939 int c;
4ed46869 7940
df7492f9
KH
7941 CHECK_NATNUM (code);
7942 c = XFASTINT (code);
7943 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7944 attrs = AREF (spec, 0);
4ed46869 7945
df7492f9
KH
7946 if (ASCII_BYTE_P (c)
7947 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7948 return code;
4ed46869 7949
df7492f9
KH
7950 val = CODING_ATTR_CHARSET_LIST (attrs);
7951 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
7952 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7953 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 7954
df7492f9
KH
7955 if (c <= 0x7F)
7956 charset = charset_roman;
7957 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 7958 {
df7492f9
KH
7959 charset = charset_kana;
7960 c -= 0x80;
4ed46869 7961 }
55ab7be3 7962 else
4ed46869 7963 {
004068e4 7964 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
7965
7966 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7967 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7968 error ("Invalid code: %d", code);
7969 SJIS_TO_JIS (c);
7970 charset = charset_kanji;
4ed46869 7971 }
df7492f9
KH
7972 c = DECODE_CHAR (charset, c);
7973 if (c < 0)
7974 error ("Invalid code: %d", code);
7975 return make_number (c);
93dec019 7976}
4ed46869 7977
48b0f3ae 7978
4ed46869 7979DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7980 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7981Return the corresponding code in SJIS. */)
7982 (ch)
df7492f9 7983 Lisp_Object ch;
4ed46869 7984{
df7492f9
KH
7985 Lisp_Object spec, attrs, charset_list;
7986 int c;
7987 struct charset *charset;
7988 unsigned code;
48b0f3ae 7989
df7492f9
KH
7990 CHECK_CHARACTER (ch);
7991 c = XFASTINT (ch);
7992 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7993 attrs = AREF (spec, 0);
7994
7995 if (ASCII_CHAR_P (c)
7996 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7997 return ch;
7998
7999 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8000 charset = char_charset (c, charset_list, &code);
8001 if (code == CHARSET_INVALID_CODE (charset))
8002 error ("Can't encode by shift_jis encoding: %d", c);
8003 JIS_TO_SJIS (code);
8004
8005 return make_number (code);
4ed46869
KH
8006}
8007
8008DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8009 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8010Return the corresponding character. */)
8011 (code)
4ed46869 8012 Lisp_Object code;
d46c5b12 8013{
df7492f9
KH
8014 Lisp_Object spec, attrs, val;
8015 struct charset *charset_roman, *charset_big5, *charset;
8016 int c;
6289dd10 8017
df7492f9
KH
8018 CHECK_NATNUM (code);
8019 c = XFASTINT (code);
8020 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8021 attrs = AREF (spec, 0);
4ed46869 8022
df7492f9
KH
8023 if (ASCII_BYTE_P (c)
8024 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8025 return code;
6289dd10 8026
df7492f9
KH
8027 val = CODING_ATTR_CHARSET_LIST (attrs);
8028 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8029 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 8030
df7492f9
KH
8031 if (c <= 0x7F)
8032 charset = charset_roman;
c28a9453
KH
8033 else
8034 {
df7492f9
KH
8035 int b1 = c >> 8, b2 = c & 0x7F;
8036 if (b1 < 0xA1 || b1 > 0xFE
8037 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8038 error ("Invalid code: %d", code);
8039 charset = charset_big5;
c28a9453 8040 }
df7492f9
KH
8041 c = DECODE_CHAR (charset, (unsigned )c);
8042 if (c < 0)
8043 error ("Invalid code: %d", code);
8044 return make_number (c);
d46c5b12 8045}
6289dd10 8046
4ed46869 8047DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8048 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
8049Return the corresponding character code in Big5. */)
8050 (ch)
4ed46869
KH
8051 Lisp_Object ch;
8052{
df7492f9
KH
8053 Lisp_Object spec, attrs, charset_list;
8054 struct charset *charset;
8055 int c;
8056 unsigned code;
8057
8058 CHECK_CHARACTER (ch);
8059 c = XFASTINT (ch);
8060 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8061 attrs = AREF (spec, 0);
8062 if (ASCII_CHAR_P (c)
8063 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8064 return ch;
8065
8066 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8067 charset = char_charset (c, charset_list, &code);
8068 if (code == CHARSET_INVALID_CODE (charset))
8069 error ("Can't encode by Big5 encoding: %d", c);
8070
8071 return make_number (code);
4ed46869 8072}
48b0f3ae 8073
3a73fa5d 8074\f
1ba9e4ab
KH
8075DEFUN ("set-terminal-coding-system-internal",
8076 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
8077 Sset_terminal_coding_system_internal, 1, 1, 0,
8078 doc: /* Internal use only. */)
8079 (coding_system)
b74e4686 8080 Lisp_Object coding_system;
4ed46869 8081{
b7826503 8082 CHECK_SYMBOL (coding_system);
df7492f9
KH
8083 setup_coding_system (Fcheck_coding_system (coding_system),
8084 &terminal_coding);
48b0f3ae 8085
70c22245 8086 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
8087 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
8088 /* Characer composition should be disabled. */
8089 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8090 terminal_coding.src_multibyte = 1;
8091 terminal_coding.dst_multibyte = 0;
4ed46869
KH
8092 return Qnil;
8093}
8094
c4825358
KH
8095DEFUN ("set-safe-terminal-coding-system-internal",
8096 Fset_safe_terminal_coding_system_internal,
48b0f3ae 8097 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 8098 doc: /* Internal use only. */)
48b0f3ae 8099 (coding_system)
b74e4686 8100 Lisp_Object coding_system;
d46c5b12 8101{
b7826503 8102 CHECK_SYMBOL (coding_system);
c4825358
KH
8103 setup_coding_system (Fcheck_coding_system (coding_system),
8104 &safe_terminal_coding);
df7492f9
KH
8105 /* Characer composition should be disabled. */
8106 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8107 safe_terminal_coding.src_multibyte = 1;
8108 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
8109 return Qnil;
8110}
4ed46869 8111
4ed46869
KH
8112DEFUN ("terminal-coding-system",
8113 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
8114 doc: /* Return coding system specified for terminal output. */)
8115 ()
4ed46869 8116{
df7492f9 8117 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
8118}
8119
1ba9e4ab
KH
8120DEFUN ("set-keyboard-coding-system-internal",
8121 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
8122 Sset_keyboard_coding_system_internal, 1, 1, 0,
8123 doc: /* Internal use only. */)
8124 (coding_system)
4ed46869
KH
8125 Lisp_Object coding_system;
8126{
b7826503 8127 CHECK_SYMBOL (coding_system);
df7492f9
KH
8128 setup_coding_system (Fcheck_coding_system (coding_system),
8129 &keyboard_coding);
8130 /* Characer composition should be disabled. */
8131 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
8132 return Qnil;
8133}
8134
8135DEFUN ("keyboard-coding-system",
8136 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
8137 doc: /* Return coding system specified for decoding keyboard input. */)
8138 ()
4ed46869 8139{
df7492f9 8140 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
8141}
8142
4ed46869 8143\f
a5d301df
KH
8144DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8145 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
8146 doc: /* Choose a coding system for an operation based on the target name.
8147The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8148DECODING-SYSTEM is the coding system to use for decoding
8149\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8150for encoding (in case OPERATION does encoding).
05e6f5dc 8151
48b0f3ae
PJ
8152The first argument OPERATION specifies an I/O primitive:
8153 For file I/O, `insert-file-contents' or `write-region'.
8154 For process I/O, `call-process', `call-process-region', or `start-process'.
8155 For network I/O, `open-network-stream'.
05e6f5dc 8156
48b0f3ae
PJ
8157The remaining arguments should be the same arguments that were passed
8158to the primitive. Depending on which primitive, one of those arguments
8159is selected as the TARGET. For example, if OPERATION does file I/O,
8160whichever argument specifies the file name is TARGET.
05e6f5dc 8161
48b0f3ae
PJ
8162TARGET has a meaning which depends on OPERATION:
8163 For file I/O, TARGET is a file name.
8164 For process I/O, TARGET is a process name.
8165 For network I/O, TARGET is a service name or a port number
05e6f5dc 8166
48b0f3ae
PJ
8167This function looks up what specified for TARGET in,
8168`file-coding-system-alist', `process-coding-system-alist',
8169or `network-coding-system-alist' depending on OPERATION.
8170They may specify a coding system, a cons of coding systems,
8171or a function symbol to call.
8172In the last case, we call the function with one argument,
8173which is a list of all the arguments given to this function.
8174
8175usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
8176 (nargs, args)
4ed46869
KH
8177 int nargs;
8178 Lisp_Object *args;
6b89e3aa 8179{
4ed46869
KH
8180 Lisp_Object operation, target_idx, target, val;
8181 register Lisp_Object chain;
177c0ea7 8182
4ed46869
KH
8183 if (nargs < 2)
8184 error ("Too few arguments");
8185 operation = args[0];
8186 if (!SYMBOLP (operation)
8187 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 8188 error ("Invalid first arguement");
4ed46869
KH
8189 if (nargs < 1 + XINT (target_idx))
8190 error ("Too few arguments for operation: %s",
8f924df7 8191 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
8192 target = args[XINT (target_idx) + 1];
8193 if (!(STRINGP (target)
8194 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 8195 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 8196
2e34157c
RS
8197 chain = ((EQ (operation, Qinsert_file_contents)
8198 || EQ (operation, Qwrite_region))
02ba4723 8199 ? Vfile_coding_system_alist
2e34157c 8200 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
8201 ? Vnetwork_coding_system_alist
8202 : Vprocess_coding_system_alist));
4ed46869
KH
8203 if (NILP (chain))
8204 return Qnil;
8205
03699b14 8206 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 8207 {
f44d27ce 8208 Lisp_Object elt;
6b89e3aa 8209
df7492f9 8210 elt = XCAR (chain);
4ed46869
KH
8211 if (CONSP (elt)
8212 && ((STRINGP (target)
03699b14
KR
8213 && STRINGP (XCAR (elt))
8214 && fast_string_match (XCAR (elt), target) >= 0)
8215 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 8216 {
03699b14 8217 val = XCDR (elt);
b19fd4c5
KH
8218 /* Here, if VAL is both a valid coding system and a valid
8219 function symbol, we return VAL as a coding system. */
02ba4723
KH
8220 if (CONSP (val))
8221 return val;
8222 if (! SYMBOLP (val))
8223 return Qnil;
8224 if (! NILP (Fcoding_system_p (val)))
8225 return Fcons (val, val);
b19fd4c5 8226 if (! NILP (Ffboundp (val)))
6b89e3aa 8227 {
b19fd4c5
KH
8228 val = call1 (val, Flist (nargs, args));
8229 if (CONSP (val))
8230 return val;
8231 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8232 return Fcons (val, val);
6b89e3aa 8233 }
02ba4723 8234 return Qnil;
6b89e3aa
KH
8235 }
8236 }
4ed46869 8237 return Qnil;
6b89e3aa
KH
8238}
8239
df7492f9 8240DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 8241 Sset_coding_system_priority, 0, MANY, 0,
da7db224 8242 doc: /* Assign higher priority to the coding systems given as arguments.
ff563fce 8243If multiple coding systems belongs to the same category,
a3181084
DL
8244all but the first one are ignored.
8245
8246usage: (set-coding-system-priority ...) */)
df7492f9
KH
8247 (nargs, args)
8248 int nargs;
8249 Lisp_Object *args;
8250{
8251 int i, j;
8252 int changed[coding_category_max];
8253 enum coding_category priorities[coding_category_max];
8254
8255 bzero (changed, sizeof changed);
6b89e3aa 8256
df7492f9 8257 for (i = j = 0; i < nargs; i++)
6b89e3aa 8258 {
df7492f9
KH
8259 enum coding_category category;
8260 Lisp_Object spec, attrs;
6b89e3aa 8261
df7492f9
KH
8262 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8263 attrs = AREF (spec, 0);
8264 category = XINT (CODING_ATTR_CATEGORY (attrs));
8265 if (changed[category])
8266 /* Ignore this coding system because a coding system of the
8267 same category already had a higher priority. */
8268 continue;
8269 changed[category] = 1;
8270 priorities[j++] = category;
8271 if (coding_categories[category].id >= 0
8272 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8273 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 8274 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 8275 }
6b89e3aa 8276
df7492f9
KH
8277 /* Now we have decided top J priorities. Reflect the order of the
8278 original priorities to the remaining priorities. */
6b89e3aa 8279
df7492f9 8280 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 8281 {
df7492f9
KH
8282 while (j < coding_category_max
8283 && changed[coding_priorities[j]])
8284 j++;
8285 if (j == coding_category_max)
8286 abort ();
8287 priorities[i] = coding_priorities[j];
8288 }
6b89e3aa 8289
df7492f9 8290 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 8291
ff563fce
KH
8292 /* Update `coding-category-list'. */
8293 Vcoding_category_list = Qnil;
8294 for (i = coding_category_max - 1; i >= 0; i--)
8295 Vcoding_category_list
8296 = Fcons (AREF (Vcoding_category_table, priorities[i]),
8297 Vcoding_category_list);
6b89e3aa 8298
df7492f9 8299 return Qnil;
6b89e3aa
KH
8300}
8301
df7492f9
KH
8302DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8303 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
8304 doc: /* Return a list of coding systems ordered by their priorities.
8305HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
8306 (highestp)
8307 Lisp_Object highestp;
d46c5b12
KH
8308{
8309 int i;
df7492f9 8310 Lisp_Object val;
6b89e3aa 8311
df7492f9 8312 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 8313 {
df7492f9
KH
8314 enum coding_category category = coding_priorities[i];
8315 int id = coding_categories[category].id;
8316 Lisp_Object attrs;
068a9dbd 8317
df7492f9
KH
8318 if (id < 0)
8319 continue;
8320 attrs = CODING_ID_ATTRS (id);
8321 if (! NILP (highestp))
8322 return CODING_ATTR_BASE_NAME (attrs);
8323 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8324 }
8325 return Fnreverse (val);
8326}
068a9dbd 8327
f0064e1f 8328static char *suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
8329
8330static Lisp_Object
df7492f9
KH
8331make_subsidiaries (base)
8332 Lisp_Object base;
068a9dbd 8333{
df7492f9 8334 Lisp_Object subsidiaries;
8f924df7 8335 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
8336 char *buf = (char *) alloca (base_name_len + 6);
8337 int i;
068a9dbd 8338
8f924df7 8339 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
8340 subsidiaries = Fmake_vector (make_number (3), Qnil);
8341 for (i = 0; i < 3; i++)
068a9dbd 8342 {
df7492f9
KH
8343 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8344 ASET (subsidiaries, i, intern (buf));
068a9dbd 8345 }
df7492f9 8346 return subsidiaries;
068a9dbd
KH
8347}
8348
8349
df7492f9
KH
8350DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8351 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
8352 doc: /* For internal use only.
8353usage: (define-coding-system-internal ...) */)
df7492f9
KH
8354 (nargs, args)
8355 int nargs;
8356 Lisp_Object *args;
068a9dbd 8357{
df7492f9
KH
8358 Lisp_Object name;
8359 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
8360 Lisp_Object attrs; /* Vector of attributes. */
8361 Lisp_Object eol_type;
8362 Lisp_Object aliases;
8363 Lisp_Object coding_type, charset_list, safe_charsets;
8364 enum coding_category category;
8365 Lisp_Object tail, val;
8366 int max_charset_id = 0;
8367 int i;
068a9dbd 8368
df7492f9
KH
8369 if (nargs < coding_arg_max)
8370 goto short_args;
068a9dbd 8371
df7492f9 8372 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 8373
df7492f9
KH
8374 name = args[coding_arg_name];
8375 CHECK_SYMBOL (name);
8376 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 8377
df7492f9
KH
8378 val = args[coding_arg_mnemonic];
8379 if (! STRINGP (val))
8380 CHECK_CHARACTER (val);
8381 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 8382
df7492f9
KH
8383 coding_type = args[coding_arg_coding_type];
8384 CHECK_SYMBOL (coding_type);
8385 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 8386
df7492f9
KH
8387 charset_list = args[coding_arg_charset_list];
8388 if (SYMBOLP (charset_list))
8389 {
8390 if (EQ (charset_list, Qiso_2022))
8391 {
8392 if (! EQ (coding_type, Qiso_2022))
8393 error ("Invalid charset-list");
8394 charset_list = Viso_2022_charset_list;
8395 }
8396 else if (EQ (charset_list, Qemacs_mule))
8397 {
8398 if (! EQ (coding_type, Qemacs_mule))
8399 error ("Invalid charset-list");
8400 charset_list = Vemacs_mule_charset_list;
8401 }
8402 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8403 if (max_charset_id < XFASTINT (XCAR (tail)))
8404 max_charset_id = XFASTINT (XCAR (tail));
8405 }
068a9dbd
KH
8406 else
8407 {
df7492f9
KH
8408 charset_list = Fcopy_sequence (charset_list);
8409 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
068a9dbd 8410 {
df7492f9
KH
8411 struct charset *charset;
8412
8413 val = Fcar (tail);
8414 CHECK_CHARSET_GET_CHARSET (val, charset);
8415 if (EQ (coding_type, Qiso_2022)
8416 ? CHARSET_ISO_FINAL (charset) < 0
8417 : EQ (coding_type, Qemacs_mule)
8418 ? CHARSET_EMACS_MULE_ID (charset) < 0
8419 : 0)
8420 error ("Can't handle charset `%s'",
8f924df7 8421 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 8422
8f924df7 8423 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
8424 if (max_charset_id < charset->id)
8425 max_charset_id = charset->id;
068a9dbd
KH
8426 }
8427 }
df7492f9 8428 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 8429
df7492f9
KH
8430 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8431 make_number (255));
8432 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 8433 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 8434 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 8435
584948ac 8436 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 8437
df7492f9 8438 val = args[coding_arg_decode_translation_table];
a6f87d34 8439 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8440 CHECK_SYMBOL (val);
df7492f9 8441 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 8442
df7492f9 8443 val = args[coding_arg_encode_translation_table];
a6f87d34 8444 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8445 CHECK_SYMBOL (val);
df7492f9 8446 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 8447
df7492f9
KH
8448 val = args[coding_arg_post_read_conversion];
8449 CHECK_SYMBOL (val);
8450 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 8451
df7492f9
KH
8452 val = args[coding_arg_pre_write_conversion];
8453 CHECK_SYMBOL (val);
8454 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 8455
df7492f9
KH
8456 val = args[coding_arg_default_char];
8457 if (NILP (val))
8458 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8459 else
8460 {
8f924df7 8461 CHECK_CHARACTER (val);
df7492f9
KH
8462 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8463 }
4031e2bf 8464
8f924df7
KH
8465 val = args[coding_arg_for_unibyte];
8466 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 8467
df7492f9
KH
8468 val = args[coding_arg_plist];
8469 CHECK_LIST (val);
8470 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 8471
df7492f9
KH
8472 if (EQ (coding_type, Qcharset))
8473 {
c7c66a95
KH
8474 /* Generate a lisp vector of 256 elements. Each element is nil,
8475 integer, or a list of charset IDs.
3a73fa5d 8476
c7c66a95
KH
8477 If Nth element is nil, the byte code N is invalid in this
8478 coding system.
4ed46869 8479
c7c66a95
KH
8480 If Nth element is a number NUM, N is the first byte of a
8481 charset whose ID is NUM.
4ed46869 8482
c7c66a95
KH
8483 If Nth element is a list of charset IDs, N is the first byte
8484 of one of them. The list is sorted by dimensions of the
2bc515e4 8485 charsets. A charset of smaller dimension comes firtst. */
df7492f9 8486 val = Fmake_vector (make_number (256), Qnil);
4ed46869 8487
5c99c2e6 8488 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 8489 {
c7c66a95
KH
8490 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8491 int dim = CHARSET_DIMENSION (charset);
8492 int idx = (dim - 1) * 4;
4ed46869 8493
5c99c2e6 8494 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 8495 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 8496
15d143f7
KH
8497 for (i = charset->code_space[idx];
8498 i <= charset->code_space[idx + 1]; i++)
8499 {
c7c66a95
KH
8500 Lisp_Object tmp, tmp2;
8501 int dim2;
ec6d2bb8 8502
c7c66a95
KH
8503 tmp = AREF (val, i);
8504 if (NILP (tmp))
8505 tmp = XCAR (tail);
8506 else if (NUMBERP (tmp))
8507 {
8508 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8509 if (dim < dim2)
c7c66a95 8510 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
8511 else
8512 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 8513 }
15d143f7 8514 else
c7c66a95
KH
8515 {
8516 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8517 {
8518 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8519 if (dim < dim2)
8520 break;
8521 }
8522 if (NILP (tmp2))
8523 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8524 else
8525 {
8526 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8527 XSETCAR (tmp2, XCAR (tail));
8528 }
8529 }
8530 ASET (val, i, tmp);
15d143f7 8531 }
df7492f9
KH
8532 }
8533 ASET (attrs, coding_attr_charset_valids, val);
8534 category = coding_category_charset;
8535 }
8536 else if (EQ (coding_type, Qccl))
8537 {
8538 Lisp_Object valids;
ecec61c1 8539
df7492f9
KH
8540 if (nargs < coding_arg_ccl_max)
8541 goto short_args;
ecec61c1 8542
df7492f9
KH
8543 val = args[coding_arg_ccl_decoder];
8544 CHECK_CCL_PROGRAM (val);
8545 if (VECTORP (val))
8546 val = Fcopy_sequence (val);
8547 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 8548
df7492f9
KH
8549 val = args[coding_arg_ccl_encoder];
8550 CHECK_CCL_PROGRAM (val);
8551 if (VECTORP (val))
8552 val = Fcopy_sequence (val);
8553 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 8554
df7492f9
KH
8555 val = args[coding_arg_ccl_valids];
8556 valids = Fmake_string (make_number (256), make_number (0));
8557 for (tail = val; !NILP (tail); tail = Fcdr (tail))
8558 {
8dcbea82 8559 int from, to;
ecec61c1 8560
df7492f9
KH
8561 val = Fcar (tail);
8562 if (INTEGERP (val))
8dcbea82
KH
8563 {
8564 from = to = XINT (val);
8565 if (from < 0 || from > 255)
8566 args_out_of_range_3 (val, make_number (0), make_number (255));
8567 }
df7492f9
KH
8568 else
8569 {
df7492f9 8570 CHECK_CONS (val);
8f924df7
KH
8571 CHECK_NATNUM_CAR (val);
8572 CHECK_NATNUM_CDR (val);
df7492f9 8573 from = XINT (XCAR (val));
8f924df7 8574 if (from > 255)
8dcbea82
KH
8575 args_out_of_range_3 (XCAR (val),
8576 make_number (0), make_number (255));
df7492f9 8577 to = XINT (XCDR (val));
8dcbea82
KH
8578 if (to < from || to > 255)
8579 args_out_of_range_3 (XCDR (val),
8580 XCAR (val), make_number (255));
df7492f9 8581 }
8dcbea82 8582 for (i = from; i <= to; i++)
8f924df7 8583 SSET (valids, i, 1);
df7492f9
KH
8584 }
8585 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 8586
df7492f9 8587 category = coding_category_ccl;
55ab7be3 8588 }
df7492f9 8589 else if (EQ (coding_type, Qutf_16))
55ab7be3 8590 {
df7492f9 8591 Lisp_Object bom, endian;
4ed46869 8592
584948ac 8593 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 8594
df7492f9
KH
8595 if (nargs < coding_arg_utf16_max)
8596 goto short_args;
4ed46869 8597
df7492f9
KH
8598 bom = args[coding_arg_utf16_bom];
8599 if (! NILP (bom) && ! EQ (bom, Qt))
8600 {
8601 CHECK_CONS (bom);
8f924df7
KH
8602 val = XCAR (bom);
8603 CHECK_CODING_SYSTEM (val);
8604 val = XCDR (bom);
8605 CHECK_CODING_SYSTEM (val);
df7492f9
KH
8606 }
8607 ASET (attrs, coding_attr_utf_16_bom, bom);
8608
8609 endian = args[coding_arg_utf16_endian];
b49a1807
KH
8610 CHECK_SYMBOL (endian);
8611 if (NILP (endian))
8612 endian = Qbig;
8613 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 8614 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
8615 ASET (attrs, coding_attr_utf_16_endian, endian);
8616
8617 category = (CONSP (bom)
8618 ? coding_category_utf_16_auto
8619 : NILP (bom)
b49a1807 8620 ? (EQ (endian, Qbig)
df7492f9
KH
8621 ? coding_category_utf_16_be_nosig
8622 : coding_category_utf_16_le_nosig)
b49a1807 8623 : (EQ (endian, Qbig)
df7492f9
KH
8624 ? coding_category_utf_16_be
8625 : coding_category_utf_16_le));
8626 }
8627 else if (EQ (coding_type, Qiso_2022))
8628 {
8629 Lisp_Object initial, reg_usage, request, flags;
4776e638 8630 int i;
1397dc18 8631
df7492f9
KH
8632 if (nargs < coding_arg_iso2022_max)
8633 goto short_args;
8634
8635 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8636 CHECK_VECTOR (initial);
8637 for (i = 0; i < 4; i++)
8638 {
8639 val = Faref (initial, make_number (i));
8640 if (! NILP (val))
8641 {
584948ac
KH
8642 struct charset *charset;
8643
8644 CHECK_CHARSET_GET_CHARSET (val, charset);
8645 ASET (initial, i, make_number (CHARSET_ID (charset)));
8646 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8647 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8648 }
8649 else
8650 ASET (initial, i, make_number (-1));
8651 }
8652
8653 reg_usage = args[coding_arg_iso2022_reg_usage];
8654 CHECK_CONS (reg_usage);
8f924df7
KH
8655 CHECK_NUMBER_CAR (reg_usage);
8656 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
8657
8658 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8659 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 8660 {
df7492f9 8661 int id;
8f924df7 8662 Lisp_Object tmp;
df7492f9
KH
8663
8664 val = Fcar (tail);
8665 CHECK_CONS (val);
8f924df7
KH
8666 tmp = XCAR (val);
8667 CHECK_CHARSET_GET_ID (tmp, id);
8668 CHECK_NATNUM_CDR (val);
df7492f9
KH
8669 if (XINT (XCDR (val)) >= 4)
8670 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 8671 XSETCAR (val, make_number (id));
1397dc18 8672 }
4ed46869 8673
df7492f9
KH
8674 flags = args[coding_arg_iso2022_flags];
8675 CHECK_NATNUM (flags);
8676 i = XINT (flags);
8677 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8678 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8679
8680 ASET (attrs, coding_attr_iso_initial, initial);
8681 ASET (attrs, coding_attr_iso_usage, reg_usage);
8682 ASET (attrs, coding_attr_iso_request, request);
8683 ASET (attrs, coding_attr_iso_flags, flags);
8684 setup_iso_safe_charsets (attrs);
8685
8686 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8687 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8688 | CODING_ISO_FLAG_SINGLE_SHIFT))
8689 ? coding_category_iso_7_else
8690 : EQ (args[coding_arg_charset_list], Qiso_2022)
8691 ? coding_category_iso_7
8692 : coding_category_iso_7_tight);
8693 else
8694 {
8695 int id = XINT (AREF (initial, 1));
8696
c6fb6e98 8697 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
8698 || EQ (args[coding_arg_charset_list], Qiso_2022)
8699 || id < 0)
8700 ? coding_category_iso_8_else
8701 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8702 ? coding_category_iso_8_1
8703 : coding_category_iso_8_2);
8704 }
0ce7886f
KH
8705 if (category != coding_category_iso_8_1
8706 && category != coding_category_iso_8_2)
8707 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
8708 }
8709 else if (EQ (coding_type, Qemacs_mule))
c28a9453 8710 {
df7492f9
KH
8711 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8712 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 8713 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 8714 category = coding_category_emacs_mule;
c28a9453 8715 }
df7492f9 8716 else if (EQ (coding_type, Qshift_jis))
c28a9453 8717 {
df7492f9
KH
8718
8719 struct charset *charset;
8720
7d64c6ad 8721 if (XINT (Flength (charset_list)) != 3
6e07c25f 8722 && XINT (Flength (charset_list)) != 4)
7d64c6ad 8723 error ("There should be three or four charsets");
df7492f9
KH
8724
8725 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8726 if (CHARSET_DIMENSION (charset) != 1)
8727 error ("Dimension of charset %s is not one",
8f924df7 8728 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8729 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8730 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8731
8732 charset_list = XCDR (charset_list);
8733 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8734 if (CHARSET_DIMENSION (charset) != 1)
8735 error ("Dimension of charset %s is not one",
8f924df7 8736 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
8737
8738 charset_list = XCDR (charset_list);
8739 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8740 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
8741 error ("Dimension of charset %s is not two",
8742 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8743
8744 charset_list = XCDR (charset_list);
2b917a06
KH
8745 if (! NILP (charset_list))
8746 {
8747 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8748 if (CHARSET_DIMENSION (charset) != 2)
8749 error ("Dimension of charset %s is not two",
8750 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8751 }
df7492f9
KH
8752
8753 category = coding_category_sjis;
8754 Vsjis_coding_system = name;
c28a9453 8755 }
df7492f9
KH
8756 else if (EQ (coding_type, Qbig5))
8757 {
8758 struct charset *charset;
4ed46869 8759
df7492f9
KH
8760 if (XINT (Flength (charset_list)) != 2)
8761 error ("There should be just two charsets");
8762
8763 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8764 if (CHARSET_DIMENSION (charset) != 1)
8765 error ("Dimension of charset %s is not one",
8f924df7 8766 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8767 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8768 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8769
8770 charset_list = XCDR (charset_list);
8771 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8772 if (CHARSET_DIMENSION (charset) != 2)
8773 error ("Dimension of charset %s is not two",
8f924df7 8774 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 8775
df7492f9
KH
8776 category = coding_category_big5;
8777 Vbig5_coding_system = name;
8778 }
8779 else if (EQ (coding_type, Qraw_text))
c28a9453 8780 {
584948ac
KH
8781 category = coding_category_raw_text;
8782 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 8783 }
df7492f9 8784 else if (EQ (coding_type, Qutf_8))
4ed46869 8785 {
584948ac
KH
8786 category = coding_category_utf_8;
8787 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4ed46869 8788 }
df7492f9
KH
8789 else if (EQ (coding_type, Qundecided))
8790 category = coding_category_undecided;
4ed46869 8791 else
df7492f9 8792 error ("Invalid coding system type: %s",
8f924df7 8793 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 8794
df7492f9 8795 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
8796 CODING_ATTR_PLIST (attrs)
8797 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8798 CODING_ATTR_PLIST (attrs)));
c4825358 8799
df7492f9
KH
8800 eol_type = args[coding_arg_eol_type];
8801 if (! NILP (eol_type)
8802 && ! EQ (eol_type, Qunix)
8803 && ! EQ (eol_type, Qdos)
8804 && ! EQ (eol_type, Qmac))
8805 error ("Invalid eol-type");
4ed46869 8806
df7492f9 8807 aliases = Fcons (name, Qnil);
4ed46869 8808
df7492f9
KH
8809 if (NILP (eol_type))
8810 {
8811 eol_type = make_subsidiaries (name);
8812 for (i = 0; i < 3; i++)
1397dc18 8813 {
df7492f9
KH
8814 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8815
8816 this_name = AREF (eol_type, i);
8817 this_aliases = Fcons (this_name, Qnil);
8818 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8819 this_spec = Fmake_vector (make_number (3), attrs);
8820 ASET (this_spec, 1, this_aliases);
8821 ASET (this_spec, 2, this_eol_type);
8822 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8823 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8824 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8825 Vcoding_system_alist);
1397dc18 8826 }
d46c5b12 8827 }
4ed46869 8828
df7492f9
KH
8829 spec_vec = Fmake_vector (make_number (3), attrs);
8830 ASET (spec_vec, 1, aliases);
8831 ASET (spec_vec, 2, eol_type);
48b0f3ae 8832
df7492f9
KH
8833 Fputhash (name, spec_vec, Vcoding_system_hash_table);
8834 Vcoding_system_list = Fcons (name, Vcoding_system_list);
8835 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8836 Vcoding_system_alist);
48b0f3ae 8837
df7492f9
KH
8838 {
8839 int id = coding_categories[category].id;
48b0f3ae 8840
df7492f9
KH
8841 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8842 setup_coding_system (name, &coding_categories[category]);
8843 }
48b0f3ae 8844
d46c5b12 8845 return Qnil;
48b0f3ae 8846
df7492f9
KH
8847 short_args:
8848 return Fsignal (Qwrong_number_of_arguments,
8849 Fcons (intern ("define-coding-system-internal"),
8850 make_number (nargs)));
d46c5b12 8851}
4ed46869 8852
d6925f38 8853
a6f87d34
KH
8854DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
8855 3, 3, 0,
8856 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
8857 (coding_system, prop, val)
8858 Lisp_Object coding_system, prop, val;
8859{
3dbe7859 8860 Lisp_Object spec, attrs;
a6f87d34
KH
8861
8862 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8863 attrs = AREF (spec, 0);
8864 if (EQ (prop, QCmnemonic))
8865 {
8866 if (! STRINGP (val))
8867 CHECK_CHARACTER (val);
8868 CODING_ATTR_MNEMONIC (attrs) = val;
8869 }
8870 else if (EQ (prop, QCdefalut_char))
8871 {
8872 if (NILP (val))
8873 val = make_number (' ');
8874 else
8875 CHECK_CHARACTER (val);
8876 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8877 }
8878 else if (EQ (prop, QCdecode_translation_table))
8879 {
8880 if (! CHAR_TABLE_P (val) && ! CONSP (val))
8881 CHECK_SYMBOL (val);
8882 CODING_ATTR_DECODE_TBL (attrs) = val;
8883 }
8884 else if (EQ (prop, QCencode_translation_table))
8885 {
8886 if (! CHAR_TABLE_P (val) && ! CONSP (val))
8887 CHECK_SYMBOL (val);
8888 CODING_ATTR_ENCODE_TBL (attrs) = val;
8889 }
8890 else if (EQ (prop, QCpost_read_conversion))
8891 {
8892 CHECK_SYMBOL (val);
8893 CODING_ATTR_POST_READ (attrs) = val;
8894 }
8895 else if (EQ (prop, QCpre_write_conversion))
8896 {
8897 CHECK_SYMBOL (val);
8898 CODING_ATTR_PRE_WRITE (attrs) = val;
8899 }
8900
8901 CODING_ATTR_PLIST (attrs)
8902 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
8903 return val;
8904}
8905
8906
df7492f9
KH
8907DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8908 Sdefine_coding_system_alias, 2, 2, 0,
8909 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8910 (alias, coding_system)
8911 Lisp_Object alias, coding_system;
66cfb530 8912{
df7492f9 8913 Lisp_Object spec, aliases, eol_type;
4ed46869 8914
df7492f9
KH
8915 CHECK_SYMBOL (alias);
8916 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8917 aliases = AREF (spec, 1);
d6925f38
KH
8918 /* ALISES should be a list of length more than zero, and the first
8919 element is a base coding system. Append ALIAS at the tail of the
8920 list. */
df7492f9
KH
8921 while (!NILP (XCDR (aliases)))
8922 aliases = XCDR (aliases);
8f924df7 8923 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 8924
df7492f9
KH
8925 eol_type = AREF (spec, 2);
8926 if (VECTORP (eol_type))
4ed46869 8927 {
df7492f9
KH
8928 Lisp_Object subsidiaries;
8929 int i;
4ed46869 8930
df7492f9
KH
8931 subsidiaries = make_subsidiaries (alias);
8932 for (i = 0; i < 3; i++)
8933 Fdefine_coding_system_alias (AREF (subsidiaries, i),
8934 AREF (eol_type, i));
4ed46869 8935 }
df7492f9
KH
8936
8937 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 8938 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
5bad0796
DL
8939 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8940 Vcoding_system_alist);
66cfb530 8941
4ed46869
KH
8942 return Qnil;
8943}
8944
df7492f9
KH
8945DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8946 1, 1, 0,
8947 doc: /* Return the base of CODING-SYSTEM.
da7db224 8948Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
8949 (coding_system)
8950 Lisp_Object coding_system;
d46c5b12 8951{
df7492f9 8952 Lisp_Object spec, attrs;
d46c5b12 8953
df7492f9
KH
8954 if (NILP (coding_system))
8955 return (Qno_conversion);
8956 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8957 attrs = AREF (spec, 0);
8958 return CODING_ATTR_BASE_NAME (attrs);
8959}
1397dc18 8960
df7492f9
KH
8961DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8962 1, 1, 0,
8963 doc: "Return the property list of CODING-SYSTEM.")
8964 (coding_system)
8965 Lisp_Object coding_system;
8966{
8967 Lisp_Object spec, attrs;
1397dc18 8968
df7492f9
KH
8969 if (NILP (coding_system))
8970 coding_system = Qno_conversion;
8971 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8972 attrs = AREF (spec, 0);
8973 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
8974}
8975
df7492f9
KH
8976
8977DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8978 1, 1, 0,
da7db224 8979 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
8980 (coding_system)
8981 Lisp_Object coding_system;
66cfb530 8982{
df7492f9 8983 Lisp_Object spec;
84d60297 8984
df7492f9
KH
8985 if (NILP (coding_system))
8986 coding_system = Qno_conversion;
8987 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 8988 return AREF (spec, 1);
df7492f9 8989}
66cfb530 8990
df7492f9
KH
8991DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8992 Scoding_system_eol_type, 1, 1, 0,
8993 doc: /* Return eol-type of CODING-SYSTEM.
8994An eol-type is integer 0, 1, 2, or a vector of coding systems.
66cfb530 8995
df7492f9
KH
8996Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8997and CR respectively.
66cfb530 8998
df7492f9
KH
8999A vector value indicates that a format of end-of-line should be
9000detected automatically. Nth element of the vector is the subsidiary
9001coding system whose eol-type is N. */)
6b89e3aa
KH
9002 (coding_system)
9003 Lisp_Object coding_system;
9004{
df7492f9
KH
9005 Lisp_Object spec, eol_type;
9006 int n;
6b89e3aa 9007
df7492f9
KH
9008 if (NILP (coding_system))
9009 coding_system = Qno_conversion;
9010 if (! CODING_SYSTEM_P (coding_system))
9011 return Qnil;
9012 spec = CODING_SYSTEM_SPEC (coding_system);
9013 eol_type = AREF (spec, 2);
9014 if (VECTORP (eol_type))
9015 return Fcopy_sequence (eol_type);
9016 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9017 return make_number (n);
6b89e3aa
KH
9018}
9019
4ed46869
KH
9020#endif /* emacs */
9021
9022\f
1397dc18 9023/*** 9. Post-amble ***/
4ed46869 9024
dfcf069d 9025void
4ed46869
KH
9026init_coding_once ()
9027{
9028 int i;
9029
df7492f9
KH
9030 for (i = 0; i < coding_category_max; i++)
9031 {
9032 coding_categories[i].id = -1;
9033 coding_priorities[i] = i;
9034 }
4ed46869
KH
9035
9036 /* ISO2022 specific initialize routine. */
9037 for (i = 0; i < 0x20; i++)
b73bfc1c 9038 iso_code_class[i] = ISO_control_0;
4ed46869
KH
9039 for (i = 0x21; i < 0x7F; i++)
9040 iso_code_class[i] = ISO_graphic_plane_0;
9041 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 9042 iso_code_class[i] = ISO_control_1;
4ed46869
KH
9043 for (i = 0xA1; i < 0xFF; i++)
9044 iso_code_class[i] = ISO_graphic_plane_1;
9045 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9046 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
9047 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9048 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9049 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9050 iso_code_class[ISO_CODE_ESC] = ISO_escape;
9051 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9052 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9053 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9054
df7492f9
KH
9055 for (i = 0; i < 256; i++)
9056 {
9057 emacs_mule_bytes[i] = 1;
9058 }
7c78e542
KH
9059 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9060 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9061 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9062 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
9063}
9064
9065#ifdef emacs
9066
dfcf069d 9067void
e0e989f6
KH
9068syms_of_coding ()
9069{
df7492f9 9070 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
9071 {
9072 Lisp_Object args[2];
9073 args[0] = QCtest;
9074 args[1] = Qeq;
9075 Vcoding_system_hash_table = Fmake_hash_table (2, args);
9076 }
df7492f9
KH
9077
9078 staticpro (&Vsjis_coding_system);
9079 Vsjis_coding_system = Qnil;
e0e989f6 9080
df7492f9
KH
9081 staticpro (&Vbig5_coding_system);
9082 Vbig5_coding_system = Qnil;
9083
24a73b0a
KH
9084 staticpro (&Vcode_conversion_reused_workbuf);
9085 Vcode_conversion_reused_workbuf = Qnil;
9086
9087 staticpro (&Vcode_conversion_workbuf_name);
9088 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
e0e989f6 9089
24a73b0a 9090 reused_workbuf_in_use = 0;
df7492f9
KH
9091
9092 DEFSYM (Qcharset, "charset");
9093 DEFSYM (Qtarget_idx, "target-idx");
9094 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
9095 Fset (Qcoding_system_history, Qnil);
9096
9ce27fde 9097 /* Target FILENAME is the first argument. */
e0e989f6 9098 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 9099 /* Target FILENAME is the third argument. */
e0e989f6
KH
9100 Fput (Qwrite_region, Qtarget_idx, make_number (2));
9101
df7492f9 9102 DEFSYM (Qcall_process, "call-process");
9ce27fde 9103 /* Target PROGRAM is the first argument. */
e0e989f6
KH
9104 Fput (Qcall_process, Qtarget_idx, make_number (0));
9105
df7492f9 9106 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 9107 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9108 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9109
df7492f9 9110 DEFSYM (Qstart_process, "start-process");
9ce27fde 9111 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9112 Fput (Qstart_process, Qtarget_idx, make_number (2));
9113
df7492f9 9114 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 9115 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
9116 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9117
df7492f9
KH
9118 DEFSYM (Qcoding_system, "coding-system");
9119 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 9120
df7492f9
KH
9121 DEFSYM (Qeol_type, "eol-type");
9122 DEFSYM (Qunix, "unix");
9123 DEFSYM (Qdos, "dos");
4ed46869 9124
df7492f9
KH
9125 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9126 DEFSYM (Qpost_read_conversion, "post-read-conversion");
9127 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9128 DEFSYM (Qdefault_char, "default-char");
9129 DEFSYM (Qundecided, "undecided");
9130 DEFSYM (Qno_conversion, "no-conversion");
9131 DEFSYM (Qraw_text, "raw-text");
4ed46869 9132
df7492f9 9133 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 9134
df7492f9 9135 DEFSYM (Qutf_8, "utf-8");
8f924df7 9136 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 9137
df7492f9 9138 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
9139 DEFSYM (Qbig, "big");
9140 DEFSYM (Qlittle, "little");
27901516 9141
df7492f9
KH
9142 DEFSYM (Qshift_jis, "shift-jis");
9143 DEFSYM (Qbig5, "big5");
4ed46869 9144
df7492f9 9145 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 9146
df7492f9 9147 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
9148 Fput (Qcoding_system_error, Qerror_conditions,
9149 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9150 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 9151 build_string ("Invalid coding system"));
4ed46869 9152
05e6f5dc
KH
9153 /* Intern this now in case it isn't already done.
9154 Setting this variable twice is harmless.
9155 But don't staticpro it here--that is done in alloc.c. */
9156 Qchar_table_extra_slots = intern ("char-table-extra-slots");
70c22245 9157
df7492f9 9158 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 9159 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
9160 DEFSYM (Qtranslation_table_id, "translation-table-id");
9161 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9162 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 9163
df7492f9 9164 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 9165
df7492f9 9166 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 9167
01378f49 9168 DEFSYM (QCcategory, ":category");
a6f87d34
KH
9169 DEFSYM (QCmnemonic, ":mnemonic");
9170 DEFSYM (QCdefalut_char, ":default-char");
9171 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9172 DEFSYM (QCencode_translation_table, ":encode-translation-table");
9173 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9174 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
01378f49 9175
df7492f9
KH
9176 Vcoding_category_table
9177 = Fmake_vector (make_number (coding_category_max), Qnil);
9178 staticpro (&Vcoding_category_table);
9179 /* Followings are target of code detection. */
9180 ASET (Vcoding_category_table, coding_category_iso_7,
9181 intern ("coding-category-iso-7"));
9182 ASET (Vcoding_category_table, coding_category_iso_7_tight,
9183 intern ("coding-category-iso-7-tight"));
9184 ASET (Vcoding_category_table, coding_category_iso_8_1,
9185 intern ("coding-category-iso-8-1"));
9186 ASET (Vcoding_category_table, coding_category_iso_8_2,
9187 intern ("coding-category-iso-8-2"));
9188 ASET (Vcoding_category_table, coding_category_iso_7_else,
9189 intern ("coding-category-iso-7-else"));
9190 ASET (Vcoding_category_table, coding_category_iso_8_else,
9191 intern ("coding-category-iso-8-else"));
9192 ASET (Vcoding_category_table, coding_category_utf_8,
9193 intern ("coding-category-utf-8"));
9194 ASET (Vcoding_category_table, coding_category_utf_16_be,
9195 intern ("coding-category-utf-16-be"));
ff563fce
KH
9196 ASET (Vcoding_category_table, coding_category_utf_16_auto,
9197 intern ("coding-category-utf-16-auto"));
df7492f9
KH
9198 ASET (Vcoding_category_table, coding_category_utf_16_le,
9199 intern ("coding-category-utf-16-le"));
9200 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9201 intern ("coding-category-utf-16-be-nosig"));
9202 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9203 intern ("coding-category-utf-16-le-nosig"));
9204 ASET (Vcoding_category_table, coding_category_charset,
9205 intern ("coding-category-charset"));
9206 ASET (Vcoding_category_table, coding_category_sjis,
9207 intern ("coding-category-sjis"));
9208 ASET (Vcoding_category_table, coding_category_big5,
9209 intern ("coding-category-big5"));
9210 ASET (Vcoding_category_table, coding_category_ccl,
9211 intern ("coding-category-ccl"));
9212 ASET (Vcoding_category_table, coding_category_emacs_mule,
9213 intern ("coding-category-emacs-mule"));
9214 /* Followings are NOT target of code detection. */
9215 ASET (Vcoding_category_table, coding_category_raw_text,
9216 intern ("coding-category-raw-text"));
9217 ASET (Vcoding_category_table, coding_category_undecided,
9218 intern ("coding-category-undecided"));
ecf488bc 9219
065e3595
KH
9220 DEFSYM (Qinsufficient_source, "insufficient-source");
9221 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9222 DEFSYM (Qinvalid_source, "invalid-source");
9223 DEFSYM (Qinterrupted, "interrupted");
9224 DEFSYM (Qinsufficient_memory, "insufficient-memory");
9225
4ed46869
KH
9226 defsubr (&Scoding_system_p);
9227 defsubr (&Sread_coding_system);
9228 defsubr (&Sread_non_nil_coding_system);
9229 defsubr (&Scheck_coding_system);
9230 defsubr (&Sdetect_coding_region);
d46c5b12 9231 defsubr (&Sdetect_coding_string);
05e6f5dc 9232 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 9233 defsubr (&Sunencodable_char_position);
df7492f9 9234 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
9235 defsubr (&Sdecode_coding_region);
9236 defsubr (&Sencode_coding_region);
9237 defsubr (&Sdecode_coding_string);
9238 defsubr (&Sencode_coding_string);
9239 defsubr (&Sdecode_sjis_char);
9240 defsubr (&Sencode_sjis_char);
9241 defsubr (&Sdecode_big5_char);
9242 defsubr (&Sencode_big5_char);
1ba9e4ab 9243 defsubr (&Sset_terminal_coding_system_internal);
c4825358 9244 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 9245 defsubr (&Sterminal_coding_system);
1ba9e4ab 9246 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 9247 defsubr (&Skeyboard_coding_system);
a5d301df 9248 defsubr (&Sfind_operation_coding_system);
df7492f9 9249 defsubr (&Sset_coding_system_priority);
6b89e3aa 9250 defsubr (&Sdefine_coding_system_internal);
df7492f9 9251 defsubr (&Sdefine_coding_system_alias);
a6f87d34 9252 defsubr (&Scoding_system_put);
df7492f9
KH
9253 defsubr (&Scoding_system_base);
9254 defsubr (&Scoding_system_plist);
9255 defsubr (&Scoding_system_aliases);
9256 defsubr (&Scoding_system_eol_type);
9257 defsubr (&Scoding_system_priority_list);
4ed46869 9258
4608c386 9259 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
9260 doc: /* List of coding systems.
9261
9262Do not alter the value of this variable manually. This variable should be
df7492f9 9263updated by the functions `define-coding-system' and
48b0f3ae 9264`define-coding-system-alias'. */);
4608c386
KH
9265 Vcoding_system_list = Qnil;
9266
9267 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
9268 doc: /* Alist of coding system names.
9269Each element is one element list of coding system name.
9270This variable is given to `completing-read' as TABLE argument.
9271
9272Do not alter the value of this variable manually. This variable should be
9273updated by the functions `make-coding-system' and
9274`define-coding-system-alias'. */);
4608c386
KH
9275 Vcoding_system_alist = Qnil;
9276
4ed46869 9277 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
9278 doc: /* List of coding-categories (symbols) ordered by priority.
9279
9280On detecting a coding system, Emacs tries code detection algorithms
9281associated with each coding-category one by one in this order. When
9282one algorithm agrees with a byte sequence of source text, the coding
9283system bound to the corresponding coding-category is selected. */);
4ed46869
KH
9284 {
9285 int i;
9286
9287 Vcoding_category_list = Qnil;
df7492f9 9288 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 9289 Vcoding_category_list
d46c5b12
KH
9290 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9291 Vcoding_category_list);
4ed46869
KH
9292 }
9293
9294 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
9295 doc: /* Specify the coding system for read operations.
9296It is useful to bind this variable with `let', but do not set it globally.
9297If the value is a coding system, it is used for decoding on read operation.
9298If not, an appropriate element is used from one of the coding system alists:
9299There are three such tables, `file-coding-system-alist',
9300`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
9301 Vcoding_system_for_read = Qnil;
9302
9303 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
9304 doc: /* Specify the coding system for write operations.
9305Programs bind this variable with `let', but you should not set it globally.
9306If the value is a coding system, it is used for encoding of output,
9307when writing it to a file and when sending it to a file or subprocess.
9308
9309If this does not specify a coding system, an appropriate element
9310is used from one of the coding system alists:
9311There are three such tables, `file-coding-system-alist',
9312`process-coding-system-alist', and `network-coding-system-alist'.
9313For output to files, if the above procedure does not specify a coding system,
9314the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
9315 Vcoding_system_for_write = Qnil;
9316
9317 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
9318 doc: /*
9319Coding system used in the latest file or process I/O. */);
4ed46869
KH
9320 Vlast_coding_system_used = Qnil;
9321
065e3595
KH
9322 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9323 doc: /*
9324Error status of the last code conversion.
9325
9326When an error was detected in the last code conversion, this variable
9327is set to one of the following symbols.
9328 `insufficient-source'
9329 `inconsistent-eol'
9330 `invalid-source'
9331 `interrupted'
9332 `insufficient-memory'
9333When no error was detected, the value doesn't change. So, to check
9334the error status of a code conversion by this variable, you must
9335explicitly set this variable to nil before performing code
9336conversion. */);
9337 Vlast_code_conversion_error = Qnil;
9338
9ce27fde 9339 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
9340 doc: /*
9341*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
9342See info node `Coding Systems' and info node `Text and Binary' concerning
9343such conversion. */);
9ce27fde
KH
9344 inhibit_eol_conversion = 0;
9345
ed29121d 9346 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
9347 doc: /*
9348Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
9349Bind it to t if the process output is to be treated as if it were a file
9350read from some filesystem. */);
ed29121d
EZ
9351 inherit_process_coding_system = 0;
9352
02ba4723 9353 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
9354 doc: /*
9355Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
9356The format is ((PATTERN . VAL) ...),
9357where PATTERN is a regular expression matching a file name,
9358VAL is a coding system, a cons of coding systems, or a function symbol.
9359If VAL is a coding system, it is used for both decoding and encoding
9360the file contents.
9361If VAL is a cons of coding systems, the car part is used for decoding,
9362and the cdr part is used for encoding.
9363If VAL is a function symbol, the function must return a coding system
0192762c
DL
9364or a cons of coding systems which are used as above. The function gets
9365the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
9366
9367See also the function `find-operation-coding-system'
9368and the variable `auto-coding-alist'. */);
02ba4723
KH
9369 Vfile_coding_system_alist = Qnil;
9370
9371 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
9372 doc: /*
9373Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
9374The format is ((PATTERN . VAL) ...),
9375where PATTERN is a regular expression matching a program name,
9376VAL is a coding system, a cons of coding systems, or a function symbol.
9377If VAL is a coding system, it is used for both decoding what received
9378from the program and encoding what sent to the program.
9379If VAL is a cons of coding systems, the car part is used for decoding,
9380and the cdr part is used for encoding.
9381If VAL is a function symbol, the function must return a coding system
9382or a cons of coding systems which are used as above.
9383
9384See also the function `find-operation-coding-system'. */);
02ba4723
KH
9385 Vprocess_coding_system_alist = Qnil;
9386
9387 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
9388 doc: /*
9389Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
9390The format is ((PATTERN . VAL) ...),
9391where PATTERN is a regular expression matching a network service name
9392or is a port number to connect to,
9393VAL is a coding system, a cons of coding systems, or a function symbol.
9394If VAL is a coding system, it is used for both decoding what received
9395from the network stream and encoding what sent to the network stream.
9396If VAL is a cons of coding systems, the car part is used for decoding,
9397and the cdr part is used for encoding.
9398If VAL is a function symbol, the function must return a coding system
9399or a cons of coding systems which are used as above.
9400
9401See also the function `find-operation-coding-system'. */);
02ba4723 9402 Vnetwork_coding_system_alist = Qnil;
4ed46869 9403
68c45bf0 9404 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
9405 doc: /* Coding system to use with system messages.
9406Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
9407 Vlocale_coding_system = Qnil;
9408
005f0d35 9409 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 9410 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
9411 doc: /*
9412*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 9413 eol_mnemonic_unix = build_string (":");
4ed46869 9414
7722baf9 9415 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
9416 doc: /*
9417*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 9418 eol_mnemonic_dos = build_string ("\\");
4ed46869 9419
7722baf9 9420 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
9421 doc: /*
9422*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 9423 eol_mnemonic_mac = build_string ("/");
4ed46869 9424
7722baf9 9425 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
9426 doc: /*
9427*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 9428 eol_mnemonic_undecided = build_string (":");
4ed46869 9429
84fbb8a0 9430 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
9431 doc: /*
9432*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 9433 Venable_character_translation = Qt;
bdd9fb48 9434
f967223b 9435 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
9436 &Vstandard_translation_table_for_decode,
9437 doc: /* Table for translating characters while decoding. */);
f967223b 9438 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 9439
f967223b 9440 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
9441 &Vstandard_translation_table_for_encode,
9442 doc: /* Table for translating characters while encoding. */);
f967223b 9443 Vstandard_translation_table_for_encode = Qnil;
4ed46869 9444
df7492f9 9445 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
9446 doc: /* Alist of charsets vs revision numbers.
9447While encoding, if a charset (car part of an element) is found,
df7492f9
KH
9448designate it with the escape sequence identifying revision (cdr part
9449of the element). */);
9450 Vcharset_revision_table = Qnil;
02ba4723
KH
9451
9452 DEFVAR_LISP ("default-process-coding-system",
9453 &Vdefault_process_coding_system,
48b0f3ae
PJ
9454 doc: /* Cons of coding systems used for process I/O by default.
9455The car part is used for decoding a process output,
9456the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 9457 Vdefault_process_coding_system = Qnil;
c4825358 9458
3f003981 9459 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
9460 doc: /*
9461Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
9462This is a vector of length 256.
9463If Nth element is non-nil, the existence of code N in a file
9464\(or output of subprocess) doesn't prevent it to be detected as
9465a coding system of ISO 2022 variant which has a flag
9466`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9467or reading output of a subprocess.
9468Only 128th through 159th elements has a meaning. */);
3f003981 9469 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
9470
9471 DEFVAR_LISP ("select-safe-coding-system-function",
9472 &Vselect_safe_coding_system_function,
df7492f9
KH
9473 doc: /*
9474Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
9475
9476If set, this function is called to force a user to select a proper
9477coding system which can encode the text in the case that a default
9478coding system used in each operation can't encode the text.
9479
9480The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
9481 Vselect_safe_coding_system_function = Qnil;
9482
5d5bf4d8
KH
9483 DEFVAR_BOOL ("coding-system-require-warning",
9484 &coding_system_require_warning,
9485 doc: /* Internal use only.
6b89e3aa
KH
9486If non-nil, on writing a file, `select-safe-coding-system-function' is
9487called even if `coding-system-for-write' is non-nil. The command
9488`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
9489 coding_system_require_warning = 0;
9490
9491
22ab2303 9492 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 9493 &inhibit_iso_escape_detection,
df7492f9
KH
9494 doc: /*
9495If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
9496
9497By default, on reading a file, Emacs tries to detect how the text is
9498encoded. This code detection is sensitive to escape sequences. If
9499the sequence is valid as ISO2022, the code is determined as one of
9500the ISO2022 encodings, and the file is decoded by the corresponding
9501coding system (e.g. `iso-2022-7bit').
9502
9503However, there may be a case that you want to read escape sequences in
9504a file as is. In such a case, you can set this variable to non-nil.
9505Then, as the code detection ignores any escape sequences, no file is
9506detected as encoded in some ISO2022 encoding. The result is that all
9507escape sequences become visible in a buffer.
9508
9509The default value is nil, and it is strongly recommended not to change
9510it. That is because many Emacs Lisp source files that contain
9511non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9512in Emacs's distribution, and they won't be decoded correctly on
9513reading if you suppress escape sequence detection.
9514
9515The other way to read escape sequences in a file without decoding is
9516to explicitly specify some coding system that doesn't use ISO2022's
9517escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 9518 inhibit_iso_escape_detection = 0;
002fdb44
DL
9519
9520 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1
DL
9521 doc: /* Char table for translating self-inserting characters.
9522This is applied to the result of input methods, not their input. See also
9523`keyboard-translate-table'. */);
002fdb44 9524 Vtranslation_table_for_input = Qnil;
8f924df7 9525
2c78b7e1
KH
9526 {
9527 Lisp_Object args[coding_arg_max];
8f924df7 9528 Lisp_Object plist[16];
2c78b7e1
KH
9529 int i;
9530
9531 for (i = 0; i < coding_arg_max; i++)
9532 args[i] = Qnil;
9533
9534 plist[0] = intern (":name");
9535 plist[1] = args[coding_arg_name] = Qno_conversion;
9536 plist[2] = intern (":mnemonic");
9537 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9538 plist[4] = intern (":coding-type");
9539 plist[5] = args[coding_arg_coding_type] = Qraw_text;
9540 plist[6] = intern (":ascii-compatible-p");
9541 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9542 plist[8] = intern (":default-char");
9543 plist[9] = args[coding_arg_default_char] = make_number (0);
8f924df7
KH
9544 plist[10] = intern (":for-unibyte");
9545 plist[11] = args[coding_arg_for_unibyte] = Qt;
9546 plist[12] = intern (":docstring");
9547 plist[13] = build_string ("Do no conversion.\n\
2c78b7e1
KH
9548\n\
9549When you visit a file with this coding, the file is read into a\n\
9550unibyte buffer as is, thus each byte of a file is treated as a\n\
9551character.");
8f924df7
KH
9552 plist[14] = intern (":eol-type");
9553 plist[15] = args[coding_arg_eol_type] = Qunix;
9554 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1
KH
9555 Fdefine_coding_system_internal (coding_arg_max, args);
9556 }
9557
9558 setup_coding_system (Qno_conversion, &keyboard_coding);
9559 setup_coding_system (Qno_conversion, &terminal_coding);
9560 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
9561
9562 {
9563 int i;
9564
9565 for (i = 0; i < coding_category_max; i++)
9566 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9567 }
4ed46869
KH
9568}
9569
68c45bf0
PE
9570char *
9571emacs_strerror (error_number)
9572 int error_number;
9573{
9574 char *str;
9575
ca9c0567 9576 synchronize_system_messages_locale ();
68c45bf0
PE
9577 str = strerror (error_number);
9578
9579 if (! NILP (Vlocale_coding_system))
9580 {
9581 Lisp_Object dec = code_convert_string_norecord (build_string (str),
9582 Vlocale_coding_system,
9583 0);
d5db4077 9584 str = (char *) SDATA (dec);
68c45bf0
PE
9585 }
9586
9587 return str;
9588}
9589
4ed46869 9590#endif /* emacs */
9ffd559c
KH
9591
9592/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
9593 (do not change this comment) */