*** empty log message ***
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
6f197c07 4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
5a936b46 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
5bad0796 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
e19c3639 138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9 146 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
df7492f9 149
ff0dacd7 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
151
152 Below is the template of these functions. */
153
4ed46869 154#if 0
df7492f9 155static int
ff0dacd7 156detect_coding_XXX (coding, detect_info)
df7492f9 157 struct coding_system *coding;
ff0dacd7 158 struct coding_detection_info *detect_info;
4ed46869 159{
df7492f9
KH
160 unsigned char *src = coding->source;
161 unsigned char *src_end = coding->source + coding->src_bytes;
162 int multibytep = coding->src_multibyte;
ff0dacd7 163 int consumed_chars = 0;
df7492f9
KH
164 int found = 0;
165 ...;
166
167 while (1)
168 {
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
171 ONE_MORE_BYTE (c);
ff0dacd7
KH
172
173 if (! __C_conforms_to_XXX___ (c))
174 break;
175 if (! __C_strongly_suggests_XXX__ (c))
176 found = CATEGORY_MASK_XXX;
df7492f9 177 }
ff0dacd7
KH
178 /* The byte sequence is invalid for XXX. */
179 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 180 return 0;
ff0dacd7 181
df7492f9 182 no_more_source:
ff0dacd7
KH
183 /* The source exausted successfully. */
184 detect_info->found |= found;
df7492f9 185 return 1;
4ed46869
KH
186}
187#endif
188
189/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
190
df7492f9
KH
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
d46c5b12 195
df7492f9
KH
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
d46c5b12 200
df7492f9 201 Below is the template of these functions. */
d46c5b12 202
4ed46869 203#if 0
b73bfc1c 204static void
df7492f9 205decode_coding_XXXX (coding)
4ed46869 206 struct coding_system *coding;
4ed46869 207{
df7492f9
KH
208 unsigned char *src = coding->source + coding->consumed;
209 unsigned char *src_end = coding->source + coding->src_bytes;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base;
214 /* A buffer to produce decoded characters. */
215 int *charbuf = coding->charbuf;
216 int *charbuf_end = charbuf + coding->charbuf_size;
217 int multibytep = coding->src_multibyte;
218
219 while (1)
220 {
221 src_base = src;
222 if (charbuf < charbuf_end)
223 /* No more room to produce a decoded character. */
224 break;
225 ONE_MORE_BYTE (c);
226 /* Decode it. */
227 }
228
229 no_more_source:
230 if (src_base < src_end
231 && coding->mode & CODING_MODE_LAST_BLOCK)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base < src_end && charbuf < charbuf_end)
235 *charbuf++ = *src_base++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding->consumed = coding->consumed_char = src_base - coding->source;
239 /* Remember how many characters we produced. */
240 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
241}
242#endif
243
244/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
245
df7492f9
KH
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
d46c5b12 250
df7492f9
KH
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 255
df7492f9
KH
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
d46c5b12 259
df7492f9 260 Below is a template of these functions. */
4ed46869 261#if 0
b73bfc1c 262static void
df7492f9 263encode_coding_XXX (coding)
4ed46869 264 struct coding_system *coding;
4ed46869 265{
df7492f9
KH
266 int multibytep = coding->dst_multibyte;
267 int *charbuf = coding->charbuf;
268 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
269 unsigned char *dst = coding->destination + coding->produced;
270 unsigned char *dst_end = coding->destination + coding->dst_bytes;
271 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
272 int produced_chars = 0;
273
274 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
275 {
276 int c = *charbuf;
277 /* Encode C into DST, and increment DST. */
278 }
279 label_no_more_destination:
280 /* How many chars and bytes we produced. */
281 coding->produced_char += produced_chars;
282 coding->produced = dst - coding->destination;
4ed46869
KH
283}
284#endif
285
4ed46869
KH
286\f
287/*** 1. Preamble ***/
288
68c45bf0 289#include <config.h>
4ed46869
KH
290#include <stdio.h>
291
4ed46869
KH
292#include "lisp.h"
293#include "buffer.h"
df7492f9 294#include "character.h"
4ed46869
KH
295#include "charset.h"
296#include "ccl.h"
df7492f9 297#include "composite.h"
4ed46869
KH
298#include "coding.h"
299#include "window.h"
300
df7492f9 301Lisp_Object Vcoding_system_hash_table;
4ed46869 302
df7492f9 303Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
304Lisp_Object Qunix, Qdos;
305extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
306Lisp_Object Qbuffer_file_coding_system;
307Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 308Lisp_Object Qdefault_char;
27901516 309Lisp_Object Qno_conversion, Qundecided;
df7492f9 310Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
df7492f9 311Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
bb0115a2 312Lisp_Object Qcoding_system_history;
1397dc18 313Lisp_Object Qvalid_codes;
4ed46869
KH
314
315extern Lisp_Object Qinsert_file_contents, Qwrite_region;
316Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
317Lisp_Object Qstart_process, Qopen_network_stream;
318Lisp_Object Qtarget_idx;
319
d46c5b12
KH
320Lisp_Object Vselect_safe_coding_system_function;
321
7722baf9
EZ
322/* Mnemonic string for each format of end-of-line. */
323Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
324/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 325 decided. */
7722baf9 326Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
327
328#ifdef emacs
329
4608c386
KH
330Lisp_Object Vcoding_system_list, Vcoding_system_alist;
331
332Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 333
d46c5b12
KH
334/* Coding system emacs-mule and raw-text are for converting only
335 end-of-line format. */
336Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 337
4ed46869
KH
338/* Coding-systems are handed between Emacs Lisp programs and C internal
339 routines by the following three variables. */
340/* Coding-system for reading files and receiving data from process. */
341Lisp_Object Vcoding_system_for_read;
342/* Coding-system for writing files and sending data to process. */
343Lisp_Object Vcoding_system_for_write;
344/* Coding-system actually used in the latest I/O. */
345Lisp_Object Vlast_coding_system_used;
346
c4825358 347/* A vector of length 256 which contains information about special
94487c4e 348 Latin codes (especially for dealing with Microsoft codes). */
3f003981 349Lisp_Object Vlatin_extra_code_table;
c4825358 350
9ce27fde
KH
351/* Flag to inhibit code conversion of end-of-line format. */
352int inhibit_eol_conversion;
353
74383408
KH
354/* Flag to inhibit ISO2022 escape sequence detection. */
355int inhibit_iso_escape_detection;
356
ed29121d
EZ
357/* Flag to make buffer-file-coding-system inherit from process-coding. */
358int inherit_process_coding_system;
359
c4825358 360/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
361struct coding_system terminal_coding;
362
c4825358
KH
363/* Coding system to be used to encode text for terminal display when
364 terminal coding system is nil. */
365struct coding_system safe_terminal_coding;
366
367/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
368struct coding_system keyboard_coding;
369
02ba4723
KH
370Lisp_Object Vfile_coding_system_alist;
371Lisp_Object Vprocess_coding_system_alist;
372Lisp_Object Vnetwork_coding_system_alist;
4ed46869 373
68c45bf0
PE
374Lisp_Object Vlocale_coding_system;
375
4ed46869
KH
376#endif /* emacs */
377
f967223b
KH
378/* Flag to tell if we look up translation table on character code
379 conversion. */
84fbb8a0 380Lisp_Object Venable_character_translation;
f967223b
KH
381/* Standard translation table to look up on decoding (reading). */
382Lisp_Object Vstandard_translation_table_for_decode;
383/* Standard translation table to look up on encoding (writing). */
384Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 385
f967223b
KH
386Lisp_Object Qtranslation_table;
387Lisp_Object Qtranslation_table_id;
388Lisp_Object Qtranslation_table_for_decode;
389Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
390
391/* Alist of charsets vs revision number. */
df7492f9 392static Lisp_Object Vcharset_revision_table;
4ed46869 393
02ba4723
KH
394/* Default coding systems used for process I/O. */
395Lisp_Object Vdefault_process_coding_system;
396
b843d1ae
KH
397/* Global flag to tell that we can't call post-read-conversion and
398 pre-write-conversion functions. Usually the value is zero, but it
399 is set to 1 temporarily while such functions are running. This is
400 to avoid infinite recursive call. */
401static int inhibit_pre_post_conversion;
402
df7492f9
KH
403/* Two special coding systems. */
404Lisp_Object Vsjis_coding_system;
405Lisp_Object Vbig5_coding_system;
406
407
ff0dacd7
KH
408static int detect_coding_utf_8 P_ ((struct coding_system *,
409 struct coding_detection_info *info));
df7492f9
KH
410static void decode_coding_utf_8 P_ ((struct coding_system *));
411static int encode_coding_utf_8 P_ ((struct coding_system *));
412
ff0dacd7
KH
413static int detect_coding_utf_16 P_ ((struct coding_system *,
414 struct coding_detection_info *info));
df7492f9
KH
415static void decode_coding_utf_16 P_ ((struct coding_system *));
416static int encode_coding_utf_16 P_ ((struct coding_system *));
417
ff0dacd7
KH
418static int detect_coding_iso_2022 P_ ((struct coding_system *,
419 struct coding_detection_info *info));
df7492f9
KH
420static void decode_coding_iso_2022 P_ ((struct coding_system *));
421static int encode_coding_iso_2022 P_ ((struct coding_system *));
422
ff0dacd7
KH
423static int detect_coding_emacs_mule P_ ((struct coding_system *,
424 struct coding_detection_info *info));
df7492f9
KH
425static void decode_coding_emacs_mule P_ ((struct coding_system *));
426static int encode_coding_emacs_mule P_ ((struct coding_system *));
427
ff0dacd7
KH
428static int detect_coding_sjis P_ ((struct coding_system *,
429 struct coding_detection_info *info));
df7492f9
KH
430static void decode_coding_sjis P_ ((struct coding_system *));
431static int encode_coding_sjis P_ ((struct coding_system *));
432
ff0dacd7
KH
433static int detect_coding_big5 P_ ((struct coding_system *,
434 struct coding_detection_info *info));
df7492f9
KH
435static void decode_coding_big5 P_ ((struct coding_system *));
436static int encode_coding_big5 P_ ((struct coding_system *));
437
ff0dacd7
KH
438static int detect_coding_ccl P_ ((struct coding_system *,
439 struct coding_detection_info *info));
df7492f9
KH
440static void decode_coding_ccl P_ ((struct coding_system *));
441static int encode_coding_ccl P_ ((struct coding_system *));
442
443static void decode_coding_raw_text P_ ((struct coding_system *));
444static int encode_coding_raw_text P_ ((struct coding_system *));
445
446
447/* ISO2022 section */
448
449#define CODING_ISO_INITIAL(coding, reg) \
450 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
451 coding_attr_iso_initial), \
452 reg)))
453
454
455#define CODING_ISO_REQUEST(coding, charset_id) \
456 ((charset_id <= (coding)->max_charset_id \
457 ? (coding)->safe_charsets[charset_id] \
458 : -1))
459
460
461#define CODING_ISO_FLAGS(coding) \
462 ((coding)->spec.iso_2022.flags)
463#define CODING_ISO_DESIGNATION(coding, reg) \
464 ((coding)->spec.iso_2022.current_designation[reg])
465#define CODING_ISO_INVOCATION(coding, plane) \
466 ((coding)->spec.iso_2022.current_invocation[plane])
467#define CODING_ISO_SINGLE_SHIFTING(coding) \
468 ((coding)->spec.iso_2022.single_shifting)
469#define CODING_ISO_BOL(coding) \
470 ((coding)->spec.iso_2022.bol)
471#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
472 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
473
474/* Control characters of ISO2022. */
475 /* code */ /* function */
476#define ISO_CODE_LF 0x0A /* line-feed */
477#define ISO_CODE_CR 0x0D /* carriage-return */
478#define ISO_CODE_SO 0x0E /* shift-out */
479#define ISO_CODE_SI 0x0F /* shift-in */
480#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
481#define ISO_CODE_ESC 0x1B /* escape */
482#define ISO_CODE_SS2 0x8E /* single-shift-2 */
483#define ISO_CODE_SS3 0x8F /* single-shift-3 */
484#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
485
486/* All code (1-byte) of ISO2022 is classified into one of the
487 followings. */
488enum iso_code_class_type
489 {
490 ISO_control_0, /* Control codes in the range
491 0x00..0x1F and 0x7F, except for the
492 following 5 codes. */
493 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
494 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
495 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
496 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
497 ISO_escape, /* ISO_CODE_SO (0x1B) */
498 ISO_control_1, /* Control codes in the range
499 0x80..0x9F, except for the
500 following 3 codes. */
501 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
502 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
503 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
504 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
505 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
506 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
507 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
508 };
05e6f5dc 509
df7492f9
KH
510/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
511 `iso-flags' attribute of an iso2022 coding system. */
93dec019 512
df7492f9
KH
513/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
514 instead of the correct short-form sequence (e.g. ESC $ A). */
515#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 516
df7492f9
KH
517/* If set, reset graphic planes and registers at end-of-line to the
518 initial state. */
519#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 520
df7492f9
KH
521/* If set, reset graphic planes and registers before any control
522 characters to the initial state. */
523#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 524
df7492f9
KH
525/* If set, encode by 7-bit environment. */
526#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 527
df7492f9
KH
528/* If set, use locking-shift function. */
529#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 530
df7492f9
KH
531/* If set, use single-shift function. Overwrite
532 CODING_ISO_FLAG_LOCKING_SHIFT. */
533#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 534
df7492f9
KH
535/* If set, use designation escape sequence. */
536#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 537
df7492f9
KH
538/* If set, produce revision number sequence. */
539#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 540
df7492f9
KH
541/* If set, produce ISO6429's direction specifying sequence. */
542#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 543
df7492f9
KH
544/* If set, assume designation states are reset at beginning of line on
545 output. */
546#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 547
df7492f9
KH
548/* If set, designation sequence should be placed at beginning of line
549 on output. */
550#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 551
df7492f9
KH
552/* If set, do not encode unsafe charactes on output. */
553#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 554
df7492f9
KH
555/* If set, extra latin codes (128..159) are accepted as a valid code
556 on input. */
557#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 558
df7492f9 559#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 560
df7492f9 561#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 562
bf16eb23
KH
563#define CODING_ISO_FLAG_USE_ROMAN 0x8000
564
565#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
566
567#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 568
df7492f9
KH
569/* A character to be produced on output if encoding of the original
570 character is prohibited by CODING_ISO_FLAG_SAFE. */
571#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 572
aa72b389 573
df7492f9
KH
574/* UTF-16 section */
575#define CODING_UTF_16_BOM(coding) \
576 ((coding)->spec.utf_16.bom)
4ed46869 577
df7492f9
KH
578#define CODING_UTF_16_ENDIAN(coding) \
579 ((coding)->spec.utf_16.endian)
4ed46869 580
df7492f9
KH
581#define CODING_UTF_16_SURROGATE(coding) \
582 ((coding)->spec.utf_16.surrogate)
4ed46869 583
4ed46869 584
df7492f9
KH
585/* CCL section */
586#define CODING_CCL_DECODER(coding) \
587 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
588#define CODING_CCL_ENCODER(coding) \
589 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
590#define CODING_CCL_VALIDS(coding) \
591 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
592 ->data)
4ed46869 593
5a936b46 594/* Index for each coding category in `coding_categories' */
4ed46869 595
df7492f9
KH
596enum coding_category
597 {
598 coding_category_iso_7,
599 coding_category_iso_7_tight,
600 coding_category_iso_8_1,
601 coding_category_iso_8_2,
602 coding_category_iso_7_else,
603 coding_category_iso_8_else,
604 coding_category_utf_8,
605 coding_category_utf_16_auto,
606 coding_category_utf_16_be,
607 coding_category_utf_16_le,
608 coding_category_utf_16_be_nosig,
609 coding_category_utf_16_le_nosig,
610 coding_category_charset,
611 coding_category_sjis,
612 coding_category_big5,
613 coding_category_ccl,
614 coding_category_emacs_mule,
615 /* All above are targets of code detection. */
616 coding_category_raw_text,
617 coding_category_undecided,
618 coding_category_max
619 };
620
621/* Definitions of flag bits used in detect_coding_XXXX. */
622#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
623#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
624#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
625#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
626#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
627#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
628#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
629#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
630#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
631#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
632#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
633#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
634#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
635#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
636#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
637#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 638#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
639
640/* This value is returned if detect_coding_mask () find nothing other
641 than ASCII characters. */
642#define CATEGORY_MASK_ANY \
643 (CATEGORY_MASK_ISO_7 \
644 | CATEGORY_MASK_ISO_7_TIGHT \
645 | CATEGORY_MASK_ISO_8_1 \
646 | CATEGORY_MASK_ISO_8_2 \
647 | CATEGORY_MASK_ISO_7_ELSE \
648 | CATEGORY_MASK_ISO_8_ELSE \
649 | CATEGORY_MASK_UTF_8 \
650 | CATEGORY_MASK_UTF_16_BE \
651 | CATEGORY_MASK_UTF_16_LE \
652 | CATEGORY_MASK_UTF_16_BE_NOSIG \
653 | CATEGORY_MASK_UTF_16_LE_NOSIG \
654 | CATEGORY_MASK_CHARSET \
655 | CATEGORY_MASK_SJIS \
656 | CATEGORY_MASK_BIG5 \
657 | CATEGORY_MASK_CCL \
658 | CATEGORY_MASK_EMACS_MULE)
659
660
661#define CATEGORY_MASK_ISO_7BIT \
662 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
663
664#define CATEGORY_MASK_ISO_8BIT \
665 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
666
667#define CATEGORY_MASK_ISO_ELSE \
668 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
669
670#define CATEGORY_MASK_ISO_ESCAPE \
671 (CATEGORY_MASK_ISO_7 \
672 | CATEGORY_MASK_ISO_7_TIGHT \
673 | CATEGORY_MASK_ISO_7_ELSE \
674 | CATEGORY_MASK_ISO_8_ELSE)
675
676#define CATEGORY_MASK_ISO \
677 ( CATEGORY_MASK_ISO_7BIT \
678 | CATEGORY_MASK_ISO_8BIT \
679 | CATEGORY_MASK_ISO_ELSE)
680
681#define CATEGORY_MASK_UTF_16 \
682 (CATEGORY_MASK_UTF_16_BE \
683 | CATEGORY_MASK_UTF_16_LE \
684 | CATEGORY_MASK_UTF_16_BE_NOSIG \
685 | CATEGORY_MASK_UTF_16_LE_NOSIG)
686
687
688/* List of symbols `coding-category-xxx' ordered by priority. This
689 variable is exposed to Emacs Lisp. */
690static Lisp_Object Vcoding_category_list;
691
692/* Table of coding categories (Lisp symbols). This variable is for
693 internal use oly. */
694static Lisp_Object Vcoding_category_table;
695
696/* Table of coding-categories ordered by priority. */
697static enum coding_category coding_priorities[coding_category_max];
698
699/* Nth element is a coding context for the coding system bound to the
700 Nth coding category. */
701static struct coding_system coding_categories[coding_category_max];
702
703static int detected_mask[coding_category_raw_text] =
704 { CATEGORY_MASK_ISO,
705 CATEGORY_MASK_ISO,
706 CATEGORY_MASK_ISO,
707 CATEGORY_MASK_ISO,
708 CATEGORY_MASK_ISO,
709 CATEGORY_MASK_ISO,
710 CATEGORY_MASK_UTF_8,
711 CATEGORY_MASK_UTF_16,
712 CATEGORY_MASK_UTF_16,
713 CATEGORY_MASK_UTF_16,
714 CATEGORY_MASK_UTF_16,
715 CATEGORY_MASK_UTF_16,
716 CATEGORY_MASK_CHARSET,
717 CATEGORY_MASK_SJIS,
718 CATEGORY_MASK_BIG5,
719 CATEGORY_MASK_CCL,
720 CATEGORY_MASK_EMACS_MULE
721 };
722
723/*** Commonly used macros and functions ***/
724
725#ifndef min
726#define min(a, b) ((a) < (b) ? (a) : (b))
727#endif
728#ifndef max
729#define max(a, b) ((a) > (b) ? (a) : (b))
730#endif
4ed46869 731
df7492f9
KH
732#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
733 do { \
734 attrs = CODING_ID_ATTRS (coding->id); \
735 eol_type = CODING_ID_EOL_TYPE (coding->id); \
736 if (VECTORP (eol_type)) \
737 eol_type = Qunix; \
738 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
739 } while (0)
4ed46869 740
4ed46869 741
df7492f9
KH
742/* Safely get one byte from the source text pointed by SRC which ends
743 at SRC_END, and set C to that byte. If there are not enough bytes
744 in the source, it jumps to `no_more_source'. The caller
745 should declare and set these variables appropriately in advance:
746 src, src_end, multibytep
747*/
aa72b389 748
df7492f9 749#define ONE_MORE_BYTE(c) \
aa72b389 750 do { \
df7492f9
KH
751 if (src == src_end) \
752 { \
753 if (src_base < src) \
754 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
755 goto no_more_source; \
756 } \
757 c = *src++; \
758 if (multibytep && (c & 0x80)) \
759 { \
760 if ((c & 0xFE) != 0xC0) \
761 error ("Undecodable char found"); \
762 c = ((c & 1) << 6) | *src++; \
763 } \
764 consumed_chars++; \
aa72b389
KH
765 } while (0)
766
aa72b389 767
df7492f9
KH
768#define ONE_MORE_BYTE_NO_CHECK(c) \
769 do { \
770 c = *src++; \
771 if (multibytep && (c & 0x80)) \
772 { \
773 if ((c & 0xFE) != 0xC0) \
774 error ("Undecodable char found"); \
775 c = ((c & 1) << 6) | *src++; \
776 } \
781d7a48 777 consumed_chars++; \
aa72b389
KH
778 } while (0)
779
aa72b389 780
df7492f9
KH
781/* Store a byte C in the place pointed by DST and increment DST to the
782 next free point, and increment PRODUCED_CHARS. The caller should
783 assure that C is 0..127, and declare and set the variable `dst'
784 appropriately in advance.
785*/
aa72b389
KH
786
787
df7492f9
KH
788#define EMIT_ONE_ASCII_BYTE(c) \
789 do { \
790 produced_chars++; \
791 *dst++ = (c); \
792 } while (0)
aa72b389 793
aa72b389 794
df7492f9 795/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 796
df7492f9
KH
797#define EMIT_TWO_ASCII_BYTES(c1, c2) \
798 do { \
799 produced_chars += 2; \
800 *dst++ = (c1), *dst++ = (c2); \
801 } while (0)
aa72b389 802
df7492f9
KH
803
804/* Store a byte C in the place pointed by DST and increment DST to the
805 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
806 nonzero, store in an appropriate multibyte from. The caller should
807 declare and set the variables `dst' and `multibytep' appropriately
808 in advance. */
809
810#define EMIT_ONE_BYTE(c) \
811 do { \
812 produced_chars++; \
813 if (multibytep) \
814 { \
815 int ch = (c); \
816 if (ch >= 0x80) \
817 ch = BYTE8_TO_CHAR (ch); \
818 CHAR_STRING_ADVANCE (ch, dst); \
819 } \
820 else \
821 *dst++ = (c); \
aa72b389
KH
822 } while (0)
823
824
df7492f9 825/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 826
e19c3639
KH
827#define EMIT_TWO_BYTES(c1, c2) \
828 do { \
829 produced_chars += 2; \
830 if (multibytep) \
831 { \
832 int ch; \
833 \
834 ch = (c1); \
835 if (ch >= 0x80) \
836 ch = BYTE8_TO_CHAR (ch); \
837 CHAR_STRING_ADVANCE (ch, dst); \
838 ch = (c2); \
839 if (ch >= 0x80) \
840 ch = BYTE8_TO_CHAR (ch); \
841 CHAR_STRING_ADVANCE (ch, dst); \
842 } \
843 else \
844 { \
845 *dst++ = (c1); \
846 *dst++ = (c2); \
847 } \
aa72b389
KH
848 } while (0)
849
850
df7492f9
KH
851#define EMIT_THREE_BYTES(c1, c2, c3) \
852 do { \
853 EMIT_ONE_BYTE (c1); \
854 EMIT_TWO_BYTES (c2, c3); \
855 } while (0)
aa72b389 856
aa72b389 857
df7492f9
KH
858#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
859 do { \
860 EMIT_TWO_BYTES (c1, c2); \
861 EMIT_TWO_BYTES (c3, c4); \
862 } while (0)
aa72b389 863
aa72b389 864
df7492f9
KH
865#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
866 do { \
867 charset_map_loaded = 0; \
868 c = DECODE_CHAR (charset, code); \
869 if (charset_map_loaded) \
870 { \
871 unsigned char *orig = coding->source; \
872 EMACS_INT offset; \
873 \
874 coding_set_source (coding); \
875 offset = coding->source - orig; \
876 src += offset; \
877 src_base += offset; \
878 src_end += offset; \
879 } \
880 } while (0)
aa72b389 881
aa72b389 882
df7492f9
KH
883#define ASSURE_DESTINATION(bytes) \
884 do { \
885 if (dst + (bytes) >= dst_end) \
886 { \
887 int more_bytes = charbuf_end - charbuf + (bytes); \
888 \
889 dst = alloc_destination (coding, more_bytes, dst); \
890 dst_end = coding->destination + coding->dst_bytes; \
891 } \
892 } while (0)
b1887814 893
df7492f9
KH
894
895
896static void
897coding_set_source (coding)
898 struct coding_system *coding;
899{
900 if (BUFFERP (coding->src_object))
901 {
2cb26057
KH
902 struct buffer *buf = XBUFFER (coding->src_object);
903
df7492f9 904 if (coding->src_pos < 0)
2cb26057 905 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 906 else
2cb26057 907 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 908 }
df7492f9 909 else if (STRINGP (coding->src_object))
aa72b389 910 {
df7492f9
KH
911 coding->source = (XSTRING (coding->src_object)->data
912 + coding->src_pos_byte);
913 }
914 else
915 /* Otherwise, the source is C string and is never relocated
916 automatically. Thus we don't have to update anything. */
917 ;
918}
919
920static void
921coding_set_destination (coding)
922 struct coding_system *coding;
923{
924 if (BUFFERP (coding->dst_object))
925 {
df7492f9 926 if (coding->src_pos < 0)
28f67a95
KH
927 {
928 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
929 coding->dst_bytes = (GAP_END_ADDR
930 - (coding->src_bytes - coding->consumed)
931 - coding->destination);
932 }
df7492f9 933 else
28f67a95
KH
934 {
935 /* We are sure that coding->dst_pos_byte is before the gap
936 of the buffer. */
937 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
938 + coding->dst_pos_byte - 1);
939 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
940 - coding->destination);
941 }
df7492f9
KH
942 }
943 else
944 /* Otherwise, the destination is C string and is never relocated
945 automatically. Thus we don't have to update anything. */
946 ;
947}
948
949
950static void
951coding_alloc_by_realloc (coding, bytes)
952 struct coding_system *coding;
953 EMACS_INT bytes;
954{
955 coding->destination = (unsigned char *) xrealloc (coding->destination,
956 coding->dst_bytes + bytes);
957 coding->dst_bytes += bytes;
958}
959
960static void
961coding_alloc_by_making_gap (coding, bytes)
962 struct coding_system *coding;
963 EMACS_INT bytes;
964{
2c78b7e1
KH
965 if (BUFFERP (coding->dst_object)
966 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
967 {
968 EMACS_INT add = coding->src_bytes - coding->consumed;
969
970 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
971 make_gap (bytes);
972 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
973 }
974 else
975 {
2c78b7e1
KH
976 Lisp_Object this_buffer;
977
978 this_buffer = Fcurrent_buffer ();
df7492f9
KH
979 set_buffer_internal (XBUFFER (coding->dst_object));
980 make_gap (bytes);
981 set_buffer_internal (XBUFFER (this_buffer));
982 }
983}
984
985
986static unsigned char *
987alloc_destination (coding, nbytes, dst)
988 struct coding_system *coding;
989 int nbytes;
990 unsigned char *dst;
991{
992 EMACS_INT offset = dst - coding->destination;
993
994 if (BUFFERP (coding->dst_object))
995 coding_alloc_by_making_gap (coding, nbytes);
996 else
997 coding_alloc_by_realloc (coding, nbytes);
998 coding->result = CODING_RESULT_SUCCESS;
999 coding_set_destination (coding);
1000 dst = coding->destination + offset;
1001 return dst;
1002}
aa72b389 1003
ff0dacd7
KH
1004/** Macros for annotations. */
1005
1006/* Maximum length of annotation data (sum of annotations for
1007 composition and charset). */
1008#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1009
1010/* An annotation data is stored in the array coding->charbuf in this
1011 format:
1012 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1013 LENGTH is the number of elements in the annotation.
1014 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1015 FROM and TO specify the range of text annotated. They are relative
1016 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1017
1018 The format of the following elements depend on ANNOTATION_MASK.
1019
1020 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1021 follows:
1022 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1023 METHOD is one of enum composition_method.
1024 Optionnal COMPOSITION-COMPONENTS are characters and composition
1025 rules.
1026
1027 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1028 follows. */
1029
1030#define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1031 do { \
1032 *(buf)++ = -(len); \
1033 *(buf)++ = (mask); \
1034 *(buf)++ = (from); \
1035 *(buf)++ = (to); \
1036 coding->annotated = 1; \
1037 } while (0);
1038
1039#define ADD_COMPOSITION_DATA(buf, from, to, method) \
1040 do { \
1041 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1042 *buf++ = method; \
1043 } while (0)
1044
1045
1046#define ADD_CHARSET_DATA(buf, from, to, id) \
1047 do { \
1048 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1049 *buf++ = id; \
1050 } while (0)
1051
df7492f9
KH
1052\f
1053/*** 2. Emacs' internal format (emacs-utf-8) ***/
1054
1055
1056
1057\f
1058/*** 3. UTF-8 ***/
1059
1060/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1061 Check if a text is encoded in UTF-8. If it is, return 1, else
1062 return 0. */
df7492f9
KH
1063
1064#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1065#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1066#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1067#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1068#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1069#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1070
1071static int
ff0dacd7 1072detect_coding_utf_8 (coding, detect_info)
df7492f9 1073 struct coding_system *coding;
ff0dacd7 1074 struct coding_detection_info *detect_info;
df7492f9
KH
1075{
1076 unsigned char *src = coding->source, *src_base = src;
1077 unsigned char *src_end = coding->source + coding->src_bytes;
1078 int multibytep = coding->src_multibyte;
1079 int consumed_chars = 0;
1080 int found = 0;
89528eb3 1081 int incomplete;
df7492f9 1082
ff0dacd7 1083 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1084 /* A coding system of this category is always ASCII compatible. */
1085 src += coding->head_ascii;
1086
1087 while (1)
1088 {
1089 int c, c1, c2, c3, c4;
1090
89528eb3 1091 incomplete = 0;
df7492f9
KH
1092 ONE_MORE_BYTE (c);
1093 if (UTF_8_1_OCTET_P (c))
1094 continue;
89528eb3 1095 incomplete = 1;
df7492f9
KH
1096 ONE_MORE_BYTE (c1);
1097 if (! UTF_8_EXTRA_OCTET_P (c1))
1098 break;
1099 if (UTF_8_2_OCTET_LEADING_P (c))
1100 {
ff0dacd7 1101 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1102 continue;
1103 }
1104 ONE_MORE_BYTE (c2);
1105 if (! UTF_8_EXTRA_OCTET_P (c2))
1106 break;
1107 if (UTF_8_3_OCTET_LEADING_P (c))
1108 {
ff0dacd7 1109 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1110 continue;
1111 }
1112 ONE_MORE_BYTE (c3);
1113 if (! UTF_8_EXTRA_OCTET_P (c3))
1114 break;
1115 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1116 {
ff0dacd7 1117 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1118 continue;
1119 }
1120 ONE_MORE_BYTE (c4);
1121 if (! UTF_8_EXTRA_OCTET_P (c4))
1122 break;
1123 if (UTF_8_5_OCTET_LEADING_P (c))
1124 {
ff0dacd7 1125 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1126 continue;
1127 }
1128 break;
1129 }
ff0dacd7 1130 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1131 return 0;
1132
1133 no_more_source:
89528eb3
KH
1134 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1135 {
ff0dacd7 1136 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3
KH
1137 return 0;
1138 }
ff0dacd7
KH
1139 detect_info->found |= found;
1140 return 1;
df7492f9
KH
1141}
1142
1143
1144static void
1145decode_coding_utf_8 (coding)
1146 struct coding_system *coding;
1147{
1148 unsigned char *src = coding->source + coding->consumed;
1149 unsigned char *src_end = coding->source + coding->src_bytes;
1150 unsigned char *src_base;
1151 int *charbuf = coding->charbuf;
1152 int *charbuf_end = charbuf + coding->charbuf_size;
1153 int consumed_chars = 0, consumed_chars_base;
1154 int multibytep = coding->src_multibyte;
1155 Lisp_Object attr, eol_type, charset_list;
1156
1157 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1158
1159 while (1)
1160 {
1161 int c, c1, c2, c3, c4, c5;
1162
1163 src_base = src;
1164 consumed_chars_base = consumed_chars;
1165
1166 if (charbuf >= charbuf_end)
1167 break;
1168
1169 ONE_MORE_BYTE (c1);
1170 if (UTF_8_1_OCTET_P(c1))
1171 {
1172 c = c1;
1173 if (c == '\r')
aa72b389 1174 {
df7492f9
KH
1175 if (EQ (eol_type, Qdos))
1176 {
1177 if (src == src_end)
98725083
KH
1178 {
1179 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1180 goto no_more_source;
1181 }
df7492f9
KH
1182 if (*src == '\n')
1183 ONE_MORE_BYTE (c);
1184 }
1185 else if (EQ (eol_type, Qmac))
1186 c = '\n';
aa72b389 1187 }
aa72b389 1188 }
df7492f9 1189 else
aa72b389 1190 {
df7492f9
KH
1191 ONE_MORE_BYTE (c2);
1192 if (! UTF_8_EXTRA_OCTET_P (c2))
1193 goto invalid_code;
1194 if (UTF_8_2_OCTET_LEADING_P (c1))
b0edb2c5
DL
1195 {
1196 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1197 /* Reject overlong sequences here and below. Encoders
1198 producing them are incorrect, they can be misleading,
1199 and they mess up read/write invariance. */
1200 if (c < 128)
1201 goto invalid_code;
1202 }
df7492f9 1203 else
aa72b389 1204 {
df7492f9
KH
1205 ONE_MORE_BYTE (c3);
1206 if (! UTF_8_EXTRA_OCTET_P (c3))
1207 goto invalid_code;
1208 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1209 {
1210 c = (((c1 & 0xF) << 12)
1211 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1212 if (c < 0x800
1213 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1214 goto invalid_code;
1215 }
df7492f9
KH
1216 else
1217 {
1218 ONE_MORE_BYTE (c4);
1219 if (! UTF_8_EXTRA_OCTET_P (c4))
1220 goto invalid_code;
1221 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1222 {
df7492f9
KH
1223 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1224 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1225 if (c < 0x10000)
1226 goto invalid_code;
1227 }
df7492f9
KH
1228 else
1229 {
1230 ONE_MORE_BYTE (c5);
1231 if (! UTF_8_EXTRA_OCTET_P (c5))
1232 goto invalid_code;
1233 if (UTF_8_5_OCTET_LEADING_P (c1))
1234 {
1235 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1236 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1237 | (c5 & 0x3F));
b0edb2c5 1238 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1239 goto invalid_code;
1240 }
1241 else
1242 goto invalid_code;
1243 }
1244 }
aa72b389 1245 }
aa72b389 1246 }
df7492f9
KH
1247
1248 *charbuf++ = c;
1249 continue;
1250
1251 invalid_code:
1252 src = src_base;
1253 consumed_chars = consumed_chars_base;
1254 ONE_MORE_BYTE (c);
1255 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1256 coding->errors++;
aa72b389
KH
1257 }
1258
df7492f9
KH
1259 no_more_source:
1260 coding->consumed_char += consumed_chars_base;
1261 coding->consumed = src_base - coding->source;
1262 coding->charbuf_used = charbuf - coding->charbuf;
1263}
1264
1265
1266static int
1267encode_coding_utf_8 (coding)
1268 struct coding_system *coding;
1269{
1270 int multibytep = coding->dst_multibyte;
1271 int *charbuf = coding->charbuf;
1272 int *charbuf_end = charbuf + coding->charbuf_used;
1273 unsigned char *dst = coding->destination + coding->produced;
1274 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1275 int produced_chars = 0;
df7492f9
KH
1276 int c;
1277
1278 if (multibytep)
aa72b389 1279 {
df7492f9
KH
1280 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1281
1282 while (charbuf < charbuf_end)
aa72b389 1283 {
df7492f9
KH
1284 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1285
1286 ASSURE_DESTINATION (safe_room);
1287 c = *charbuf++;
28f67a95
KH
1288 if (CHAR_BYTE8_P (c))
1289 {
1290 c = CHAR_TO_BYTE8 (c);
1291 EMIT_ONE_BYTE (c);
1292 }
1293 else
1294 {
1295 CHAR_STRING_ADVANCE (c, pend);
1296 for (p = str; p < pend; p++)
1297 EMIT_ONE_BYTE (*p);
1298 }
aa72b389 1299 }
aa72b389 1300 }
df7492f9
KH
1301 else
1302 {
1303 int safe_room = MAX_MULTIBYTE_LENGTH;
1304
1305 while (charbuf < charbuf_end)
1306 {
1307 ASSURE_DESTINATION (safe_room);
1308 c = *charbuf++;
1309 dst += CHAR_STRING (c, dst);
1310 produced_chars++;
1311 }
1312 }
1313 coding->result = CODING_RESULT_SUCCESS;
1314 coding->produced_char += produced_chars;
1315 coding->produced = dst - coding->destination;
1316 return 0;
aa72b389
KH
1317}
1318
4ed46869 1319
df7492f9 1320/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1321 Check if a text is encoded in one of UTF-16 based coding systems.
1322 If it is, return 1, else return 0. */
df7492f9
KH
1323
1324#define UTF_16_HIGH_SURROGATE_P(val) \
1325 (((val) & 0xFC00) == 0xD800)
1326
1327#define UTF_16_LOW_SURROGATE_P(val) \
1328 (((val) & 0xFC00) == 0xDC00)
1329
1330#define UTF_16_INVALID_P(val) \
1331 (((val) == 0xFFFE) \
1332 || ((val) == 0xFFFF) \
1333 || UTF_16_LOW_SURROGATE_P (val))
1334
1335
1336static int
ff0dacd7 1337detect_coding_utf_16 (coding, detect_info)
b73bfc1c 1338 struct coding_system *coding;
ff0dacd7 1339 struct coding_detection_info *detect_info;
b73bfc1c 1340{
df7492f9
KH
1341 unsigned char *src = coding->source, *src_base = src;
1342 unsigned char *src_end = coding->source + coding->src_bytes;
1343 int multibytep = coding->src_multibyte;
1344 int consumed_chars = 0;
1345 int c1, c2;
1346
ff0dacd7 1347 detect_info->checked |= CATEGORY_MASK_UTF_16;
89528eb3 1348
ff0dacd7
KH
1349 if (coding->mode & CODING_MODE_LAST_BLOCK
1350 && (coding->src_bytes & 1))
1351 {
1352 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1353 return 0;
1354 }
df7492f9
KH
1355 ONE_MORE_BYTE (c1);
1356 ONE_MORE_BYTE (c2);
4ed46869 1357
df7492f9 1358 if ((c1 == 0xFF) && (c2 == 0xFE))
ff0dacd7
KH
1359 {
1360 detect_info->found |= CATEGORY_MASK_UTF_16_LE;
1361 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1362 }
df7492f9 1363 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7
KH
1364 {
1365 detect_info->found |= CATEGORY_MASK_UTF_16_BE;
1366 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1367 }
df7492f9 1368 no_more_source:
ff0dacd7 1369 return 1;
df7492f9 1370}
ec6d2bb8 1371
df7492f9
KH
1372static void
1373decode_coding_utf_16 (coding)
1374 struct coding_system *coding;
1375{
1376 unsigned char *src = coding->source + coding->consumed;
1377 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1378 unsigned char *src_base;
df7492f9
KH
1379 int *charbuf = coding->charbuf;
1380 int *charbuf_end = charbuf + coding->charbuf_size;
1381 int consumed_chars = 0, consumed_chars_base;
1382 int multibytep = coding->src_multibyte;
1383 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1384 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1385 int surrogate = CODING_UTF_16_SURROGATE (coding);
1386 Lisp_Object attr, eol_type, charset_list;
1387
1388 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1389
1390 if (bom != utf_16_without_bom)
1391 {
1392 int c, c1, c2;
4af310db 1393
df7492f9
KH
1394 src_base = src;
1395 ONE_MORE_BYTE (c1);
1396 ONE_MORE_BYTE (c2);
e19c3639 1397 c = (c1 << 8) | c2;
df7492f9
KH
1398 if (bom == utf_16_with_bom)
1399 {
1400 if (endian == utf_16_big_endian
880cf180 1401 ? c != 0xFEFF : c != 0xFFFE)
4af310db 1402 {
df7492f9
KH
1403 /* We are sure that there's enouph room at CHARBUF. */
1404 *charbuf++ = c1;
1405 *charbuf++ = c2;
1406 coding->errors++;
4af310db 1407 }
4af310db 1408 }
df7492f9 1409 else
4af310db 1410 {
880cf180 1411 if (c == 0xFEFF)
df7492f9
KH
1412 CODING_UTF_16_ENDIAN (coding)
1413 = endian = utf_16_big_endian;
880cf180 1414 else if (c == 0xFFFE)
df7492f9
KH
1415 CODING_UTF_16_ENDIAN (coding)
1416 = endian = utf_16_little_endian;
1417 else
4af310db 1418 {
df7492f9
KH
1419 CODING_UTF_16_ENDIAN (coding)
1420 = endian = utf_16_big_endian;
1421 src = src_base;
4af310db 1422 }
4af310db 1423 }
df7492f9
KH
1424 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1425 }
1426
1427 while (1)
1428 {
1429 int c, c1, c2;
1430
1431 src_base = src;
1432 consumed_chars_base = consumed_chars;
1433
1434 if (charbuf + 2 >= charbuf_end)
1435 break;
1436
1437 ONE_MORE_BYTE (c1);
1438 ONE_MORE_BYTE (c2);
1439 c = (endian == utf_16_big_endian
e19c3639 1440 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1441 if (surrogate)
aa72b389 1442 {
df7492f9 1443 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1444 {
df7492f9
KH
1445 if (endian == utf_16_big_endian)
1446 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1447 else
1448 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1449 *charbuf++ = c1;
1450 *charbuf++ = c2;
1451 coding->errors++;
1452 if (UTF_16_HIGH_SURROGATE_P (c))
1453 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1454 else
1455 *charbuf++ = c;
aa72b389 1456 }
df7492f9
KH
1457 else
1458 {
1459 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1460 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1461 *charbuf++ = c;
1462 }
1463 }
1464 else
1465 {
1466 if (UTF_16_HIGH_SURROGATE_P (c))
1467 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1468 else
1469 *charbuf++ = c;
1470 }
1471 }
1472
1473 no_more_source:
1474 coding->consumed_char += consumed_chars_base;
1475 coding->consumed = src_base - coding->source;
1476 coding->charbuf_used = charbuf - coding->charbuf;
1477}
1478
1479static int
1480encode_coding_utf_16 (coding)
1481 struct coding_system *coding;
1482{
1483 int multibytep = coding->dst_multibyte;
1484 int *charbuf = coding->charbuf;
1485 int *charbuf_end = charbuf + coding->charbuf_used;
1486 unsigned char *dst = coding->destination + coding->produced;
1487 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1488 int safe_room = 8;
1489 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1490 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1491 int produced_chars = 0;
1492 Lisp_Object attrs, eol_type, charset_list;
1493 int c;
1494
1495 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1496
1497 if (bom == utf_16_with_bom)
1498 {
1499 ASSURE_DESTINATION (safe_room);
1500 if (big_endian)
df7492f9 1501 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1502 else
1503 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1504 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1505 }
1506
1507 while (charbuf < charbuf_end)
1508 {
1509 ASSURE_DESTINATION (safe_room);
1510 c = *charbuf++;
e19c3639
KH
1511 if (c >= MAX_UNICODE_CHAR)
1512 c = coding->default_char;
df7492f9
KH
1513
1514 if (c < 0x10000)
1515 {
1516 if (big_endian)
1517 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1518 else
1519 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1520 }
1521 else
1522 {
1523 int c1, c2;
1524
1525 c -= 0x10000;
1526 c1 = (c >> 10) + 0xD800;
1527 c2 = (c & 0x3FF) + 0xDC00;
1528 if (big_endian)
1529 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1530 else
1531 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1532 }
1533 }
1534 coding->result = CODING_RESULT_SUCCESS;
1535 coding->produced = dst - coding->destination;
1536 coding->produced_char += produced_chars;
1537 return 0;
1538}
1539
1540\f
1541/*** 6. Old Emacs' internal format (emacs-mule) ***/
1542
1543/* Emacs' internal format for representation of multiple character
1544 sets is a kind of multi-byte encoding, i.e. characters are
1545 represented by variable-length sequences of one-byte codes.
1546
1547 ASCII characters and control characters (e.g. `tab', `newline') are
1548 represented by one-byte sequences which are their ASCII codes, in
1549 the range 0x00 through 0x7F.
1550
1551 8-bit characters of the range 0x80..0x9F are represented by
1552 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1553 code + 0x20).
1554
1555 8-bit characters of the range 0xA0..0xFF are represented by
1556 one-byte sequences which are their 8-bit code.
1557
1558 The other characters are represented by a sequence of `base
1559 leading-code', optional `extended leading-code', and one or two
1560 `position-code's. The length of the sequence is determined by the
1561 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1562 whereas extended leading-code and position-code take the range 0xA0
1563 through 0xFF. See `charset.h' for more details about leading-code
1564 and position-code.
1565
1566 --- CODE RANGE of Emacs' internal format ---
1567 character set range
1568 ------------- -----
1569 ascii 0x00..0x7F
1570 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1571 eight-bit-graphic 0xA0..0xBF
1572 ELSE 0x81..0x9D + [0xA0..0xFF]+
1573 ---------------------------------------------
1574
1575 As this is the internal character representation, the format is
1576 usually not used externally (i.e. in a file or in a data sent to a
1577 process). But, it is possible to have a text externally in this
1578 format (i.e. by encoding by the coding system `emacs-mule').
1579
1580 In that case, a sequence of one-byte codes has a slightly different
1581 form.
1582
1583 At first, all characters in eight-bit-control are represented by
1584 one-byte sequences which are their 8-bit code.
1585
1586 Next, character composition data are represented by the byte
1587 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1588 where,
1589 METHOD is 0xF0 plus one of composition method (enum
1590 composition_method),
1591
1592 BYTES is 0xA0 plus a byte length of this composition data,
1593
1594 CHARS is 0x20 plus a number of characters composed by this
1595 data,
1596
1597 COMPONENTs are characters of multibye form or composition
1598 rules encoded by two-byte of ASCII codes.
1599
1600 In addition, for backward compatibility, the following formats are
1601 also recognized as composition data on decoding.
1602
1603 0x80 MSEQ ...
1604 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1605
1606 Here,
1607 MSEQ is a multibyte form but in these special format:
1608 ASCII: 0xA0 ASCII_CODE+0x80,
1609 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1610 RULE is a one byte code of the range 0xA0..0xF0 that
1611 represents a composition rule.
1612 */
1613
1614char emacs_mule_bytes[256];
1615
df7492f9 1616int
ff0dacd7 1617emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1618 struct coding_system *coding;
781d7a48 1619 unsigned char *src;
ff0dacd7 1620 int *nbytes, *nchars, *id;
df7492f9 1621{
df7492f9
KH
1622 unsigned char *src_end = coding->source + coding->src_bytes;
1623 int multibytep = coding->src_multibyte;
1624 unsigned char *src_base = src;
1625 struct charset *charset;
1626 unsigned code;
1627 int c;
1628 int consumed_chars = 0;
1629
1630 ONE_MORE_BYTE (c);
df7492f9
KH
1631 switch (emacs_mule_bytes[c])
1632 {
1633 case 2:
1634 if (! (charset = emacs_mule_charset[c]))
1635 goto invalid_code;
1636 ONE_MORE_BYTE (c);
1637 code = c & 0x7F;
1638 break;
1639
1640 case 3:
7c78e542
KH
1641 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1642 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
b73bfc1c 1643 {
df7492f9
KH
1644 ONE_MORE_BYTE (c);
1645 if (! (charset = emacs_mule_charset[c]))
1646 goto invalid_code;
1647 ONE_MORE_BYTE (c);
1648 code = c & 0x7F;
b73bfc1c
KH
1649 }
1650 else
1651 {
df7492f9
KH
1652 if (! (charset = emacs_mule_charset[c]))
1653 goto invalid_code;
1654 ONE_MORE_BYTE (c);
781d7a48 1655 code = (c & 0x7F) << 8;
df7492f9
KH
1656 ONE_MORE_BYTE (c);
1657 code |= c & 0x7F;
1658 }
1659 break;
1660
1661 case 4:
781d7a48 1662 ONE_MORE_BYTE (c);
df7492f9
KH
1663 if (! (charset = emacs_mule_charset[c]))
1664 goto invalid_code;
1665 ONE_MORE_BYTE (c);
781d7a48 1666 code = (c & 0x7F) << 8;
df7492f9
KH
1667 ONE_MORE_BYTE (c);
1668 code |= c & 0x7F;
1669 break;
1670
1671 case 1:
1672 code = c;
9d123124
KH
1673 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1674 ? charset_ascii : charset_eight_bit);
df7492f9
KH
1675 break;
1676
1677 default:
1678 abort ();
1679 }
1680 c = DECODE_CHAR (charset, code);
1681 if (c < 0)
1682 goto invalid_code;
1683 *nbytes = src - src_base;
1684 *nchars = consumed_chars;
ff0dacd7
KH
1685 if (id)
1686 *id = charset->id;
df7492f9
KH
1687 return c;
1688
1689 no_more_source:
1690 return -2;
1691
1692 invalid_code:
1693 return -1;
1694}
1695
1696
1697/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1698 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1699 else return 0. */
df7492f9
KH
1700
1701static int
ff0dacd7 1702detect_coding_emacs_mule (coding, detect_info)
df7492f9 1703 struct coding_system *coding;
ff0dacd7 1704 struct coding_detection_info *detect_info;
df7492f9
KH
1705{
1706 unsigned char *src = coding->source, *src_base = src;
1707 unsigned char *src_end = coding->source + coding->src_bytes;
1708 int multibytep = coding->src_multibyte;
1709 int consumed_chars = 0;
1710 int c;
1711 int found = 0;
89528eb3 1712 int incomplete;
df7492f9 1713
ff0dacd7 1714 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1715 /* A coding system of this category is always ASCII compatible. */
1716 src += coding->head_ascii;
1717
1718 while (1)
1719 {
89528eb3 1720 incomplete = 0;
df7492f9 1721 ONE_MORE_BYTE (c);
89528eb3 1722 incomplete = 1;
df7492f9
KH
1723
1724 if (c == 0x80)
1725 {
1726 /* Perhaps the start of composite character. We simple skip
1727 it because analyzing it is too heavy for detecting. But,
1728 at least, we check that the composite character
1729 constitues of more than 4 bytes. */
1730 unsigned char *src_base;
1731
1732 repeat:
1733 src_base = src;
1734 do
1735 {
1736 ONE_MORE_BYTE (c);
1737 }
1738 while (c >= 0xA0);
1739
1740 if (src - src_base <= 4)
1741 break;
ff0dacd7 1742 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1743 if (c == 0x80)
1744 goto repeat;
b73bfc1c 1745 }
df7492f9
KH
1746
1747 if (c < 0x80)
b73bfc1c 1748 {
df7492f9
KH
1749 if (c < 0x20
1750 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1751 break;
1752 }
1753 else
1754 {
1755 unsigned char *src_base = src - 1;
1756
1757 do
1758 {
1759 ONE_MORE_BYTE (c);
1760 }
1761 while (c >= 0xA0);
1762 if (src - src_base != emacs_mule_bytes[*src_base])
1763 break;
ff0dacd7 1764 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1765 }
1766 }
ff0dacd7 1767 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1768 return 0;
1769
1770 no_more_source:
89528eb3
KH
1771 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1772 {
ff0dacd7 1773 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1774 return 0;
1775 }
ff0dacd7
KH
1776 detect_info->found |= found;
1777 return 1;
4ed46869
KH
1778}
1779
b73bfc1c 1780
df7492f9
KH
1781/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1782
1783/* Decode a character represented as a component of composition
1784 sequence of Emacs 20/21 style at SRC. Set C to that character and
1785 update SRC to the head of next character (or an encoded composition
1786 rule). If SRC doesn't points a composition component, set C to -1.
1787 If SRC points an invalid byte sequence, global exit by a return
1788 value 0. */
1789
1790#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1791 if (1) \
1792 { \
1793 int c; \
1794 int nbytes, nchars; \
1795 \
1796 if (src == src_end) \
1797 break; \
ff0dacd7 1798 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1799 if (c < 0) \
1800 { \
1801 if (c == -2) \
1802 break; \
1803 goto invalid_code; \
1804 } \
1805 *buf++ = c; \
1806 src += nbytes; \
1807 consumed_chars += nchars; \
1808 } \
1809 else
1810
1811
1812/* Decode a composition rule represented as a component of composition
781d7a48
KH
1813 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1814 and increment BUF. If SRC points an invalid byte sequence, set C
1815 to -1. */
df7492f9 1816
781d7a48 1817#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1818 do { \
1819 int c, gref, nref; \
1820 \
781d7a48 1821 if (src >= src_end) \
df7492f9
KH
1822 goto invalid_code; \
1823 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1824 c -= 0x20; \
df7492f9
KH
1825 if (c < 0 || c >= 81) \
1826 goto invalid_code; \
1827 \
1828 gref = c / 9, nref = c % 9; \
1829 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1830 } while (0)
1831
1832
781d7a48
KH
1833/* Decode a composition rule represented as a component of composition
1834 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1835 and increment BUF. If SRC points an invalid byte sequence, set C
1836 to -1. */
1837
1838#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1839 do { \
1840 int gref, nref; \
1841 \
1842 if (src + 1>= src_end) \
1843 goto invalid_code; \
1844 ONE_MORE_BYTE_NO_CHECK (gref); \
1845 gref -= 0x20; \
1846 ONE_MORE_BYTE_NO_CHECK (nref); \
1847 nref -= 0x20; \
1848 if (gref < 0 || gref >= 81 \
1849 || nref < 0 || nref >= 81) \
1850 goto invalid_code; \
1851 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1852 } while (0)
1853
1854
df7492f9 1855#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1856 do { \
df7492f9 1857 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1858 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1859 the byte length of this composition information, CHARS is the \
1860 number of characters composed by this composition. */ \
781d7a48
KH
1861 enum composition_method method = c - 0xF2; \
1862 int *charbuf_base = charbuf; \
ff0dacd7 1863 int from, to; \
df7492f9
KH
1864 int consumed_chars_limit; \
1865 int nbytes, nchars; \
1866 \
1867 ONE_MORE_BYTE (c); \
1868 nbytes = c - 0xA0; \
1869 if (nbytes < 3) \
1870 goto invalid_code; \
1871 ONE_MORE_BYTE (c); \
1872 nchars = c - 0xA0; \
ff0dacd7
KH
1873 from = coding->produced + char_offset; \
1874 to = from + nchars; \
1875 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1876 consumed_chars_limit = consumed_chars_base + nbytes; \
1877 if (method != COMPOSITION_RELATIVE) \
aa72b389 1878 { \
df7492f9
KH
1879 int i = 0; \
1880 while (consumed_chars < consumed_chars_limit) \
aa72b389 1881 { \
df7492f9 1882 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1883 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1884 else \
1885 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1886 i++; \
aa72b389 1887 } \
df7492f9
KH
1888 if (consumed_chars < consumed_chars_limit) \
1889 goto invalid_code; \
781d7a48 1890 charbuf_base[0] -= i; \
aa72b389
KH
1891 } \
1892 } while (0)
93dec019 1893
aa72b389 1894
df7492f9
KH
1895#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1896 do { \
1897 /* Emacs 20 style format for relative composition. */ \
1898 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1899 enum composition_method method = COMPOSITION_RELATIVE; \
df7492f9
KH
1900 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1901 int *buf = components; \
1902 int i, j; \
ff0dacd7 1903 int from, to; \
df7492f9
KH
1904 \
1905 src = src_base; \
1906 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1907 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1908 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1909 if (i < 2) \
1910 goto invalid_code; \
ff0dacd7
KH
1911 from = coding->produced_char + char_offset; \
1912 to = from + i; \
1913 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1914 for (j = 0; j < i; j++) \
1915 *charbuf++ = components[j]; \
1916 } while (0)
1917
1918
1919#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1920 do { \
1921 /* Emacs 20 style format for rule-base composition. */ \
1922 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1923 enum composition_method method = COMPOSITION_WITH_RULE; \
df7492f9
KH
1924 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1925 int *buf = components; \
1926 int i, j; \
ff0dacd7 1927 int from, to; \
df7492f9
KH
1928 \
1929 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1930 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1931 { \
781d7a48 1932 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1933 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1934 } \
1935 if (i < 1 || (buf - components) % 2 == 0) \
1936 goto invalid_code; \
1937 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1938 goto no_more_source; \
ff0dacd7
KH
1939 from = coding->produced_char + char_offset; \
1940 to = from + i; \
1941 ADD_COMPOSITION_DATA (buf, from, to, method); \
df7492f9
KH
1942 for (j = 0; j < i; j++) \
1943 *charbuf++ = components[j]; \
1944 for (j = 0; j < i; j += 2) \
1945 *charbuf++ = components[j]; \
1946 } while (0)
1947
aa72b389
KH
1948
1949static void
df7492f9 1950decode_coding_emacs_mule (coding)
aa72b389 1951 struct coding_system *coding;
aa72b389 1952{
df7492f9
KH
1953 unsigned char *src = coding->source + coding->consumed;
1954 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1955 unsigned char *src_base;
df7492f9 1956 int *charbuf = coding->charbuf;
ff0dacd7 1957 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 1958 int consumed_chars = 0, consumed_chars_base;
df7492f9
KH
1959 int multibytep = coding->src_multibyte;
1960 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
1961 int char_offset = coding->produced_char;
1962 int last_offset = char_offset;
1963 int last_id = charset_ascii;
aa72b389 1964
df7492f9 1965 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1966
aa72b389
KH
1967 while (1)
1968 {
df7492f9
KH
1969 int c;
1970
aa72b389 1971 src_base = src;
df7492f9
KH
1972 consumed_chars_base = consumed_chars;
1973
1974 if (charbuf >= charbuf_end)
1975 break;
aa72b389 1976
df7492f9
KH
1977 ONE_MORE_BYTE (c);
1978
1979 if (c < 0x80)
aa72b389 1980 {
df7492f9
KH
1981 if (c == '\r')
1982 {
1983 if (EQ (eol_type, Qdos))
1984 {
1985 if (src == src_end)
98725083
KH
1986 {
1987 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1988 goto no_more_source;
1989 }
df7492f9
KH
1990 if (*src == '\n')
1991 ONE_MORE_BYTE (c);
1992 }
1993 else if (EQ (eol_type, Qmac))
1994 c = '\n';
1995 }
1996 *charbuf++ = c;
1997 char_offset++;
aa72b389 1998 }
df7492f9
KH
1999 else if (c == 0x80)
2000 {
df7492f9 2001 ONE_MORE_BYTE (c);
781d7a48
KH
2002 if (c - 0xF2 >= COMPOSITION_RELATIVE
2003 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
2004 DECODE_EMACS_MULE_21_COMPOSITION (c);
2005 else if (c < 0xC0)
2006 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2007 else if (c == 0xFF)
2008 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2009 else
2010 goto invalid_code;
2011 }
2012 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2013 {
2014 int nbytes, nchars;
ff0dacd7
KH
2015 int id;
2016
781d7a48
KH
2017 src = src_base;
2018 consumed_chars = consumed_chars_base;
ff0dacd7 2019 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2020 if (c < 0)
2021 {
2022 if (c == -2)
2023 break;
2024 goto invalid_code;
2025 }
ff0dacd7
KH
2026 if (last_id != id)
2027 {
2028 if (last_id != charset_ascii)
2029 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2030 last_id = id;
2031 last_offset = char_offset;
2032 }
df7492f9 2033 *charbuf++ = c;
781d7a48
KH
2034 src += nbytes;
2035 consumed_chars += nchars;
df7492f9
KH
2036 char_offset++;
2037 }
2038 continue;
2039
2040 invalid_code:
2041 src = src_base;
2042 consumed_chars = consumed_chars_base;
2043 ONE_MORE_BYTE (c);
2044 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2045 char_offset++;
df7492f9
KH
2046 coding->errors++;
2047 }
2048
2049 no_more_source:
ff0dacd7
KH
2050 if (last_id != charset_ascii)
2051 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
2052 coding->consumed_char += consumed_chars_base;
2053 coding->consumed = src_base - coding->source;
2054 coding->charbuf_used = charbuf - coding->charbuf;
2055}
2056
2057
2058#define EMACS_MULE_LEADING_CODES(id, codes) \
2059 do { \
2060 if (id < 0xA0) \
2061 codes[0] = id, codes[1] = 0; \
2062 else if (id < 0xE0) \
2063 codes[0] = 0x9A, codes[1] = id; \
2064 else if (id < 0xF0) \
2065 codes[0] = 0x9B, codes[1] = id; \
2066 else if (id < 0xF5) \
2067 codes[0] = 0x9C, codes[1] = id; \
2068 else \
2069 codes[0] = 0x9D, codes[1] = id; \
2070 } while (0);
2071
aa72b389 2072
df7492f9
KH
2073static int
2074encode_coding_emacs_mule (coding)
2075 struct coding_system *coding;
2076{
2077 int multibytep = coding->dst_multibyte;
2078 int *charbuf = coding->charbuf;
2079 int *charbuf_end = charbuf + coding->charbuf_used;
2080 unsigned char *dst = coding->destination + coding->produced;
2081 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2082 int safe_room = 8;
df7492f9
KH
2083 int produced_chars = 0;
2084 Lisp_Object attrs, eol_type, charset_list;
2085 int c;
ff0dacd7 2086 int preferred_charset_id = -1;
df7492f9
KH
2087
2088 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2089
2090 while (charbuf < charbuf_end)
2091 {
2092 ASSURE_DESTINATION (safe_room);
2093 c = *charbuf++;
ff0dacd7
KH
2094
2095 if (c < 0)
2096 {
2097 /* Handle an annotation. */
2098 switch (*charbuf)
2099 {
2100 case CODING_ANNOTATE_COMPOSITION_MASK:
2101 /* Not yet implemented. */
2102 break;
2103 case CODING_ANNOTATE_CHARSET_MASK:
2104 preferred_charset_id = charbuf[3];
2105 if (preferred_charset_id >= 0
2106 && NILP (Fmemq (make_number (preferred_charset_id),
2107 charset_list)))
2108 preferred_charset_id = -1;
2109 break;
2110 default:
2111 abort ();
2112 }
2113 charbuf += -c - 1;
2114 continue;
2115 }
2116
df7492f9
KH
2117 if (ASCII_CHAR_P (c))
2118 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2119 else if (CHAR_BYTE8_P (c))
2120 {
2121 c = CHAR_TO_BYTE8 (c);
2122 EMIT_ONE_BYTE (c);
2123 }
df7492f9 2124 else
aa72b389 2125 {
df7492f9
KH
2126 struct charset *charset;
2127 unsigned code;
2128 int dimension;
2129 int emacs_mule_id;
2130 unsigned char leading_codes[2];
2131
ff0dacd7
KH
2132 if (preferred_charset_id >= 0)
2133 {
2134 charset = CHARSET_FROM_ID (preferred_charset_id);
2135 if (! CHAR_CHARSET_P (c, charset))
2136 charset = char_charset (c, charset_list, NULL);
2137 }
2138 else
2139 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2140 if (! charset)
2141 {
2142 c = coding->default_char;
2143 if (ASCII_CHAR_P (c))
2144 {
2145 EMIT_ONE_ASCII_BYTE (c);
2146 continue;
2147 }
2148 charset = char_charset (c, charset_list, &code);
2149 }
2150 dimension = CHARSET_DIMENSION (charset);
2151 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2152 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2153 EMIT_ONE_BYTE (leading_codes[0]);
2154 if (leading_codes[1])
2155 EMIT_ONE_BYTE (leading_codes[1]);
2156 if (dimension == 1)
2157 EMIT_ONE_BYTE (code);
aa72b389 2158 else
df7492f9
KH
2159 {
2160 EMIT_ONE_BYTE (code >> 8);
2161 EMIT_ONE_BYTE (code & 0xFF);
2162 }
aa72b389 2163 }
aa72b389 2164 }
df7492f9
KH
2165 coding->result = CODING_RESULT_SUCCESS;
2166 coding->produced_char += produced_chars;
2167 coding->produced = dst - coding->destination;
2168 return 0;
aa72b389 2169}
b73bfc1c 2170
4ed46869 2171\f
df7492f9 2172/*** 7. ISO2022 handlers ***/
4ed46869
KH
2173
2174/* The following note describes the coding system ISO2022 briefly.
39787efd 2175 Since the intention of this note is to help understand the
5a936b46 2176 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2177 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46
DL
2178 original document of ISO2022. This is equivalent to the standard
2179 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2180
2181 ISO2022 provides many mechanisms to encode several character sets
5a936b46 2182 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2183 is encoded using bytes less than 128. This may make the encoded
2184 text a little bit longer, but the text passes more easily through
5a936b46
DL
2185 several types of gateway, some of which strip off the MSB (Most
2186 Significant Bit).
b73bfc1c 2187
5a936b46
DL
2188 There are two kinds of character sets: control character sets and
2189 graphic character sets. The former contain control characters such
4ed46869 2190 as `newline' and `escape' to provide control functions (control
39787efd 2191 functions are also provided by escape sequences). The latter
5a936b46 2192 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2193 two control character sets and many graphic character sets.
2194
2195 Graphic character sets are classified into one of the following
39787efd
KH
2196 four classes, according to the number of bytes (DIMENSION) and
2197 number of characters in one dimension (CHARS) of the set:
2198 - DIMENSION1_CHARS94
2199 - DIMENSION1_CHARS96
2200 - DIMENSION2_CHARS94
2201 - DIMENSION2_CHARS96
2202
2203 In addition, each character set is assigned an identification tag,
5a936b46 2204 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2205 hereafter). The <F> of each character set is decided by ECMA(*)
2206 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2207 (0x30..0x3F are for private use only).
4ed46869
KH
2208
2209 Note (*): ECMA = European Computer Manufacturers Association
2210
5a936b46 2211 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2212 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2213 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2214 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2215 o DIMENSION2_CHARS96 -- none for the moment
2216
39787efd 2217 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2218 C0 [0x00..0x1F] -- control character plane 0
2219 GL [0x20..0x7F] -- graphic character plane 0
2220 C1 [0x80..0x9F] -- control character plane 1
2221 GR [0xA0..0xFF] -- graphic character plane 1
2222
2223 A control character set is directly designated and invoked to C0 or
39787efd
KH
2224 C1 by an escape sequence. The most common case is that:
2225 - ISO646's control character set is designated/invoked to C0, and
2226 - ISO6429's control character set is designated/invoked to C1,
2227 and usually these designations/invocations are omitted in encoded
2228 text. In a 7-bit environment, only C0 can be used, and a control
2229 character for C1 is encoded by an appropriate escape sequence to
2230 fit into the environment. All control characters for C1 are
2231 defined to have corresponding escape sequences.
4ed46869
KH
2232
2233 A graphic character set is at first designated to one of four
2234 graphic registers (G0 through G3), then these graphic registers are
2235 invoked to GL or GR. These designations and invocations can be
2236 done independently. The most common case is that G0 is invoked to
39787efd
KH
2237 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2238 these invocations and designations are omitted in encoded text.
2239 In a 7-bit environment, only GL can be used.
4ed46869 2240
39787efd
KH
2241 When a graphic character set of CHARS94 is invoked to GL, codes
2242 0x20 and 0x7F of the GL area work as control characters SPACE and
2243 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2244 be used.
4ed46869
KH
2245
2246 There are two ways of invocation: locking-shift and single-shift.
2247 With locking-shift, the invocation lasts until the next different
39787efd
KH
2248 invocation, whereas with single-shift, the invocation affects the
2249 following character only and doesn't affect the locking-shift
2250 state. Invocations are done by the following control characters or
2251 escape sequences:
4ed46869
KH
2252
2253 ----------------------------------------------------------------------
39787efd 2254 abbrev function cntrl escape seq description
4ed46869 2255 ----------------------------------------------------------------------
39787efd
KH
2256 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2257 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2258 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2259 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2260 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2261 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2262 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2263 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2264 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2265 ----------------------------------------------------------------------
39787efd
KH
2266 (*) These are not used by any known coding system.
2267
2268 Control characters for these functions are defined by macros
2269 ISO_CODE_XXX in `coding.h'.
4ed46869 2270
39787efd 2271 Designations are done by the following escape sequences:
4ed46869
KH
2272 ----------------------------------------------------------------------
2273 escape sequence description
2274 ----------------------------------------------------------------------
2275 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2276 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2277 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2278 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2279 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2280 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2281 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2282 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2283 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2284 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2285 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2286 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2287 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2288 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2289 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2290 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2291 ----------------------------------------------------------------------
2292
2293 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2294 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2295
2296 Note (*): Although these designations are not allowed in ISO2022,
2297 Emacs accepts them on decoding, and produces them on encoding
39787efd 2298 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2299 7-bit environment, non-locking-shift, and non-single-shift.
2300
2301 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2302 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2303
5a936b46 2304 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2305 same multilingual text in ISO2022. Actually, there exist many
2306 coding systems such as Compound Text (used in X11's inter client
5a936b46
DL
2307 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2308 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2309 localized platforms), and all of these are variants of ISO2022.
2310
2311 In addition to the above, Emacs handles two more kinds of escape
2312 sequences: ISO6429's direction specification and Emacs' private
2313 sequence for specifying character composition.
2314
39787efd 2315 ISO6429's direction specification takes the following form:
4ed46869
KH
2316 o CSI ']' -- end of the current direction
2317 o CSI '0' ']' -- end of the current direction
2318 o CSI '1' ']' -- start of left-to-right text
2319 o CSI '2' ']' -- start of right-to-left text
2320 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2321 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2322
2323 Character composition specification takes the following form:
ec6d2bb8
KH
2324 o ESC '0' -- start relative composition
2325 o ESC '1' -- end composition
2326 o ESC '2' -- start rule-base composition (*)
2327 o ESC '3' -- start relative composition with alternate chars (**)
2328 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2329 Since these are not standard escape sequences of any ISO standard,
5a936b46 2330 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2331
5a936b46
DL
2332 (*) This form is used only in Emacs 20.7 and older versions,
2333 but newer versions can safely decode it.
2334 (**) This form is used only in Emacs 21.1 and newer versions,
2335 and older versions can't decode it.
ec6d2bb8 2336
5a936b46 2337 Here's a list of example usages of these composition escape
b73bfc1c 2338 sequences (categorized by `enum composition_method').
ec6d2bb8 2339
b73bfc1c 2340 COMPOSITION_RELATIVE:
ec6d2bb8 2341 ESC 0 CHAR [ CHAR ] ESC 1
5a936b46 2342 COMPOSITION_WITH_RULE:
ec6d2bb8 2343 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2344 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2345 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2346 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2347 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2348
2349enum iso_code_class_type iso_code_class[256];
2350
df7492f9
KH
2351#define SAFE_CHARSET_P(coding, id) \
2352 ((id) <= (coding)->max_charset_id \
2353 && (coding)->safe_charsets[id] >= 0)
2354
2355
2356#define SHIFT_OUT_OK(category) \
2357 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2358
2359static void
f0064e1f
DL
2360setup_iso_safe_charsets (attrs)
2361 Lisp_Object attrs;
df7492f9
KH
2362{
2363 Lisp_Object charset_list, safe_charsets;
2364 Lisp_Object request;
2365 Lisp_Object reg_usage;
2366 Lisp_Object tail;
2367 int reg94, reg96;
2368 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2369 int max_charset_id;
2370
2371 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2372 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2373 && ! EQ (charset_list, Viso_2022_charset_list))
2374 {
2375 CODING_ATTR_CHARSET_LIST (attrs)
2376 = charset_list = Viso_2022_charset_list;
2377 ASET (attrs, coding_attr_safe_charsets, Qnil);
2378 }
2379
2380 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2381 return;
2382
2383 max_charset_id = 0;
2384 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2385 {
2386 int id = XINT (XCAR (tail));
2387 if (max_charset_id < id)
2388 max_charset_id = id;
2389 }
2390
2391 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2392 make_number (255));
2393 request = AREF (attrs, coding_attr_iso_request);
2394 reg_usage = AREF (attrs, coding_attr_iso_usage);
2395 reg94 = XINT (XCAR (reg_usage));
2396 reg96 = XINT (XCDR (reg_usage));
2397
2398 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2399 {
2400 Lisp_Object id;
2401 Lisp_Object reg;
2402 struct charset *charset;
2403
2404 id = XCAR (tail);
2405 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2406 reg = Fcdr (Fassq (id, request));
df7492f9
KH
2407 if (! NILP (reg))
2408 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2409 else if (charset->iso_chars_96)
2410 {
2411 if (reg96 < 4)
2412 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2413 }
2414 else
2415 {
2416 if (reg94 < 4)
2417 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2418 }
2419 }
2420 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2421}
d46c5b12 2422
d46c5b12 2423
4ed46869 2424/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2425 Check if a text is encoded in one of ISO-2022 based codig systems.
2426 If it is, return 1, else return 0. */
4ed46869 2427
0a28aafb 2428static int
ff0dacd7 2429detect_coding_iso_2022 (coding, detect_info)
df7492f9 2430 struct coding_system *coding;
ff0dacd7 2431 struct coding_detection_info *detect_info;
4ed46869 2432{
df7492f9
KH
2433 unsigned char *src = coding->source, *src_base = src;
2434 unsigned char *src_end = coding->source + coding->src_bytes;
2435 int multibytep = coding->src_multibyte;
ff0dacd7 2436 int single_shifting = 0;
df7492f9
KH
2437 int id;
2438 int c, c1;
2439 int consumed_chars = 0;
2440 int i;
ff0dacd7
KH
2441 int rejected = 0;
2442 int found = 0;
2443
2444 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2445
2446 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2447 {
2448 struct coding_system *this = &(coding_categories[i]);
2449 Lisp_Object attrs, val;
2450
2451 attrs = CODING_ID_ATTRS (this->id);
2452 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2453 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2454 setup_iso_safe_charsets (attrs);
2455 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2456 this->max_charset_id = XSTRING (val)->size - 1;
2457 this->safe_charsets = (char *) XSTRING (val)->data;
2458 }
2459
2460 /* A coding system of this category is always ASCII compatible. */
2461 src += coding->head_ascii;
3f003981 2462
ff0dacd7 2463 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2464 {
df7492f9 2465 ONE_MORE_BYTE (c);
4ed46869
KH
2466 switch (c)
2467 {
2468 case ISO_CODE_ESC:
74383408
KH
2469 if (inhibit_iso_escape_detection)
2470 break;
f46869e4 2471 single_shifting = 0;
df7492f9 2472 ONE_MORE_BYTE (c);
d46c5b12 2473 if (c >= '(' && c <= '/')
4ed46869 2474 {
bf9cdd4e 2475 /* Designation sequence for a charset of dimension 1. */
df7492f9 2476 ONE_MORE_BYTE (c1);
d46c5b12 2477 if (c1 < ' ' || c1 >= 0x80
df7492f9 2478 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2479 /* Invalid designation sequence. Just ignore. */
2480 break;
bf9cdd4e
KH
2481 }
2482 else if (c == '$')
2483 {
2484 /* Designation sequence for a charset of dimension 2. */
df7492f9 2485 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2486 if (c >= '@' && c <= 'B')
2487 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2488 id = iso_charset_table[1][0][c];
bf9cdd4e 2489 else if (c >= '(' && c <= '/')
bcf26d6a 2490 {
df7492f9 2491 ONE_MORE_BYTE (c1);
d46c5b12 2492 if (c1 < ' ' || c1 >= 0x80
df7492f9 2493 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2494 /* Invalid designation sequence. Just ignore. */
2495 break;
bcf26d6a 2496 }
bf9cdd4e 2497 else
ff0dacd7 2498 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2499 break;
2500 }
ae9ff118 2501 else if (c == 'N' || c == 'O')
d46c5b12 2502 {
ae9ff118 2503 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2504 single_shifting = 1;
2505 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2506 break;
4ed46869 2507 }
ec6d2bb8
KH
2508 else if (c >= '0' && c <= '4')
2509 {
2510 /* ESC <Fp> for start/end composition. */
ff0dacd7 2511 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2512 break;
2513 }
bf9cdd4e 2514 else
df7492f9 2515 {
ff0dacd7 2516 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2517 break;
2518 }
d46c5b12
KH
2519
2520 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2521 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2522 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2523 id))
ff0dacd7 2524 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2525 else
ff0dacd7 2526 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2527 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2528 id))
ff0dacd7 2529 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2530 else
ff0dacd7 2531 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2532 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2533 id))
ff0dacd7 2534 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2535 else
ff0dacd7 2536 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2537 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2538 id))
ff0dacd7 2539 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2540 else
ff0dacd7 2541 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2542 break;
2543
4ed46869 2544 case ISO_CODE_SO:
d46c5b12 2545 case ISO_CODE_SI:
ff0dacd7 2546 /* Locking shift out/in. */
74383408
KH
2547 if (inhibit_iso_escape_detection)
2548 break;
f46869e4 2549 single_shifting = 0;
ff0dacd7
KH
2550 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2551 found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2552 break;
ff0dacd7 2553
4ed46869 2554 case ISO_CODE_CSI:
ff0dacd7 2555 /* Control sequence introducer. */
f46869e4 2556 single_shifting = 0;
ff0dacd7
KH
2557 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2558 found |= CATEGORY_MASK_ISO_8_ELSE;
2559 goto check_extra_latin;
2560
2561
4ed46869
KH
2562 case ISO_CODE_SS2:
2563 case ISO_CODE_SS3:
ff0dacd7
KH
2564 /* Single shift. */
2565 if (inhibit_iso_escape_detection)
2566 break;
2567 single_shifting = 1;
2568 rejected |= CATEGORY_MASK_ISO_7BIT;
2569 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2570 & CODING_ISO_FLAG_SINGLE_SHIFT)
2571 found |= CATEGORY_MASK_ISO_8_1;
2572 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2573 & CODING_ISO_FLAG_SINGLE_SHIFT)
2574 found |= CATEGORY_MASK_ISO_8_2;
2575 goto check_extra_latin;
4ed46869
KH
2576
2577 default:
2578 if (c < 0x80)
f46869e4
KH
2579 {
2580 single_shifting = 0;
2581 break;
2582 }
ff0dacd7 2583 if (c >= 0xA0)
c4825358 2584 {
ff0dacd7
KH
2585 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2586 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2587 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2588 0xA0..0FF. If the byte length is even, we include
2589 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2590 only when we are not single shifting. */
2591 if (! single_shifting
2592 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2593 {
e17de821 2594 int i = 1;
b73bfc1c
KH
2595 while (src < src_end)
2596 {
df7492f9 2597 ONE_MORE_BYTE (c);
b73bfc1c
KH
2598 if (c < 0xA0)
2599 break;
2600 i++;
2601 }
2602
2603 if (i & 1 && src < src_end)
ff0dacd7 2604 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2605 else
ff0dacd7 2606 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2607 }
ff0dacd7 2608 break;
4ed46869 2609 }
ff0dacd7
KH
2610 check_extra_latin:
2611 single_shifting = 0;
2612 if (! VECTORP (Vlatin_extra_code_table)
2613 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2614 {
2615 rejected = CATEGORY_MASK_ISO;
2616 break;
2617 }
2618 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2619 & CODING_ISO_FLAG_LATIN_EXTRA)
2620 found |= CATEGORY_MASK_ISO_8_1;
2621 else
2622 rejected |= CATEGORY_MASK_ISO_8_1;
2623 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2624 & CODING_ISO_FLAG_LATIN_EXTRA)
2625 found |= CATEGORY_MASK_ISO_8_2;
2626 else
2627 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2628 }
2629 }
ff0dacd7
KH
2630 detect_info->rejected |= CATEGORY_MASK_ISO;
2631 return 0;
2632
df7492f9 2633 no_more_source:
ff0dacd7
KH
2634 detect_info->rejected |= rejected;
2635 detect_info->found |= (found & ~rejected);
df7492f9 2636 return 1;
4ed46869
KH
2637}
2638
4ed46869
KH
2639
2640/* Set designation state into CODING. */
df7492f9
KH
2641#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2642 do { \
2643 int id, prev; \
2644 \
2645 if (final < '0' || final >= 128 \
2646 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2647 || !SAFE_CHARSET_P (coding, id)) \
2648 { \
2649 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2650 goto invalid_code; \
2651 } \
2652 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2653 if (id == charset_jisx0201_roman) \
2654 { \
2655 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2656 id = charset_ascii; \
2657 } \
2658 else if (id == charset_jisx0208_1978) \
2659 { \
2660 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2661 id = charset_jisx0208; \
2662 } \
df7492f9
KH
2663 CODING_ISO_DESIGNATION (coding, reg) = id; \
2664 /* If there was an invalid designation to REG previously, and this \
2665 designation is ASCII to REG, we should keep this designation \
2666 sequence. */ \
2667 if (prev == -2 && id == charset_ascii) \
2668 goto invalid_code; \
4ed46869
KH
2669 } while (0)
2670
d46c5b12 2671
df7492f9
KH
2672#define MAYBE_FINISH_COMPOSITION() \
2673 do { \
2674 int i; \
2675 if (composition_state == COMPOSING_NO) \
2676 break; \
2677 /* It is assured that we have enough room for producing \
2678 characters stored in the table `components'. */ \
2679 if (charbuf + component_idx > charbuf_end) \
2680 goto no_more_source; \
2681 composition_state = COMPOSING_NO; \
2682 if (method == COMPOSITION_RELATIVE \
2683 || method == COMPOSITION_WITH_ALTCHARS) \
2684 { \
2685 for (i = 0; i < component_idx; i++) \
2686 *charbuf++ = components[i]; \
2687 char_offset += component_idx; \
2688 } \
2689 else \
2690 { \
2691 for (i = 0; i < component_idx; i += 2) \
2692 *charbuf++ = components[i]; \
2693 char_offset += (component_idx / 2) + 1; \
2694 } \
2695 } while (0)
2696
d46c5b12 2697
aa72b389
KH
2698/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2699 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2700 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2701 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2702 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2703 */
ec6d2bb8 2704
df7492f9
KH
2705#define DECODE_COMPOSITION_START(c1) \
2706 do { \
2707 if (c1 == '0' \
781d7a48 2708 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2709 { \
2710 component_len = component_idx; \
2711 composition_state = COMPOSING_CHAR; \
2712 } \
2713 else \
2714 { \
2715 unsigned char *p; \
2716 \
2717 MAYBE_FINISH_COMPOSITION (); \
2718 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2719 goto no_more_source; \
2720 for (p = src; p < src_end - 1; p++) \
2721 if (*p == ISO_CODE_ESC && p[1] == '1') \
2722 break; \
2723 if (p == src_end - 1) \
2724 { \
2725 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2726 goto invalid_code; \
2727 goto no_more_source; \
2728 } \
2729 \
2730 /* This is surely the start of a composition. */ \
2731 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2732 : c1 == '2' ? COMPOSITION_WITH_RULE \
2733 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2734 : COMPOSITION_WITH_RULE_ALTCHARS); \
2735 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2736 : COMPOSING_COMPONENT_CHAR); \
2737 component_idx = component_len = 0; \
2738 } \
ec6d2bb8
KH
2739 } while (0)
2740
ec6d2bb8 2741
df7492f9
KH
2742/* Handle compositoin end sequence ESC 1. */
2743
2744#define DECODE_COMPOSITION_END() \
ec6d2bb8 2745 do { \
df7492f9
KH
2746 int nchars = (component_len > 0 ? component_idx - component_len \
2747 : method == COMPOSITION_RELATIVE ? component_idx \
2748 : (component_idx + 1) / 2); \
2749 int i; \
2750 int *saved_charbuf = charbuf; \
ff0dacd7
KH
2751 int from = coding->produced_char + char_offset; \
2752 int to = from + nchars; \
df7492f9 2753 \
ff0dacd7 2754 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9 2755 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2756 { \
df7492f9
KH
2757 if (component_len == 0) \
2758 for (i = 0; i < component_idx; i++) \
2759 *charbuf++ = components[i]; \
2760 else \
2761 for (i = 0; i < component_len; i++) \
2762 *charbuf++ = components[i]; \
2763 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2764 } \
df7492f9
KH
2765 if (method == COMPOSITION_WITH_RULE) \
2766 for (i = 0; i < component_idx; i += 2, char_offset++) \
2767 *charbuf++ = components[i]; \
ec6d2bb8 2768 else \
df7492f9
KH
2769 for (i = component_len; i < component_idx; i++, char_offset++) \
2770 *charbuf++ = components[i]; \
2771 coding->annotated = 1; \
2772 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2773 } while (0)
2774
df7492f9 2775
ec6d2bb8
KH
2776/* Decode a composition rule from the byte C1 (and maybe one more byte
2777 from SRC) and store one encoded composition rule in
2778 coding->cmp_data. */
2779
2780#define DECODE_COMPOSITION_RULE(c1) \
2781 do { \
ec6d2bb8
KH
2782 (c1) -= 32; \
2783 if (c1 < 81) /* old format (before ver.21) */ \
2784 { \
2785 int gref = (c1) / 9; \
2786 int nref = (c1) % 9; \
2787 if (gref == 4) gref = 10; \
2788 if (nref == 4) nref = 10; \
df7492f9 2789 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2790 } \
b73bfc1c 2791 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2792 { \
2793 ONE_MORE_BYTE (c2); \
df7492f9 2794 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2795 } \
df7492f9
KH
2796 else \
2797 c1 = 0; \
ec6d2bb8 2798 } while (0)
88993dfd 2799
d46c5b12 2800
4ed46869
KH
2801/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2802
b73bfc1c 2803static void
df7492f9 2804decode_coding_iso_2022 (coding)
4ed46869 2805 struct coding_system *coding;
4ed46869 2806{
df7492f9
KH
2807 unsigned char *src = coding->source + coding->consumed;
2808 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2809 unsigned char *src_base;
df7492f9 2810 int *charbuf = coding->charbuf;
ff0dacd7
KH
2811 int *charbuf_end
2812 = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2813 int consumed_chars = 0, consumed_chars_base;
df7492f9
KH
2814 int multibytep = coding->src_multibyte;
2815 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2816 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2817 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2818 struct charset *charset;
2819 int c;
2820 /* For handling composition sequence. */
2821#define COMPOSING_NO 0
2822#define COMPOSING_CHAR 1
2823#define COMPOSING_RULE 2
2824#define COMPOSING_COMPONENT_CHAR 3
2825#define COMPOSING_COMPONENT_RULE 4
2826
2827 int composition_state = COMPOSING_NO;
2828 enum composition_method method;
2829 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2830 int component_idx;
2831 int component_len;
2832 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
2833 int char_offset = coding->produced_char;
2834 int last_offset = char_offset;
2835 int last_id = charset_ascii;
df7492f9
KH
2836
2837 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2838 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2839
2840 while (1)
4ed46869 2841 {
b73bfc1c
KH
2842 int c1, c2;
2843
2844 src_base = src;
df7492f9
KH
2845 consumed_chars_base = consumed_chars;
2846
2847 if (charbuf >= charbuf_end)
2848 break;
2849
b73bfc1c 2850 ONE_MORE_BYTE (c1);
4ed46869 2851
98725083 2852 /* We produce at most one character. */
4ed46869
KH
2853 switch (iso_code_class [c1])
2854 {
2855 case ISO_0x20_or_0x7F:
df7492f9 2856 if (composition_state != COMPOSING_NO)
ec6d2bb8 2857 {
df7492f9
KH
2858 if (composition_state == COMPOSING_RULE
2859 || composition_state == COMPOSING_COMPONENT_RULE)
2860 {
2861 DECODE_COMPOSITION_RULE (c1);
2862 components[component_idx++] = c1;
2863 composition_state--;
2864 continue;
2865 }
ec6d2bb8 2866 }
df7492f9
KH
2867 if (charset_id_0 < 0
2868 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2869 /* This is SPACE or DEL. */
2870 charset = CHARSET_FROM_ID (charset_ascii);
2871 else
2872 charset = CHARSET_FROM_ID (charset_id_0);
2873 break;
4ed46869
KH
2874
2875 case ISO_graphic_plane_0:
781d7a48 2876 if (composition_state != COMPOSING_NO)
b73bfc1c 2877 {
781d7a48
KH
2878 if (composition_state == COMPOSING_RULE
2879 || composition_state == COMPOSING_COMPONENT_RULE)
2880 {
2881 DECODE_COMPOSITION_RULE (c1);
2882 components[component_idx++] = c1;
2883 composition_state--;
2884 continue;
2885 }
b73bfc1c 2886 }
df7492f9 2887 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2888 break;
2889
2890 case ISO_0xA0_or_0xFF:
df7492f9
KH
2891 if (charset_id_1 < 0
2892 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2893 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2894 goto invalid_code;
4ed46869
KH
2895 /* This is a graphic character, we fall down ... */
2896
2897 case ISO_graphic_plane_1:
df7492f9
KH
2898 if (charset_id_1 < 0)
2899 goto invalid_code;
2900 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2901 break;
2902
2903 case ISO_carriage_return:
df7492f9 2904 if (c1 == '\r')
4ed46869 2905 {
df7492f9 2906 if (EQ (eol_type, Qdos))
4ed46869 2907 {
df7492f9 2908 if (src == src_end)
98725083
KH
2909 {
2910 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2911 goto no_more_source;
2912 }
df7492f9
KH
2913 if (*src == '\n')
2914 ONE_MORE_BYTE (c1);
4ed46869 2915 }
df7492f9
KH
2916 else if (EQ (eol_type, Qmac))
2917 c1 = '\n';
4ed46869 2918 }
df7492f9
KH
2919 /* fall through */
2920
2921 case ISO_control_0:
2922 MAYBE_FINISH_COMPOSITION ();
2923 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2924 break;
2925
df7492f9
KH
2926 case ISO_control_1:
2927 MAYBE_FINISH_COMPOSITION ();
2928 goto invalid_code;
2929
4ed46869 2930 case ISO_shift_out:
df7492f9
KH
2931 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2932 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2933 goto invalid_code;
2934 CODING_ISO_INVOCATION (coding, 0) = 1;
2935 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2936 continue;
4ed46869
KH
2937
2938 case ISO_shift_in:
df7492f9
KH
2939 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2940 goto invalid_code;
2941 CODING_ISO_INVOCATION (coding, 0) = 0;
2942 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2943 continue;
4ed46869
KH
2944
2945 case ISO_single_shift_2_7:
2946 case ISO_single_shift_2:
df7492f9
KH
2947 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2948 goto invalid_code;
4ed46869
KH
2949 /* SS2 is handled as an escape sequence of ESC 'N' */
2950 c1 = 'N';
2951 goto label_escape_sequence;
2952
2953 case ISO_single_shift_3:
df7492f9
KH
2954 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2955 goto invalid_code;
4ed46869
KH
2956 /* SS2 is handled as an escape sequence of ESC 'O' */
2957 c1 = 'O';
2958 goto label_escape_sequence;
2959
2960 case ISO_control_sequence_introducer:
2961 /* CSI is handled as an escape sequence of ESC '[' ... */
2962 c1 = '[';
2963 goto label_escape_sequence;
2964
2965 case ISO_escape:
2966 ONE_MORE_BYTE (c1);
2967 label_escape_sequence:
df7492f9 2968 /* Escape sequences handled here are invocation,
4ed46869
KH
2969 designation, direction specification, and character
2970 composition specification. */
2971 switch (c1)
2972 {
2973 case '&': /* revision of following character set */
2974 ONE_MORE_BYTE (c1);
2975 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2976 goto invalid_code;
4ed46869
KH
2977 ONE_MORE_BYTE (c1);
2978 if (c1 != ISO_CODE_ESC)
df7492f9 2979 goto invalid_code;
4ed46869
KH
2980 ONE_MORE_BYTE (c1);
2981 goto label_escape_sequence;
2982
2983 case '$': /* designation of 2-byte character set */
df7492f9
KH
2984 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2985 goto invalid_code;
4ed46869
KH
2986 ONE_MORE_BYTE (c1);
2987 if (c1 >= '@' && c1 <= 'B')
2988 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2989 or JISX0208.1980 */
df7492f9 2990 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2991 }
2992 else if (c1 >= 0x28 && c1 <= 0x2B)
2993 { /* designation of DIMENSION2_CHARS94 character set */
2994 ONE_MORE_BYTE (c2);
df7492f9 2995 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2996 }
2997 else if (c1 >= 0x2C && c1 <= 0x2F)
2998 { /* designation of DIMENSION2_CHARS96 character set */
2999 ONE_MORE_BYTE (c2);
df7492f9 3000 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
3001 }
3002 else
df7492f9 3003 goto invalid_code;
b73bfc1c 3004 /* We must update these variables now. */
df7492f9
KH
3005 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3006 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3007 continue;
4ed46869
KH
3008
3009 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3010 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3011 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3012 goto invalid_code;
3013 CODING_ISO_INVOCATION (coding, 0) = 2;
3014 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3015 continue;
4ed46869
KH
3016
3017 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3018 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3019 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3020 goto invalid_code;
3021 CODING_ISO_INVOCATION (coding, 0) = 3;
3022 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3023 continue;
4ed46869
KH
3024
3025 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3026 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3027 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3028 goto invalid_code;
3029 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 3030 ONE_MORE_BYTE (c1);
e7046a18 3031 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3032 goto invalid_code;
4ed46869
KH
3033 break;
3034
3035 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3036 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3037 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3038 goto invalid_code;
3039 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 3040 ONE_MORE_BYTE (c1);
e7046a18 3041 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3042 goto invalid_code;
4ed46869
KH
3043 break;
3044
ec6d2bb8 3045 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3046 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3047 goto invalid_code;
ec6d2bb8 3048 DECODE_COMPOSITION_START (c1);
b73bfc1c 3049 continue;
4ed46869 3050
ec6d2bb8 3051 case '1': /* end composition */
df7492f9
KH
3052 if (composition_state == COMPOSING_NO)
3053 goto invalid_code;
3054 DECODE_COMPOSITION_END ();
b73bfc1c 3055 continue;
4ed46869
KH
3056
3057 case '[': /* specification of direction */
df7492f9
KH
3058 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3059 goto invalid_code;
4ed46869 3060 /* For the moment, nested direction is not supported.
d46c5b12 3061 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3062 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3063 ONE_MORE_BYTE (c1);
3064 switch (c1)
3065 {
3066 case ']': /* end of the current direction */
d46c5b12 3067 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3068
3069 case '0': /* end of the current direction */
3070 case '1': /* start of left-to-right direction */
3071 ONE_MORE_BYTE (c1);
3072 if (c1 == ']')
d46c5b12 3073 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3074 else
df7492f9 3075 goto invalid_code;
4ed46869
KH
3076 break;
3077
3078 case '2': /* start of right-to-left direction */
3079 ONE_MORE_BYTE (c1);
3080 if (c1 == ']')
d46c5b12 3081 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3082 else
df7492f9 3083 goto invalid_code;
4ed46869
KH
3084 break;
3085
3086 default:
df7492f9 3087 goto invalid_code;
4ed46869 3088 }
b73bfc1c 3089 continue;
4ed46869
KH
3090
3091 default:
df7492f9
KH
3092 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3093 goto invalid_code;
4ed46869
KH
3094 if (c1 >= 0x28 && c1 <= 0x2B)
3095 { /* designation of DIMENSION1_CHARS94 character set */
3096 ONE_MORE_BYTE (c2);
df7492f9 3097 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
3098 }
3099 else if (c1 >= 0x2C && c1 <= 0x2F)
3100 { /* designation of DIMENSION1_CHARS96 character set */
3101 ONE_MORE_BYTE (c2);
df7492f9 3102 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
3103 }
3104 else
df7492f9 3105 goto invalid_code;
b73bfc1c 3106 /* We must update these variables now. */
df7492f9
KH
3107 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3108 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3109 continue;
4ed46869 3110 }
b73bfc1c 3111 }
4ed46869 3112
ff0dacd7
KH
3113 if (charset->id != charset_ascii
3114 && last_id != charset->id)
3115 {
3116 if (last_id != charset_ascii)
3117 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3118 last_id = charset->id;
3119 last_offset = char_offset;
3120 }
3121
b73bfc1c 3122 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3123 Produce a decoded character while getting 2nd position code
3124 C2 if necessary. */
3125 c1 &= 0x7F;
3126 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3127 {
3128 ONE_MORE_BYTE (c2);
df7492f9 3129 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3130 /* C2 is not in a valid range. */
df7492f9
KH
3131 goto invalid_code;
3132 c1 = (c1 << 8) | (c2 & 0x7F);
3133 if (CHARSET_DIMENSION (charset) > 2)
3134 {
3135 ONE_MORE_BYTE (c2);
3136 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3137 /* C2 is not in a valid range. */
3138 goto invalid_code;
3139 c1 = (c1 << 8) | (c2 & 0x7F);
3140 }
3141 }
3142
3143 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3144 if (c < 0)
3145 {
3146 MAYBE_FINISH_COMPOSITION ();
3147 for (; src_base < src; src_base++, char_offset++)
3148 {
3149 if (ASCII_BYTE_P (*src_base))
3150 *charbuf++ = *src_base;
3151 else
3152 *charbuf++ = BYTE8_TO_CHAR (*src_base);
ff0dacd7 3153 char_offset++;
df7492f9
KH
3154 }
3155 }
3156 else if (composition_state == COMPOSING_NO)
3157 {
3158 *charbuf++ = c;
3159 char_offset++;
4ed46869 3160 }
df7492f9 3161 else
781d7a48
KH
3162 {
3163 components[component_idx++] = c;
3164 if (method == COMPOSITION_WITH_RULE
3165 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3166 && composition_state == COMPOSING_COMPONENT_CHAR))
3167 composition_state++;
3168 }
4ed46869
KH
3169 continue;
3170
df7492f9
KH
3171 invalid_code:
3172 MAYBE_FINISH_COMPOSITION ();
4ed46869 3173 src = src_base;
df7492f9
KH
3174 consumed_chars = consumed_chars_base;
3175 ONE_MORE_BYTE (c);
3176 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3177 char_offset++;
df7492f9 3178 coding->errors++;
4ed46869 3179 }
fb88bf2d 3180
df7492f9 3181 no_more_source:
ff0dacd7
KH
3182 if (last_id != charset_ascii)
3183 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
3184 coding->consumed_char += consumed_chars_base;
3185 coding->consumed = src_base - coding->source;
3186 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3187}
3188
b73bfc1c 3189
f4dee582 3190/* ISO2022 encoding stuff. */
4ed46869
KH
3191
3192/*
f4dee582 3193 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3194 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3195 variant has the following specifications:
df7492f9 3196 1. Initial designation to G0 thru G3.
4ed46869
KH
3197 2. Allows short-form designation?
3198 3. ASCII should be designated to G0 before control characters?
3199 4. ASCII should be designated to G0 at end of line?
3200 5. 7-bit environment or 8-bit environment?
3201 6. Use locking-shift?
3202 7. Use Single-shift?
3203 And the following two are only for Japanese:
3204 8. Use ASCII in place of JIS0201-1976-Roman?
3205 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3206 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3207 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3208 details.
4ed46869
KH
3209*/
3210
3211/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3212 register REG at DST, and increment DST. If <final-char> of CHARSET is
3213 '@', 'A', or 'B' and the coding system CODING allows, produce
3214 designation sequence of short-form. */
4ed46869
KH
3215
3216#define ENCODE_DESIGNATION(charset, reg, coding) \
3217 do { \
df7492f9 3218 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3219 char *intermediate_char_94 = "()*+"; \
3220 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3221 int revision = -1; \
3222 int c; \
3223 \
3224 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3225 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3226 \
3227 if (revision >= 0) \
70c22245 3228 { \
df7492f9
KH
3229 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3230 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3231 } \
df7492f9 3232 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3233 if (CHARSET_DIMENSION (charset) == 1) \
3234 { \
df7492f9
KH
3235 if (! CHARSET_ISO_CHARS_96 (charset)) \
3236 c = intermediate_char_94[reg]; \
4ed46869 3237 else \
df7492f9
KH
3238 c = intermediate_char_96[reg]; \
3239 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3240 } \
3241 else \
3242 { \
df7492f9
KH
3243 EMIT_ONE_ASCII_BYTE ('$'); \
3244 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3245 { \
df7492f9 3246 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3247 || reg != 0 \
3248 || final_char < '@' || final_char > 'B') \
df7492f9 3249 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3250 } \
3251 else \
df7492f9 3252 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3253 } \
df7492f9
KH
3254 EMIT_ONE_ASCII_BYTE (final_char); \
3255 \
3256 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3257 } while (0)
3258
df7492f9 3259
4ed46869
KH
3260/* The following two macros produce codes (control character or escape
3261 sequence) for ISO2022 single-shift functions (single-shift-2 and
3262 single-shift-3). */
3263
df7492f9
KH
3264#define ENCODE_SINGLE_SHIFT_2 \
3265 do { \
3266 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3267 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3268 else \
3269 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3270 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3271 } while (0)
3272
df7492f9
KH
3273
3274#define ENCODE_SINGLE_SHIFT_3 \
3275 do { \
3276 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3277 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3278 else \
3279 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3280 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3281 } while (0)
3282
df7492f9 3283
4ed46869
KH
3284/* The following four macros produce codes (control character or
3285 escape sequence) for ISO2022 locking-shift functions (shift-in,
3286 shift-out, locking-shift-2, and locking-shift-3). */
3287
df7492f9
KH
3288#define ENCODE_SHIFT_IN \
3289 do { \
3290 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3291 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3292 } while (0)
3293
df7492f9
KH
3294
3295#define ENCODE_SHIFT_OUT \
3296 do { \
3297 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3298 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3299 } while (0)
3300
df7492f9
KH
3301
3302#define ENCODE_LOCKING_SHIFT_2 \
3303 do { \
3304 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3305 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3306 } while (0)
3307
df7492f9
KH
3308
3309#define ENCODE_LOCKING_SHIFT_3 \
3310 do { \
3311 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3312 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3313 } while (0)
3314
df7492f9 3315
f4dee582
RS
3316/* Produce codes for a DIMENSION1 character whose character set is
3317 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3318 sequences are also produced in advance if necessary. */
3319
6e85d753
KH
3320#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3321 do { \
df7492f9 3322 int id = CHARSET_ID (charset); \
bf16eb23
KH
3323 \
3324 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3325 && id == charset_ascii) \
3326 { \
3327 id = charset_jisx0201_roman; \
3328 charset = CHARSET_FROM_ID (id); \
3329 } \
3330 \
df7492f9 3331 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3332 { \
df7492f9
KH
3333 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3334 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3335 else \
df7492f9
KH
3336 EMIT_ONE_BYTE (c1 | 0x80); \
3337 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3338 break; \
3339 } \
df7492f9 3340 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3341 { \
df7492f9 3342 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3343 break; \
3344 } \
df7492f9 3345 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3346 { \
df7492f9 3347 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3348 break; \
3349 } \
6e85d753
KH
3350 else \
3351 /* Since CHARSET is not yet invoked to any graphic planes, we \
3352 must invoke it, or, at first, designate it to some graphic \
3353 register. Then repeat the loop to actually produce the \
3354 character. */ \
df7492f9
KH
3355 dst = encode_invocation_designation (charset, coding, dst, \
3356 &produced_chars); \
4ed46869
KH
3357 } while (1)
3358
df7492f9 3359
f4dee582
RS
3360/* Produce codes for a DIMENSION2 character whose character set is
3361 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3362 invocation codes are also produced in advance if necessary. */
3363
6e85d753
KH
3364#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3365 do { \
df7492f9 3366 int id = CHARSET_ID (charset); \
bf16eb23
KH
3367 \
3368 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3369 && id == charset_jisx0208) \
3370 { \
3371 id = charset_jisx0208_1978; \
3372 charset = CHARSET_FROM_ID (id); \
3373 } \
3374 \
df7492f9 3375 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3376 { \
df7492f9
KH
3377 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3378 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3379 else \
df7492f9
KH
3380 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3381 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3382 break; \
3383 } \
df7492f9 3384 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3385 { \
df7492f9 3386 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3387 break; \
3388 } \
df7492f9 3389 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3390 { \
df7492f9 3391 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3392 break; \
3393 } \
6e85d753
KH
3394 else \
3395 /* Since CHARSET is not yet invoked to any graphic planes, we \
3396 must invoke it, or, at first, designate it to some graphic \
3397 register. Then repeat the loop to actually produce the \
3398 character. */ \
df7492f9
KH
3399 dst = encode_invocation_designation (charset, coding, dst, \
3400 &produced_chars); \
4ed46869
KH
3401 } while (1)
3402
05e6f5dc 3403
df7492f9
KH
3404#define ENCODE_ISO_CHARACTER(charset, c) \
3405 do { \
3406 int code = ENCODE_CHAR ((charset),(c)); \
3407 \
3408 if (CHARSET_DIMENSION (charset) == 1) \
3409 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3410 else \
3411 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3412 } while (0)
bdd9fb48 3413
05e6f5dc 3414
4ed46869 3415/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3416 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3417 Return new DST. */
3418
3419unsigned char *
df7492f9
KH
3420encode_invocation_designation (charset, coding, dst, p_nchars)
3421 struct charset *charset;
4ed46869
KH
3422 struct coding_system *coding;
3423 unsigned char *dst;
df7492f9 3424 int *p_nchars;
4ed46869 3425{
df7492f9
KH
3426 int multibytep = coding->dst_multibyte;
3427 int produced_chars = *p_nchars;
4ed46869 3428 int reg; /* graphic register number */
df7492f9 3429 int id = CHARSET_ID (charset);
4ed46869
KH
3430
3431 /* At first, check designations. */
3432 for (reg = 0; reg < 4; reg++)
df7492f9 3433 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3434 break;
3435
3436 if (reg >= 4)
3437 {
3438 /* CHARSET is not yet designated to any graphic registers. */
3439 /* At first check the requested designation. */
df7492f9
KH
3440 reg = CODING_ISO_REQUEST (coding, id);
3441 if (reg < 0)
1ba9e4ab
KH
3442 /* Since CHARSET requests no special designation, designate it
3443 to graphic register 0. */
4ed46869
KH
3444 reg = 0;
3445
3446 ENCODE_DESIGNATION (charset, reg, coding);
3447 }
3448
df7492f9
KH
3449 if (CODING_ISO_INVOCATION (coding, 0) != reg
3450 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3451 {
3452 /* Since the graphic register REG is not invoked to any graphic
3453 planes, invoke it to graphic plane 0. */
3454 switch (reg)
3455 {
3456 case 0: /* graphic register 0 */
3457 ENCODE_SHIFT_IN;
3458 break;
3459
3460 case 1: /* graphic register 1 */
3461 ENCODE_SHIFT_OUT;
3462 break;
3463
3464 case 2: /* graphic register 2 */
df7492f9 3465 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3466 ENCODE_SINGLE_SHIFT_2;
3467 else
3468 ENCODE_LOCKING_SHIFT_2;
3469 break;
3470
3471 case 3: /* graphic register 3 */
df7492f9 3472 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3473 ENCODE_SINGLE_SHIFT_3;
3474 else
3475 ENCODE_LOCKING_SHIFT_3;
3476 break;
3477 }
3478 }
b73bfc1c 3479
df7492f9 3480 *p_nchars = produced_chars;
4ed46869
KH
3481 return dst;
3482}
3483
df7492f9
KH
3484/* The following three macros produce codes for indicating direction
3485 of text. */
3486#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3487 do { \
df7492f9
KH
3488 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3489 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3490 else \
df7492f9 3491 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3492 } while (0)
3493
ec6d2bb8 3494
df7492f9
KH
3495#define ENCODE_DIRECTION_R2L() \
3496 do { \
3497 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3498 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3499 } while (0)
3500
ec6d2bb8 3501
df7492f9 3502#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3503 do { \
df7492f9
KH
3504 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3505 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3506 } while (0)
3507
4ed46869
KH
3508
3509/* Produce codes for designation and invocation to reset the graphic
3510 planes and registers to initial state. */
df7492f9
KH
3511#define ENCODE_RESET_PLANE_AND_REGISTER() \
3512 do { \
3513 int reg; \
3514 struct charset *charset; \
3515 \
3516 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3517 ENCODE_SHIFT_IN; \
3518 for (reg = 0; reg < 4; reg++) \
3519 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3520 && (CODING_ISO_DESIGNATION (coding, reg) \
3521 != CODING_ISO_INITIAL (coding, reg))) \
3522 { \
3523 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3524 ENCODE_DESIGNATION (charset, reg, coding); \
3525 } \
4ed46869
KH
3526 } while (0)
3527
df7492f9 3528
bdd9fb48 3529/* Produce designation sequences of charsets in the line started from
b73bfc1c 3530 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3531
3532 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3533 find all the necessary designations. */
3534
b73bfc1c 3535static unsigned char *
df7492f9 3536encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3537 struct coding_system *coding;
df7492f9
KH
3538 int *charbuf, *charbuf_end;
3539 unsigned char *dst;
e0e989f6 3540{
df7492f9 3541 struct charset *charset;
bdd9fb48
KH
3542 /* Table of charsets to be designated to each graphic register. */
3543 int r[4];
df7492f9
KH
3544 int c, found = 0, reg;
3545 int produced_chars = 0;
3546 int multibytep = coding->dst_multibyte;
3547 Lisp_Object attrs;
3548 Lisp_Object charset_list;
3549
3550 attrs = CODING_ID_ATTRS (coding->id);
3551 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3552 if (EQ (charset_list, Qiso_2022))
3553 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3554
3555 for (reg = 0; reg < 4; reg++)
3556 r[reg] = -1;
3557
b73bfc1c 3558 while (found < 4)
e0e989f6 3559 {
df7492f9
KH
3560 int id;
3561
3562 c = *charbuf++;
b73bfc1c
KH
3563 if (c == '\n')
3564 break;
df7492f9
KH
3565 charset = char_charset (c, charset_list, NULL);
3566 id = CHARSET_ID (charset);
3567 reg = CODING_ISO_REQUEST (coding, id);
3568 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3569 {
3570 found++;
df7492f9 3571 r[reg] = id;
bdd9fb48 3572 }
bdd9fb48
KH
3573 }
3574
3575 if (found)
3576 {
3577 for (reg = 0; reg < 4; reg++)
3578 if (r[reg] >= 0
df7492f9
KH
3579 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3580 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3581 }
b73bfc1c
KH
3582
3583 return dst;
e0e989f6
KH
3584}
3585
4ed46869
KH
3586/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3587
df7492f9
KH
3588static int
3589encode_coding_iso_2022 (coding)
4ed46869 3590 struct coding_system *coding;
4ed46869 3591{
df7492f9
KH
3592 int multibytep = coding->dst_multibyte;
3593 int *charbuf = coding->charbuf;
3594 int *charbuf_end = charbuf + coding->charbuf_used;
3595 unsigned char *dst = coding->destination + coding->produced;
3596 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3597 int safe_room = 16;
3598 int bol_designation
3599 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3600 && CODING_ISO_BOL (coding));
3601 int produced_chars = 0;
3602 Lisp_Object attrs, eol_type, charset_list;
3603 int ascii_compatible;
b73bfc1c 3604 int c;
ff0dacd7 3605 int preferred_charset_id = -1;
05e6f5dc 3606
df7492f9 3607 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4 3608 setup_iso_safe_charsets (attrs);
ff0dacd7
KH
3609 /* Charset list may have been changed. */
3610 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
004068e4
KH
3611 coding->safe_charsets
3612 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
bdd9fb48 3613
df7492f9 3614 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3615
df7492f9 3616 while (charbuf < charbuf_end)
4ed46869 3617 {
df7492f9 3618 ASSURE_DESTINATION (safe_room);
b73bfc1c 3619
df7492f9 3620 if (bol_designation)
b73bfc1c 3621 {
df7492f9 3622 unsigned char *dst_prev = dst;
4ed46869 3623
bdd9fb48 3624 /* We have to produce designation sequences if any now. */
df7492f9
KH
3625 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3626 bol_designation = 0;
3627 /* We are sure that designation sequences are all ASCII bytes. */
3628 produced_chars += dst - dst_prev;
4ed46869 3629 }
ec6d2bb8 3630
df7492f9 3631 c = *charbuf++;
4ed46869 3632
ff0dacd7
KH
3633 if (c < 0)
3634 {
3635 /* Handle an annotation. */
3636 switch (*charbuf)
3637 {
3638 case CODING_ANNOTATE_COMPOSITION_MASK:
3639 /* Not yet implemented. */
3640 break;
3641 case CODING_ANNOTATE_CHARSET_MASK:
3642 preferred_charset_id = charbuf[3];
3643 if (preferred_charset_id >= 0
3644 && NILP (Fmemq (make_number (preferred_charset_id),
3645 charset_list)))
3646 preferred_charset_id = -1;
3647 break;
3648 default:
3649 abort ();
3650 }
3651 charbuf += -c - 1;
3652 continue;
3653 }
3654
b73bfc1c
KH
3655 /* Now encode the character C. */
3656 if (c < 0x20 || c == 0x7F)
3657 {
df7492f9
KH
3658 if (c == '\n'
3659 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3660 {
df7492f9
KH
3661 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3662 ENCODE_RESET_PLANE_AND_REGISTER ();
3663 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3664 {
df7492f9
KH
3665 int i;
3666
3667 for (i = 0; i < 4; i++)
3668 CODING_ISO_DESIGNATION (coding, i)
3669 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3670 }
df7492f9
KH
3671 bol_designation
3672 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3673 }
df7492f9
KH
3674 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3675 ENCODE_RESET_PLANE_AND_REGISTER ();
3676 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3677 }
df7492f9 3678 else if (ASCII_CHAR_P (c))
88993dfd 3679 {
df7492f9
KH
3680 if (ascii_compatible)
3681 EMIT_ONE_ASCII_BYTE (c);
3682 else
bf16eb23
KH
3683 {
3684 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3685 ENCODE_ISO_CHARACTER (charset, c);
3686 }
88993dfd 3687 }
16eafb5d
KH
3688 else if (CHAR_BYTE8_P (c))
3689 {
3690 c = CHAR_TO_BYTE8 (c);
3691 EMIT_ONE_BYTE (c);
3692 }
b73bfc1c 3693 else
df7492f9 3694 {
ff0dacd7 3695 struct charset *charset;
b73bfc1c 3696
ff0dacd7
KH
3697 if (preferred_charset_id >= 0)
3698 {
3699 charset = CHARSET_FROM_ID (preferred_charset_id);
3700 if (! CHAR_CHARSET_P (c, charset))
3701 charset = char_charset (c, charset_list, NULL);
3702 }
3703 else
3704 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3705 if (!charset)
3706 {
41cbe562
KH
3707 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3708 {
3709 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3710 charset = CHARSET_FROM_ID (charset_ascii);
3711 }
3712 else
3713 {
3714 c = coding->default_char;
3715 charset = char_charset (c, charset_list, NULL);
3716 }
df7492f9
KH
3717 }
3718 ENCODE_ISO_CHARACTER (charset, c);
3719 }
84fbb8a0 3720 }
b73bfc1c 3721
df7492f9
KH
3722 if (coding->mode & CODING_MODE_LAST_BLOCK
3723 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3724 {
3725 ASSURE_DESTINATION (safe_room);
3726 ENCODE_RESET_PLANE_AND_REGISTER ();
3727 }
3728 coding->result = CODING_RESULT_SUCCESS;
3729 CODING_ISO_BOL (coding) = bol_designation;
3730 coding->produced_char += produced_chars;
3731 coding->produced = dst - coding->destination;
3732 return 0;
4ed46869
KH
3733}
3734
3735\f
df7492f9 3736/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3737
df7492f9 3738/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3739 quite widely. So, for the moment, Emacs supports them in the bare
3740 C code. But, in the future, they may be supported only by CCL. */
3741
3742/* SJIS is a coding system encoding three character sets: ASCII, right
3743 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3744 as is. A character of charset katakana-jisx0201 is encoded by
3745 "position-code + 0x80". A character of charset japanese-jisx0208
3746 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3747 so that it fit in the range below.
4ed46869
KH
3748
3749 --- CODE RANGE of SJIS ---
3750 (character set) (range)
3751 ASCII 0x00 .. 0x7F
df7492f9 3752 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3753 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3754 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3755 -------------------------------
3756
3757*/
3758
3759/* BIG5 is a coding system encoding two character sets: ASCII and
3760 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3761 character set and is encoded in two-byte.
4ed46869
KH
3762
3763 --- CODE RANGE of BIG5 ---
3764 (character set) (range)
3765 ASCII 0x00 .. 0x7F
3766 Big5 (1st byte) 0xA1 .. 0xFE
3767 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3768 --------------------------
3769
df7492f9 3770 */
4ed46869
KH
3771
3772/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3773 Check if a text is encoded in SJIS. If it is, return
df7492f9 3774 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3775
0a28aafb 3776static int
ff0dacd7 3777detect_coding_sjis (coding, detect_info)
df7492f9 3778 struct coding_system *coding;
ff0dacd7 3779 struct coding_detection_info *detect_info;
4ed46869 3780{
df7492f9
KH
3781 unsigned char *src = coding->source, *src_base = src;
3782 unsigned char *src_end = coding->source + coding->src_bytes;
3783 int multibytep = coding->src_multibyte;
3784 int consumed_chars = 0;
3785 int found = 0;
b73bfc1c 3786 int c;
89528eb3 3787 int incomplete;
df7492f9 3788
ff0dacd7 3789 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
3790 /* A coding system of this category is always ASCII compatible. */
3791 src += coding->head_ascii;
4ed46869 3792
b73bfc1c 3793 while (1)
4ed46869 3794 {
89528eb3 3795 incomplete = 0;
df7492f9 3796 ONE_MORE_BYTE (c);
89528eb3 3797 incomplete = 1;
682169fe
KH
3798 if (c < 0x80)
3799 continue;
df7492f9 3800 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3801 {
df7492f9 3802 ONE_MORE_BYTE (c);
682169fe 3803 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 3804 break;
ff0dacd7 3805 found = CATEGORY_MASK_SJIS;
4ed46869 3806 }
df7492f9 3807 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 3808 found = CATEGORY_MASK_SJIS;
df7492f9
KH
3809 else
3810 break;
4ed46869 3811 }
ff0dacd7 3812 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
3813 return 0;
3814
3815 no_more_source:
89528eb3
KH
3816 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3817 {
ff0dacd7 3818 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3
KH
3819 return 0;
3820 }
ff0dacd7
KH
3821 detect_info->found |= found;
3822 return 1;
4ed46869
KH
3823}
3824
3825/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3826 Check if a text is encoded in BIG5. If it is, return
df7492f9 3827 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3828
0a28aafb 3829static int
ff0dacd7 3830detect_coding_big5 (coding, detect_info)
df7492f9 3831 struct coding_system *coding;
ff0dacd7 3832 struct coding_detection_info *detect_info;
4ed46869 3833{
df7492f9
KH
3834 unsigned char *src = coding->source, *src_base = src;
3835 unsigned char *src_end = coding->source + coding->src_bytes;
3836 int multibytep = coding->src_multibyte;
3837 int consumed_chars = 0;
3838 int found = 0;
b73bfc1c 3839 int c;
89528eb3 3840 int incomplete;
fa42c37f 3841
ff0dacd7 3842 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
3843 /* A coding system of this category is always ASCII compatible. */
3844 src += coding->head_ascii;
fa42c37f 3845
b73bfc1c 3846 while (1)
fa42c37f 3847 {
89528eb3 3848 incomplete = 0;
df7492f9 3849 ONE_MORE_BYTE (c);
89528eb3 3850 incomplete = 1;
df7492f9 3851 if (c < 0x80)
fa42c37f 3852 continue;
df7492f9 3853 if (c >= 0xA1)
fa42c37f 3854 {
df7492f9
KH
3855 ONE_MORE_BYTE (c);
3856 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3857 return 0;
ff0dacd7 3858 found = CATEGORY_MASK_BIG5;
fa42c37f 3859 }
df7492f9
KH
3860 else
3861 break;
fa42c37f 3862 }
ff0dacd7 3863 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 3864 return 0;
df7492f9
KH
3865
3866 no_more_source:
89528eb3
KH
3867 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3868 {
ff0dacd7 3869 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
3870 return 0;
3871 }
ff0dacd7
KH
3872 detect_info->found |= found;
3873 return 1;
fa42c37f
KH
3874}
3875
4ed46869
KH
3876/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3877 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3878
b73bfc1c 3879static void
df7492f9 3880decode_coding_sjis (coding)
4ed46869 3881 struct coding_system *coding;
4ed46869 3882{
df7492f9
KH
3883 unsigned char *src = coding->source + coding->consumed;
3884 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3885 unsigned char *src_base;
df7492f9 3886 int *charbuf = coding->charbuf;
ff0dacd7 3887 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
3888 int consumed_chars = 0, consumed_chars_base;
3889 int multibytep = coding->src_multibyte;
3890 struct charset *charset_roman, *charset_kanji, *charset_kana;
3891 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
3892 int char_offset = coding->produced_char;
3893 int last_offset = char_offset;
3894 int last_id = charset_ascii;
a5d301df 3895
df7492f9
KH
3896 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3897
3898 val = charset_list;
3899 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3
KH
3900 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3901 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3902
b73bfc1c 3903 while (1)
4ed46869 3904 {
df7492f9 3905 int c, c1;
b73bfc1c
KH
3906
3907 src_base = src;
df7492f9
KH
3908 consumed_chars_base = consumed_chars;
3909
3910 if (charbuf >= charbuf_end)
3911 break;
3912
3913 ONE_MORE_BYTE (c);
b73bfc1c 3914
df7492f9 3915 if (c == '\r')
4ed46869 3916 {
df7492f9 3917 if (EQ (eol_type, Qdos))
4ed46869 3918 {
df7492f9 3919 if (src == src_end)
98725083
KH
3920 {
3921 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3922 goto no_more_source;
3923 }
df7492f9
KH
3924 if (*src == '\n')
3925 ONE_MORE_BYTE (c);
4ed46869 3926 }
df7492f9
KH
3927 else if (EQ (eol_type, Qmac))
3928 c = '\n';
4ed46869 3929 }
54f78171 3930 else
df7492f9
KH
3931 {
3932 struct charset *charset;
3933
3934 if (c < 0x80)
3935 charset = charset_roman;
3936 else
4ed46869 3937 {
df7492f9
KH
3938 if (c >= 0xF0)
3939 goto invalid_code;
3940 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3941 {
54f78171 3942 /* SJIS -> JISX0208 */
df7492f9
KH
3943 ONE_MORE_BYTE (c1);
3944 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3945 goto invalid_code;
3946 c = (c << 8) | c1;
3947 SJIS_TO_JIS (c);
3948 charset = charset_kanji;
5e34de15 3949 }
fb88bf2d 3950 else
89528eb3
KH
3951 {
3952 /* SJIS -> JISX0201-Kana */
3953 c &= 0x7F;
3954 charset = charset_kana;
3955 }
df7492f9 3956 }
ff0dacd7
KH
3957 if (charset->id != charset_ascii
3958 && last_id != charset->id)
3959 {
3960 if (last_id != charset_ascii)
3961 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3962 last_id = charset->id;
3963 last_offset = char_offset;
3964 }
df7492f9
KH
3965 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3966 }
3967 *charbuf++ = c;
ff0dacd7 3968 char_offset++;
df7492f9
KH
3969 continue;
3970
3971 invalid_code:
3972 src = src_base;
3973 consumed_chars = consumed_chars_base;
3974 ONE_MORE_BYTE (c);
3975 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3976 char_offset++;
df7492f9
KH
3977 coding->errors++;
3978 }
3979
3980 no_more_source:
ff0dacd7
KH
3981 if (last_id != charset_ascii)
3982 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
3983 coding->consumed_char += consumed_chars_base;
3984 coding->consumed = src_base - coding->source;
3985 coding->charbuf_used = charbuf - coding->charbuf;
3986}
3987
3988static void
3989decode_coding_big5 (coding)
3990 struct coding_system *coding;
3991{
3992 unsigned char *src = coding->source + coding->consumed;
3993 unsigned char *src_end = coding->source + coding->src_bytes;
3994 unsigned char *src_base;
3995 int *charbuf = coding->charbuf;
ff0dacd7 3996 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
3997 int consumed_chars = 0, consumed_chars_base;
3998 int multibytep = coding->src_multibyte;
3999 struct charset *charset_roman, *charset_big5;
4000 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
4001 int char_offset = coding->produced_char;
4002 int last_offset = char_offset;
4003 int last_id = charset_ascii;
df7492f9
KH
4004
4005 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4006 val = charset_list;
4007 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4008 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4009
4010 while (1)
4011 {
4012 int c, c1;
4013
4014 src_base = src;
4015 consumed_chars_base = consumed_chars;
4016
4017 if (charbuf >= charbuf_end)
4018 break;
4019
4020 ONE_MORE_BYTE (c);
4021
4022 if (c == '\r')
4023 {
4024 if (EQ (eol_type, Qdos))
4025 {
4026 if (src == src_end)
98725083
KH
4027 {
4028 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4029 goto no_more_source;
4030 }
df7492f9
KH
4031 if (*src == '\n')
4032 ONE_MORE_BYTE (c);
4ed46869 4033 }
df7492f9
KH
4034 else if (EQ (eol_type, Qmac))
4035 c = '\n';
4036 }
4037 else
4038 {
4039 struct charset *charset;
4040 if (c < 0x80)
4041 charset = charset_roman;
fb88bf2d 4042 else
fb88bf2d 4043 {
54f78171 4044 /* BIG5 -> Big5 */
df7492f9
KH
4045 if (c < 0xA1 || c > 0xFE)
4046 goto invalid_code;
4047 ONE_MORE_BYTE (c1);
4048 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4049 goto invalid_code;
4050 c = c << 8 | c1;
4051 charset = charset_big5;
4ed46869 4052 }
ff0dacd7
KH
4053 if (charset->id != charset_ascii
4054 && last_id != charset->id)
4055 {
4056 if (last_id != charset_ascii)
4057 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4058 last_id = charset->id;
4059 last_offset = char_offset;
4060 }
df7492f9 4061 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 4062 }
4ed46869 4063
df7492f9 4064 *charbuf++ = c;
ff0dacd7 4065 char_offset++;
fb88bf2d
KH
4066 continue;
4067
df7492f9 4068 invalid_code:
4ed46869 4069 src = src_base;
df7492f9
KH
4070 consumed_chars = consumed_chars_base;
4071 ONE_MORE_BYTE (c);
4072 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4073 char_offset++;
df7492f9 4074 coding->errors++;
fb88bf2d 4075 }
d46c5b12 4076
df7492f9 4077 no_more_source:
ff0dacd7
KH
4078 if (last_id != charset_ascii)
4079 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4080 coding->consumed_char += consumed_chars_base;
4081 coding->consumed = src_base - coding->source;
4082 coding->charbuf_used = charbuf - coding->charbuf;
4083}
4084
4085/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4086 This function can encode charsets `ascii', `katakana-jisx0201',
4087 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4088 are sure that all these charsets are registered as official charset
4089 (i.e. do not have extended leading-codes). Characters of other
4090 charsets are produced without any encoding. If SJIS_P is 1, encode
4091 SJIS text, else encode BIG5 text. */
4092
4093static int
4094encode_coding_sjis (coding)
4095 struct coding_system *coding;
4096{
4097 int multibytep = coding->dst_multibyte;
4098 int *charbuf = coding->charbuf;
4099 int *charbuf_end = charbuf + coding->charbuf_used;
4100 unsigned char *dst = coding->destination + coding->produced;
4101 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4102 int safe_room = 4;
4103 int produced_chars = 0;
4104 Lisp_Object attrs, eol_type, charset_list, val;
4105 int ascii_compatible;
4106 struct charset *charset_roman, *charset_kanji, *charset_kana;
4107 int c;
4108
4109 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4110 val = charset_list;
4111 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4112 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4113 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4114
4115 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4116
4117 while (charbuf < charbuf_end)
4118 {
4119 ASSURE_DESTINATION (safe_room);
4120 c = *charbuf++;
4121 /* Now encode the character C. */
4122 if (ASCII_CHAR_P (c) && ascii_compatible)
4123 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4124 else if (CHAR_BYTE8_P (c))
4125 {
4126 c = CHAR_TO_BYTE8 (c);
4127 EMIT_ONE_BYTE (c);
4128 }
df7492f9
KH
4129 else
4130 {
4131 unsigned code;
4132 struct charset *charset = char_charset (c, charset_list, &code);
4133
4134 if (!charset)
4135 {
41cbe562
KH
4136 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4137 {
4138 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4139 charset = CHARSET_FROM_ID (charset_ascii);
4140 }
4141 else
4142 {
4143 c = coding->default_char;
4144 charset = char_charset (c, charset_list, &code);
4145 }
df7492f9
KH
4146 }
4147 if (code == CHARSET_INVALID_CODE (charset))
4148 abort ();
4149 if (charset == charset_kanji)
4150 {
4151 int c1, c2;
4152 JIS_TO_SJIS (code);
4153 c1 = code >> 8, c2 = code & 0xFF;
4154 EMIT_TWO_BYTES (c1, c2);
4155 }
4156 else if (charset == charset_kana)
4157 EMIT_ONE_BYTE (code | 0x80);
4158 else
4159 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4160 }
4161 }
4162 coding->result = CODING_RESULT_SUCCESS;
4163 coding->produced_char += produced_chars;
4164 coding->produced = dst - coding->destination;
4165 return 0;
4166}
4167
4168static int
4169encode_coding_big5 (coding)
4170 struct coding_system *coding;
4171{
4172 int multibytep = coding->dst_multibyte;
4173 int *charbuf = coding->charbuf;
4174 int *charbuf_end = charbuf + coding->charbuf_used;
4175 unsigned char *dst = coding->destination + coding->produced;
4176 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4177 int safe_room = 4;
4178 int produced_chars = 0;
4179 Lisp_Object attrs, eol_type, charset_list, val;
4180 int ascii_compatible;
4181 struct charset *charset_roman, *charset_big5;
4182 int c;
4183
4184 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4185 val = charset_list;
4186 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4187 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4188 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4189
4190 while (charbuf < charbuf_end)
4191 {
4192 ASSURE_DESTINATION (safe_room);
4193 c = *charbuf++;
4194 /* Now encode the character C. */
4195 if (ASCII_CHAR_P (c) && ascii_compatible)
4196 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4197 else if (CHAR_BYTE8_P (c))
4198 {
4199 c = CHAR_TO_BYTE8 (c);
4200 EMIT_ONE_BYTE (c);
4201 }
df7492f9
KH
4202 else
4203 {
4204 unsigned code;
4205 struct charset *charset = char_charset (c, charset_list, &code);
4206
4207 if (! charset)
4208 {
41cbe562
KH
4209 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4210 {
4211 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4212 charset = CHARSET_FROM_ID (charset_ascii);
4213 }
4214 else
4215 {
4216 c = coding->default_char;
4217 charset = char_charset (c, charset_list, &code);
4218 }
df7492f9
KH
4219 }
4220 if (code == CHARSET_INVALID_CODE (charset))
4221 abort ();
4222 if (charset == charset_big5)
4223 {
4224 int c1, c2;
4225
4226 c1 = code >> 8, c2 = code & 0xFF;
4227 EMIT_TWO_BYTES (c1, c2);
4228 }
4229 else
4230 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4231 }
4232 }
4233 coding->result = CODING_RESULT_SUCCESS;
4234 coding->produced_char += produced_chars;
4235 coding->produced = dst - coding->destination;
4236 return 0;
4237}
4238
4239\f
4240/*** 10. CCL handlers ***/
4241
4242/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4243 Check if a text is encoded in a coding system of which
4244 encoder/decoder are written in CCL program. If it is, return
4245 CATEGORY_MASK_CCL, else return 0. */
4246
4247static int
ff0dacd7 4248detect_coding_ccl (coding, detect_info)
df7492f9 4249 struct coding_system *coding;
ff0dacd7 4250 struct coding_detection_info *detect_info;
df7492f9
KH
4251{
4252 unsigned char *src = coding->source, *src_base = src;
4253 unsigned char *src_end = coding->source + coding->src_bytes;
4254 int multibytep = coding->src_multibyte;
4255 int consumed_chars = 0;
4256 int found = 0;
4257 unsigned char *valids = CODING_CCL_VALIDS (coding);
4258 int head_ascii = coding->head_ascii;
4259 Lisp_Object attrs;
4260
ff0dacd7
KH
4261 detect_info->checked |= CATEGORY_MASK_CCL;
4262
df7492f9
KH
4263 coding = &coding_categories[coding_category_ccl];
4264 attrs = CODING_ID_ATTRS (coding->id);
4265 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4266 src += head_ascii;
4267
4268 while (1)
4269 {
4270 int c;
4271 ONE_MORE_BYTE (c);
4272 if (! valids[c])
4273 break;
ff0dacd7
KH
4274 if ((valids[c] > 1))
4275 found = CATEGORY_MASK_CCL;
df7492f9 4276 }
ff0dacd7 4277 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4278 return 0;
4279
4280 no_more_source:
ff0dacd7
KH
4281 detect_info->found |= found;
4282 return 1;
df7492f9
KH
4283}
4284
4285static void
4286decode_coding_ccl (coding)
4287 struct coding_system *coding;
4288{
7c78e542 4289 const unsigned char *src = coding->source + coding->consumed;
df7492f9
KH
4290 unsigned char *src_end = coding->source + coding->src_bytes;
4291 int *charbuf = coding->charbuf;
4292 int *charbuf_end = charbuf + coding->charbuf_size;
4293 int consumed_chars = 0;
4294 int multibytep = coding->src_multibyte;
4295 struct ccl_program ccl;
4296 int source_charbuf[1024];
4297 int source_byteidx[1024];
8dcbea82 4298 Lisp_Object attrs, eol_type, charset_list, valids;
df7492f9 4299
8dcbea82 4300 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4301 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4302
4303 while (src < src_end)
4304 {
7c78e542 4305 const unsigned char *p = src;
df7492f9
KH
4306 int *source, *source_end;
4307 int i = 0;
4308
4309 if (multibytep)
4310 while (i < 1024 && p < src_end)
4311 {
4312 source_byteidx[i] = p - src;
4313 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4314 }
4315 else
4316 while (i < 1024 && p < src_end)
4317 source_charbuf[i++] = *p++;
4318
4319 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4320 ccl.last_block = 1;
4321
4322 source = source_charbuf;
4323 source_end = source + i;
4324 while (source < source_end)
4325 {
4326 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4327 source_end - source, charbuf_end - charbuf,
4328 charset_list);
df7492f9
KH
4329 source += ccl.consumed;
4330 charbuf += ccl.produced;
4331 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4332 break;
4333 }
4334 if (source < source_end)
4335 src += source_byteidx[source - source_charbuf];
4336 else
4337 src = p;
4338 consumed_chars += source - source_charbuf;
4339
4340 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4341 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4342 break;
4343 }
4344
4345 switch (ccl.status)
4346 {
4347 case CCL_STAT_SUSPEND_BY_SRC:
4348 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4349 break;
4350 case CCL_STAT_SUSPEND_BY_DST:
4351 break;
4352 case CCL_STAT_QUIT:
4353 case CCL_STAT_INVALID_CMD:
4354 coding->result = CODING_RESULT_INTERRUPT;
4355 break;
4356 default:
4357 coding->result = CODING_RESULT_SUCCESS;
4358 break;
4359 }
4360 coding->consumed_char += consumed_chars;
4361 coding->consumed = src - coding->source;
4362 coding->charbuf_used = charbuf - coding->charbuf;
4363}
4364
4365static int
4366encode_coding_ccl (coding)
4367 struct coding_system *coding;
4368{
4369 struct ccl_program ccl;
4370 int multibytep = coding->dst_multibyte;
4371 int *charbuf = coding->charbuf;
4372 int *charbuf_end = charbuf + coding->charbuf_used;
4373 unsigned char *dst = coding->destination + coding->produced;
4374 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4375 unsigned char *adjusted_dst_end = dst_end - 1;
4376 int destination_charbuf[1024];
4377 int i, produced_chars = 0;
8dcbea82 4378 Lisp_Object attrs, eol_type, charset_list;
df7492f9 4379
8dcbea82 4380 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4381 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4382
4383 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4384 ccl.dst_multibyte = coding->dst_multibyte;
4385
4386 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4387 {
4388 int dst_bytes = dst_end - dst;
4389 if (dst_bytes > 1024)
4390 dst_bytes = 1024;
4391
4392 ccl_driver (&ccl, charbuf, destination_charbuf,
8dcbea82 4393 charbuf_end - charbuf, dst_bytes, charset_list);
df7492f9
KH
4394 charbuf += ccl.consumed;
4395 if (multibytep)
4396 for (i = 0; i < ccl.produced; i++)
4397 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4398 else
4399 {
4400 for (i = 0; i < ccl.produced; i++)
4401 *dst++ = destination_charbuf[i] & 0xFF;
4402 produced_chars += ccl.produced;
4403 }
4404 }
4405
4406 switch (ccl.status)
4407 {
4408 case CCL_STAT_SUSPEND_BY_SRC:
4409 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4410 break;
4411 case CCL_STAT_SUSPEND_BY_DST:
4412 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4413 break;
4414 case CCL_STAT_QUIT:
4415 case CCL_STAT_INVALID_CMD:
4416 coding->result = CODING_RESULT_INTERRUPT;
4417 break;
4418 default:
4419 coding->result = CODING_RESULT_SUCCESS;
4420 break;
4421 }
4422
4423 coding->produced_char += produced_chars;
4424 coding->produced = dst - coding->destination;
4425 return 0;
4ed46869
KH
4426}
4427
df7492f9
KH
4428
4429\f
4430/*** 10, 11. no-conversion handlers ***/
4431
4432/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4433
b73bfc1c 4434static void
df7492f9 4435decode_coding_raw_text (coding)
4ed46869 4436 struct coding_system *coding;
4ed46869 4437{
df7492f9 4438 coding->chars_at_source = 1;
2c78b7e1
KH
4439 coding->consumed_char = 0;
4440 coding->consumed = 0;
df7492f9
KH
4441 coding->result = CODING_RESULT_SUCCESS;
4442}
4ed46869 4443
df7492f9
KH
4444static int
4445encode_coding_raw_text (coding)
4446 struct coding_system *coding;
4447{
4448 int multibytep = coding->dst_multibyte;
4449 int *charbuf = coding->charbuf;
4450 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4451 unsigned char *dst = coding->destination + coding->produced;
4452 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4453 int produced_chars = 0;
4454 int c;
a5d301df 4455
df7492f9 4456 if (multibytep)
b73bfc1c 4457 {
df7492f9 4458 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4459
df7492f9
KH
4460 if (coding->src_multibyte)
4461 while (charbuf < charbuf_end)
4462 {
4463 ASSURE_DESTINATION (safe_room);
4464 c = *charbuf++;
4465 if (ASCII_CHAR_P (c))
4466 EMIT_ONE_ASCII_BYTE (c);
4467 else if (CHAR_BYTE8_P (c))
4468 {
4469 c = CHAR_TO_BYTE8 (c);
4470 EMIT_ONE_BYTE (c);
4471 }
4472 else
4473 {
4474 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4475
df7492f9
KH
4476 CHAR_STRING_ADVANCE (c, p1);
4477 while (p0 < p1)
9d123124
KH
4478 {
4479 EMIT_ONE_BYTE (*p0);
4480 p0++;
4481 }
df7492f9
KH
4482 }
4483 }
b73bfc1c 4484 else
df7492f9
KH
4485 while (charbuf < charbuf_end)
4486 {
4487 ASSURE_DESTINATION (safe_room);
4488 c = *charbuf++;
4489 EMIT_ONE_BYTE (c);
4490 }
4491 }
4492 else
4493 {
4494 if (coding->src_multibyte)
b73bfc1c 4495 {
df7492f9
KH
4496 int safe_room = MAX_MULTIBYTE_LENGTH;
4497
4498 while (charbuf < charbuf_end)
b73bfc1c 4499 {
df7492f9
KH
4500 ASSURE_DESTINATION (safe_room);
4501 c = *charbuf++;
4502 if (ASCII_CHAR_P (c))
4503 *dst++ = c;
4504 else if (CHAR_BYTE8_P (c))
4505 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4506 else
df7492f9
KH
4507 CHAR_STRING_ADVANCE (c, dst);
4508 produced_chars++;
b73bfc1c 4509 }
4ed46869 4510 }
df7492f9
KH
4511 else
4512 {
4513 ASSURE_DESTINATION (charbuf_end - charbuf);
4514 while (charbuf < charbuf_end && dst < dst_end)
4515 *dst++ = *charbuf++;
4516 produced_chars = dst - (coding->destination + coding->dst_bytes);
4517 }
4ed46869 4518 }
df7492f9
KH
4519 coding->result = CODING_RESULT_SUCCESS;
4520 coding->produced_char += produced_chars;
4521 coding->produced = dst - coding->destination;
4522 return 0;
4ed46869
KH
4523}
4524
ff0dacd7
KH
4525/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4526 Check if a text is encoded in a charset-based coding system. If it
4527 is, return 1, else return 0. */
4528
0a28aafb 4529static int
ff0dacd7 4530detect_coding_charset (coding, detect_info)
df7492f9 4531 struct coding_system *coding;
ff0dacd7 4532 struct coding_detection_info *detect_info;
1397dc18 4533{
df7492f9
KH
4534 unsigned char *src = coding->source, *src_base = src;
4535 unsigned char *src_end = coding->source + coding->src_bytes;
4536 int multibytep = coding->src_multibyte;
4537 int consumed_chars = 0;
4538 Lisp_Object attrs, valids;
584948ac 4539 int found = 0;
1397dc18 4540
ff0dacd7
KH
4541 detect_info->checked |= CATEGORY_MASK_CHARSET;
4542
df7492f9
KH
4543 coding = &coding_categories[coding_category_charset];
4544 attrs = CODING_ID_ATTRS (coding->id);
4545 valids = AREF (attrs, coding_attr_charset_valids);
4546
4547 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4548 src += coding->head_ascii;
1397dc18 4549
b73bfc1c 4550 while (1)
1397dc18 4551 {
df7492f9 4552 int c;
1397dc18 4553
df7492f9
KH
4554 ONE_MORE_BYTE (c);
4555 if (NILP (AREF (valids, c)))
4556 break;
584948ac 4557 if (c >= 0x80)
ff0dacd7 4558 found = CATEGORY_MASK_CHARSET;
df7492f9 4559 }
ff0dacd7 4560 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4561 return 0;
4ed46869 4562
df7492f9 4563 no_more_source:
ff0dacd7
KH
4564 detect_info->found |= found;
4565 return 1;
df7492f9 4566}
4ed46869 4567
b73bfc1c 4568static void
df7492f9 4569decode_coding_charset (coding)
4ed46869 4570 struct coding_system *coding;
4ed46869 4571{
df7492f9
KH
4572 unsigned char *src = coding->source + coding->consumed;
4573 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4574 unsigned char *src_base;
df7492f9 4575 int *charbuf = coding->charbuf;
ff0dacd7 4576 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4577 int consumed_chars = 0, consumed_chars_base;
4578 int multibytep = coding->src_multibyte;
4eb6d3f1 4579 Lisp_Object attrs, eol_type, charset_list, valids;
ff0dacd7
KH
4580 int char_offset = coding->produced_char;
4581 int last_offset = char_offset;
4582 int last_id = charset_ascii;
df7492f9
KH
4583
4584 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4585 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4586
df7492f9 4587 while (1)
4ed46869 4588 {
4eb6d3f1 4589 int c;
df7492f9
KH
4590
4591 src_base = src;
4592 consumed_chars_base = consumed_chars;
b73bfc1c 4593
df7492f9
KH
4594 if (charbuf >= charbuf_end)
4595 break;
4596
4eb6d3f1 4597 ONE_MORE_BYTE (c);
df7492f9 4598 if (c == '\r')
d46c5b12 4599 {
c7c66a95
KH
4600 /* Here we assume that no charset maps '\r' to something
4601 else. */
df7492f9 4602 if (EQ (eol_type, Qdos))
b73bfc1c 4603 {
98725083
KH
4604 if (src == src_end)
4605 {
4606 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4607 goto no_more_source;
4608 }
4609 if (*src == '\n')
df7492f9 4610 ONE_MORE_BYTE (c);
b73bfc1c 4611 }
df7492f9 4612 else if (EQ (eol_type, Qmac))
b73bfc1c 4613 c = '\n';
d46c5b12 4614 }
df7492f9 4615 else
d46c5b12 4616 {
4eb6d3f1
KH
4617 Lisp_Object val;
4618 struct charset *charset;
c7c66a95 4619 int dim;
acb2a965
KH
4620 int len = 1;
4621 unsigned code = c;
4eb6d3f1
KH
4622
4623 val = AREF (valids, c);
4624 if (NILP (val))
4625 goto invalid_code;
c7c66a95 4626 if (INTEGERP (val))
4eb6d3f1 4627 {
c7c66a95
KH
4628 charset = CHARSET_FROM_ID (XFASTINT (val));
4629 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4630 while (len < dim)
4eb6d3f1 4631 {
acb2a965
KH
4632 ONE_MORE_BYTE (c);
4633 code = (code << 8) | c;
f9d71dcd 4634 len++;
4eb6d3f1 4635 }
c7c66a95
KH
4636 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4637 charset, code, c);
4638 }
4639 else
4640 {
4641 /* VAL is a list of charset IDs. It is assured that the
4642 list is sorted by charset dimensions (smaller one
4643 comes first). */
c7c66a95
KH
4644 while (CONSP (val))
4645 {
4646 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4647 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4648 while (len < dim)
c7c66a95 4649 {
acb2a965
KH
4650 ONE_MORE_BYTE (c);
4651 code = (code << 8) | c;
f9d71dcd 4652 len++;
c7c66a95 4653 }
c7c66a95
KH
4654 CODING_DECODE_CHAR (coding, src, src_base,
4655 src_end, charset, code, c);
4656 if (c >= 0)
4657 break;
4658 val = XCDR (val);
4659 }
4eb6d3f1 4660 }
df7492f9
KH
4661 if (c < 0)
4662 goto invalid_code;
ff0dacd7
KH
4663 if (charset->id != charset_ascii
4664 && last_id != charset->id)
4665 {
4666 if (last_id != charset_ascii)
4667 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4668 last_id = charset->id;
4669 last_offset = char_offset;
4670 }
d46c5b12 4671 }
df7492f9 4672 *charbuf++ = c;
ff0dacd7 4673 char_offset++;
df7492f9
KH
4674 continue;
4675
4676 invalid_code:
4677 src = src_base;
4678 consumed_chars = consumed_chars_base;
4679 ONE_MORE_BYTE (c);
4680 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4681 char_offset++;
df7492f9 4682 coding->errors++;
4ed46869
KH
4683 }
4684
df7492f9 4685 no_more_source:
ff0dacd7
KH
4686 if (last_id != charset_ascii)
4687 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4688 coding->consumed_char += consumed_chars_base;
4689 coding->consumed = src_base - coding->source;
4690 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4691}
4692
df7492f9
KH
4693static int
4694encode_coding_charset (coding)
4ed46869 4695 struct coding_system *coding;
4ed46869 4696{
df7492f9
KH
4697 int multibytep = coding->dst_multibyte;
4698 int *charbuf = coding->charbuf;
4699 int *charbuf_end = charbuf + coding->charbuf_used;
4700 unsigned char *dst = coding->destination + coding->produced;
4701 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4702 int safe_room = MAX_MULTIBYTE_LENGTH;
4703 int produced_chars = 0;
df7492f9
KH
4704 Lisp_Object attrs, eol_type, charset_list;
4705 int ascii_compatible;
b73bfc1c 4706 int c;
b73bfc1c 4707
df7492f9 4708 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4709 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4710
df7492f9 4711 while (charbuf < charbuf_end)
4ed46869 4712 {
4eb6d3f1 4713 struct charset *charset;
df7492f9
KH
4714 unsigned code;
4715
4716 ASSURE_DESTINATION (safe_room);
4717 c = *charbuf++;
4718 if (ascii_compatible && ASCII_CHAR_P (c))
4719 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4720 else if (CHAR_BYTE8_P (c))
4721 {
4722 c = CHAR_TO_BYTE8 (c);
4723 EMIT_ONE_BYTE (c);
4724 }
d46c5b12 4725 else
4eb6d3f1
KH
4726 {
4727 charset = char_charset (c, charset_list, &code);
4728 if (charset)
4729 {
4730 if (CHARSET_DIMENSION (charset) == 1)
4731 EMIT_ONE_BYTE (code);
4732 else if (CHARSET_DIMENSION (charset) == 2)
4733 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4734 else if (CHARSET_DIMENSION (charset) == 3)
4735 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4736 else
4737 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4738 (code >> 8) & 0xFF, code & 0xFF);
4739 }
4740 else
41cbe562
KH
4741 {
4742 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4743 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4744 else
4745 c = coding->default_char;
4746 EMIT_ONE_BYTE (c);
4747 }
4eb6d3f1 4748 }
4ed46869
KH
4749 }
4750
df7492f9
KH
4751 coding->result = CODING_RESULT_SUCCESS;
4752 coding->produced_char += produced_chars;
4753 coding->produced = dst - coding->destination;
4754 return 0;
4ed46869
KH
4755}
4756
4757\f
1397dc18 4758/*** 7. C library functions ***/
4ed46869 4759
df7492f9
KH
4760/* Setup coding context CODING from information about CODING_SYSTEM.
4761 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4762 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4763
4764void
df7492f9
KH
4765setup_coding_system (coding_system, coding)
4766 Lisp_Object coding_system;
ec6d2bb8
KH
4767 struct coding_system *coding;
4768{
df7492f9
KH
4769 Lisp_Object attrs;
4770 Lisp_Object eol_type;
4771 Lisp_Object coding_type;
4772 Lisp_Object val;
ec6d2bb8 4773
df7492f9
KH
4774 if (NILP (coding_system))
4775 coding_system = Qno_conversion;
4776
4777 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4778
4779 attrs = CODING_ID_ATTRS (coding->id);
4780 eol_type = CODING_ID_EOL_TYPE (coding->id);
4781
4782 coding->mode = 0;
4783 coding->head_ascii = -1;
4784 coding->common_flags
4785 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4786
4787 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4788 coding->max_charset_id = XSTRING (val)->size - 1;
4789 coding->safe_charsets = (char *) XSTRING (val)->data;
4790 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4791
4792 coding_type = CODING_ATTR_TYPE (attrs);
4793 if (EQ (coding_type, Qundecided))
4794 {
4795 coding->detector = NULL;
4796 coding->decoder = decode_coding_raw_text;
4797 coding->encoder = encode_coding_raw_text;
4798 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4799 }
4800 else if (EQ (coding_type, Qiso_2022))
4801 {
4802 int i;
4803 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
ff0dacd7 4804 enum coding_category category = XINT (CODING_ATTR_CATEGORY (attrs));
df7492f9
KH
4805
4806 /* Invoke graphic register 0 to plane 0. */
4807 CODING_ISO_INVOCATION (coding, 0) = 0;
4808 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4809 CODING_ISO_INVOCATION (coding, 1)
4810 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4811 /* Setup the initial status of designation. */
4812 for (i = 0; i < 4; i++)
4813 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4814 /* Not single shifting initially. */
4815 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4816 /* Beginning of buffer should also be regarded as bol. */
4817 CODING_ISO_BOL (coding) = 1;
4818 coding->detector = detect_coding_iso_2022;
4819 coding->decoder = decode_coding_iso_2022;
4820 coding->encoder = encode_coding_iso_2022;
4821 if (flags & CODING_ISO_FLAG_SAFE)
4822 coding->mode |= CODING_MODE_SAFE_ENCODING;
4823 coding->common_flags
4824 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4825 | CODING_REQUIRE_FLUSHING_MASK);
4826 if (flags & CODING_ISO_FLAG_COMPOSITION)
4827 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
4828 if (flags & CODING_ISO_FLAG_DESIGNATION)
4829 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
4830 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4831 {
4832 setup_iso_safe_charsets (attrs);
4833 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4834 coding->max_charset_id = XSTRING (val)->size - 1;
4835 coding->safe_charsets = (char *) XSTRING (val)->data;
4836 }
4837 CODING_ISO_FLAGS (coding) = flags;
4838 }
4839 else if (EQ (coding_type, Qcharset))
4840 {
4841 coding->detector = detect_coding_charset;
4842 coding->decoder = decode_coding_charset;
4843 coding->encoder = encode_coding_charset;
4844 coding->common_flags
4845 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4846 }
4847 else if (EQ (coding_type, Qutf_8))
4848 {
4849 coding->detector = detect_coding_utf_8;
4850 coding->decoder = decode_coding_utf_8;
4851 coding->encoder = encode_coding_utf_8;
4852 coding->common_flags
4853 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4854 }
4855 else if (EQ (coding_type, Qutf_16))
4856 {
4857 val = AREF (attrs, coding_attr_utf_16_bom);
4858 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4859 : EQ (val, Qt) ? utf_16_with_bom
4860 : utf_16_without_bom);
4861 val = AREF (attrs, coding_attr_utf_16_endian);
4862 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4863 : utf_16_little_endian);
e19c3639 4864 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4865 coding->detector = detect_coding_utf_16;
4866 coding->decoder = decode_coding_utf_16;
4867 coding->encoder = encode_coding_utf_16;
4868 coding->common_flags
4869 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4870 }
4871 else if (EQ (coding_type, Qccl))
4872 {
4873 coding->detector = detect_coding_ccl;
4874 coding->decoder = decode_coding_ccl;
4875 coding->encoder = encode_coding_ccl;
4876 coding->common_flags
4877 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4878 | CODING_REQUIRE_FLUSHING_MASK);
4879 }
4880 else if (EQ (coding_type, Qemacs_mule))
4881 {
4882 coding->detector = detect_coding_emacs_mule;
4883 coding->decoder = decode_coding_emacs_mule;
4884 coding->encoder = encode_coding_emacs_mule;
4885 coding->common_flags
4886 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4887 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4888 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4889 {
4890 Lisp_Object tail, safe_charsets;
4891 int max_charset_id = 0;
4892
4893 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4894 tail = XCDR (tail))
4895 if (max_charset_id < XFASTINT (XCAR (tail)))
4896 max_charset_id = XFASTINT (XCAR (tail));
4897 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4898 make_number (255));
4899 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4900 tail = XCDR (tail))
4901 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4902 coding->max_charset_id = max_charset_id;
4903 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4904 }
4905 }
4906 else if (EQ (coding_type, Qshift_jis))
4907 {
4908 coding->detector = detect_coding_sjis;
4909 coding->decoder = decode_coding_sjis;
4910 coding->encoder = encode_coding_sjis;
4911 coding->common_flags
4912 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4913 }
4914 else if (EQ (coding_type, Qbig5))
4915 {
4916 coding->detector = detect_coding_big5;
4917 coding->decoder = decode_coding_big5;
4918 coding->encoder = encode_coding_big5;
4919 coding->common_flags
4920 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4921 }
4922 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4923 {
df7492f9
KH
4924 coding->detector = NULL;
4925 coding->decoder = decode_coding_raw_text;
4926 coding->encoder = encode_coding_raw_text;
4927 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4928 }
df7492f9
KH
4929
4930 return;
ec6d2bb8
KH
4931}
4932
df7492f9
KH
4933/* Return raw-text or one of its subsidiaries that has the same
4934 eol_type as CODING-SYSTEM. */
ec6d2bb8 4935
df7492f9
KH
4936Lisp_Object
4937raw_text_coding_system (coding_system)
4938 Lisp_Object coding_system;
ec6d2bb8 4939{
0be8721c 4940 Lisp_Object spec, attrs;
df7492f9
KH
4941 Lisp_Object eol_type, raw_text_eol_type;
4942
4943 spec = CODING_SYSTEM_SPEC (coding_system);
4944 attrs = AREF (spec, 0);
4945
4946 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4947 return coding_system;
ec6d2bb8 4948
df7492f9
KH
4949 eol_type = AREF (spec, 2);
4950 if (VECTORP (eol_type))
4951 return Qraw_text;
4952 spec = CODING_SYSTEM_SPEC (Qraw_text);
4953 raw_text_eol_type = AREF (spec, 2);
4954 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4955 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4956 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4957}
4958
54f78171 4959
df7492f9
KH
4960/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4961 does, return one of the subsidiary that has the same eol-spec as
4962 PARENT. Otherwise, return CODING_SYSTEM. */
4963
4964Lisp_Object
4965coding_inherit_eol_type (coding_system, parent)
b74e4686 4966 Lisp_Object coding_system, parent;
54f78171 4967{
df7492f9 4968 Lisp_Object spec, attrs, eol_type;
54f78171 4969
df7492f9
KH
4970 spec = CODING_SYSTEM_SPEC (coding_system);
4971 attrs = AREF (spec, 0);
4972 eol_type = AREF (spec, 2);
4973 if (VECTORP (eol_type))
4974 {
4975 Lisp_Object parent_spec;
df7492f9
KH
4976 Lisp_Object parent_eol_type;
4977
4978 parent_spec
4979 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4980 parent_eol_type = AREF (parent_spec, 2);
4981 if (EQ (parent_eol_type, Qunix))
4982 coding_system = AREF (eol_type, 0);
4983 else if (EQ (parent_eol_type, Qdos))
4984 coding_system = AREF (eol_type, 1);
4985 else if (EQ (parent_eol_type, Qmac))
4986 coding_system = AREF (eol_type, 2);
54f78171 4987 }
df7492f9 4988 return coding_system;
54f78171
KH
4989}
4990
4ed46869
KH
4991/* Emacs has a mechanism to automatically detect a coding system if it
4992 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4993 it's impossible to distinguish some coding systems accurately
4994 because they use the same range of codes. So, at first, coding
4995 systems are categorized into 7, those are:
4996
0ef69138 4997 o coding-category-emacs-mule
4ed46869
KH
4998
4999 The category for a coding system which has the same code range
5000 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5001 symbol) `emacs-mule' by default.
4ed46869
KH
5002
5003 o coding-category-sjis
5004
5005 The category for a coding system which has the same code range
5006 as SJIS. Assigned the coding-system (Lisp
7717c392 5007 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5008
5009 o coding-category-iso-7
5010
5011 The category for a coding system which has the same code range
7717c392 5012 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5013 shift and single shift functions. This can encode/decode all
5014 charsets. Assigned the coding-system (Lisp symbol)
5015 `iso-2022-7bit' by default.
5016
5017 o coding-category-iso-7-tight
5018
5019 Same as coding-category-iso-7 except that this can
5020 encode/decode only the specified charsets.
4ed46869
KH
5021
5022 o coding-category-iso-8-1
5023
5024 The category for a coding system which has the same code range
5025 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5026 for DIMENSION1 charset. This doesn't use any locking shift
5027 and single shift functions. Assigned the coding-system (Lisp
5028 symbol) `iso-latin-1' by default.
4ed46869
KH
5029
5030 o coding-category-iso-8-2
5031
5032 The category for a coding system which has the same code range
5033 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5034 for DIMENSION2 charset. This doesn't use any locking shift
5035 and single shift functions. Assigned the coding-system (Lisp
5036 symbol) `japanese-iso-8bit' by default.
4ed46869 5037
7717c392 5038 o coding-category-iso-7-else
4ed46869
KH
5039
5040 The category for a coding system which has the same code range
df7492f9 5041 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5042 single shift functions. Assigned the coding-system (Lisp
5043 symbol) `iso-2022-7bit-lock' by default.
5044
5045 o coding-category-iso-8-else
5046
5047 The category for a coding system which has the same code range
df7492f9 5048 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5049 single shift functions. Assigned the coding-system (Lisp
5050 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5051
5052 o coding-category-big5
5053
5054 The category for a coding system which has the same code range
5055 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5056 `cn-big5' by default.
4ed46869 5057
fa42c37f
KH
5058 o coding-category-utf-8
5059
5060 The category for a coding system which has the same code range
5061 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5062 symbol) `utf-8' by default.
5063
5064 o coding-category-utf-16-be
5065
5066 The category for a coding system in which a text has an
5067 Unicode signature (cf. Unicode Standard) in the order of BIG
5068 endian at the head. Assigned the coding-system (Lisp symbol)
5069 `utf-16-be' by default.
5070
5071 o coding-category-utf-16-le
5072
5073 The category for a coding system in which a text has an
5074 Unicode signature (cf. Unicode Standard) in the order of
5075 LITTLE endian at the head. Assigned the coding-system (Lisp
5076 symbol) `utf-16-le' by default.
5077
1397dc18
KH
5078 o coding-category-ccl
5079
5080 The category for a coding system of which encoder/decoder is
5081 written in CCL programs. The default value is nil, i.e., no
5082 coding system is assigned.
5083
4ed46869
KH
5084 o coding-category-binary
5085
5086 The category for a coding system not categorized in any of the
5087 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5088 `no-conversion' by default.
4ed46869
KH
5089
5090 Each of them is a Lisp symbol and the value is an actual
df7492f9 5091 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5092 What Emacs does actually is to detect a category of coding system.
5093 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5094 decide only one possible category, it selects a category of the
4ed46869
KH
5095 highest priority. Priorities of categories are also specified by a
5096 user in a Lisp variable `coding-category-list'.
5097
5098*/
5099
df7492f9
KH
5100#define EOL_SEEN_NONE 0
5101#define EOL_SEEN_LF 1
5102#define EOL_SEEN_CR 2
5103#define EOL_SEEN_CRLF 4
4ed46869 5104
ff0dacd7
KH
5105/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5106 SOURCE is encoded. If CATEGORY is one of
5107 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5108 two-byte, else they are encoded by one-byte.
5109
5110 Return one of EOL_SEEN_XXX. */
4ed46869 5111
bc4bc72a
RS
5112#define MAX_EOL_CHECK_COUNT 3
5113
d46c5b12 5114static int
89528eb3 5115detect_eol (source, src_bytes, category)
d46c5b12 5116 unsigned char *source;
df7492f9 5117 EMACS_INT src_bytes;
89528eb3 5118 enum coding_category category;
4ed46869 5119{
d46c5b12 5120 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5121 unsigned char c;
df7492f9
KH
5122 int total = 0;
5123 int eol_seen = EOL_SEEN_NONE;
4ed46869 5124
89528eb3 5125 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5126 {
df7492f9 5127 int msb, lsb;
fa42c37f 5128
89528eb3
KH
5129 msb = category == (coding_category_utf_16_le
5130 | coding_category_utf_16_le_nosig);
df7492f9 5131 lsb = 1 - msb;
fa42c37f 5132
df7492f9 5133 while (src + 1 < src_end)
fa42c37f 5134 {
df7492f9
KH
5135 c = src[lsb];
5136 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5137 {
df7492f9
KH
5138 int this_eol;
5139
5140 if (c == '\n')
5141 this_eol = EOL_SEEN_LF;
5142 else if (src + 3 >= src_end
5143 || src[msb + 2] != 0
5144 || src[lsb + 2] != '\n')
5145 this_eol = EOL_SEEN_CR;
fa42c37f 5146 else
df7492f9
KH
5147 this_eol = EOL_SEEN_CRLF;
5148
5149 if (eol_seen == EOL_SEEN_NONE)
5150 /* This is the first end-of-line. */
5151 eol_seen = this_eol;
5152 else if (eol_seen != this_eol)
fa42c37f 5153 {
df7492f9
KH
5154 /* The found type is different from what found before. */
5155 eol_seen = EOL_SEEN_LF;
5156 break;
fa42c37f 5157 }
df7492f9
KH
5158 if (++total == MAX_EOL_CHECK_COUNT)
5159 break;
fa42c37f 5160 }
df7492f9 5161 src += 2;
fa42c37f 5162 }
df7492f9 5163 }
d46c5b12 5164 else
27901516 5165 {
df7492f9 5166 while (src < src_end)
27901516 5167 {
df7492f9
KH
5168 c = *src++;
5169 if (c == '\n' || c == '\r')
5170 {
5171 int this_eol;
d46c5b12 5172
df7492f9
KH
5173 if (c == '\n')
5174 this_eol = EOL_SEEN_LF;
5175 else if (src >= src_end || *src != '\n')
5176 this_eol = EOL_SEEN_CR;
5177 else
5178 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5179
df7492f9
KH
5180 if (eol_seen == EOL_SEEN_NONE)
5181 /* This is the first end-of-line. */
5182 eol_seen = this_eol;
5183 else if (eol_seen != this_eol)
5184 {
5185 /* The found type is different from what found before. */
5186 eol_seen = EOL_SEEN_LF;
5187 break;
5188 }
5189 if (++total == MAX_EOL_CHECK_COUNT)
5190 break;
5191 }
5192 }
73be902c 5193 }
df7492f9 5194 return eol_seen;
73be902c
KH
5195}
5196
df7492f9 5197
73be902c 5198static void
df7492f9
KH
5199adjust_coding_eol_type (coding, eol_seen)
5200 struct coding_system *coding;
5201 int eol_seen;
73be902c 5202{
0be8721c 5203 Lisp_Object eol_type;
df7492f9
KH
5204
5205 eol_type = CODING_ID_EOL_TYPE (coding->id);
5206 if (eol_seen & EOL_SEEN_LF)
5207 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6f197c07 5208 else if (eol_seen & EOL_SEEN_CRLF)
df7492f9 5209 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6f197c07 5210 else if (eol_seen & EOL_SEEN_CR)
df7492f9 5211 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
5212}
5213
df7492f9
KH
5214/* Detect how a text specified in CODING is encoded. If a coding
5215 system is detected, update fields of CODING by the detected coding
5216 system. */
5217
5218void
5219detect_coding (coding)
d46c5b12 5220 struct coding_system *coding;
d46c5b12 5221{
df7492f9
KH
5222 unsigned char *src, *src_end;
5223 Lisp_Object attrs, coding_type;
d46c5b12 5224
df7492f9
KH
5225 coding->consumed = coding->consumed_char = 0;
5226 coding->produced = coding->produced_char = 0;
5227 coding_set_source (coding);
1c3478b0 5228
df7492f9 5229 src_end = coding->source + coding->src_bytes;
1c3478b0 5230
df7492f9
KH
5231 /* If we have not yet decided the text encoding type, detect it
5232 now. */
5233 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5234 {
df7492f9
KH
5235 int c, i;
5236
5237 for (src = coding->source; src < src_end; src++)
5238 {
5239 c = *src;
5240 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5241 || c == ISO_CODE_SI
5242 || c == ISO_CODE_SO)))
5243 break;
5244 }
5245 coding->head_ascii = src - (coding->source + coding->consumed);
5246
5247 if (coding->head_ascii < coding->src_bytes)
1c3478b0 5248 {
ff0dacd7
KH
5249 struct coding_detection_info detect_info;
5250 enum coding_category category;
5251 struct coding_system *this;
df7492f9 5252
ff0dacd7 5253 detect_info.checked = detect_info.found = detect_info.rejected = 0;
df7492f9 5254 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 5255 {
ff0dacd7
KH
5256 category = coding_priorities[i];
5257 this = coding_categories + category;
df7492f9 5258 if (this->id < 0)
1c3478b0 5259 {
df7492f9 5260 /* No coding system of this category is defined. */
ff0dacd7 5261 detect_info.rejected |= (1 << category);
df7492f9 5262 }
ff0dacd7 5263 else if (category >= coding_category_raw_text)
89528eb3 5264 continue;
ff0dacd7 5265 else if (detect_info.checked & (1 << category))
df7492f9 5266 {
ff0dacd7
KH
5267 if (detect_info.found & (1 << category))
5268 break;
1c3478b0 5269 }
ff0dacd7
KH
5270 else if ((*(this->detector)) (coding, &detect_info)
5271 && detect_info.found & (1 << category))
5272 break;
1c3478b0 5273 }
ff0dacd7
KH
5274 if (i < coding_category_raw_text)
5275 setup_coding_system (CODING_ID_NAME (this->id), coding);
5276 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5277 setup_coding_system (Qraw_text, coding);
ff0dacd7 5278 else if (detect_info.rejected)
df7492f9 5279 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5280 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5281 {
5282 this = coding_categories + coding_priorities[i];
5283 setup_coding_system (CODING_ID_NAME (this->id), coding);
5284 break;
5285 }
1c3478b0 5286 }
b73bfc1c 5287 }
69f76525 5288
df7492f9
KH
5289 attrs = CODING_ID_ATTRS (coding->id);
5290 coding_type = CODING_ATTR_TYPE (attrs);
5291
5292 /* If we have not yet decided the EOL type, detect it now. But, the
5293 detection is impossible for a CCL based coding system, in which
5294 case, we detct the EOL type after decoding. */
5295 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5296 && ! EQ (coding_type, Qccl))
d46c5b12 5297 {
89528eb3
KH
5298 int eol_seen = detect_eol (coding->source, coding->src_bytes,
5299 XINT (CODING_ATTR_CATEGORY (attrs)));
df7492f9
KH
5300
5301 if (eol_seen != EOL_SEEN_NONE)
5302 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5303 }
4ed46869
KH
5304}
5305
aaaf0b1e
KH
5306
5307static void
df7492f9 5308decode_eol (coding)
aaaf0b1e 5309 struct coding_system *coding;
aaaf0b1e 5310{
df7492f9 5311 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5312 {
df7492f9
KH
5313 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5314 unsigned char *pend = p + coding->produced;
5315 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5316
df7492f9 5317 for (; p < pend; p++)
aaaf0b1e 5318 {
df7492f9
KH
5319 if (*p == '\n')
5320 eol_seen |= EOL_SEEN_LF;
5321 else if (*p == '\r')
aaaf0b1e 5322 {
df7492f9 5323 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5324 {
df7492f9
KH
5325 eol_seen |= EOL_SEEN_CRLF;
5326 p++;
aaaf0b1e 5327 }
aaaf0b1e 5328 else
df7492f9 5329 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5330 }
aaaf0b1e 5331 }
df7492f9
KH
5332 if (eol_seen != EOL_SEEN_NONE)
5333 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5334 }
aaaf0b1e 5335
df7492f9
KH
5336 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5337 {
5338 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5339 unsigned char *pend = p + coding->produced;
5340
5341 for (; p < pend; p++)
5342 if (*p == '\r')
5343 *p = '\n';
5344 }
5345 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5346 {
5347 unsigned char *p, *pbeg, *pend;
5348 Lisp_Object undo_list;
5349
5350 move_gap_both (coding->dst_pos + coding->produced_char,
5351 coding->dst_pos_byte + coding->produced);
5352 undo_list = current_buffer->undo_list;
5353 current_buffer->undo_list = Qt;
c197f191 5354 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
df7492f9
KH
5355 current_buffer->undo_list = undo_list;
5356 pbeg = GPT_ADDR;
5357 pend = pbeg + coding->produced;
5358
5359 for (p = pend - 1; p >= pbeg; p--)
5360 if (*p == '\r')
5361 {
5362 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5363 pend--;
5364 }
5365 coding->produced_char -= coding->produced - (pend - pbeg);
5366 coding->produced = pend - pbeg;
5367 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5368 }
5369}
5370
df7492f9
KH
5371static void
5372translate_chars (coding, table)
4ed46869 5373 struct coding_system *coding;
df7492f9 5374 Lisp_Object table;
4ed46869 5375{
df7492f9
KH
5376 int *charbuf = coding->charbuf;
5377 int *charbuf_end = charbuf + coding->charbuf_used;
5378 int c;
5379
5380 if (coding->chars_at_source)
5381 return;
4ed46869 5382
df7492f9 5383 while (charbuf < charbuf_end)
8844fa83 5384 {
df7492f9
KH
5385 c = *charbuf;
5386 if (c < 0)
5387 charbuf += c;
5388 else
5389 *charbuf++ = translate_char (table, c);
8844fa83 5390 }
df7492f9 5391}
4ed46869 5392
df7492f9
KH
5393static int
5394produce_chars (coding)
5395 struct coding_system *coding;
5396{
5397 unsigned char *dst = coding->destination + coding->produced;
5398 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5399 int produced;
5400 int produced_chars = 0;
b73bfc1c 5401
df7492f9 5402 if (! coding->chars_at_source)
4ed46869 5403 {
df7492f9
KH
5404 /* Characters are in coding->charbuf. */
5405 int *buf = coding->charbuf;
5406 int *buf_end = buf + coding->charbuf_used;
5407 unsigned char *adjusted_dst_end;
4ed46869 5408
df7492f9
KH
5409 if (BUFFERP (coding->src_object)
5410 && EQ (coding->src_object, coding->dst_object))
5411 dst_end = coding->source + coding->consumed;
5412 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5413
df7492f9
KH
5414 while (buf < buf_end)
5415 {
5416 int c = *buf++;
5417
5418 if (dst >= adjusted_dst_end)
5419 {
5420 dst = alloc_destination (coding,
5421 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5422 dst);
5423 dst_end = coding->destination + coding->dst_bytes;
5424 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5425 }
5426 if (c >= 0)
5427 {
5428 if (coding->dst_multibyte
5429 || ! CHAR_BYTE8_P (c))
5430 CHAR_STRING_ADVANCE (c, dst);
5431 else
5432 *dst++ = CHAR_TO_BYTE8 (c);
5433 produced_chars++;
5434 }
5435 else
e1c23804 5436 /* This is an annotation datum. */
df7492f9
KH
5437 buf -= c + 1;
5438 }
5439 }
5440 else
5441 {
df7492f9
KH
5442 unsigned char *src = coding->source;
5443 unsigned char *src_end = src + coding->src_bytes;
5444 Lisp_Object eol_type;
b73bfc1c 5445
df7492f9 5446 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5447
df7492f9 5448 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5449 {
df7492f9
KH
5450 if (coding->src_multibyte)
5451 {
71c81426 5452 int multibytep = 1;
df7492f9 5453 int consumed_chars;
d46c5b12 5454
df7492f9
KH
5455 while (1)
5456 {
5457 unsigned char *src_base = src;
5458 int c;
b73bfc1c 5459
df7492f9
KH
5460 ONE_MORE_BYTE (c);
5461 if (c == '\r')
5462 {
5463 if (EQ (eol_type, Qdos))
5464 {
98725083
KH
5465 if (src == src_end)
5466 {
5467 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5468 goto no_more_source;
5469 }
5470 if (*src == '\n')
df7492f9
KH
5471 c = *src++;
5472 }
5473 else if (EQ (eol_type, Qmac))
5474 c = '\n';
5475 }
5476 if (dst == dst_end)
5477 {
2c78b7e1 5478 coding->consumed = src - coding->source;
b73bfc1c 5479
2c78b7e1
KH
5480 if (EQ (coding->src_object, coding->dst_object))
5481 dst_end = src;
5482 if (dst == dst_end)
5483 {
5484 dst = alloc_destination (coding, src_end - src + 1,
5485 dst);
5486 dst_end = coding->destination + coding->dst_bytes;
5487 coding_set_source (coding);
5488 src = coding->source + coding->consumed;
5489 src_end = coding->source + coding->src_bytes;
5490 }
df7492f9
KH
5491 }
5492 *dst++ = c;
5493 produced_chars++;
5494 }
5495 no_more_source:
5496 ;
5497 }
5498 else
5499 while (src < src_end)
5500 {
71c81426 5501 int multibytep = 1;
df7492f9 5502 int c = *src++;
b73bfc1c 5503
df7492f9
KH
5504 if (c == '\r')
5505 {
5506 if (EQ (eol_type, Qdos))
5507 {
5508 if (src < src_end
5509 && *src == '\n')
5510 c = *src++;
5511 }
5512 else if (EQ (eol_type, Qmac))
5513 c = '\n';
5514 }
5515 if (dst >= dst_end - 1)
5516 {
2c78b7e1 5517 coding->consumed = src - coding->source;
df7492f9 5518
2c78b7e1
KH
5519 if (EQ (coding->src_object, coding->dst_object))
5520 dst_end = src;
5521 if (dst >= dst_end - 1)
5522 {
5523 dst = alloc_destination (coding, src_end - src + 2,
5524 dst);
5525 dst_end = coding->destination + coding->dst_bytes;
5526 coding_set_source (coding);
5527 src = coding->source + coding->consumed;
5528 src_end = coding->source + coding->src_bytes;
5529 }
df7492f9
KH
5530 }
5531 EMIT_ONE_BYTE (c);
5532 }
d46c5b12 5533 }
df7492f9
KH
5534 else
5535 {
5536 if (!EQ (coding->src_object, coding->dst_object))
5537 {
5538 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5539
df7492f9
KH
5540 if (require > 0)
5541 {
5542 EMACS_INT offset = src - coding->source;
5543
5544 dst = alloc_destination (coding, require, dst);
5545 coding_set_source (coding);
5546 src = coding->source + offset;
5547 src_end = coding->source + coding->src_bytes;
5548 }
5549 }
5550 produced_chars = coding->src_chars;
5551 while (src < src_end)
5552 {
5553 int c = *src++;
5554
5555 if (c == '\r')
5556 {
5557 if (EQ (eol_type, Qdos))
5558 {
5559 if (src < src_end
5560 && *src == '\n')
5561 c = *src++;
5562 produced_chars--;
5563 }
5564 else if (EQ (eol_type, Qmac))
5565 c = '\n';
5566 }
5567 *dst++ = c;
5568 }
5569 }
2c78b7e1
KH
5570 coding->consumed = coding->src_bytes;
5571 coding->consumed_char = coding->src_chars;
b73bfc1c 5572 }
4ed46869 5573
df7492f9
KH
5574 produced = dst - (coding->destination + coding->produced);
5575 if (BUFFERP (coding->dst_object))
5576 insert_from_gap (produced_chars, produced);
5577 coding->produced += produced;
5578 coding->produced_char += produced_chars;
5579 return produced_chars;
b73bfc1c 5580}
52d41803 5581
ff0dacd7
KH
5582/* Compose text in CODING->object according to the annotation data at
5583 CHARBUF. CHARBUF is an array:
5584 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 5585 */
4ed46869 5586
df7492f9
KH
5587static INLINE void
5588produce_composition (coding, charbuf)
4ed46869 5589 struct coding_system *coding;
df7492f9 5590 int *charbuf;
4ed46869 5591{
df7492f9 5592 int len;
ff0dacd7 5593 EMACS_INT from, to;
df7492f9 5594 enum composition_method method;
df7492f9
KH
5595 Lisp_Object components;
5596
df7492f9 5597 len = -charbuf[0];
ff0dacd7
KH
5598 from = coding->dst_pos + charbuf[2];
5599 to = coding->dst_pos + charbuf[3];
5600 method = (enum composition_method) (charbuf[4]);
df7492f9
KH
5601
5602 if (method == COMPOSITION_RELATIVE)
5603 components = Qnil;
5604 else
d46c5b12 5605 {
df7492f9
KH
5606 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5607 int i;
4ed46869 5608
df7492f9
KH
5609 len -= 5;
5610 charbuf += 5;
5611 for (i = 0; i < len; i++)
5612 args[i] = make_number (charbuf[i]);
5613 components = (method == COMPOSITION_WITH_ALTCHARS
5614 ? Fstring (len, args) : Fvector (len, args));
5615 }
ff0dacd7 5616 compose_text (from, to, components, Qnil, coding->dst_object);
df7492f9 5617}
b73bfc1c 5618
d46c5b12 5619
ff0dacd7
KH
5620/* Put `charset' property on text in CODING->object according to
5621 the annotation data at CHARBUF. CHARBUF is an array:
5622 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5623 */
b73bfc1c 5624
ff0dacd7
KH
5625static INLINE void
5626produce_charset (coding, charbuf)
5627 struct coding_system *coding;
5628 int *charbuf;
5629{
5630 EMACS_INT from = coding->dst_pos + charbuf[2];
5631 EMACS_INT to = coding->dst_pos + charbuf[3];
5632 struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
b73bfc1c 5633
ff0dacd7
KH
5634 Fput_text_property (make_number (from), make_number (to),
5635 Qcharset, CHARSET_NAME (charset),
5636 coding->dst_object);
4ed46869
KH
5637}
5638
ff0dacd7 5639
df7492f9
KH
5640#define CHARBUF_SIZE 0x4000
5641
5642#define ALLOC_CONVERSION_WORK_AREA(coding) \
5643 do { \
5644 int size = CHARBUF_SIZE;; \
5645 \
5646 coding->charbuf = NULL; \
5647 while (size > 1024) \
5648 { \
5649 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5650 if (coding->charbuf) \
5651 break; \
5652 size >>= 1; \
5653 } \
5654 if (! coding->charbuf) \
5655 { \
5656 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5657 return coding->result; \
5658 } \
5659 coding->charbuf_size = size; \
5660 } while (0)
4ed46869 5661
d46c5b12
KH
5662
5663static void
df7492f9 5664produce_annotation (coding)
d46c5b12 5665 struct coding_system *coding;
d46c5b12 5666{
df7492f9
KH
5667 int *charbuf = coding->charbuf;
5668 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5669
ff0dacd7
KH
5670 if (NILP (coding->dst_object))
5671 return;
5672
df7492f9 5673 while (charbuf < charbuf_end)
d46c5b12 5674 {
df7492f9
KH
5675 if (*charbuf >= 0)
5676 charbuf++;
d46c5b12 5677 else
d46c5b12 5678 {
df7492f9 5679 int len = -*charbuf;
ff0dacd7 5680 switch (charbuf[1])
df7492f9
KH
5681 {
5682 case CODING_ANNOTATE_COMPOSITION_MASK:
5683 produce_composition (coding, charbuf);
5684 break;
ff0dacd7
KH
5685 case CODING_ANNOTATE_CHARSET_MASK:
5686 produce_charset (coding, charbuf);
5687 break;
df7492f9
KH
5688 default:
5689 abort ();
5690 }
5691 charbuf += len;
d46c5b12 5692 }
df7492f9
KH
5693 }
5694}
d46c5b12 5695
df7492f9
KH
5696/* Decode the data at CODING->src_object into CODING->dst_object.
5697 CODING->src_object is a buffer, a string, or nil.
5698 CODING->dst_object is a buffer.
de79a6a5 5699
df7492f9
KH
5700 If CODING->src_object is a buffer, it must be the current buffer.
5701 In this case, if CODING->src_pos is positive, it is a position of
5702 the source text in the buffer, otherwise, the source text is in the
5703 gap area of the buffer, and CODING->src_pos specifies the offset of
5704 the text from GPT (which must be the same as PT). If this is the
5705 same buffer as CODING->dst_object, CODING->src_pos must be
5706 negative.
b73bfc1c 5707
df7492f9
KH
5708 If CODING->src_object is a string, CODING->src_pos in an index to
5709 that string.
d46c5b12 5710
df7492f9
KH
5711 If CODING->src_object is nil, CODING->source must already point to
5712 the non-relocatable memory area. In this case, CODING->src_pos is
5713 an offset from CODING->source.
d46c5b12 5714
df7492f9
KH
5715 The decoded data is inserted at the current point of the buffer
5716 CODING->dst_object.
5717*/
5718
5719static int
5720decode_coding (coding)
d46c5b12 5721 struct coding_system *coding;
d46c5b12 5722{
df7492f9 5723 Lisp_Object attrs;
d46c5b12 5724
df7492f9
KH
5725 if (BUFFERP (coding->src_object)
5726 && coding->src_pos > 0
5727 && coding->src_pos < GPT
5728 && coding->src_pos + coding->src_chars > GPT)
5729 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5730
df7492f9 5731 if (BUFFERP (coding->dst_object))
88993dfd 5732 {
df7492f9
KH
5733 if (current_buffer != XBUFFER (coding->dst_object))
5734 set_buffer_internal (XBUFFER (coding->dst_object));
5735 if (GPT != PT)
5736 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5737 }
5738
df7492f9
KH
5739 coding->consumed = coding->consumed_char = 0;
5740 coding->produced = coding->produced_char = 0;
5741 coding->chars_at_source = 0;
5742 coding->result = CODING_RESULT_SUCCESS;
5743 coding->errors = 0;
5744
5745 ALLOC_CONVERSION_WORK_AREA (coding);
5746
5747 attrs = CODING_ID_ATTRS (coding->id);
5748
5749 do
d46c5b12 5750 {
df7492f9
KH
5751 coding_set_source (coding);
5752 coding->annotated = 0;
5753 (*(coding->decoder)) (coding);
5754 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
da4109a9
KH
5755 translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5756 else if (!NILP (Vstandard_translation_table_for_decode))
5757 translate_chars (coding, Vstandard_translation_table_for_decode);
df7492f9
KH
5758 coding_set_destination (coding);
5759 produce_chars (coding);
5760 if (coding->annotated)
5761 produce_annotation (coding);
d46c5b12 5762 }
df7492f9
KH
5763 while (coding->consumed < coding->src_bytes
5764 && ! coding->result);
d46c5b12 5765
df7492f9
KH
5766 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5767 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5768 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5769 decode_eol (coding);
d46c5b12 5770
df7492f9
KH
5771 coding->carryover_bytes = 0;
5772 if (coding->consumed < coding->src_bytes)
d46c5b12 5773 {
df7492f9
KH
5774 int nbytes = coding->src_bytes - coding->consumed;
5775 unsigned char *src;
5776
5777 coding_set_source (coding);
5778 coding_set_destination (coding);
5779 src = coding->source + coding->consumed;
5780
5781 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5782 {
df7492f9
KH
5783 /* Flush out unprocessed data as binary chars. We are sure
5784 that the number of data is less than the size of
5785 coding->charbuf. */
df7492f9 5786 while (nbytes-- > 0)
d46c5b12 5787 {
df7492f9 5788 int c = *src++;
98725083
KH
5789
5790 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
d46c5b12 5791 }
df7492f9 5792 produce_chars (coding);
d46c5b12 5793 }
d46c5b12 5794 else
df7492f9
KH
5795 {
5796 /* Record unprocessed bytes in coding->carryover. We are
5797 sure that the number of data is less than the size of
5798 coding->carryover. */
5799 unsigned char *p = coding->carryover;
5800
5801 coding->carryover_bytes = nbytes;
5802 while (nbytes-- > 0)
5803 *p++ = *src++;
5804 }
5805 coding->consumed = coding->src_bytes;
5806 }
b73bfc1c 5807
df7492f9 5808 return coding->result;
d46c5b12
KH
5809}
5810
ff0dacd7 5811
e1c23804 5812/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
5813 ending before LIMIT of CODING->src_object (buffer or string), store
5814 the data in BUF, set *STOP to a starting position of the next
5815 composition (if any) or to LIMIT, and return the address of the
5816 next element of BUF.
5817
5818 If such an annotation is not found, set *STOP to a starting
5819 position of a composition after POS (if any) or to LIMIT, and
5820 return BUF. */
5821
5822static INLINE int *
5823handle_composition_annotation (pos, limit, coding, buf, stop)
5824 EMACS_INT pos, limit;
5825 struct coding_system *coding;
5826 int *buf;
5827 EMACS_INT *stop;
5828{
5829 EMACS_INT start, end;
5830 Lisp_Object prop;
5831
5832 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5833 || end > limit)
5834 *stop = limit;
5835 else if (start > pos)
5836 *stop = start;
5837 else
5838 {
5839 if (start == pos)
5840 {
5841 /* We found a composition. Store the corresponding
5842 annotation data in BUF. */
5843 int *head = buf;
5844 enum composition_method method = COMPOSITION_METHOD (prop);
5845 int nchars = COMPOSITION_LENGTH (prop);
5846
5847 ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5848 if (method != COMPOSITION_RELATIVE)
5849 {
5850 Lisp_Object components;
5851 int len, i, i_byte;
5852
5853 components = COMPOSITION_COMPONENTS (prop);
5854 if (VECTORP (components))
5855 {
5856 len = XVECTOR (components)->size;
5857 for (i = 0; i < len; i++)
5858 *buf++ = XINT (AREF (components, i));
5859 }
5860 else if (STRINGP (components))
5861 {
5862 len = XSTRING (components)->size;
5863 i = i_byte = 0;
5864 while (i < len)
5865 {
5866 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5867 buf++;
5868 }
5869 }
5870 else if (INTEGERP (components))
5871 {
5872 len = 1;
5873 *buf++ = XINT (components);
5874 }
5875 else if (CONSP (components))
5876 {
5877 for (len = 0; CONSP (components);
5878 len++, components = XCDR (components))
5879 *buf++ = XINT (XCAR (components));
5880 }
5881 else
5882 abort ();
5883 *head -= len;
5884 }
5885 }
5886
5887 if (find_composition (end, limit, &start, &end, &prop,
5888 coding->src_object)
5889 && end <= limit)
5890 *stop = start;
5891 else
5892 *stop = limit;
5893 }
5894 return buf;
5895}
5896
5897
e1c23804 5898/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
5899 CODING->src_object (buffer of string), store the data in BUF, set
5900 *STOP to the position where the value of `charset' property changes
5901 (limiting by LIMIT), and return the address of the next element of
5902 BUF.
5903
5904 If the property value is nil, set *STOP to the position where the
5905 property value is non-nil (limiting by LIMIT), and return BUF. */
5906
5907static INLINE int *
5908handle_charset_annotation (pos, limit, coding, buf, stop)
5909 EMACS_INT pos, limit;
5910 struct coding_system *coding;
5911 int *buf;
5912 EMACS_INT *stop;
5913{
5914 Lisp_Object val, next;
5915 int id;
5916
5917 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5918 if (! NILP (val) && CHARSETP (val))
5919 id = XINT (CHARSET_SYMBOL_ID (val));
5920 else
5921 id = -1;
5922 ADD_CHARSET_DATA (buf, 0, 0, id);
5923 next = Fnext_single_property_change (make_number (pos), Qcharset,
5924 coding->src_object,
5925 make_number (limit));
5926 *stop = XINT (next);
5927 return buf;
5928}
5929
5930
df7492f9
KH
5931static void
5932consume_chars (coding)
5933 struct coding_system *coding;
5934{
5935 int *buf = coding->charbuf;
ff0dacd7 5936 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 5937 const unsigned char *src = coding->source + coding->consumed;
ff0dacd7
KH
5938 EMACS_INT pos = coding->src_pos + coding->consumed_char;
5939 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
5940 int multibytep = coding->src_multibyte;
5941 Lisp_Object eol_type;
5942 int c;
ff0dacd7
KH
5943 EMACS_INT stop, stop_composition, stop_charset;
5944 int id;
88993dfd 5945
df7492f9
KH
5946 eol_type = CODING_ID_EOL_TYPE (coding->id);
5947 if (VECTORP (eol_type))
5948 eol_type = Qunix;
88993dfd 5949
df7492f9
KH
5950 /* Note: composition handling is not yet implemented. */
5951 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 5952
ff0dacd7
KH
5953 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
5954 stop = stop_composition = pos;
5955 else
5956 stop = stop_composition = end_pos;
5957 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
5958 stop = stop_charset = pos;
df7492f9 5959 else
ff0dacd7 5960 stop_charset = end_pos;
ec6d2bb8 5961
ff0dacd7
KH
5962 /* Compensate for CRLF and annotation. */
5963 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 5964 while (buf < buf_end)
ec6d2bb8 5965 {
df7492f9 5966 if (pos == stop)
ec6d2bb8 5967 {
df7492f9 5968 int *p;
ec6d2bb8 5969
df7492f9
KH
5970 if (pos == end_pos)
5971 break;
ff0dacd7
KH
5972 if (pos == stop_composition)
5973 buf = handle_composition_annotation (pos, end_pos, coding,
5974 buf, &stop_composition);
5975 if (pos == stop_charset)
5976 buf = handle_charset_annotation (pos, end_pos, coding,
5977 buf, &stop_charset);
5978 stop = (stop_composition < stop_charset
5979 ? stop_composition : stop_charset);
df7492f9
KH
5980 }
5981
5982 if (! multibytep)
5983 c = *src++;
5984 else
5985 c = STRING_CHAR_ADVANCE (src);
5986 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5987 c = '\n';
5988 if (! EQ (eol_type, Qunix))
5989 {
5990 if (c == '\n')
5991 {
5992 if (EQ (eol_type, Qdos))
5993 *buf++ = '\r';
5994 else
5995 c = '\r';
ec6d2bb8 5996 }
ec6d2bb8 5997 }
df7492f9
KH
5998 *buf++ = c;
5999 pos++;
ec6d2bb8 6000 }
ec6d2bb8 6001
df7492f9
KH
6002 coding->consumed = src - coding->source;
6003 coding->consumed_char = pos - coding->src_pos;
6004 coding->charbuf_used = buf - coding->charbuf;
6005 coding->chars_at_source = 0;
ec6d2bb8
KH
6006}
6007
ec6d2bb8 6008
df7492f9
KH
6009/* Encode the text at CODING->src_object into CODING->dst_object.
6010 CODING->src_object is a buffer or a string.
6011 CODING->dst_object is a buffer or nil.
6012
6013 If CODING->src_object is a buffer, it must be the current buffer.
6014 In this case, if CODING->src_pos is positive, it is a position of
6015 the source text in the buffer, otherwise. the source text is in the
6016 gap area of the buffer, and coding->src_pos specifies the offset of
6017 the text from GPT (which must be the same as PT). If this is the
6018 same buffer as CODING->dst_object, CODING->src_pos must be
6019 negative and CODING should not have `pre-write-conversion'.
6020
6021 If CODING->src_object is a string, CODING should not have
6022 `pre-write-conversion'.
6023
6024 If CODING->dst_object is a buffer, the encoded data is inserted at
6025 the current point of that buffer.
6026
6027 If CODING->dst_object is nil, the encoded data is placed at the
6028 memory area specified by CODING->destination. */
6029
6030static int
6031encode_coding (coding)
ec6d2bb8 6032 struct coding_system *coding;
ec6d2bb8 6033{
df7492f9 6034 Lisp_Object attrs;
ec6d2bb8 6035
df7492f9 6036 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 6037
df7492f9 6038 if (BUFFERP (coding->dst_object))
ec6d2bb8 6039 {
df7492f9
KH
6040 set_buffer_internal (XBUFFER (coding->dst_object));
6041 coding->dst_multibyte
6042 = ! NILP (current_buffer->enable_multibyte_characters);
6043 }
ec6d2bb8 6044
df7492f9
KH
6045 coding->consumed = coding->consumed_char = 0;
6046 coding->produced = coding->produced_char = 0;
6047 coding->result = CODING_RESULT_SUCCESS;
6048 coding->errors = 0;
ec6d2bb8 6049
df7492f9 6050 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 6051
df7492f9
KH
6052 do {
6053 coding_set_source (coding);
6054 consume_chars (coding);
6055
6056 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
da4109a9
KH
6057 translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6058 else if (!NILP (Vstandard_translation_table_for_encode))
6059 translate_chars (coding, Vstandard_translation_table_for_encode);
df7492f9
KH
6060
6061 coding_set_destination (coding);
6062 (*(coding->encoder)) (coding);
6063 } while (coding->consumed_char < coding->src_chars);
6064
6065 if (BUFFERP (coding->dst_object))
6066 insert_from_gap (coding->produced_char, coding->produced);
6067
6068 return (coding->result);
ec6d2bb8
KH
6069}
6070
df7492f9 6071/* Work buffer */
fb88bf2d 6072
df7492f9
KH
6073/* List of currently used working buffer. */
6074Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 6075
df7492f9
KH
6076/* A working buffer used by the top level conversion. */
6077Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 6078
4ed46869 6079
df7492f9
KH
6080/* Return a working buffer that can be freely used by the following
6081 code conversion. MULTIBYTEP specifies the multibyteness of the
6082 buffer. */
b73bfc1c 6083
df7492f9
KH
6084Lisp_Object
6085make_conversion_work_buffer (multibytep)
6086 int multibytep;
6087{
6088 struct buffer *current = current_buffer;
6089 Lisp_Object buf;
d46c5b12 6090
df7492f9 6091 if (NILP (Vcode_conversion_work_buf_list))
e133c8fa 6092 {
df7492f9
KH
6093 if (NILP (Vcode_conversion_reused_work_buf))
6094 Vcode_conversion_reused_work_buf
6095 = Fget_buffer_create (build_string (" *code-conversion-work*"));
6096 Vcode_conversion_work_buf_list
6097 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
e133c8fa 6098 }
df7492f9 6099 else
d46c5b12 6100 {
c197f191 6101 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 6102 char str[128];
e077cc80 6103
df7492f9
KH
6104 sprintf (str, " *code-conversion-work*<%d>", depth);
6105 Vcode_conversion_work_buf_list
6106 = Fcons (Fget_buffer_create (build_string (str)),
6107 Vcode_conversion_work_buf_list);
d46c5b12 6108 }
d46c5b12 6109
df7492f9
KH
6110 buf = XCAR (Vcode_conversion_work_buf_list);
6111 set_buffer_internal (XBUFFER (buf));
6112 current_buffer->undo_list = Qt;
6113 Ferase_buffer ();
9d123124 6114 Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
df7492f9
KH
6115 set_buffer_internal (current);
6116 return buf;
6117}
d46c5b12 6118
df7492f9 6119static struct coding_system *saved_coding;
d46c5b12 6120
df7492f9
KH
6121Lisp_Object
6122code_conversion_restore (info)
6123 Lisp_Object info;
6124{
c197f191 6125 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 6126 Lisp_Object buf;
d46c5b12 6127
df7492f9 6128 if (depth > 0)
d46c5b12 6129 {
df7492f9
KH
6130 buf = XCAR (Vcode_conversion_work_buf_list);
6131 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6132 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
6133 Fkill_buffer (buf);
6134 }
d46c5b12 6135
c197f191 6136 if (EQ (saved_coding->dst_object, Qt)
df7492f9
KH
6137 && saved_coding->destination)
6138 xfree (saved_coding->destination);
b843d1ae 6139
df7492f9
KH
6140 return save_excursion_restore (info);
6141}
d46c5b12 6142
12410ef1 6143
df7492f9
KH
6144int
6145decode_coding_gap (coding, chars, bytes)
6146 struct coding_system *coding;
6147 EMACS_INT chars, bytes;
6148{
6149 int count = specpdl_ptr - specpdl;
fb88bf2d 6150
df7492f9
KH
6151 saved_coding = coding;
6152 record_unwind_protect (code_conversion_restore, save_excursion_save ());
ec6d2bb8 6153
df7492f9
KH
6154 coding->src_object = Fcurrent_buffer ();
6155 coding->src_chars = chars;
6156 coding->src_bytes = bytes;
6157 coding->src_pos = -chars;
6158 coding->src_pos_byte = -bytes;
6159 coding->src_multibyte = chars < bytes;
6160 coding->dst_object = coding->src_object;
6161 coding->dst_pos = PT;
6162 coding->dst_pos_byte = PT_BYTE;
71c81426 6163 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
98725083 6164 coding->mode |= CODING_MODE_LAST_BLOCK;
4956c225 6165
df7492f9
KH
6166 if (CODING_REQUIRE_DETECTION (coding))
6167 detect_coding (coding);
6168
6169 decode_coding (coding);
d46c5b12 6170
df7492f9
KH
6171 unbind_to (count, Qnil);
6172 return coding->result;
6173}
d46c5b12 6174
df7492f9
KH
6175int
6176encode_coding_gap (coding, chars, bytes)
6177 struct coding_system *coding;
6178 EMACS_INT chars, bytes;
6179{
6180 int count = specpdl_ptr - specpdl;
6181 Lisp_Object buffer;
d46c5b12 6182
df7492f9
KH
6183 saved_coding = coding;
6184 record_unwind_protect (code_conversion_restore, save_excursion_save ());
fb88bf2d 6185
df7492f9
KH
6186 buffer = Fcurrent_buffer ();
6187 coding->src_object = buffer;
6188 coding->src_chars = chars;
6189 coding->src_bytes = bytes;
6190 coding->src_pos = -chars;
6191 coding->src_pos_byte = -bytes;
6192 coding->src_multibyte = chars < bytes;
6193 coding->dst_object = coding->src_object;
6194 coding->dst_pos = PT;
6195 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 6196
df7492f9 6197 encode_coding (coding);
f2558efd 6198
df7492f9
KH
6199 unbind_to (count, Qnil);
6200 return coding->result;
6201}
b73bfc1c 6202
d46c5b12 6203
df7492f9
KH
6204/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6205 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 6206
df7492f9 6207 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 6208
df7492f9
KH
6209 If it is a buffer, the text is at point of the buffer. FROM and TO
6210 are positions in the buffer.
ec6d2bb8 6211
df7492f9
KH
6212 If it is a string, the text is at the beginning of the string.
6213 FROM and TO are indices to the string.
ec6d2bb8 6214
df7492f9
KH
6215 If it is nil, the text is at coding->source. FROM and TO are
6216 indices to coding->source.
ec6d2bb8 6217
df7492f9 6218 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 6219
df7492f9
KH
6220 If it is a buffer, the decoded text is inserted at point of the
6221 buffer. If the buffer is the same as SRC_OBJECT, the source text
6222 is deleted.
d46c5b12 6223
df7492f9
KH
6224 If it is Qt, a string is made from the decoded text, and
6225 set in CODING->dst_object.
d46c5b12 6226
df7492f9 6227 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6228 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6229 CODING->destination by xmalloc. If the decoded text is longer than
6230 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6231 */
d46c5b12 6232
df7492f9
KH
6233void
6234decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6235 dst_object)
6236 struct coding_system *coding;
6237 Lisp_Object src_object;
6238 EMACS_INT from, from_byte, to, to_byte;
6239 Lisp_Object dst_object;
6240{
6241 int count = specpdl_ptr - specpdl;
6242 unsigned char *destination;
6243 EMACS_INT dst_bytes;
6244 EMACS_INT chars = to - from;
6245 EMACS_INT bytes = to_byte - from_byte;
6246 Lisp_Object attrs;
d46c5b12 6247
df7492f9
KH
6248 saved_coding = coding;
6249 record_unwind_protect (code_conversion_restore, save_excursion_save ());
93dec019 6250
df7492f9
KH
6251 if (NILP (dst_object))
6252 {
6253 destination = coding->destination;
6254 dst_bytes = coding->dst_bytes;
6255 }
93dec019 6256
df7492f9
KH
6257 coding->src_object = src_object;
6258 coding->src_chars = chars;
6259 coding->src_bytes = bytes;
6260 coding->src_multibyte = chars < bytes;
70ad9fc4 6261
df7492f9
KH
6262 if (STRINGP (src_object))
6263 {
6264 coding->src_pos = from;
6265 coding->src_pos_byte = from_byte;
6266 }
6267 else if (BUFFERP (src_object))
6268 {
6269 set_buffer_internal (XBUFFER (src_object));
6270 if (from != GPT)
6271 move_gap_both (from, from_byte);
6272 if (EQ (src_object, dst_object))
fb88bf2d 6273 {
df7492f9
KH
6274 TEMP_SET_PT_BOTH (from, from_byte);
6275 del_range_both (from, from_byte, to, to_byte, 1);
6276 coding->src_pos = -chars;
6277 coding->src_pos_byte = -bytes;
fb88bf2d 6278 }
df7492f9 6279 else
fb88bf2d 6280 {
df7492f9
KH
6281 coding->src_pos = from;
6282 coding->src_pos_byte = from_byte;
fb88bf2d 6283 }
d46c5b12 6284 }
fb88bf2d 6285
df7492f9
KH
6286 if (CODING_REQUIRE_DETECTION (coding))
6287 detect_coding (coding);
6288 attrs = CODING_ID_ATTRS (coding->id);
6289
2cb26057
KH
6290 if (EQ (dst_object, Qt)
6291 || (! NILP (CODING_ATTR_POST_READ (attrs))
6292 && NILP (dst_object)))
b73bfc1c 6293 {
df7492f9
KH
6294 coding->dst_object = make_conversion_work_buffer (1);
6295 coding->dst_pos = BEG;
6296 coding->dst_pos_byte = BEG_BYTE;
6297 coding->dst_multibyte = 1;
b73bfc1c 6298 }
df7492f9 6299 else if (BUFFERP (dst_object))
12410ef1 6300 {
df7492f9
KH
6301 coding->dst_object = dst_object;
6302 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6303 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6304 coding->dst_multibyte
6305 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 6306 }
72d1a715 6307 else
df7492f9
KH
6308 {
6309 coding->dst_object = Qnil;
6310 coding->dst_multibyte = 1;
6311 }
6312
6313 decode_coding (coding);
4ed46869 6314
df7492f9
KH
6315 if (BUFFERP (coding->dst_object))
6316 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 6317
df7492f9 6318 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6319 {
df7492f9
KH
6320 struct gcpro gcpro1, gcpro2;
6321 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6322 Lisp_Object val;
4ed46869 6323
c0cc7f7f 6324 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6325 GCPRO2 (coding->src_object, coding->dst_object);
6326 val = call1 (CODING_ATTR_POST_READ (attrs),
6327 make_number (coding->produced_char));
6328 UNGCPRO;
6329 CHECK_NATNUM (val);
6330 coding->produced_char += Z - prev_Z;
6331 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6332 }
4ed46869 6333
df7492f9 6334 if (EQ (dst_object, Qt))
ec6d2bb8 6335 {
df7492f9
KH
6336 coding->dst_object = Fbuffer_string ();
6337 }
6338 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6339 {
6340 set_buffer_internal (XBUFFER (coding->dst_object));
6341 if (dst_bytes < coding->produced)
6342 {
6343 destination
6344 = (unsigned char *) xrealloc (destination, coding->produced);
6345 if (! destination)
6346 {
6347 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6348 unbind_to (count, Qnil);
6349 return;
6350 }
6351 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6352 move_gap_both (BEGV, BEGV_BYTE);
6353 bcopy (BEGV_ADDR, destination, coding->produced);
6354 coding->destination = destination;
6355 }
ec6d2bb8 6356 }
2b4f9037 6357
df7492f9 6358 unbind_to (count, Qnil);
d46c5b12
KH
6359}
6360
df7492f9
KH
6361
6362void
6363encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6364 dst_object)
b73bfc1c 6365 struct coding_system *coding;
df7492f9
KH
6366 Lisp_Object src_object;
6367 EMACS_INT from, from_byte, to, to_byte;
6368 Lisp_Object dst_object;
b73bfc1c
KH
6369{
6370 int count = specpdl_ptr - specpdl;
df7492f9
KH
6371 EMACS_INT chars = to - from;
6372 EMACS_INT bytes = to_byte - from_byte;
6373 Lisp_Object attrs;
6374
6375 saved_coding = coding;
6376 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6377
6378 coding->src_object = src_object;
6379 coding->src_chars = chars;
6380 coding->src_bytes = bytes;
6381 coding->src_multibyte = chars < bytes;
6382
6383 attrs = CODING_ID_ATTRS (coding->id);
6384
6385 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6386 {
df7492f9
KH
6387 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6388 set_buffer_internal (XBUFFER (coding->src_object));
6389 if (STRINGP (src_object))
6390 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6391 else if (BUFFERP (src_object))
6392 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6393 else
6394 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6395
6396 if (EQ (src_object, dst_object))
6397 {
6398 set_buffer_internal (XBUFFER (src_object));
6399 del_range_both (from, from_byte, to, to_byte, 1);
6400 set_buffer_internal (XBUFFER (coding->src_object));
6401 }
6402
ac87bbef
KH
6403 call2 (CODING_ATTR_PRE_WRITE (attrs),
6404 make_number (BEG), make_number (Z));
6405 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6406 if (BEG != GPT)
6407 move_gap_both (BEG, BEG_BYTE);
6408 coding->src_chars = Z - BEG;
6409 coding->src_bytes = Z_BYTE - BEG_BYTE;
6410 coding->src_pos = BEG;
6411 coding->src_pos_byte = BEG_BYTE;
6412 coding->src_multibyte = Z < Z_BYTE;
6413 }
6414 else if (STRINGP (src_object))
6415 {
6416 coding->src_pos = from;
6417 coding->src_pos_byte = from_byte;
6418 }
6419 else if (BUFFERP (src_object))
d46c5b12 6420 {
df7492f9 6421 set_buffer_internal (XBUFFER (src_object));
df7492f9 6422 if (EQ (src_object, dst_object))
d46c5b12 6423 {
ff0dacd7
KH
6424 coding->src_object = del_range_1 (from, to, 1, 1);
6425 coding->src_pos = 0;
6426 coding->src_pos_byte = 0;
d46c5b12 6427 }
df7492f9 6428 else
d46c5b12 6429 {
ff0dacd7
KH
6430 if (from < GPT && to >= GPT)
6431 move_gap_both (from, from_byte);
df7492f9
KH
6432 coding->src_pos = from;
6433 coding->src_pos_byte = from_byte;
d46c5b12
KH
6434 }
6435 }
4ed46869 6436
df7492f9 6437 if (BUFFERP (dst_object))
d46c5b12 6438 {
df7492f9 6439 coding->dst_object = dst_object;
28f67a95
KH
6440 if (EQ (src_object, dst_object))
6441 {
6442 coding->dst_pos = from;
6443 coding->dst_pos_byte = from_byte;
6444 }
6445 else
6446 {
6447 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6448 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6449 }
df7492f9
KH
6450 coding->dst_multibyte
6451 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6452 }
df7492f9 6453 else if (EQ (dst_object, Qt))
4956c225 6454 {
df7492f9 6455 coding->dst_object = Qnil;
df7492f9 6456 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6457 if (coding->dst_bytes == 0)
6458 coding->dst_bytes = 1;
6459 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6460 coding->dst_multibyte = 0;
4956c225 6461 }
df7492f9 6462 else
78108bcd 6463 {
df7492f9
KH
6464 coding->dst_object = Qnil;
6465 coding->dst_multibyte = 0;
78108bcd
KH
6466 }
6467
df7492f9 6468 encode_coding (coding);
4ed46869 6469
df7492f9 6470 if (EQ (dst_object, Qt))
4ed46869 6471 {
df7492f9
KH
6472 if (BUFFERP (coding->dst_object))
6473 coding->dst_object = Fbuffer_string ();
6474 else
73be902c 6475 {
df7492f9
KH
6476 coding->dst_object
6477 = make_unibyte_string ((char *) coding->destination,
6478 coding->produced);
6479 xfree (coding->destination);
73be902c 6480 }
4ed46869 6481 }
d46c5b12 6482
df7492f9 6483 unbind_to (count, Qnil);
b73bfc1c
KH
6484}
6485
df7492f9 6486
b73bfc1c 6487Lisp_Object
df7492f9 6488preferred_coding_system ()
b73bfc1c 6489{
df7492f9 6490 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6491
df7492f9 6492 return CODING_ID_NAME (id);
4ed46869
KH
6493}
6494
6495\f
6496#ifdef emacs
1397dc18 6497/*** 8. Emacs Lisp library functions ***/
4ed46869 6498
4ed46869 6499DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6500 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6501See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6502about coding-system objects. */)
6503 (obj)
4ed46869
KH
6504 Lisp_Object obj;
6505{
df7492f9 6506 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6507}
6508
9d991de8
RS
6509DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6510 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6511 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6512 (prompt)
4ed46869
KH
6513 Lisp_Object prompt;
6514{
e0e989f6 6515 Lisp_Object val;
9d991de8
RS
6516 do
6517 {
4608c386
KH
6518 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6519 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6520 }
6521 while (XSTRING (val)->size == 0);
e0e989f6 6522 return (Fintern (val, Qnil));
4ed46869
KH
6523}
6524
9b787f3e 6525DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6526 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6527If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6528 (prompt, default_coding_system)
9b787f3e 6529 Lisp_Object prompt, default_coding_system;
4ed46869 6530{
f44d27ce 6531 Lisp_Object val;
9b787f3e
RS
6532 if (SYMBOLP (default_coding_system))
6533 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6534 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6535 Qt, Qnil, Qcoding_system_history,
6536 default_coding_system, Qnil);
e0e989f6 6537 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6538}
6539
6540DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6541 1, 1, 0,
48b0f3ae 6542 doc: /* Check validity of CODING-SYSTEM.
b054002f 6543If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
df7492f9 6544 (coding_system)
4ed46869
KH
6545 Lisp_Object coding_system;
6546{
b7826503 6547 CHECK_SYMBOL (coding_system);
4ed46869
KH
6548 if (!NILP (Fcoding_system_p (coding_system)))
6549 return coding_system;
6550 while (1)
02ba4723 6551 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6552}
df7492f9 6553
3a73fa5d 6554\f
89528eb3
KH
6555/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6556 HIGHEST is nonzero, return the coding system of the highest
6557 priority among the detected coding systems. Otherwize return a
6558 list of detected coding systems sorted by their priorities. If
6559 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6560 multibyte form but contains only ASCII and eight-bit chars.
6561 Otherwise, the bytes are raw bytes.
6562
6563 CODING-SYSTEM controls the detection as below:
6564
6565 If it is nil, detect both text-format and eol-format. If the
6566 text-format part of CODING-SYSTEM is already specified
6567 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6568 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6569 detect only text-format. */
6570
d46c5b12 6571Lisp_Object
df7492f9 6572detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6573 unsigned char *src;
6574 int src_bytes, highest;
0a28aafb 6575 int multibytep;
df7492f9 6576 Lisp_Object coding_system;
4ed46869 6577{
df7492f9
KH
6578 unsigned char *src_end = src + src_bytes;
6579 int mask = CATEGORY_MASK_ANY;
df7492f9
KH
6580 Lisp_Object attrs, eol_type;
6581 Lisp_Object val;
6582 struct coding_system coding;
89528eb3 6583 int id;
ff0dacd7 6584 struct coding_detection_info detect_info;
df7492f9
KH
6585
6586 if (NILP (coding_system))
6587 coding_system = Qundecided;
6588 setup_coding_system (coding_system, &coding);
6589 attrs = CODING_ID_ATTRS (coding.id);
6590 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 6591 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 6592
df7492f9
KH
6593 coding.source = src;
6594 coding.src_bytes = src_bytes;
6595 coding.src_multibyte = multibytep;
6596 coding.consumed = 0;
89528eb3 6597 coding.mode |= CODING_MODE_LAST_BLOCK;
4ed46869 6598
ff0dacd7
KH
6599 detect_info.checked = detect_info.found = detect_info.rejected = 0;
6600
89528eb3
KH
6601 /* At first, detect text-format if necessary. */
6602 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
4ed46869 6603 {
ff0dacd7
KH
6604 enum coding_category category;
6605 struct coding_system *this;
6606 int c, i;
6607
df7492f9 6608 for (; src < src_end; src++)
4ed46869 6609 {
df7492f9 6610 c = *src;
89528eb3
KH
6611 if (c & 0x80
6612 || (c < 0x20 && (c == ISO_CODE_ESC
6613 || c == ISO_CODE_SI
584948ac 6614 || c == ISO_CODE_SO)))
d46c5b12 6615 break;
4ed46869 6616 }
df7492f9
KH
6617 coding.head_ascii = src - coding.source;
6618
6619 if (src < src_end)
6620 for (i = 0; i < coding_category_raw_text; i++)
6621 {
ff0dacd7
KH
6622 category = coding_priorities[i];
6623 this = coding_categories + category;
df7492f9 6624
df7492f9
KH
6625 if (this->id < 0)
6626 {
6627 /* No coding system of this category is defined. */
ff0dacd7 6628 detect_info.rejected |= (1 << category);
df7492f9 6629 }
ff0dacd7 6630 else if (category >= coding_category_raw_text)
89528eb3 6631 continue;
ff0dacd7
KH
6632 else if (detect_info.checked & (1 << category))
6633 {
6634 if (highest
6635 && (detect_info.found & (1 << category)))
6636 break;
6637 }
df7492f9
KH
6638 else
6639 {
ff0dacd7 6640 if ((*(this->detector)) (&coding, &detect_info)
89528eb3 6641 && highest
ff0dacd7
KH
6642 && (detect_info.found & (1 << category)))
6643 break;
df7492f9
KH
6644 }
6645 }
4ed46869 6646
ff0dacd7
KH
6647
6648 if (detect_info.rejected == CATEGORY_MASK_ANY)
89528eb3 6649 {
ff0dacd7 6650 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
6651 id = coding_categories[coding_category_raw_text].id;
6652 val = Fcons (make_number (id), Qnil);
6653 }
ff0dacd7 6654 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 6655 {
ff0dacd7 6656 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
6657 id = coding_categories[coding_category_undecided].id;
6658 val = Fcons (make_number (id), Qnil);
6659 }
6660 else if (highest)
6661 {
ff0dacd7
KH
6662 if (detect_info.found)
6663 {
6664 detect_info.found = 1 << category;
6665 val = Fcons (make_number (this->id), Qnil);
6666 }
6667 else
6668 for (i = 0; i < coding_category_raw_text; i++)
6669 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6670 {
6671 detect_info.found = 1 << coding_priorities[i];
6672 id = coding_categories[coding_priorities[i]].id;
6673 val = Fcons (make_number (id), Qnil);
6674 break;
6675 }
6676 }
89528eb3
KH
6677 else
6678 {
ff0dacd7
KH
6679 int mask = detect_info.rejected | detect_info.found;
6680 int found = 0;
89528eb3 6681 val = Qnil;
ff0dacd7 6682
89528eb3 6683 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
6684 {
6685 category = coding_priorities[i];
6686 if (! (mask & (1 << category)))
6687 {
6688 found |= 1 << category;
6689 id = coding_categories[category].id;
6690 val = Fcons (make_number (id), val);
6691 }
6692 }
6693 for (i = coding_category_raw_text - 1; i >= 0; i--)
6694 {
6695 category = coding_priorities[i];
6696 if (detect_info.found & (1 << category))
6697 {
6698 id = coding_categories[category].id;
6699 val = Fcons (make_number (id), val);
6700 }
6701 }
6702 detect_info.found |= found;
89528eb3
KH
6703 }
6704 }
df7492f9
KH
6705 else
6706 {
ff0dacd7 6707 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 6708 val = Fcons (make_number (coding.id), Qnil);
4ed46869 6709 }
df7492f9 6710
89528eb3 6711 /* Then, detect eol-format if necessary. */
df7492f9 6712 {
89528eb3 6713 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
6714 Lisp_Object tail;
6715
89528eb3
KH
6716 if (VECTORP (eol_type))
6717 {
ff0dacd7 6718 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
6719 normal_eol = detect_eol (coding.source, src_bytes,
6720 coding_category_raw_text);
ff0dacd7
KH
6721 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6722 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
6723 utf_16_be_eol = detect_eol (coding.source, src_bytes,
6724 coding_category_utf_16_be);
ff0dacd7
KH
6725 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6726 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
6727 utf_16_le_eol = detect_eol (coding.source, src_bytes,
6728 coding_category_utf_16_le);
6729 }
6730 else
6731 {
6732 if (EQ (eol_type, Qunix))
6733 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6734 else if (EQ (eol_type, Qdos))
6735 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6736 else
6737 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6738 }
6739
df7492f9
KH
6740 for (tail = val; CONSP (tail); tail = XCDR (tail))
6741 {
89528eb3 6742 enum coding_category category;
df7492f9 6743 int this_eol;
89528eb3
KH
6744
6745 id = XINT (XCAR (tail));
6746 attrs = CODING_ID_ATTRS (id);
6747 category = XINT (CODING_ATTR_CATEGORY (attrs));
6748 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
6749 if (VECTORP (eol_type))
6750 {
89528eb3
KH
6751 if (category == coding_category_utf_16_be
6752 || category == coding_category_utf_16_be_nosig)
6753 this_eol = utf_16_be_eol;
6754 else if (category == coding_category_utf_16_le
6755 || category == coding_category_utf_16_le_nosig)
6756 this_eol = utf_16_le_eol;
df7492f9 6757 else
89528eb3
KH
6758 this_eol = normal_eol;
6759
df7492f9
KH
6760 if (this_eol == EOL_SEEN_LF)
6761 XSETCAR (tail, AREF (eol_type, 0));
6762 else if (this_eol == EOL_SEEN_CRLF)
6763 XSETCAR (tail, AREF (eol_type, 1));
6764 else if (this_eol == EOL_SEEN_CR)
6765 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
6766 else
6767 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 6768 }
89528eb3
KH
6769 else
6770 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
6771 }
6772 }
6773
03699b14 6774 return (highest ? XCAR (val) : val);
93dec019 6775}
4ed46869 6776
df7492f9 6777
d46c5b12
KH
6778DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6779 2, 3, 0,
48b0f3ae
PJ
6780 doc: /* Detect coding system of the text in the region between START and END.
6781Return a list of possible coding systems ordered by priority.
6782
6783If only ASCII characters are found, it returns a list of single element
6784`undecided' or its subsidiary coding system according to a detected
6785end-of-line format.
6786
6787If optional argument HIGHEST is non-nil, return the coding system of
6788highest priority. */)
6789 (start, end, highest)
d46c5b12
KH
6790 Lisp_Object start, end, highest;
6791{
6792 int from, to;
6793 int from_byte, to_byte;
6289dd10 6794
b7826503
PJ
6795 CHECK_NUMBER_COERCE_MARKER (start);
6796 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6797
d46c5b12
KH
6798 validate_region (&start, &end);
6799 from = XINT (start), to = XINT (end);
6800 from_byte = CHAR_TO_BYTE (from);
6801 to_byte = CHAR_TO_BYTE (to);
6289dd10 6802
d46c5b12
KH
6803 if (from < GPT && to >= GPT)
6804 move_gap_both (to, to_byte);
c210f766 6805
d46c5b12 6806 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6807 to_byte - from_byte,
0a28aafb
KH
6808 !NILP (highest),
6809 !NILP (current_buffer
df7492f9
KH
6810 ->enable_multibyte_characters),
6811 Qnil);
d46c5b12 6812}
6289dd10 6813
d46c5b12
KH
6814DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6815 1, 2, 0,
48b0f3ae
PJ
6816 doc: /* Detect coding system of the text in STRING.
6817Return a list of possible coding systems ordered by priority.
6818
6819If only ASCII characters are found, it returns a list of single element
6820`undecided' or its subsidiary coding system according to a detected
6821end-of-line format.
6822
6823If optional argument HIGHEST is non-nil, return the coding system of
6824highest priority. */)
6825 (string, highest)
d46c5b12
KH
6826 Lisp_Object string, highest;
6827{
b7826503 6828 CHECK_STRING (string);
4ed46869 6829
d46c5b12 6830 return detect_coding_system (XSTRING (string)->data,
df7492f9 6831 STRING_BYTES (XSTRING (string)),
0a28aafb 6832 !NILP (highest),
df7492f9
KH
6833 STRING_MULTIBYTE (string),
6834 Qnil);
4ed46869
KH
6835}
6836
05e6f5dc 6837
df7492f9
KH
6838static INLINE int
6839char_encodable_p (c, attrs)
6840 int c;
6841 Lisp_Object attrs;
05e6f5dc 6842{
df7492f9 6843 Lisp_Object tail;
df7492f9 6844 struct charset *charset;
05e6f5dc 6845
df7492f9
KH
6846 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6847 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6848 {
df7492f9
KH
6849 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6850 if (CHAR_CHARSET_P (c, charset))
6851 break;
05e6f5dc 6852 }
df7492f9 6853 return (! NILP (tail));
05e6f5dc
KH
6854}
6855
6856
df7492f9
KH
6857/* Return a list of coding systems that safely encode the text between
6858 START and END. If EXCLUDE is non-nil, it is a list of coding
6859 systems not to check. The returned list doesn't contain any such
48468dac 6860 coding systems. In any case, if the text contains only ASCII or is
df7492f9
KH
6861 unibyte, return t. */
6862
6863DEFUN ("find-coding-systems-region-internal",
6864 Ffind_coding_systems_region_internal,
6865 Sfind_coding_systems_region_internal, 2, 3, 0,
6866 doc: /* Internal use only. */)
6867 (start, end, exclude)
6868 Lisp_Object start, end, exclude;
6869{
6870 Lisp_Object coding_attrs_list, safe_codings;
6871 EMACS_INT start_byte, end_byte;
7c78e542 6872 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
6873 int c;
6874 Lisp_Object tail, elt;
05e6f5dc 6875
df7492f9
KH
6876 if (STRINGP (start))
6877 {
6878 if (!STRING_MULTIBYTE (start)
48468dac 6879 || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
df7492f9
KH
6880 return Qt;
6881 start_byte = 0;
6882 end_byte = STRING_BYTES (XSTRING (start));
6883 }
6884 else
6885 {
6886 CHECK_NUMBER_COERCE_MARKER (start);
6887 CHECK_NUMBER_COERCE_MARKER (end);
6888 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6889 args_out_of_range (start, end);
6890 if (NILP (current_buffer->enable_multibyte_characters))
6891 return Qt;
6892 start_byte = CHAR_TO_BYTE (XINT (start));
6893 end_byte = CHAR_TO_BYTE (XINT (end));
6894 if (XINT (end) - XINT (start) == end_byte - start_byte)
6895 return Qt;
05e6f5dc 6896
e1c23804 6897 if (XINT (start) < GPT && XINT (end) > GPT)
df7492f9 6898 {
e1c23804
DL
6899 if ((GPT - XINT (start)) < (XINT (end) - GPT))
6900 move_gap_both (XINT (start), start_byte);
df7492f9 6901 else
e1c23804 6902 move_gap_both (XINT (end), end_byte);
df7492f9
KH
6903 }
6904 }
05e6f5dc 6905
df7492f9
KH
6906 coding_attrs_list = Qnil;
6907 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6908 if (NILP (exclude)
6909 || NILP (Fmemq (XCAR (tail), exclude)))
6910 {
6911 Lisp_Object attrs;
05e6f5dc 6912
df7492f9
KH
6913 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6914 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6915 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6916 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6917 }
6918
6919 if (STRINGP (start))
6920 p = pbeg = XSTRING (start)->data;
6921 else
6922 p = pbeg = BYTE_POS_ADDR (start_byte);
6923 pend = p + (end_byte - start_byte);
6924
6925 while (p < pend && ASCII_BYTE_P (*p)) p++;
6926 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
6927
6928 while (p < pend)
6929 {
df7492f9
KH
6930 if (ASCII_BYTE_P (*p))
6931 p++;
6932 else
6933 {
6934 c = STRING_CHAR_ADVANCE (p);
6935
6936 charset_map_loaded = 0;
6937 for (tail = coding_attrs_list; CONSP (tail);)
6938 {
6939 elt = XCAR (tail);
6940 if (NILP (elt))
6941 tail = XCDR (tail);
6942 else if (char_encodable_p (c, elt))
6943 tail = XCDR (tail);
6944 else if (CONSP (XCDR (tail)))
6945 {
6946 XSETCAR (tail, XCAR (XCDR (tail)));
6947 XSETCDR (tail, XCDR (XCDR (tail)));
6948 }
6949 else
6950 {
6951 XSETCAR (tail, Qnil);
6952 tail = XCDR (tail);
6953 }
6954 }
6955 if (charset_map_loaded)
6956 {
6957 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 6958
df7492f9
KH
6959 if (STRINGP (start))
6960 pbeg = XSTRING (start)->data;
6961 else
6962 pbeg = BYTE_POS_ADDR (start_byte);
6963 p = pbeg + p_offset;
6964 pend = pbeg + pend_offset;
6965 }
6966 }
05e6f5dc 6967 }
df7492f9
KH
6968
6969 safe_codings = Qnil;
6970 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6971 if (! NILP (XCAR (tail)))
6972 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6973
05e6f5dc
KH
6974 return safe_codings;
6975}
6976
6977
df7492f9
KH
6978DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6979 Scheck_coding_systems_region, 3, 3, 0,
6980 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 6981
df7492f9
KH
6982START and END are buffer positions specifying the region.
6983CODING-SYSTEM-LIST is a list of coding systems to check.
6984
6985The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6986CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6987whole region, POS0, POS1, ... are buffer positions where non-encodable
6988characters are found.
6989
6990If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6991value is nil.
6992
6993START may be a string. In that case, check if the string is
6994encodable, and the value contains indices to the string instead of
6995buffer positions. END is ignored. */)
6996 (start, end, coding_system_list)
6997 Lisp_Object start, end, coding_system_list;
05e6f5dc 6998{
df7492f9
KH
6999 Lisp_Object list;
7000 EMACS_INT start_byte, end_byte;
7001 int pos;
7c78e542 7002 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7003 int c;
7004 Lisp_Object tail, elt;
05e6f5dc
KH
7005
7006 if (STRINGP (start))
7007 {
df7492f9
KH
7008 if (!STRING_MULTIBYTE (start)
7009 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
7010 return Qnil;
7011 start_byte = 0;
7012 end_byte = STRING_BYTES (XSTRING (start));
7013 pos = 0;
05e6f5dc
KH
7014 }
7015 else
7016 {
b7826503
PJ
7017 CHECK_NUMBER_COERCE_MARKER (start);
7018 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7019 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7020 args_out_of_range (start, end);
7021 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7022 return Qnil;
7023 start_byte = CHAR_TO_BYTE (XINT (start));
7024 end_byte = CHAR_TO_BYTE (XINT (end));
7025 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7026 return Qt;
df7492f9 7027
e1c23804 7028 if (XINT (start) < GPT && XINT (end) > GPT)
df7492f9 7029 {
e1c23804
DL
7030 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7031 move_gap_both (XINT (start), start_byte);
df7492f9 7032 else
e1c23804 7033 move_gap_both (XINT (end), end_byte);
df7492f9 7034 }
e1c23804 7035 pos = XINT (start);
df7492f9
KH
7036 }
7037
7038 list = Qnil;
7039 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7040 {
7041 elt = XCAR (tail);
7042 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7043 Qnil)),
7044 list);
05e6f5dc
KH
7045 }
7046
df7492f9
KH
7047 if (STRINGP (start))
7048 p = pbeg = XSTRING (start)->data;
7049 else
7050 p = pbeg = BYTE_POS_ADDR (start_byte);
7051 pend = p + (end_byte - start_byte);
7052
7053 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7054 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7055
7056 while (p < pend)
05e6f5dc 7057 {
df7492f9
KH
7058 if (ASCII_BYTE_P (*p))
7059 p++;
7060 else
05e6f5dc 7061 {
df7492f9
KH
7062 c = STRING_CHAR_ADVANCE (p);
7063
7064 charset_map_loaded = 0;
7065 for (tail = list; CONSP (tail); tail = XCDR (tail))
7066 {
7067 elt = XCDR (XCAR (tail));
7068 if (! char_encodable_p (c, XCAR (elt)))
7069 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7070 }
7071 if (charset_map_loaded)
7072 {
7073 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7074
7075 if (STRINGP (start))
7076 pbeg = XSTRING (start)->data;
7077 else
7078 pbeg = BYTE_POS_ADDR (start_byte);
7079 p = pbeg + p_offset;
7080 pend = pbeg + pend_offset;
7081 }
05e6f5dc 7082 }
df7492f9 7083 pos++;
05e6f5dc
KH
7084 }
7085
df7492f9
KH
7086 tail = list;
7087 list = Qnil;
7088 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 7089 {
df7492f9
KH
7090 elt = XCAR (tail);
7091 if (CONSP (XCDR (XCDR (elt))))
7092 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7093 list);
05e6f5dc 7094 }
df7492f9
KH
7095
7096 return list;
05e6f5dc
KH
7097}
7098
7099
df7492f9 7100
4031e2bf 7101Lisp_Object
df7492f9
KH
7102code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7103 Lisp_Object start, end, coding_system, dst_object;
7104 int encodep, norecord;
3a73fa5d
RS
7105{
7106 struct coding_system coding;
df7492f9
KH
7107 EMACS_INT from, from_byte, to, to_byte;
7108 Lisp_Object src_object;
3a73fa5d 7109
b7826503
PJ
7110 CHECK_NUMBER_COERCE_MARKER (start);
7111 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
7112 if (NILP (coding_system))
7113 coding_system = Qno_conversion;
7114 else
7115 CHECK_CODING_SYSTEM (coding_system);
7116 src_object = Fcurrent_buffer ();
7117 if (NILP (dst_object))
7118 dst_object = src_object;
7119 else if (! EQ (dst_object, Qt))
7120 CHECK_BUFFER (dst_object);
3a73fa5d 7121
d46c5b12
KH
7122 validate_region (&start, &end);
7123 from = XFASTINT (start);
df7492f9 7124 from_byte = CHAR_TO_BYTE (from);
d46c5b12 7125 to = XFASTINT (end);
df7492f9 7126 to_byte = CHAR_TO_BYTE (to);
d46c5b12 7127
df7492f9
KH
7128 setup_coding_system (coding_system, &coding);
7129 coding.mode |= CODING_MODE_LAST_BLOCK;
7130
7131 if (encodep)
7132 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7133 dst_object);
7134 else
7135 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7136 dst_object);
7137 if (! norecord)
7138 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 7139
df7492f9
KH
7140 if (coding.result != CODING_RESULT_SUCCESS)
7141 error ("Code conversion error: %d", coding.result);
3a73fa5d 7142
df7492f9
KH
7143 return (BUFFERP (dst_object)
7144 ? make_number (coding.produced_char)
7145 : coding.dst_object);
4031e2bf
KH
7146}
7147
df7492f9 7148
4031e2bf 7149DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 7150 3, 4, "r\nzCoding system: ",
48b0f3ae 7151 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
7152When called from a program, takes four arguments:
7153 START, END, CODING-SYSTEM, and DESTINATION.
7154START and END are buffer positions.
7155
7156Optional 4th arguments DESTINATION specifies where the decoded text goes.
7157If nil, the region between START and END is replace by the decoded text.
7158If buffer, the decoded text is inserted in the buffer.
7159If t, the decoded text is returned.
7160
48b0f3ae
PJ
7161This function sets `last-coding-system-used' to the precise coding system
7162used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7163not fully specified.)
7164It returns the length of the decoded text. */)
df7492f9
KH
7165 (start, end, coding_system, destination)
7166 Lisp_Object start, end, coding_system, destination;
4031e2bf 7167{
df7492f9 7168 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
7169}
7170
7171DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
7172 3, 4, "r\nzCoding system: ",
7173 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
7174When called from a program, takes three arguments:
7175START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
7176
7177Optional 4th arguments DESTINATION specifies where the encoded text goes.
7178If nil, the region between START and END is replace by the encoded text.
7179If buffer, the encoded text is inserted in the buffer.
7180If t, the encoded text is returned.
7181
48b0f3ae
PJ
7182This function sets `last-coding-system-used' to the precise coding system
7183used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7184not fully specified.)
7185It returns the length of the encoded text. */)
df7492f9
KH
7186 (start, end, coding_system, destination)
7187 Lisp_Object start, end, coding_system, destination;
3a73fa5d 7188{
df7492f9 7189 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 7190}
3a73fa5d 7191
4031e2bf 7192Lisp_Object
df7492f9
KH
7193code_convert_string (string, coding_system, dst_object,
7194 encodep, nocopy, norecord)
7195 Lisp_Object string, coding_system, dst_object;
7196 int encodep, nocopy, norecord;
4031e2bf
KH
7197{
7198 struct coding_system coding;
df7492f9 7199 EMACS_INT chars, bytes;
3a73fa5d 7200
b7826503 7201 CHECK_STRING (string);
d46c5b12 7202 if (NILP (coding_system))
df7492f9
KH
7203 {
7204 if (! norecord)
7205 Vlast_coding_system_used = Qno_conversion;
7206 if (NILP (dst_object))
7207 return (nocopy ? Fcopy_sequence (string) : string);
7208 }
4ed46869 7209
df7492f9
KH
7210 if (NILP (coding_system))
7211 coding_system = Qno_conversion;
7212 else
7213 CHECK_CODING_SYSTEM (coding_system);
7214 if (NILP (dst_object))
7215 dst_object = Qt;
7216 else if (! EQ (dst_object, Qt))
7217 CHECK_BUFFER (dst_object);
5f1cd180 7218
df7492f9 7219 setup_coding_system (coding_system, &coding);
d46c5b12 7220 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
7221 chars = XSTRING (string)->size;
7222 bytes = STRING_BYTES (XSTRING (string));
7223 if (encodep)
7224 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7225 else
7226 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7227 if (! norecord)
7228 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 7229
df7492f9
KH
7230 if (coding.result != CODING_RESULT_SUCCESS)
7231 error ("Code conversion error: %d", coding.result);
4ed46869 7232
df7492f9
KH
7233 return (BUFFERP (dst_object)
7234 ? make_number (coding.produced_char)
7235 : coding.dst_object);
4ed46869
KH
7236}
7237
4031e2bf 7238
ecec61c1 7239/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
7240 Do not set Vlast_coding_system_used.
7241
7242 This function is called only from macros DECODE_FILE and
7243 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
7244
7245Lisp_Object
7246code_convert_string_norecord (string, coding_system, encodep)
7247 Lisp_Object string, coding_system;
7248 int encodep;
7249{
0be8721c 7250 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 7251}
ecec61c1 7252
ecec61c1 7253
df7492f9
KH
7254DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7255 2, 4, 0,
7256 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7257
7258Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7259if the decoding operation is trivial.
ecec61c1 7260
df7492f9 7261Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 7262inserted in BUFFER instead of returned as a string. In this case,
df7492f9 7263the return value is BUFFER.
ecec61c1 7264
df7492f9
KH
7265This function sets `last-coding-system-used' to the precise coding system
7266used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7267not fully specified. */)
7268 (string, coding_system, nocopy, buffer)
7269 Lisp_Object string, coding_system, nocopy, buffer;
7270{
7271 return code_convert_string (string, coding_system, buffer,
7272 0, ! NILP (nocopy), 0);
7273}
7274
7275DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7276 2, 4, 0,
7277 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7278
7279Optional third arg NOCOPY non-nil means it is OK to return STRING
7280itself if the encoding operation is trivial.
7281
7282Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 7283inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
7284the return value is BUFFER.
7285
7286This function sets `last-coding-system-used' to the precise coding system
7287used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7288not fully specified.) */)
7289 (string, coding_system, nocopy, buffer)
7290 Lisp_Object string, coding_system, nocopy, buffer;
7291{
7292 return code_convert_string (string, coding_system, buffer,
c197f191 7293 1, ! NILP (nocopy), 1);
ecec61c1 7294}
df7492f9 7295
3a73fa5d 7296\f
4ed46869 7297DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7298 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7299Return the corresponding character. */)
7300 (code)
4ed46869
KH
7301 Lisp_Object code;
7302{
df7492f9
KH
7303 Lisp_Object spec, attrs, val;
7304 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7305 int c;
7306
7307 CHECK_NATNUM (code);
7308 c = XFASTINT (code);
7309 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7310 attrs = AREF (spec, 0);
4ed46869 7311
df7492f9
KH
7312 if (ASCII_BYTE_P (c)
7313 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7314 return code;
7315
7316 val = CODING_ATTR_CHARSET_LIST (attrs);
7317 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
7318 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7319 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
df7492f9
KH
7320
7321 if (c <= 0x7F)
7322 charset = charset_roman;
7323 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 7324 {
df7492f9
KH
7325 charset = charset_kana;
7326 c -= 0x80;
55ab7be3
KH
7327 }
7328 else
7329 {
004068e4 7330 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
7331
7332 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7333 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7334 error ("Invalid code: %d", code);
7335 SJIS_TO_JIS (c);
7336 charset = charset_kanji;
55ab7be3 7337 }
df7492f9
KH
7338 c = DECODE_CHAR (charset, c);
7339 if (c < 0)
7340 error ("Invalid code: %d", code);
7341 return make_number (c);
4ed46869
KH
7342}
7343
df7492f9 7344
4ed46869 7345DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7346 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7347Return the corresponding code in SJIS. */)
7348 (ch)
df7492f9 7349 Lisp_Object ch;
4ed46869 7350{
df7492f9
KH
7351 Lisp_Object spec, attrs, charset_list;
7352 int c;
7353 struct charset *charset;
7354 unsigned code;
4ed46869 7355
df7492f9
KH
7356 CHECK_CHARACTER (ch);
7357 c = XFASTINT (ch);
7358 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7359 attrs = AREF (spec, 0);
7360
7361 if (ASCII_CHAR_P (c)
7362 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7363 return ch;
7364
7365 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7366 charset = char_charset (c, charset_list, &code);
7367 if (code == CHARSET_INVALID_CODE (charset))
7368 error ("Can't encode by shift_jis encoding: %d", c);
7369 JIS_TO_SJIS (code);
7370
7371 return make_number (code);
4ed46869
KH
7372}
7373
7374DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7375 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7376Return the corresponding character. */)
7377 (code)
4ed46869
KH
7378 Lisp_Object code;
7379{
df7492f9
KH
7380 Lisp_Object spec, attrs, val;
7381 struct charset *charset_roman, *charset_big5, *charset;
7382 int c;
4ed46869 7383
df7492f9
KH
7384 CHECK_NATNUM (code);
7385 c = XFASTINT (code);
7386 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7387 attrs = AREF (spec, 0);
7388
7389 if (ASCII_BYTE_P (c)
7390 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7391 return code;
7392
7393 val = CODING_ATTR_CHARSET_LIST (attrs);
7394 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7395 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7396
7397 if (c <= 0x7F)
7398 charset = charset_roman;
c28a9453
KH
7399 else
7400 {
df7492f9
KH
7401 int b1 = c >> 8, b2 = c & 0x7F;
7402 if (b1 < 0xA1 || b1 > 0xFE
7403 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7404 error ("Invalid code: %d", code);
7405 charset = charset_big5;
c28a9453 7406 }
df7492f9
KH
7407 c = DECODE_CHAR (charset, (unsigned )c);
7408 if (c < 0)
7409 error ("Invalid code: %d", code);
7410 return make_number (c);
4ed46869
KH
7411}
7412
7413DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7414 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7415Return the corresponding character code in Big5. */)
7416 (ch)
4ed46869
KH
7417 Lisp_Object ch;
7418{
df7492f9
KH
7419 Lisp_Object spec, attrs, charset_list;
7420 struct charset *charset;
7421 int c;
7422 unsigned code;
7423
7424 CHECK_CHARACTER (ch);
7425 c = XFASTINT (ch);
7426 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7427 attrs = AREF (spec, 0);
7428 if (ASCII_CHAR_P (c)
7429 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7430 return ch;
7431
7432 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7433 charset = char_charset (c, charset_list, &code);
7434 if (code == CHARSET_INVALID_CODE (charset))
7435 error ("Can't encode by Big5 encoding: %d", c);
7436
7437 return make_number (code);
4ed46869 7438}
df7492f9 7439
3a73fa5d 7440\f
1ba9e4ab
KH
7441DEFUN ("set-terminal-coding-system-internal",
7442 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7443 Sset_terminal_coding_system_internal, 1, 1, 0,
7444 doc: /* Internal use only. */)
7445 (coding_system)
b74e4686 7446 Lisp_Object coding_system;
4ed46869 7447{
b7826503 7448 CHECK_SYMBOL (coding_system);
df7492f9
KH
7449 setup_coding_system (Fcheck_coding_system (coding_system),
7450 &terminal_coding);
7451
70c22245 7452 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7453 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7454 /* Characer composition should be disabled. */
7455 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7456 terminal_coding.src_multibyte = 1;
7457 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7458 return Qnil;
7459}
7460
c4825358
KH
7461DEFUN ("set-safe-terminal-coding-system-internal",
7462 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7463 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7464 doc: /* Internal use only. */)
48b0f3ae 7465 (coding_system)
b74e4686 7466 Lisp_Object coding_system;
c4825358 7467{
b7826503 7468 CHECK_SYMBOL (coding_system);
c4825358
KH
7469 setup_coding_system (Fcheck_coding_system (coding_system),
7470 &safe_terminal_coding);
df7492f9
KH
7471 /* Characer composition should be disabled. */
7472 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7473 safe_terminal_coding.src_multibyte = 1;
7474 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7475 return Qnil;
7476}
7477
4ed46869
KH
7478DEFUN ("terminal-coding-system",
7479 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7480 doc: /* Return coding system specified for terminal output. */)
7481 ()
4ed46869 7482{
df7492f9 7483 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7484}
7485
1ba9e4ab
KH
7486DEFUN ("set-keyboard-coding-system-internal",
7487 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7488 Sset_keyboard_coding_system_internal, 1, 1, 0,
7489 doc: /* Internal use only. */)
7490 (coding_system)
4ed46869
KH
7491 Lisp_Object coding_system;
7492{
b7826503 7493 CHECK_SYMBOL (coding_system);
df7492f9
KH
7494 setup_coding_system (Fcheck_coding_system (coding_system),
7495 &keyboard_coding);
7496 /* Characer composition should be disabled. */
7497 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7498 return Qnil;
7499}
7500
7501DEFUN ("keyboard-coding-system",
7502 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7503 doc: /* Return coding system specified for decoding keyboard input. */)
7504 ()
4ed46869 7505{
df7492f9 7506 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7507}
7508
7509\f
a5d301df
KH
7510DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7511 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7512 doc: /* Choose a coding system for an operation based on the target name.
7513The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7514DECODING-SYSTEM is the coding system to use for decoding
7515\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7516for encoding (in case OPERATION does encoding).
7517
7518The first argument OPERATION specifies an I/O primitive:
7519 For file I/O, `insert-file-contents' or `write-region'.
7520 For process I/O, `call-process', `call-process-region', or `start-process'.
7521 For network I/O, `open-network-stream'.
7522
7523The remaining arguments should be the same arguments that were passed
7524to the primitive. Depending on which primitive, one of those arguments
7525is selected as the TARGET. For example, if OPERATION does file I/O,
7526whichever argument specifies the file name is TARGET.
7527
7528TARGET has a meaning which depends on OPERATION:
7529 For file I/O, TARGET is a file name.
7530 For process I/O, TARGET is a process name.
7531 For network I/O, TARGET is a service name or a port number
7532
7533This function looks up what specified for TARGET in,
7534`file-coding-system-alist', `process-coding-system-alist',
7535or `network-coding-system-alist' depending on OPERATION.
7536They may specify a coding system, a cons of coding systems,
7537or a function symbol to call.
7538In the last case, we call the function with one argument,
7539which is a list of all the arguments given to this function.
7540
7541usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7542 (nargs, args)
4ed46869
KH
7543 int nargs;
7544 Lisp_Object *args;
7545{
7546 Lisp_Object operation, target_idx, target, val;
7547 register Lisp_Object chain;
7548
7549 if (nargs < 2)
7550 error ("Too few arguments");
7551 operation = args[0];
7552 if (!SYMBOLP (operation)
7553 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7554 error ("Invalid first arguement");
4ed46869
KH
7555 if (nargs < 1 + XINT (target_idx))
7556 error ("Too few arguments for operation: %s",
7557 XSYMBOL (operation)->name->data);
7558 target = args[XINT (target_idx) + 1];
7559 if (!(STRINGP (target)
7560 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7561 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7562
2e34157c
RS
7563 chain = ((EQ (operation, Qinsert_file_contents)
7564 || EQ (operation, Qwrite_region))
02ba4723 7565 ? Vfile_coding_system_alist
2e34157c 7566 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7567 ? Vnetwork_coding_system_alist
7568 : Vprocess_coding_system_alist));
4ed46869
KH
7569 if (NILP (chain))
7570 return Qnil;
7571
03699b14 7572 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7573 {
f44d27ce 7574 Lisp_Object elt;
4ed46869 7575
df7492f9 7576 elt = XCAR (chain);
4ed46869
KH
7577 if (CONSP (elt)
7578 && ((STRINGP (target)
03699b14
KR
7579 && STRINGP (XCAR (elt))
7580 && fast_string_match (XCAR (elt), target) >= 0)
7581 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7582 {
03699b14 7583 val = XCDR (elt);
b19fd4c5
KH
7584 /* Here, if VAL is both a valid coding system and a valid
7585 function symbol, we return VAL as a coding system. */
02ba4723
KH
7586 if (CONSP (val))
7587 return val;
7588 if (! SYMBOLP (val))
7589 return Qnil;
7590 if (! NILP (Fcoding_system_p (val)))
7591 return Fcons (val, val);
b19fd4c5
KH
7592 if (! NILP (Ffboundp (val)))
7593 {
7594 val = call1 (val, Flist (nargs, args));
7595 if (CONSP (val))
7596 return val;
7597 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7598 return Fcons (val, val);
7599 }
02ba4723
KH
7600 return Qnil;
7601 }
4ed46869
KH
7602 }
7603 return Qnil;
7604}
7605
df7492f9 7606DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 7607 Sset_coding_system_priority, 0, MANY, 0,
da7db224 7608 doc: /* Assign higher priority to the coding systems given as arguments.
1fcd6c8b 7609usage: (set-coding-system-priority CODING-SYSTEM ...) */)
df7492f9
KH
7610 (nargs, args)
7611 int nargs;
7612 Lisp_Object *args;
7613{
7614 int i, j;
7615 int changed[coding_category_max];
7616 enum coding_category priorities[coding_category_max];
7617
7618 bzero (changed, sizeof changed);
7619
7620 for (i = j = 0; i < nargs; i++)
7621 {
7622 enum coding_category category;
7623 Lisp_Object spec, attrs;
7624
7625 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7626 attrs = AREF (spec, 0);
7627 category = XINT (CODING_ATTR_CATEGORY (attrs));
7628 if (changed[category])
7629 /* Ignore this coding system because a coding system of the
7630 same category already had a higher priority. */
7631 continue;
7632 changed[category] = 1;
7633 priorities[j++] = category;
7634 if (coding_categories[category].id >= 0
7635 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7636 setup_coding_system (args[i], &coding_categories[category]);
7637 }
7638
7639 /* Now we have decided top J priorities. Reflect the order of the
7640 original priorities to the remaining priorities. */
7641
7642 for (i = j, j = 0; i < coding_category_max; i++, j++)
7643 {
7644 while (j < coding_category_max
7645 && changed[coding_priorities[j]])
7646 j++;
7647 if (j == coding_category_max)
7648 abort ();
7649 priorities[i] = coding_priorities[j];
7650 }
7651
7652 bcopy (priorities, coding_priorities, sizeof priorities);
7653 return Qnil;
7654}
7655
7656DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7657 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7658 doc: /* Return a list of coding systems ordered by their priorities.
7659HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7660 (highestp)
7661 Lisp_Object highestp;
d46c5b12
KH
7662{
7663 int i;
df7492f9 7664 Lisp_Object val;
d46c5b12 7665
df7492f9 7666 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7667 {
df7492f9
KH
7668 enum coding_category category = coding_priorities[i];
7669 int id = coding_categories[category].id;
7670 Lisp_Object attrs;
7671
7672 if (id < 0)
7673 continue;
7674 attrs = CODING_ID_ATTRS (id);
7675 if (! NILP (highestp))
7676 return CODING_ATTR_BASE_NAME (attrs);
7677 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7678 }
7679 return Fnreverse (val);
7680}
7681
f0064e1f
DL
7682static char *suffixes[] = { "-unix", "-dos", "-mac" };
7683
df7492f9
KH
7684static Lisp_Object
7685make_subsidiaries (base)
7686 Lisp_Object base;
7687{
7688 Lisp_Object subsidiaries;
df7492f9
KH
7689 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7690 char *buf = (char *) alloca (base_name_len + 6);
7691 int i;
7692
7693 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7694 subsidiaries = Fmake_vector (make_number (3), Qnil);
7695 for (i = 0; i < 3; i++)
7696 {
7697 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7698 ASET (subsidiaries, i, intern (buf));
7699 }
7700 return subsidiaries;
7701}
7702
7703
7704DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7705 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7706 doc: /* For internal use only.
7707usage: (define-coding-system-internal ...) */)
df7492f9
KH
7708 (nargs, args)
7709 int nargs;
7710 Lisp_Object *args;
7711{
7712 Lisp_Object name;
7713 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7714 Lisp_Object attrs; /* Vector of attributes. */
7715 Lisp_Object eol_type;
7716 Lisp_Object aliases;
7717 Lisp_Object coding_type, charset_list, safe_charsets;
7718 enum coding_category category;
7719 Lisp_Object tail, val;
7720 int max_charset_id = 0;
7721 int i;
7722
7723 if (nargs < coding_arg_max)
7724 goto short_args;
7725
7726 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7727
7728 name = args[coding_arg_name];
7729 CHECK_SYMBOL (name);
7730 CODING_ATTR_BASE_NAME (attrs) = name;
7731
7732 val = args[coding_arg_mnemonic];
7733 if (! STRINGP (val))
7734 CHECK_CHARACTER (val);
7735 CODING_ATTR_MNEMONIC (attrs) = val;
7736
7737 coding_type = args[coding_arg_coding_type];
7738 CHECK_SYMBOL (coding_type);
7739 CODING_ATTR_TYPE (attrs) = coding_type;
7740
7741 charset_list = args[coding_arg_charset_list];
7742 if (SYMBOLP (charset_list))
7743 {
7744 if (EQ (charset_list, Qiso_2022))
7745 {
7746 if (! EQ (coding_type, Qiso_2022))
7747 error ("Invalid charset-list");
7748 charset_list = Viso_2022_charset_list;
7749 }
7750 else if (EQ (charset_list, Qemacs_mule))
7751 {
7752 if (! EQ (coding_type, Qemacs_mule))
7753 error ("Invalid charset-list");
7754 charset_list = Vemacs_mule_charset_list;
7755 }
7756 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7757 if (max_charset_id < XFASTINT (XCAR (tail)))
7758 max_charset_id = XFASTINT (XCAR (tail));
7759 }
7760 else
7761 {
7762 charset_list = Fcopy_sequence (charset_list);
7763 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7764 {
7765 struct charset *charset;
7766
7767 val = Fcar (tail);
7768 CHECK_CHARSET_GET_CHARSET (val, charset);
7769 if (EQ (coding_type, Qiso_2022)
7770 ? CHARSET_ISO_FINAL (charset) < 0
7771 : EQ (coding_type, Qemacs_mule)
7772 ? CHARSET_EMACS_MULE_ID (charset) < 0
7773 : 0)
7774 error ("Can't handle charset `%s'",
7775 XSYMBOL (CHARSET_NAME (charset))->name->data);
7776
7777 XCAR (tail) = make_number (charset->id);
7778 if (max_charset_id < charset->id)
7779 max_charset_id = charset->id;
7780 }
7781 }
7782 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7783
7784 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7785 make_number (255));
7786 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7787 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7788 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7789
584948ac
KH
7790 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
7791
df7492f9
KH
7792 val = args[coding_arg_decode_translation_table];
7793 if (! NILP (val))
7794 CHECK_CHAR_TABLE (val);
7795 CODING_ATTR_DECODE_TBL (attrs) = val;
7796
7797 val = args[coding_arg_encode_translation_table];
7798 if (! NILP (val))
7799 CHECK_CHAR_TABLE (val);
7800 CODING_ATTR_ENCODE_TBL (attrs) = val;
7801
7802 val = args[coding_arg_post_read_conversion];
7803 CHECK_SYMBOL (val);
7804 CODING_ATTR_POST_READ (attrs) = val;
7805
7806 val = args[coding_arg_pre_write_conversion];
7807 CHECK_SYMBOL (val);
7808 CODING_ATTR_PRE_WRITE (attrs) = val;
7809
7810 val = args[coding_arg_default_char];
7811 if (NILP (val))
7812 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7813 else
7814 {
7815 CHECK_CHARACTER (val);
7816 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7817 }
7818
7819 val = args[coding_arg_plist];
7820 CHECK_LIST (val);
7821 CODING_ATTR_PLIST (attrs) = val;
7822
7823 if (EQ (coding_type, Qcharset))
7824 {
c7c66a95
KH
7825 /* Generate a lisp vector of 256 elements. Each element is nil,
7826 integer, or a list of charset IDs.
7827
7828 If Nth element is nil, the byte code N is invalid in this
7829 coding system.
7830
7831 If Nth element is a number NUM, N is the first byte of a
7832 charset whose ID is NUM.
7833
7834 If Nth element is a list of charset IDs, N is the first byte
7835 of one of them. The list is sorted by dimensions of the
7836 charsets. A charset of smaller dimension comes firtst.
7837 */
df7492f9
KH
7838 val = Fmake_vector (make_number (256), Qnil);
7839
7840 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7841 {
c7c66a95
KH
7842 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7843 int dim = CHARSET_DIMENSION (charset);
7844 int idx = (dim - 1) * 4;
7845
584948ac
KH
7846 if (CHARSET_ASCII_COMPATIBLE_P (charset))
7847 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7848
15d143f7
KH
7849 for (i = charset->code_space[idx];
7850 i <= charset->code_space[idx + 1]; i++)
7851 {
c7c66a95
KH
7852 Lisp_Object tmp, tmp2;
7853 int dim2;
7854
7855 tmp = AREF (val, i);
7856 if (NILP (tmp))
7857 tmp = XCAR (tail);
7858 else if (NUMBERP (tmp))
7859 {
7860 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7861 if (dim < dim2)
c7c66a95 7862 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
7863 else
7864 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 7865 }
15d143f7 7866 else
c7c66a95
KH
7867 {
7868 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7869 {
7870 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7871 if (dim < dim2)
7872 break;
7873 }
7874 if (NILP (tmp2))
7875 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7876 else
7877 {
7878 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7879 XSETCAR (tmp2, XCAR (tail));
7880 }
7881 }
7882 ASET (val, i, tmp);
15d143f7 7883 }
df7492f9
KH
7884 }
7885 ASET (attrs, coding_attr_charset_valids, val);
7886 category = coding_category_charset;
7887 }
7888 else if (EQ (coding_type, Qccl))
7889 {
7890 Lisp_Object valids;
7891
7892 if (nargs < coding_arg_ccl_max)
7893 goto short_args;
7894
7895 val = args[coding_arg_ccl_decoder];
7896 CHECK_CCL_PROGRAM (val);
7897 if (VECTORP (val))
7898 val = Fcopy_sequence (val);
7899 ASET (attrs, coding_attr_ccl_decoder, val);
7900
7901 val = args[coding_arg_ccl_encoder];
7902 CHECK_CCL_PROGRAM (val);
7903 if (VECTORP (val))
7904 val = Fcopy_sequence (val);
7905 ASET (attrs, coding_attr_ccl_encoder, val);
7906
7907 val = args[coding_arg_ccl_valids];
7908 valids = Fmake_string (make_number (256), make_number (0));
7909 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7910 {
8dcbea82
KH
7911 int from, to;
7912
df7492f9
KH
7913 val = Fcar (tail);
7914 if (INTEGERP (val))
8dcbea82
KH
7915 {
7916 from = to = XINT (val);
7917 if (from < 0 || from > 255)
7918 args_out_of_range_3 (val, make_number (0), make_number (255));
7919 }
df7492f9
KH
7920 else
7921 {
df7492f9
KH
7922 CHECK_CONS (val);
7923 CHECK_NUMBER (XCAR (val));
7924 CHECK_NUMBER (XCDR (val));
7925 from = XINT (XCAR (val));
8dcbea82
KH
7926 if (from < 0 || from > 255)
7927 args_out_of_range_3 (XCAR (val),
7928 make_number (0), make_number (255));
df7492f9 7929 to = XINT (XCDR (val));
8dcbea82
KH
7930 if (to < from || to > 255)
7931 args_out_of_range_3 (XCDR (val),
7932 XCAR (val), make_number (255));
df7492f9 7933 }
8dcbea82
KH
7934 for (i = from; i <= to; i++)
7935 XSTRING (valids)->data[i] = 1;
df7492f9
KH
7936 }
7937 ASET (attrs, coding_attr_ccl_valids, valids);
7938
7939 category = coding_category_ccl;
7940 }
7941 else if (EQ (coding_type, Qutf_16))
7942 {
7943 Lisp_Object bom, endian;
7944
584948ac
KH
7945 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
7946
df7492f9
KH
7947 if (nargs < coding_arg_utf16_max)
7948 goto short_args;
7949
7950 bom = args[coding_arg_utf16_bom];
7951 if (! NILP (bom) && ! EQ (bom, Qt))
7952 {
7953 CHECK_CONS (bom);
7954 CHECK_CODING_SYSTEM (XCAR (bom));
7955 CHECK_CODING_SYSTEM (XCDR (bom));
7956 }
7957 ASET (attrs, coding_attr_utf_16_bom, bom);
7958
7959 endian = args[coding_arg_utf16_endian];
7960 ASET (attrs, coding_attr_utf_16_endian, endian);
7961
7962 category = (CONSP (bom)
7963 ? coding_category_utf_16_auto
7964 : NILP (bom)
7965 ? (NILP (endian)
7966 ? coding_category_utf_16_be_nosig
7967 : coding_category_utf_16_le_nosig)
7968 : (NILP (endian)
7969 ? coding_category_utf_16_be
7970 : coding_category_utf_16_le));
7971 }
7972 else if (EQ (coding_type, Qiso_2022))
7973 {
7974 Lisp_Object initial, reg_usage, request, flags;
0be8721c 7975 int i, id;
1397dc18 7976
df7492f9
KH
7977 if (nargs < coding_arg_iso2022_max)
7978 goto short_args;
7979
7980 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7981 CHECK_VECTOR (initial);
7982 for (i = 0; i < 4; i++)
7983 {
7984 val = Faref (initial, make_number (i));
7985 if (! NILP (val))
7986 {
584948ac
KH
7987 struct charset *charset;
7988
7989 CHECK_CHARSET_GET_CHARSET (val, charset);
7990 ASET (initial, i, make_number (CHARSET_ID (charset)));
7991 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
7992 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
7993 }
7994 else
7995 ASET (initial, i, make_number (-1));
7996 }
7997
7998 reg_usage = args[coding_arg_iso2022_reg_usage];
7999 CHECK_CONS (reg_usage);
8000 CHECK_NATNUM (XCAR (reg_usage));
8001 CHECK_NATNUM (XCDR (reg_usage));
8002
8003 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8004 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 8005 {
df7492f9
KH
8006 int id;
8007
8008 val = Fcar (tail);
8009 CHECK_CONS (val);
8010 CHECK_CHARSET_GET_ID (XCAR (val), id);
8011 CHECK_NATNUM (XCDR (val));
8012 if (XINT (XCDR (val)) >= 4)
8013 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8014 XCAR (val) = make_number (id);
1397dc18 8015 }
df7492f9
KH
8016
8017 flags = args[coding_arg_iso2022_flags];
8018 CHECK_NATNUM (flags);
8019 i = XINT (flags);
8020 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8021 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8022
8023 ASET (attrs, coding_attr_iso_initial, initial);
8024 ASET (attrs, coding_attr_iso_usage, reg_usage);
8025 ASET (attrs, coding_attr_iso_request, request);
8026 ASET (attrs, coding_attr_iso_flags, flags);
8027 setup_iso_safe_charsets (attrs);
8028
8029 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8030 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8031 | CODING_ISO_FLAG_SINGLE_SHIFT))
8032 ? coding_category_iso_7_else
8033 : EQ (args[coding_arg_charset_list], Qiso_2022)
8034 ? coding_category_iso_7
8035 : coding_category_iso_7_tight);
8036 else
8037 {
8038 int id = XINT (AREF (initial, 1));
8039
c6fb6e98 8040 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
8041 || EQ (args[coding_arg_charset_list], Qiso_2022)
8042 || id < 0)
8043 ? coding_category_iso_8_else
8044 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8045 ? coding_category_iso_8_1
8046 : coding_category_iso_8_2);
8047 }
0ce7886f
KH
8048 if (category != coding_category_iso_8_1
8049 && category != coding_category_iso_8_2)
8050 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
8051 }
8052 else if (EQ (coding_type, Qemacs_mule))
8053 {
8054 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8055 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 8056 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8057 category = coding_category_emacs_mule;
8058 }
8059 else if (EQ (coding_type, Qshift_jis))
8060 {
8061
8062 struct charset *charset;
8063
8064 if (XINT (Flength (charset_list)) != 3)
8065 error ("There should be just three charsets");
8066
8067 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8068 if (CHARSET_DIMENSION (charset) != 1)
8069 error ("Dimension of charset %s is not one",
8070 XSYMBOL (CHARSET_NAME (charset))->name->data);
584948ac
KH
8071 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8072 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8073
8074 charset_list = XCDR (charset_list);
8075 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8076 if (CHARSET_DIMENSION (charset) != 1)
8077 error ("Dimension of charset %s is not one",
8078 XSYMBOL (CHARSET_NAME (charset))->name->data);
8079
8080 charset_list = XCDR (charset_list);
8081 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8082 if (CHARSET_DIMENSION (charset) != 2)
8083 error ("Dimension of charset %s is not two",
8084 XSYMBOL (CHARSET_NAME (charset))->name->data);
8085
8086 category = coding_category_sjis;
8087 Vsjis_coding_system = name;
8088 }
8089 else if (EQ (coding_type, Qbig5))
8090 {
8091 struct charset *charset;
8092
8093 if (XINT (Flength (charset_list)) != 2)
8094 error ("There should be just two charsets");
8095
8096 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8097 if (CHARSET_DIMENSION (charset) != 1)
8098 error ("Dimension of charset %s is not one",
8099 XSYMBOL (CHARSET_NAME (charset))->name->data);
584948ac
KH
8100 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8101 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8102
8103 charset_list = XCDR (charset_list);
8104 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8105 if (CHARSET_DIMENSION (charset) != 2)
8106 error ("Dimension of charset %s is not two",
8107 XSYMBOL (CHARSET_NAME (charset))->name->data);
8108
8109 category = coding_category_big5;
8110 Vbig5_coding_system = name;
8111 }
8112 else if (EQ (coding_type, Qraw_text))
584948ac
KH
8113 {
8114 category = coding_category_raw_text;
8115 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8116 }
df7492f9 8117 else if (EQ (coding_type, Qutf_8))
584948ac
KH
8118 {
8119 category = coding_category_utf_8;
8120 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8121 }
df7492f9
KH
8122 else if (EQ (coding_type, Qundecided))
8123 category = coding_category_undecided;
8124 else
8125 error ("Invalid coding system type: %s",
8126 XSYMBOL (coding_type)->name->data);
8127
8128 CODING_ATTR_CATEGORY (attrs) = make_number (category);
8129
8130 eol_type = args[coding_arg_eol_type];
8131 if (! NILP (eol_type)
8132 && ! EQ (eol_type, Qunix)
8133 && ! EQ (eol_type, Qdos)
8134 && ! EQ (eol_type, Qmac))
8135 error ("Invalid eol-type");
8136
8137 aliases = Fcons (name, Qnil);
8138
8139 if (NILP (eol_type))
8140 {
8141 eol_type = make_subsidiaries (name);
8142 for (i = 0; i < 3; i++)
1397dc18 8143 {
df7492f9
KH
8144 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8145
8146 this_name = AREF (eol_type, i);
8147 this_aliases = Fcons (this_name, Qnil);
8148 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8149 this_spec = Fmake_vector (make_number (3), attrs);
8150 ASET (this_spec, 1, this_aliases);
8151 ASET (this_spec, 2, this_eol_type);
8152 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8153 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8154 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8155 Vcoding_system_alist);
1397dc18 8156 }
d46c5b12 8157 }
1397dc18 8158
df7492f9
KH
8159 spec_vec = Fmake_vector (make_number (3), attrs);
8160 ASET (spec_vec, 1, aliases);
8161 ASET (spec_vec, 2, eol_type);
8162
8163 Fputhash (name, spec_vec, Vcoding_system_hash_table);
8164 Vcoding_system_list = Fcons (name, Vcoding_system_list);
8165 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8166 Vcoding_system_alist);
8167
8168 {
8169 int id = coding_categories[category].id;
8170
8171 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8172 setup_coding_system (name, &coding_categories[category]);
8173 }
8174
d46c5b12 8175 return Qnil;
df7492f9
KH
8176
8177 short_args:
8178 return Fsignal (Qwrong_number_of_arguments,
8179 Fcons (intern ("define-coding-system-internal"),
8180 make_number (nargs)));
d46c5b12
KH
8181}
8182
da7db224 8183/* Fixme: should this record the alias relationships for
e1c23804 8184 diagnostics? Should it update coding-system-list? */
df7492f9
KH
8185DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8186 Sdefine_coding_system_alias, 2, 2, 0,
8187 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8188 (alias, coding_system)
8189 Lisp_Object alias, coding_system;
66cfb530 8190{
df7492f9 8191 Lisp_Object spec, aliases, eol_type;
84d60297 8192
df7492f9
KH
8193 CHECK_SYMBOL (alias);
8194 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8195 aliases = AREF (spec, 1);
8196 while (!NILP (XCDR (aliases)))
8197 aliases = XCDR (aliases);
8198 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 8199
df7492f9
KH
8200 eol_type = AREF (spec, 2);
8201 if (VECTORP (eol_type))
66cfb530 8202 {
df7492f9
KH
8203 Lisp_Object subsidiaries;
8204 int i;
8205
8206 subsidiaries = make_subsidiaries (alias);
8207 for (i = 0; i < 3; i++)
8208 Fdefine_coding_system_alias (AREF (subsidiaries, i),
8209 AREF (eol_type, i));
8210
8211 ASET (spec, 2, subsidiaries);
66cfb530 8212 }
df7492f9
KH
8213
8214 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
8215 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8216 Vcoding_system_alist);
66cfb530
KH
8217
8218 return Qnil;
8219}
8220
df7492f9
KH
8221DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8222 1, 1, 0,
8223 doc: /* Return the base of CODING-SYSTEM.
da7db224 8224Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
8225 (coding_system)
8226 Lisp_Object coding_system;
8227{
8228 Lisp_Object spec, attrs;
8229
8230 if (NILP (coding_system))
8231 return (Qno_conversion);
8232 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8233 attrs = AREF (spec, 0);
8234 return CODING_ATTR_BASE_NAME (attrs);
8235}
8236
8237DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8238 1, 1, 0,
8239 doc: "Return the property list of CODING-SYSTEM.")
8240 (coding_system)
8241 Lisp_Object coding_system;
8242{
8243 Lisp_Object spec, attrs;
8244
8245 if (NILP (coding_system))
8246 coding_system = Qno_conversion;
8247 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8248 attrs = AREF (spec, 0);
8249 return CODING_ATTR_PLIST (attrs);
8250}
8251
8252
8253DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8254 1, 1, 0,
da7db224 8255 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
8256 (coding_system)
8257 Lisp_Object coding_system;
8258{
8259 Lisp_Object spec;
8260
8261 if (NILP (coding_system))
8262 coding_system = Qno_conversion;
8263 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 8264 return AREF (spec, 1);
df7492f9
KH
8265}
8266
8267DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8268 Scoding_system_eol_type, 1, 1, 0,
8269 doc: /* Return eol-type of CODING-SYSTEM.
8270An eol-type is integer 0, 1, 2, or a vector of coding systems.
8271
8272Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8273and CR respectively.
8274
8275A vector value indicates that a format of end-of-line should be
8276detected automatically. Nth element of the vector is the subsidiary
8277coding system whose eol-type is N. */)
8278 (coding_system)
8279 Lisp_Object coding_system;
8280{
8281 Lisp_Object spec, eol_type;
8282 int n;
8283
8284 if (NILP (coding_system))
8285 coding_system = Qno_conversion;
8286 if (! CODING_SYSTEM_P (coding_system))
8287 return Qnil;
8288 spec = CODING_SYSTEM_SPEC (coding_system);
8289 eol_type = AREF (spec, 2);
8290 if (VECTORP (eol_type))
8291 return Fcopy_sequence (eol_type);
8292 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8293 return make_number (n);
8294}
8295
4ed46869
KH
8296#endif /* emacs */
8297
8298\f
1397dc18 8299/*** 9. Post-amble ***/
4ed46869 8300
dfcf069d 8301void
4ed46869
KH
8302init_coding_once ()
8303{
8304 int i;
8305
df7492f9
KH
8306 for (i = 0; i < coding_category_max; i++)
8307 {
8308 coding_categories[i].id = -1;
8309 coding_priorities[i] = i;
8310 }
4ed46869
KH
8311
8312 /* ISO2022 specific initialize routine. */
8313 for (i = 0; i < 0x20; i++)
b73bfc1c 8314 iso_code_class[i] = ISO_control_0;
4ed46869
KH
8315 for (i = 0x21; i < 0x7F; i++)
8316 iso_code_class[i] = ISO_graphic_plane_0;
8317 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 8318 iso_code_class[i] = ISO_control_1;
4ed46869
KH
8319 for (i = 0xA1; i < 0xFF; i++)
8320 iso_code_class[i] = ISO_graphic_plane_1;
8321 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8322 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8323 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8324 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8325 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8326 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8327 iso_code_class[ISO_CODE_ESC] = ISO_escape;
8328 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8329 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8330 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8331
b843d1ae 8332 inhibit_pre_post_conversion = 0;
df7492f9
KH
8333
8334 for (i = 0; i < 256; i++)
8335 {
8336 emacs_mule_bytes[i] = 1;
8337 }
7c78e542
KH
8338 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8339 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8340 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8341 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
8342}
8343
8344#ifdef emacs
8345
dfcf069d 8346void
e0e989f6
KH
8347syms_of_coding ()
8348{
df7492f9
KH
8349 staticpro (&Vcoding_system_hash_table);
8350 Vcoding_system_hash_table = Fmakehash (Qeq);
8351
8352 staticpro (&Vsjis_coding_system);
8353 Vsjis_coding_system = Qnil;
8354
8355 staticpro (&Vbig5_coding_system);
8356 Vbig5_coding_system = Qnil;
8357
8358 staticpro (&Vcode_conversion_work_buf_list);
8359 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 8360
df7492f9
KH
8361 staticpro (&Vcode_conversion_reused_work_buf);
8362 Vcode_conversion_reused_work_buf = Qnil;
8363
8364 DEFSYM (Qcharset, "charset");
8365 DEFSYM (Qtarget_idx, "target-idx");
8366 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
8367 Fset (Qcoding_system_history, Qnil);
8368
9ce27fde 8369 /* Target FILENAME is the first argument. */
e0e989f6 8370 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 8371 /* Target FILENAME is the third argument. */
e0e989f6
KH
8372 Fput (Qwrite_region, Qtarget_idx, make_number (2));
8373
df7492f9 8374 DEFSYM (Qcall_process, "call-process");
9ce27fde 8375 /* Target PROGRAM is the first argument. */
e0e989f6
KH
8376 Fput (Qcall_process, Qtarget_idx, make_number (0));
8377
df7492f9 8378 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 8379 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8380 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8381
df7492f9 8382 DEFSYM (Qstart_process, "start-process");
9ce27fde 8383 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8384 Fput (Qstart_process, Qtarget_idx, make_number (2));
8385
df7492f9 8386 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 8387 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
8388 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8389
df7492f9
KH
8390 DEFSYM (Qcoding_system, "coding-system");
8391 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 8392
df7492f9
KH
8393 DEFSYM (Qeol_type, "eol-type");
8394 DEFSYM (Qunix, "unix");
8395 DEFSYM (Qdos, "dos");
4ed46869 8396
df7492f9
KH
8397 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8398 DEFSYM (Qpost_read_conversion, "post-read-conversion");
8399 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8400 DEFSYM (Qdefault_char, "default-char");
8401 DEFSYM (Qundecided, "undecided");
8402 DEFSYM (Qno_conversion, "no-conversion");
8403 DEFSYM (Qraw_text, "raw-text");
4ed46869 8404
df7492f9 8405 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 8406
df7492f9 8407 DEFSYM (Qutf_8, "utf-8");
27901516 8408
df7492f9 8409 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
8410 DEFSYM (Qsignature, "signature");
8411 DEFSYM (Qendian, "endian");
8412 DEFSYM (Qbig, "big");
8413 DEFSYM (Qlittle, "little");
27901516 8414
df7492f9
KH
8415 DEFSYM (Qshift_jis, "shift-jis");
8416 DEFSYM (Qbig5, "big5");
4ed46869 8417
df7492f9 8418 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 8419
df7492f9 8420 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
8421 Fput (Qcoding_system_error, Qerror_conditions,
8422 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8423 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 8424 build_string ("Invalid coding system"));
4ed46869 8425
df7492f9
KH
8426 /* Intern this now in case it isn't already done.
8427 Setting this variable twice is harmless.
8428 But don't staticpro it here--that is done in alloc.c. */
8429 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 8430
df7492f9 8431 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 8432 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
8433 DEFSYM (Qtranslation_table_id, "translation-table-id");
8434 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8435 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 8436
df7492f9 8437 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 8438
df7492f9 8439 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 8440
df7492f9
KH
8441 Vcoding_category_table
8442 = Fmake_vector (make_number (coding_category_max), Qnil);
8443 staticpro (&Vcoding_category_table);
8444 /* Followings are target of code detection. */
8445 ASET (Vcoding_category_table, coding_category_iso_7,
8446 intern ("coding-category-iso-7"));
8447 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8448 intern ("coding-category-iso-7-tight"));
8449 ASET (Vcoding_category_table, coding_category_iso_8_1,
8450 intern ("coding-category-iso-8-1"));
8451 ASET (Vcoding_category_table, coding_category_iso_8_2,
8452 intern ("coding-category-iso-8-2"));
8453 ASET (Vcoding_category_table, coding_category_iso_7_else,
8454 intern ("coding-category-iso-7-else"));
8455 ASET (Vcoding_category_table, coding_category_iso_8_else,
8456 intern ("coding-category-iso-8-else"));
8457 ASET (Vcoding_category_table, coding_category_utf_8,
8458 intern ("coding-category-utf-8"));
8459 ASET (Vcoding_category_table, coding_category_utf_16_be,
8460 intern ("coding-category-utf-16-be"));
8461 ASET (Vcoding_category_table, coding_category_utf_16_le,
8462 intern ("coding-category-utf-16-le"));
8463 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8464 intern ("coding-category-utf-16-be-nosig"));
8465 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8466 intern ("coding-category-utf-16-le-nosig"));
8467 ASET (Vcoding_category_table, coding_category_charset,
8468 intern ("coding-category-charset"));
8469 ASET (Vcoding_category_table, coding_category_sjis,
8470 intern ("coding-category-sjis"));
8471 ASET (Vcoding_category_table, coding_category_big5,
8472 intern ("coding-category-big5"));
8473 ASET (Vcoding_category_table, coding_category_ccl,
8474 intern ("coding-category-ccl"));
8475 ASET (Vcoding_category_table, coding_category_emacs_mule,
8476 intern ("coding-category-emacs-mule"));
8477 /* Followings are NOT target of code detection. */
8478 ASET (Vcoding_category_table, coding_category_raw_text,
8479 intern ("coding-category-raw-text"));
8480 ASET (Vcoding_category_table, coding_category_undecided,
8481 intern ("coding-category-undecided"));
70c22245 8482
4ed46869
KH
8483 defsubr (&Scoding_system_p);
8484 defsubr (&Sread_coding_system);
8485 defsubr (&Sread_non_nil_coding_system);
8486 defsubr (&Scheck_coding_system);
8487 defsubr (&Sdetect_coding_region);
d46c5b12 8488 defsubr (&Sdetect_coding_string);
05e6f5dc 8489 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 8490 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8491 defsubr (&Sdecode_coding_region);
8492 defsubr (&Sencode_coding_region);
8493 defsubr (&Sdecode_coding_string);
8494 defsubr (&Sencode_coding_string);
8495 defsubr (&Sdecode_sjis_char);
8496 defsubr (&Sencode_sjis_char);
8497 defsubr (&Sdecode_big5_char);
8498 defsubr (&Sencode_big5_char);
1ba9e4ab 8499 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8500 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8501 defsubr (&Sterminal_coding_system);
1ba9e4ab 8502 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8503 defsubr (&Skeyboard_coding_system);
a5d301df 8504 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
8505 defsubr (&Sset_coding_system_priority);
8506 defsubr (&Sdefine_coding_system_internal);
8507 defsubr (&Sdefine_coding_system_alias);
8508 defsubr (&Scoding_system_base);
8509 defsubr (&Scoding_system_plist);
8510 defsubr (&Scoding_system_aliases);
8511 defsubr (&Scoding_system_eol_type);
8512 defsubr (&Scoding_system_priority_list);
4ed46869 8513
4608c386 8514 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8515 doc: /* List of coding systems.
8516
8517Do not alter the value of this variable manually. This variable should be
df7492f9 8518updated by the functions `define-coding-system' and
48b0f3ae 8519`define-coding-system-alias'. */);
4608c386
KH
8520 Vcoding_system_list = Qnil;
8521
8522 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8523 doc: /* Alist of coding system names.
8524Each element is one element list of coding system name.
8525This variable is given to `completing-read' as TABLE argument.
8526
8527Do not alter the value of this variable manually. This variable should be
8528updated by the functions `make-coding-system' and
8529`define-coding-system-alias'. */);
4608c386
KH
8530 Vcoding_system_alist = Qnil;
8531
4ed46869 8532 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8533 doc: /* List of coding-categories (symbols) ordered by priority.
8534
8535On detecting a coding system, Emacs tries code detection algorithms
8536associated with each coding-category one by one in this order. When
8537one algorithm agrees with a byte sequence of source text, the coding
8538system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8539 {
8540 int i;
8541
8542 Vcoding_category_list = Qnil;
df7492f9 8543 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8544 Vcoding_category_list
d46c5b12
KH
8545 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8546 Vcoding_category_list);
4ed46869
KH
8547 }
8548
8549 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8550 doc: /* Specify the coding system for read operations.
8551It is useful to bind this variable with `let', but do not set it globally.
8552If the value is a coding system, it is used for decoding on read operation.
8553If not, an appropriate element is used from one of the coding system alists:
8554There are three such tables, `file-coding-system-alist',
8555`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8556 Vcoding_system_for_read = Qnil;
8557
8558 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8559 doc: /* Specify the coding system for write operations.
8560Programs bind this variable with `let', but you should not set it globally.
8561If the value is a coding system, it is used for encoding of output,
8562when writing it to a file and when sending it to a file or subprocess.
8563
8564If this does not specify a coding system, an appropriate element
8565is used from one of the coding system alists:
8566There are three such tables, `file-coding-system-alist',
8567`process-coding-system-alist', and `network-coding-system-alist'.
8568For output to files, if the above procedure does not specify a coding system,
8569the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8570 Vcoding_system_for_write = Qnil;
8571
8572 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8573 doc: /*
8574Coding system used in the latest file or process I/O. */);
4ed46869
KH
8575 Vlast_coding_system_used = Qnil;
8576
9ce27fde 8577 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8578 doc: /*
8579*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8580See info node `Coding Systems' and info node `Text and Binary' concerning
8581such conversion. */);
9ce27fde
KH
8582 inhibit_eol_conversion = 0;
8583
ed29121d 8584 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8585 doc: /*
8586Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8587Bind it to t if the process output is to be treated as if it were a file
8588read from some filesystem. */);
ed29121d
EZ
8589 inherit_process_coding_system = 0;
8590
02ba4723 8591 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8592 doc: /*
8593Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8594The format is ((PATTERN . VAL) ...),
8595where PATTERN is a regular expression matching a file name,
8596VAL is a coding system, a cons of coding systems, or a function symbol.
8597If VAL is a coding system, it is used for both decoding and encoding
8598the file contents.
8599If VAL is a cons of coding systems, the car part is used for decoding,
8600and the cdr part is used for encoding.
8601If VAL is a function symbol, the function must return a coding system
0192762c
DL
8602or a cons of coding systems which are used as above. The function gets
8603the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8604
8605See also the function `find-operation-coding-system'
8606and the variable `auto-coding-alist'. */);
02ba4723
KH
8607 Vfile_coding_system_alist = Qnil;
8608
8609 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8610 doc: /*
8611Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8612The format is ((PATTERN . VAL) ...),
8613where PATTERN is a regular expression matching a program name,
8614VAL is a coding system, a cons of coding systems, or a function symbol.
8615If VAL is a coding system, it is used for both decoding what received
8616from the program and encoding what sent to the program.
8617If VAL is a cons of coding systems, the car part is used for decoding,
8618and the cdr part is used for encoding.
8619If VAL is a function symbol, the function must return a coding system
8620or a cons of coding systems which are used as above.
8621
8622See also the function `find-operation-coding-system'. */);
02ba4723
KH
8623 Vprocess_coding_system_alist = Qnil;
8624
8625 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8626 doc: /*
8627Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8628The format is ((PATTERN . VAL) ...),
8629where PATTERN is a regular expression matching a network service name
8630or is a port number to connect to,
8631VAL is a coding system, a cons of coding systems, or a function symbol.
8632If VAL is a coding system, it is used for both decoding what received
8633from the network stream and encoding what sent to the network stream.
8634If VAL is a cons of coding systems, the car part is used for decoding,
8635and the cdr part is used for encoding.
8636If VAL is a function symbol, the function must return a coding system
8637or a cons of coding systems which are used as above.
8638
8639See also the function `find-operation-coding-system'. */);
02ba4723 8640 Vnetwork_coding_system_alist = Qnil;
4ed46869 8641
68c45bf0 8642 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8643 doc: /* Coding system to use with system messages.
8644Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8645 Vlocale_coding_system = Qnil;
8646
005f0d35 8647 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8648 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8649 doc: /*
8650*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8651 eol_mnemonic_unix = build_string (":");
4ed46869 8652
7722baf9 8653 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8654 doc: /*
8655*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8656 eol_mnemonic_dos = build_string ("\\");
4ed46869 8657
7722baf9 8658 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8659 doc: /*
8660*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8661 eol_mnemonic_mac = build_string ("/");
4ed46869 8662
7722baf9 8663 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8664 doc: /*
8665*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8666 eol_mnemonic_undecided = build_string (":");
4ed46869 8667
84fbb8a0 8668 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8669 doc: /*
8670*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8671 Venable_character_translation = Qt;
bdd9fb48 8672
f967223b 8673 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8674 &Vstandard_translation_table_for_decode,
8675 doc: /* Table for translating characters while decoding. */);
f967223b 8676 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8677
f967223b 8678 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8679 &Vstandard_translation_table_for_encode,
8680 doc: /* Table for translating characters while encoding. */);
f967223b 8681 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8682
df7492f9 8683 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8684 doc: /* Alist of charsets vs revision numbers.
8685While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8686designate it with the escape sequence identifying revision (cdr part
8687of the element). */);
8688 Vcharset_revision_table = Qnil;
02ba4723
KH
8689
8690 DEFVAR_LISP ("default-process-coding-system",
8691 &Vdefault_process_coding_system,
48b0f3ae
PJ
8692 doc: /* Cons of coding systems used for process I/O by default.
8693The car part is used for decoding a process output,
8694the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8695 Vdefault_process_coding_system = Qnil;
c4825358 8696
3f003981 8697 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8698 doc: /*
8699Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8700This is a vector of length 256.
8701If Nth element is non-nil, the existence of code N in a file
8702\(or output of subprocess) doesn't prevent it to be detected as
8703a coding system of ISO 2022 variant which has a flag
8704`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8705or reading output of a subprocess.
8706Only 128th through 159th elements has a meaning. */);
3f003981 8707 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8708
8709 DEFVAR_LISP ("select-safe-coding-system-function",
8710 &Vselect_safe_coding_system_function,
df7492f9
KH
8711 doc: /*
8712Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8713
8714If set, this function is called to force a user to select a proper
8715coding system which can encode the text in the case that a default
8716coding system used in each operation can't encode the text.
8717
8718The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8719 Vselect_safe_coding_system_function = Qnil;
8720
22ab2303 8721 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8722 &inhibit_iso_escape_detection,
df7492f9
KH
8723 doc: /*
8724If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8725
8726By default, on reading a file, Emacs tries to detect how the text is
8727encoded. This code detection is sensitive to escape sequences. If
8728the sequence is valid as ISO2022, the code is determined as one of
8729the ISO2022 encodings, and the file is decoded by the corresponding
8730coding system (e.g. `iso-2022-7bit').
8731
8732However, there may be a case that you want to read escape sequences in
8733a file as is. In such a case, you can set this variable to non-nil.
8734Then, as the code detection ignores any escape sequences, no file is
8735detected as encoded in some ISO2022 encoding. The result is that all
8736escape sequences become visible in a buffer.
8737
8738The default value is nil, and it is strongly recommended not to change
8739it. That is because many Emacs Lisp source files that contain
8740non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8741in Emacs's distribution, and they won't be decoded correctly on
8742reading if you suppress escape sequence detection.
8743
8744The other way to read escape sequences in a file without decoding is
8745to explicitly specify some coding system that doesn't use ISO2022's
8746escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8747 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8748
8749 {
8750 Lisp_Object args[coding_arg_max];
8751 Lisp_Object plist[14];
8752 int i;
8753
8754 for (i = 0; i < coding_arg_max; i++)
8755 args[i] = Qnil;
8756
8757 plist[0] = intern (":name");
8758 plist[1] = args[coding_arg_name] = Qno_conversion;
8759 plist[2] = intern (":mnemonic");
8760 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8761 plist[4] = intern (":coding-type");
8762 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8763 plist[6] = intern (":ascii-compatible-p");
8764 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8765 plist[8] = intern (":default-char");
8766 plist[9] = args[coding_arg_default_char] = make_number (0);
8767 plist[10] = intern (":docstring");
8768 plist[11] = build_string ("Do no conversion.\n\
8769\n\
8770When you visit a file with this coding, the file is read into a\n\
8771unibyte buffer as is, thus each byte of a file is treated as a\n\
8772character.");
8773 plist[12] = intern (":eol-type");
8774 plist[13] = args[coding_arg_eol_type] = Qunix;
8775 args[coding_arg_plist] = Flist (14, plist);
8776 Fdefine_coding_system_internal (coding_arg_max, args);
8777 }
8778
8779 setup_coding_system (Qno_conversion, &keyboard_coding);
8780 setup_coding_system (Qno_conversion, &terminal_coding);
8781 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8782}
8783
68c45bf0
PE
8784char *
8785emacs_strerror (error_number)
8786 int error_number;
8787{
8788 char *str;
8789
ca9c0567 8790 synchronize_system_messages_locale ();
68c45bf0
PE
8791 str = strerror (error_number);
8792
8793 if (! NILP (Vlocale_coding_system))
8794 {
8795 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8796 Vlocale_coding_system,
8797 0);
8798 str = (char *) XSTRING (dec)->data;
8799 }
8800
8801 return str;
8802}
8803
4ed46869 8804#endif /* emacs */