(BASE_FONTSET_P): Check FONTSET_BASE, not
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
6f197c07 4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
5a936b46 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
5bad0796 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
e19c3639 138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9 146 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
df7492f9 149
ff0dacd7 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
151
152 Below is the template of these functions. */
153
4ed46869 154#if 0
df7492f9 155static int
ff0dacd7 156detect_coding_XXX (coding, detect_info)
df7492f9 157 struct coding_system *coding;
ff0dacd7 158 struct coding_detection_info *detect_info;
4ed46869 159{
df7492f9
KH
160 unsigned char *src = coding->source;
161 unsigned char *src_end = coding->source + coding->src_bytes;
162 int multibytep = coding->src_multibyte;
ff0dacd7 163 int consumed_chars = 0;
df7492f9
KH
164 int found = 0;
165 ...;
166
167 while (1)
168 {
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
171 ONE_MORE_BYTE (c);
ff0dacd7
KH
172
173 if (! __C_conforms_to_XXX___ (c))
174 break;
175 if (! __C_strongly_suggests_XXX__ (c))
176 found = CATEGORY_MASK_XXX;
df7492f9 177 }
ff0dacd7
KH
178 /* The byte sequence is invalid for XXX. */
179 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 180 return 0;
ff0dacd7 181
df7492f9 182 no_more_source:
ff0dacd7
KH
183 /* The source exausted successfully. */
184 detect_info->found |= found;
df7492f9 185 return 1;
4ed46869
KH
186}
187#endif
188
189/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
190
df7492f9
KH
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
d46c5b12 195
df7492f9
KH
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
d46c5b12 200
df7492f9 201 Below is the template of these functions. */
d46c5b12 202
4ed46869 203#if 0
b73bfc1c 204static void
df7492f9 205decode_coding_XXXX (coding)
4ed46869 206 struct coding_system *coding;
4ed46869 207{
df7492f9
KH
208 unsigned char *src = coding->source + coding->consumed;
209 unsigned char *src_end = coding->source + coding->src_bytes;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base;
214 /* A buffer to produce decoded characters. */
215 int *charbuf = coding->charbuf;
216 int *charbuf_end = charbuf + coding->charbuf_size;
217 int multibytep = coding->src_multibyte;
218
219 while (1)
220 {
221 src_base = src;
222 if (charbuf < charbuf_end)
223 /* No more room to produce a decoded character. */
224 break;
225 ONE_MORE_BYTE (c);
226 /* Decode it. */
227 }
228
229 no_more_source:
230 if (src_base < src_end
231 && coding->mode & CODING_MODE_LAST_BLOCK)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base < src_end && charbuf < charbuf_end)
235 *charbuf++ = *src_base++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding->consumed = coding->consumed_char = src_base - coding->source;
239 /* Remember how many characters we produced. */
240 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
241}
242#endif
243
244/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
245
df7492f9
KH
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
d46c5b12 250
df7492f9
KH
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 255
df7492f9
KH
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
d46c5b12 259
df7492f9 260 Below is a template of these functions. */
4ed46869 261#if 0
b73bfc1c 262static void
df7492f9 263encode_coding_XXX (coding)
4ed46869 264 struct coding_system *coding;
4ed46869 265{
df7492f9
KH
266 int multibytep = coding->dst_multibyte;
267 int *charbuf = coding->charbuf;
268 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
269 unsigned char *dst = coding->destination + coding->produced;
270 unsigned char *dst_end = coding->destination + coding->dst_bytes;
271 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
272 int produced_chars = 0;
273
274 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
275 {
276 int c = *charbuf;
277 /* Encode C into DST, and increment DST. */
278 }
279 label_no_more_destination:
280 /* How many chars and bytes we produced. */
281 coding->produced_char += produced_chars;
282 coding->produced = dst - coding->destination;
4ed46869
KH
283}
284#endif
285
4ed46869
KH
286\f
287/*** 1. Preamble ***/
288
68c45bf0 289#include <config.h>
4ed46869
KH
290#include <stdio.h>
291
4ed46869
KH
292#include "lisp.h"
293#include "buffer.h"
df7492f9 294#include "character.h"
4ed46869
KH
295#include "charset.h"
296#include "ccl.h"
df7492f9 297#include "composite.h"
4ed46869
KH
298#include "coding.h"
299#include "window.h"
300
df7492f9 301Lisp_Object Vcoding_system_hash_table;
4ed46869 302
df7492f9 303Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
304Lisp_Object Qunix, Qdos;
305extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
306Lisp_Object Qbuffer_file_coding_system;
307Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 308Lisp_Object Qdefault_char;
27901516 309Lisp_Object Qno_conversion, Qundecided;
df7492f9 310Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 311Lisp_Object Qbig, Qlittle;
bb0115a2 312Lisp_Object Qcoding_system_history;
1397dc18 313Lisp_Object Qvalid_codes;
4ed46869
KH
314
315extern Lisp_Object Qinsert_file_contents, Qwrite_region;
316Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
317Lisp_Object Qstart_process, Qopen_network_stream;
318Lisp_Object Qtarget_idx;
319
d46c5b12
KH
320Lisp_Object Vselect_safe_coding_system_function;
321
7722baf9
EZ
322/* Mnemonic string for each format of end-of-line. */
323Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
324/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 325 decided. */
7722baf9 326Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
327
328#ifdef emacs
329
4608c386
KH
330Lisp_Object Vcoding_system_list, Vcoding_system_alist;
331
332Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 333
d46c5b12
KH
334/* Coding system emacs-mule and raw-text are for converting only
335 end-of-line format. */
336Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 337
4ed46869
KH
338/* Coding-systems are handed between Emacs Lisp programs and C internal
339 routines by the following three variables. */
340/* Coding-system for reading files and receiving data from process. */
341Lisp_Object Vcoding_system_for_read;
342/* Coding-system for writing files and sending data to process. */
343Lisp_Object Vcoding_system_for_write;
344/* Coding-system actually used in the latest I/O. */
345Lisp_Object Vlast_coding_system_used;
346
c4825358 347/* A vector of length 256 which contains information about special
94487c4e 348 Latin codes (especially for dealing with Microsoft codes). */
3f003981 349Lisp_Object Vlatin_extra_code_table;
c4825358 350
9ce27fde
KH
351/* Flag to inhibit code conversion of end-of-line format. */
352int inhibit_eol_conversion;
353
74383408
KH
354/* Flag to inhibit ISO2022 escape sequence detection. */
355int inhibit_iso_escape_detection;
356
ed29121d
EZ
357/* Flag to make buffer-file-coding-system inherit from process-coding. */
358int inherit_process_coding_system;
359
c4825358 360/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
361struct coding_system terminal_coding;
362
c4825358
KH
363/* Coding system to be used to encode text for terminal display when
364 terminal coding system is nil. */
365struct coding_system safe_terminal_coding;
366
367/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
368struct coding_system keyboard_coding;
369
02ba4723
KH
370Lisp_Object Vfile_coding_system_alist;
371Lisp_Object Vprocess_coding_system_alist;
372Lisp_Object Vnetwork_coding_system_alist;
4ed46869 373
68c45bf0
PE
374Lisp_Object Vlocale_coding_system;
375
4ed46869
KH
376#endif /* emacs */
377
f967223b
KH
378/* Flag to tell if we look up translation table on character code
379 conversion. */
84fbb8a0 380Lisp_Object Venable_character_translation;
f967223b
KH
381/* Standard translation table to look up on decoding (reading). */
382Lisp_Object Vstandard_translation_table_for_decode;
383/* Standard translation table to look up on encoding (writing). */
384Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 385
f967223b
KH
386Lisp_Object Qtranslation_table;
387Lisp_Object Qtranslation_table_id;
388Lisp_Object Qtranslation_table_for_decode;
389Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
390
391/* Alist of charsets vs revision number. */
df7492f9 392static Lisp_Object Vcharset_revision_table;
4ed46869 393
02ba4723
KH
394/* Default coding systems used for process I/O. */
395Lisp_Object Vdefault_process_coding_system;
396
b843d1ae
KH
397/* Global flag to tell that we can't call post-read-conversion and
398 pre-write-conversion functions. Usually the value is zero, but it
399 is set to 1 temporarily while such functions are running. This is
400 to avoid infinite recursive call. */
401static int inhibit_pre_post_conversion;
402
df7492f9
KH
403/* Two special coding systems. */
404Lisp_Object Vsjis_coding_system;
405Lisp_Object Vbig5_coding_system;
406
407
ff0dacd7
KH
408static int detect_coding_utf_8 P_ ((struct coding_system *,
409 struct coding_detection_info *info));
df7492f9
KH
410static void decode_coding_utf_8 P_ ((struct coding_system *));
411static int encode_coding_utf_8 P_ ((struct coding_system *));
412
ff0dacd7
KH
413static int detect_coding_utf_16 P_ ((struct coding_system *,
414 struct coding_detection_info *info));
df7492f9
KH
415static void decode_coding_utf_16 P_ ((struct coding_system *));
416static int encode_coding_utf_16 P_ ((struct coding_system *));
417
ff0dacd7
KH
418static int detect_coding_iso_2022 P_ ((struct coding_system *,
419 struct coding_detection_info *info));
df7492f9
KH
420static void decode_coding_iso_2022 P_ ((struct coding_system *));
421static int encode_coding_iso_2022 P_ ((struct coding_system *));
422
ff0dacd7
KH
423static int detect_coding_emacs_mule P_ ((struct coding_system *,
424 struct coding_detection_info *info));
df7492f9
KH
425static void decode_coding_emacs_mule P_ ((struct coding_system *));
426static int encode_coding_emacs_mule P_ ((struct coding_system *));
427
ff0dacd7
KH
428static int detect_coding_sjis P_ ((struct coding_system *,
429 struct coding_detection_info *info));
df7492f9
KH
430static void decode_coding_sjis P_ ((struct coding_system *));
431static int encode_coding_sjis P_ ((struct coding_system *));
432
ff0dacd7
KH
433static int detect_coding_big5 P_ ((struct coding_system *,
434 struct coding_detection_info *info));
df7492f9
KH
435static void decode_coding_big5 P_ ((struct coding_system *));
436static int encode_coding_big5 P_ ((struct coding_system *));
437
ff0dacd7
KH
438static int detect_coding_ccl P_ ((struct coding_system *,
439 struct coding_detection_info *info));
df7492f9
KH
440static void decode_coding_ccl P_ ((struct coding_system *));
441static int encode_coding_ccl P_ ((struct coding_system *));
442
443static void decode_coding_raw_text P_ ((struct coding_system *));
444static int encode_coding_raw_text P_ ((struct coding_system *));
445
446
447/* ISO2022 section */
448
449#define CODING_ISO_INITIAL(coding, reg) \
450 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
451 coding_attr_iso_initial), \
452 reg)))
453
454
455#define CODING_ISO_REQUEST(coding, charset_id) \
456 ((charset_id <= (coding)->max_charset_id \
457 ? (coding)->safe_charsets[charset_id] \
458 : -1))
459
460
461#define CODING_ISO_FLAGS(coding) \
462 ((coding)->spec.iso_2022.flags)
463#define CODING_ISO_DESIGNATION(coding, reg) \
464 ((coding)->spec.iso_2022.current_designation[reg])
465#define CODING_ISO_INVOCATION(coding, plane) \
466 ((coding)->spec.iso_2022.current_invocation[plane])
467#define CODING_ISO_SINGLE_SHIFTING(coding) \
468 ((coding)->spec.iso_2022.single_shifting)
469#define CODING_ISO_BOL(coding) \
470 ((coding)->spec.iso_2022.bol)
471#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
472 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
473
474/* Control characters of ISO2022. */
475 /* code */ /* function */
476#define ISO_CODE_LF 0x0A /* line-feed */
477#define ISO_CODE_CR 0x0D /* carriage-return */
478#define ISO_CODE_SO 0x0E /* shift-out */
479#define ISO_CODE_SI 0x0F /* shift-in */
480#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
481#define ISO_CODE_ESC 0x1B /* escape */
482#define ISO_CODE_SS2 0x8E /* single-shift-2 */
483#define ISO_CODE_SS3 0x8F /* single-shift-3 */
484#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
485
486/* All code (1-byte) of ISO2022 is classified into one of the
487 followings. */
488enum iso_code_class_type
489 {
490 ISO_control_0, /* Control codes in the range
491 0x00..0x1F and 0x7F, except for the
492 following 5 codes. */
493 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
494 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
495 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
496 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
497 ISO_escape, /* ISO_CODE_SO (0x1B) */
498 ISO_control_1, /* Control codes in the range
499 0x80..0x9F, except for the
500 following 3 codes. */
501 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
502 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
503 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
504 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
505 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
506 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
507 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
508 };
05e6f5dc 509
df7492f9
KH
510/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
511 `iso-flags' attribute of an iso2022 coding system. */
93dec019 512
df7492f9
KH
513/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
514 instead of the correct short-form sequence (e.g. ESC $ A). */
515#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 516
df7492f9
KH
517/* If set, reset graphic planes and registers at end-of-line to the
518 initial state. */
519#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 520
df7492f9
KH
521/* If set, reset graphic planes and registers before any control
522 characters to the initial state. */
523#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 524
df7492f9
KH
525/* If set, encode by 7-bit environment. */
526#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 527
df7492f9
KH
528/* If set, use locking-shift function. */
529#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 530
df7492f9
KH
531/* If set, use single-shift function. Overwrite
532 CODING_ISO_FLAG_LOCKING_SHIFT. */
533#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 534
df7492f9
KH
535/* If set, use designation escape sequence. */
536#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 537
df7492f9
KH
538/* If set, produce revision number sequence. */
539#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 540
df7492f9
KH
541/* If set, produce ISO6429's direction specifying sequence. */
542#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 543
df7492f9
KH
544/* If set, assume designation states are reset at beginning of line on
545 output. */
546#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 547
df7492f9
KH
548/* If set, designation sequence should be placed at beginning of line
549 on output. */
550#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 551
df7492f9
KH
552/* If set, do not encode unsafe charactes on output. */
553#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 554
df7492f9
KH
555/* If set, extra latin codes (128..159) are accepted as a valid code
556 on input. */
557#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 558
df7492f9 559#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 560
df7492f9 561#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 562
bf16eb23
KH
563#define CODING_ISO_FLAG_USE_ROMAN 0x8000
564
565#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
566
567#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 568
df7492f9
KH
569/* A character to be produced on output if encoding of the original
570 character is prohibited by CODING_ISO_FLAG_SAFE. */
571#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 572
aa72b389 573
df7492f9
KH
574/* UTF-16 section */
575#define CODING_UTF_16_BOM(coding) \
576 ((coding)->spec.utf_16.bom)
4ed46869 577
df7492f9
KH
578#define CODING_UTF_16_ENDIAN(coding) \
579 ((coding)->spec.utf_16.endian)
4ed46869 580
df7492f9
KH
581#define CODING_UTF_16_SURROGATE(coding) \
582 ((coding)->spec.utf_16.surrogate)
4ed46869 583
4ed46869 584
df7492f9
KH
585/* CCL section */
586#define CODING_CCL_DECODER(coding) \
587 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
588#define CODING_CCL_ENCODER(coding) \
589 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
590#define CODING_CCL_VALIDS(coding) \
591 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
592 ->data)
4ed46869 593
5a936b46 594/* Index for each coding category in `coding_categories' */
4ed46869 595
df7492f9
KH
596enum coding_category
597 {
598 coding_category_iso_7,
599 coding_category_iso_7_tight,
600 coding_category_iso_8_1,
601 coding_category_iso_8_2,
602 coding_category_iso_7_else,
603 coding_category_iso_8_else,
604 coding_category_utf_8,
605 coding_category_utf_16_auto,
606 coding_category_utf_16_be,
607 coding_category_utf_16_le,
608 coding_category_utf_16_be_nosig,
609 coding_category_utf_16_le_nosig,
610 coding_category_charset,
611 coding_category_sjis,
612 coding_category_big5,
613 coding_category_ccl,
614 coding_category_emacs_mule,
615 /* All above are targets of code detection. */
616 coding_category_raw_text,
617 coding_category_undecided,
618 coding_category_max
619 };
620
621/* Definitions of flag bits used in detect_coding_XXXX. */
622#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
623#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
624#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
625#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
626#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
627#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
628#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 629#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
630#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
631#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
632#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
633#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
634#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
635#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
636#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
637#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
638#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 639#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
640
641/* This value is returned if detect_coding_mask () find nothing other
642 than ASCII characters. */
643#define CATEGORY_MASK_ANY \
644 (CATEGORY_MASK_ISO_7 \
645 | CATEGORY_MASK_ISO_7_TIGHT \
646 | CATEGORY_MASK_ISO_8_1 \
647 | CATEGORY_MASK_ISO_8_2 \
648 | CATEGORY_MASK_ISO_7_ELSE \
649 | CATEGORY_MASK_ISO_8_ELSE \
650 | CATEGORY_MASK_UTF_8 \
651 | CATEGORY_MASK_UTF_16_BE \
652 | CATEGORY_MASK_UTF_16_LE \
653 | CATEGORY_MASK_UTF_16_BE_NOSIG \
654 | CATEGORY_MASK_UTF_16_LE_NOSIG \
655 | CATEGORY_MASK_CHARSET \
656 | CATEGORY_MASK_SJIS \
657 | CATEGORY_MASK_BIG5 \
658 | CATEGORY_MASK_CCL \
659 | CATEGORY_MASK_EMACS_MULE)
660
661
662#define CATEGORY_MASK_ISO_7BIT \
663 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
664
665#define CATEGORY_MASK_ISO_8BIT \
666 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
667
668#define CATEGORY_MASK_ISO_ELSE \
669 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
670
671#define CATEGORY_MASK_ISO_ESCAPE \
672 (CATEGORY_MASK_ISO_7 \
673 | CATEGORY_MASK_ISO_7_TIGHT \
674 | CATEGORY_MASK_ISO_7_ELSE \
675 | CATEGORY_MASK_ISO_8_ELSE)
676
677#define CATEGORY_MASK_ISO \
678 ( CATEGORY_MASK_ISO_7BIT \
679 | CATEGORY_MASK_ISO_8BIT \
680 | CATEGORY_MASK_ISO_ELSE)
681
682#define CATEGORY_MASK_UTF_16 \
683 (CATEGORY_MASK_UTF_16_BE \
684 | CATEGORY_MASK_UTF_16_LE \
685 | CATEGORY_MASK_UTF_16_BE_NOSIG \
686 | CATEGORY_MASK_UTF_16_LE_NOSIG)
687
688
689/* List of symbols `coding-category-xxx' ordered by priority. This
690 variable is exposed to Emacs Lisp. */
691static Lisp_Object Vcoding_category_list;
692
693/* Table of coding categories (Lisp symbols). This variable is for
694 internal use oly. */
695static Lisp_Object Vcoding_category_table;
696
697/* Table of coding-categories ordered by priority. */
698static enum coding_category coding_priorities[coding_category_max];
699
700/* Nth element is a coding context for the coding system bound to the
701 Nth coding category. */
702static struct coding_system coding_categories[coding_category_max];
703
704static int detected_mask[coding_category_raw_text] =
705 { CATEGORY_MASK_ISO,
706 CATEGORY_MASK_ISO,
707 CATEGORY_MASK_ISO,
708 CATEGORY_MASK_ISO,
709 CATEGORY_MASK_ISO,
710 CATEGORY_MASK_ISO,
711 CATEGORY_MASK_UTF_8,
712 CATEGORY_MASK_UTF_16,
713 CATEGORY_MASK_UTF_16,
714 CATEGORY_MASK_UTF_16,
715 CATEGORY_MASK_UTF_16,
716 CATEGORY_MASK_UTF_16,
717 CATEGORY_MASK_CHARSET,
718 CATEGORY_MASK_SJIS,
719 CATEGORY_MASK_BIG5,
720 CATEGORY_MASK_CCL,
721 CATEGORY_MASK_EMACS_MULE
722 };
723
724/*** Commonly used macros and functions ***/
725
726#ifndef min
727#define min(a, b) ((a) < (b) ? (a) : (b))
728#endif
729#ifndef max
730#define max(a, b) ((a) > (b) ? (a) : (b))
731#endif
4ed46869 732
df7492f9
KH
733#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
734 do { \
735 attrs = CODING_ID_ATTRS (coding->id); \
736 eol_type = CODING_ID_EOL_TYPE (coding->id); \
737 if (VECTORP (eol_type)) \
738 eol_type = Qunix; \
739 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
740 } while (0)
4ed46869 741
4ed46869 742
df7492f9
KH
743/* Safely get one byte from the source text pointed by SRC which ends
744 at SRC_END, and set C to that byte. If there are not enough bytes
745 in the source, it jumps to `no_more_source'. The caller
746 should declare and set these variables appropriately in advance:
747 src, src_end, multibytep
748*/
aa72b389 749
df7492f9 750#define ONE_MORE_BYTE(c) \
aa72b389 751 do { \
df7492f9
KH
752 if (src == src_end) \
753 { \
754 if (src_base < src) \
755 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
756 goto no_more_source; \
757 } \
758 c = *src++; \
759 if (multibytep && (c & 0x80)) \
760 { \
761 if ((c & 0xFE) != 0xC0) \
762 error ("Undecodable char found"); \
763 c = ((c & 1) << 6) | *src++; \
764 } \
765 consumed_chars++; \
aa72b389
KH
766 } while (0)
767
aa72b389 768
df7492f9
KH
769#define ONE_MORE_BYTE_NO_CHECK(c) \
770 do { \
771 c = *src++; \
772 if (multibytep && (c & 0x80)) \
773 { \
774 if ((c & 0xFE) != 0xC0) \
775 error ("Undecodable char found"); \
776 c = ((c & 1) << 6) | *src++; \
777 } \
781d7a48 778 consumed_chars++; \
aa72b389
KH
779 } while (0)
780
aa72b389 781
df7492f9
KH
782/* Store a byte C in the place pointed by DST and increment DST to the
783 next free point, and increment PRODUCED_CHARS. The caller should
784 assure that C is 0..127, and declare and set the variable `dst'
785 appropriately in advance.
786*/
aa72b389
KH
787
788
df7492f9
KH
789#define EMIT_ONE_ASCII_BYTE(c) \
790 do { \
791 produced_chars++; \
792 *dst++ = (c); \
793 } while (0)
aa72b389 794
aa72b389 795
df7492f9 796/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 797
df7492f9
KH
798#define EMIT_TWO_ASCII_BYTES(c1, c2) \
799 do { \
800 produced_chars += 2; \
801 *dst++ = (c1), *dst++ = (c2); \
802 } while (0)
aa72b389 803
df7492f9
KH
804
805/* Store a byte C in the place pointed by DST and increment DST to the
806 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
807 nonzero, store in an appropriate multibyte from. The caller should
808 declare and set the variables `dst' and `multibytep' appropriately
809 in advance. */
810
811#define EMIT_ONE_BYTE(c) \
812 do { \
813 produced_chars++; \
814 if (multibytep) \
815 { \
816 int ch = (c); \
817 if (ch >= 0x80) \
818 ch = BYTE8_TO_CHAR (ch); \
819 CHAR_STRING_ADVANCE (ch, dst); \
820 } \
821 else \
822 *dst++ = (c); \
aa72b389
KH
823 } while (0)
824
825
df7492f9 826/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 827
e19c3639
KH
828#define EMIT_TWO_BYTES(c1, c2) \
829 do { \
830 produced_chars += 2; \
831 if (multibytep) \
832 { \
833 int ch; \
834 \
835 ch = (c1); \
836 if (ch >= 0x80) \
837 ch = BYTE8_TO_CHAR (ch); \
838 CHAR_STRING_ADVANCE (ch, dst); \
839 ch = (c2); \
840 if (ch >= 0x80) \
841 ch = BYTE8_TO_CHAR (ch); \
842 CHAR_STRING_ADVANCE (ch, dst); \
843 } \
844 else \
845 { \
846 *dst++ = (c1); \
847 *dst++ = (c2); \
848 } \
aa72b389
KH
849 } while (0)
850
851
df7492f9
KH
852#define EMIT_THREE_BYTES(c1, c2, c3) \
853 do { \
854 EMIT_ONE_BYTE (c1); \
855 EMIT_TWO_BYTES (c2, c3); \
856 } while (0)
aa72b389 857
aa72b389 858
df7492f9
KH
859#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
860 do { \
861 EMIT_TWO_BYTES (c1, c2); \
862 EMIT_TWO_BYTES (c3, c4); \
863 } while (0)
aa72b389 864
aa72b389 865
df7492f9
KH
866#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
867 do { \
868 charset_map_loaded = 0; \
869 c = DECODE_CHAR (charset, code); \
870 if (charset_map_loaded) \
871 { \
872 unsigned char *orig = coding->source; \
873 EMACS_INT offset; \
874 \
875 coding_set_source (coding); \
876 offset = coding->source - orig; \
877 src += offset; \
878 src_base += offset; \
879 src_end += offset; \
880 } \
881 } while (0)
aa72b389 882
aa72b389 883
df7492f9
KH
884#define ASSURE_DESTINATION(bytes) \
885 do { \
886 if (dst + (bytes) >= dst_end) \
887 { \
888 int more_bytes = charbuf_end - charbuf + (bytes); \
889 \
890 dst = alloc_destination (coding, more_bytes, dst); \
891 dst_end = coding->destination + coding->dst_bytes; \
892 } \
893 } while (0)
b1887814 894
df7492f9
KH
895
896
897static void
898coding_set_source (coding)
899 struct coding_system *coding;
900{
901 if (BUFFERP (coding->src_object))
902 {
2cb26057
KH
903 struct buffer *buf = XBUFFER (coding->src_object);
904
df7492f9 905 if (coding->src_pos < 0)
2cb26057 906 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 907 else
2cb26057 908 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 909 }
df7492f9 910 else if (STRINGP (coding->src_object))
aa72b389 911 {
df7492f9
KH
912 coding->source = (XSTRING (coding->src_object)->data
913 + coding->src_pos_byte);
914 }
915 else
916 /* Otherwise, the source is C string and is never relocated
917 automatically. Thus we don't have to update anything. */
918 ;
919}
920
921static void
922coding_set_destination (coding)
923 struct coding_system *coding;
924{
925 if (BUFFERP (coding->dst_object))
926 {
df7492f9 927 if (coding->src_pos < 0)
28f67a95
KH
928 {
929 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
930 coding->dst_bytes = (GAP_END_ADDR
931 - (coding->src_bytes - coding->consumed)
932 - coding->destination);
933 }
df7492f9 934 else
28f67a95
KH
935 {
936 /* We are sure that coding->dst_pos_byte is before the gap
937 of the buffer. */
938 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
939 + coding->dst_pos_byte - 1);
940 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
941 - coding->destination);
942 }
df7492f9
KH
943 }
944 else
945 /* Otherwise, the destination is C string and is never relocated
946 automatically. Thus we don't have to update anything. */
947 ;
948}
949
950
951static void
952coding_alloc_by_realloc (coding, bytes)
953 struct coding_system *coding;
954 EMACS_INT bytes;
955{
956 coding->destination = (unsigned char *) xrealloc (coding->destination,
957 coding->dst_bytes + bytes);
958 coding->dst_bytes += bytes;
959}
960
961static void
962coding_alloc_by_making_gap (coding, bytes)
963 struct coding_system *coding;
964 EMACS_INT bytes;
965{
2c78b7e1
KH
966 if (BUFFERP (coding->dst_object)
967 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
968 {
969 EMACS_INT add = coding->src_bytes - coding->consumed;
970
971 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
972 make_gap (bytes);
973 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
974 }
975 else
976 {
2c78b7e1
KH
977 Lisp_Object this_buffer;
978
979 this_buffer = Fcurrent_buffer ();
df7492f9
KH
980 set_buffer_internal (XBUFFER (coding->dst_object));
981 make_gap (bytes);
982 set_buffer_internal (XBUFFER (this_buffer));
983 }
984}
985
986
987static unsigned char *
988alloc_destination (coding, nbytes, dst)
989 struct coding_system *coding;
990 int nbytes;
991 unsigned char *dst;
992{
993 EMACS_INT offset = dst - coding->destination;
994
995 if (BUFFERP (coding->dst_object))
996 coding_alloc_by_making_gap (coding, nbytes);
997 else
998 coding_alloc_by_realloc (coding, nbytes);
999 coding->result = CODING_RESULT_SUCCESS;
1000 coding_set_destination (coding);
1001 dst = coding->destination + offset;
1002 return dst;
1003}
aa72b389 1004
ff0dacd7
KH
1005/** Macros for annotations. */
1006
1007/* Maximum length of annotation data (sum of annotations for
1008 composition and charset). */
1009#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1010
1011/* An annotation data is stored in the array coding->charbuf in this
1012 format:
1013 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1014 LENGTH is the number of elements in the annotation.
1015 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1016 FROM and TO specify the range of text annotated. They are relative
1017 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1018
1019 The format of the following elements depend on ANNOTATION_MASK.
1020
1021 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1022 follows:
1023 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1024 METHOD is one of enum composition_method.
1025 Optionnal COMPOSITION-COMPONENTS are characters and composition
1026 rules.
1027
1028 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1029 follows. */
1030
1031#define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1032 do { \
1033 *(buf)++ = -(len); \
1034 *(buf)++ = (mask); \
1035 *(buf)++ = (from); \
1036 *(buf)++ = (to); \
1037 coding->annotated = 1; \
1038 } while (0);
1039
1040#define ADD_COMPOSITION_DATA(buf, from, to, method) \
1041 do { \
1042 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1043 *buf++ = method; \
1044 } while (0)
1045
1046
1047#define ADD_CHARSET_DATA(buf, from, to, id) \
1048 do { \
1049 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1050 *buf++ = id; \
1051 } while (0)
1052
df7492f9
KH
1053\f
1054/*** 2. Emacs' internal format (emacs-utf-8) ***/
1055
1056
1057
1058\f
1059/*** 3. UTF-8 ***/
1060
1061/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1062 Check if a text is encoded in UTF-8. If it is, return 1, else
1063 return 0. */
df7492f9
KH
1064
1065#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1066#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1067#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1068#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1069#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1070#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1071
1072static int
ff0dacd7 1073detect_coding_utf_8 (coding, detect_info)
df7492f9 1074 struct coding_system *coding;
ff0dacd7 1075 struct coding_detection_info *detect_info;
df7492f9
KH
1076{
1077 unsigned char *src = coding->source, *src_base = src;
1078 unsigned char *src_end = coding->source + coding->src_bytes;
1079 int multibytep = coding->src_multibyte;
1080 int consumed_chars = 0;
1081 int found = 0;
89528eb3 1082 int incomplete;
df7492f9 1083
ff0dacd7 1084 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1085 /* A coding system of this category is always ASCII compatible. */
1086 src += coding->head_ascii;
1087
1088 while (1)
1089 {
1090 int c, c1, c2, c3, c4;
1091
89528eb3 1092 incomplete = 0;
df7492f9
KH
1093 ONE_MORE_BYTE (c);
1094 if (UTF_8_1_OCTET_P (c))
1095 continue;
89528eb3 1096 incomplete = 1;
df7492f9
KH
1097 ONE_MORE_BYTE (c1);
1098 if (! UTF_8_EXTRA_OCTET_P (c1))
1099 break;
1100 if (UTF_8_2_OCTET_LEADING_P (c))
1101 {
ff0dacd7 1102 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1103 continue;
1104 }
1105 ONE_MORE_BYTE (c2);
1106 if (! UTF_8_EXTRA_OCTET_P (c2))
1107 break;
1108 if (UTF_8_3_OCTET_LEADING_P (c))
1109 {
ff0dacd7 1110 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1111 continue;
1112 }
1113 ONE_MORE_BYTE (c3);
1114 if (! UTF_8_EXTRA_OCTET_P (c3))
1115 break;
1116 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1117 {
ff0dacd7 1118 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1119 continue;
1120 }
1121 ONE_MORE_BYTE (c4);
1122 if (! UTF_8_EXTRA_OCTET_P (c4))
1123 break;
1124 if (UTF_8_5_OCTET_LEADING_P (c))
1125 {
ff0dacd7 1126 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1127 continue;
1128 }
1129 break;
1130 }
ff0dacd7 1131 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1132 return 0;
1133
1134 no_more_source:
89528eb3
KH
1135 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1136 {
ff0dacd7 1137 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3
KH
1138 return 0;
1139 }
ff0dacd7
KH
1140 detect_info->found |= found;
1141 return 1;
df7492f9
KH
1142}
1143
1144
1145static void
1146decode_coding_utf_8 (coding)
1147 struct coding_system *coding;
1148{
1149 unsigned char *src = coding->source + coding->consumed;
1150 unsigned char *src_end = coding->source + coding->src_bytes;
1151 unsigned char *src_base;
1152 int *charbuf = coding->charbuf;
1153 int *charbuf_end = charbuf + coding->charbuf_size;
1154 int consumed_chars = 0, consumed_chars_base;
1155 int multibytep = coding->src_multibyte;
1156 Lisp_Object attr, eol_type, charset_list;
1157
1158 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1159
1160 while (1)
1161 {
1162 int c, c1, c2, c3, c4, c5;
1163
1164 src_base = src;
1165 consumed_chars_base = consumed_chars;
1166
1167 if (charbuf >= charbuf_end)
1168 break;
1169
1170 ONE_MORE_BYTE (c1);
1171 if (UTF_8_1_OCTET_P(c1))
1172 {
1173 c = c1;
1174 if (c == '\r')
aa72b389 1175 {
df7492f9
KH
1176 if (EQ (eol_type, Qdos))
1177 {
1178 if (src == src_end)
98725083
KH
1179 {
1180 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1181 goto no_more_source;
1182 }
df7492f9
KH
1183 if (*src == '\n')
1184 ONE_MORE_BYTE (c);
1185 }
1186 else if (EQ (eol_type, Qmac))
1187 c = '\n';
aa72b389 1188 }
aa72b389 1189 }
df7492f9 1190 else
aa72b389 1191 {
df7492f9
KH
1192 ONE_MORE_BYTE (c2);
1193 if (! UTF_8_EXTRA_OCTET_P (c2))
1194 goto invalid_code;
1195 if (UTF_8_2_OCTET_LEADING_P (c1))
b0edb2c5
DL
1196 {
1197 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1198 /* Reject overlong sequences here and below. Encoders
1199 producing them are incorrect, they can be misleading,
1200 and they mess up read/write invariance. */
1201 if (c < 128)
1202 goto invalid_code;
1203 }
df7492f9 1204 else
aa72b389 1205 {
df7492f9
KH
1206 ONE_MORE_BYTE (c3);
1207 if (! UTF_8_EXTRA_OCTET_P (c3))
1208 goto invalid_code;
1209 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1210 {
1211 c = (((c1 & 0xF) << 12)
1212 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1213 if (c < 0x800
1214 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1215 goto invalid_code;
1216 }
df7492f9
KH
1217 else
1218 {
1219 ONE_MORE_BYTE (c4);
1220 if (! UTF_8_EXTRA_OCTET_P (c4))
1221 goto invalid_code;
1222 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1223 {
df7492f9
KH
1224 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1225 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1226 if (c < 0x10000)
1227 goto invalid_code;
1228 }
df7492f9
KH
1229 else
1230 {
1231 ONE_MORE_BYTE (c5);
1232 if (! UTF_8_EXTRA_OCTET_P (c5))
1233 goto invalid_code;
1234 if (UTF_8_5_OCTET_LEADING_P (c1))
1235 {
1236 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1237 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1238 | (c5 & 0x3F));
b0edb2c5 1239 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1240 goto invalid_code;
1241 }
1242 else
1243 goto invalid_code;
1244 }
1245 }
aa72b389 1246 }
aa72b389 1247 }
df7492f9
KH
1248
1249 *charbuf++ = c;
1250 continue;
1251
1252 invalid_code:
1253 src = src_base;
1254 consumed_chars = consumed_chars_base;
1255 ONE_MORE_BYTE (c);
1256 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1257 coding->errors++;
aa72b389
KH
1258 }
1259
df7492f9
KH
1260 no_more_source:
1261 coding->consumed_char += consumed_chars_base;
1262 coding->consumed = src_base - coding->source;
1263 coding->charbuf_used = charbuf - coding->charbuf;
1264}
1265
1266
1267static int
1268encode_coding_utf_8 (coding)
1269 struct coding_system *coding;
1270{
1271 int multibytep = coding->dst_multibyte;
1272 int *charbuf = coding->charbuf;
1273 int *charbuf_end = charbuf + coding->charbuf_used;
1274 unsigned char *dst = coding->destination + coding->produced;
1275 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1276 int produced_chars = 0;
df7492f9
KH
1277 int c;
1278
1279 if (multibytep)
aa72b389 1280 {
df7492f9
KH
1281 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1282
1283 while (charbuf < charbuf_end)
aa72b389 1284 {
df7492f9
KH
1285 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1286
1287 ASSURE_DESTINATION (safe_room);
1288 c = *charbuf++;
28f67a95
KH
1289 if (CHAR_BYTE8_P (c))
1290 {
1291 c = CHAR_TO_BYTE8 (c);
1292 EMIT_ONE_BYTE (c);
1293 }
1294 else
1295 {
1296 CHAR_STRING_ADVANCE (c, pend);
1297 for (p = str; p < pend; p++)
1298 EMIT_ONE_BYTE (*p);
1299 }
aa72b389 1300 }
aa72b389 1301 }
df7492f9
KH
1302 else
1303 {
1304 int safe_room = MAX_MULTIBYTE_LENGTH;
1305
1306 while (charbuf < charbuf_end)
1307 {
1308 ASSURE_DESTINATION (safe_room);
1309 c = *charbuf++;
1310 dst += CHAR_STRING (c, dst);
1311 produced_chars++;
1312 }
1313 }
1314 coding->result = CODING_RESULT_SUCCESS;
1315 coding->produced_char += produced_chars;
1316 coding->produced = dst - coding->destination;
1317 return 0;
aa72b389
KH
1318}
1319
4ed46869 1320
df7492f9 1321/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1322 Check if a text is encoded in one of UTF-16 based coding systems.
1323 If it is, return 1, else return 0. */
df7492f9
KH
1324
1325#define UTF_16_HIGH_SURROGATE_P(val) \
1326 (((val) & 0xFC00) == 0xD800)
1327
1328#define UTF_16_LOW_SURROGATE_P(val) \
1329 (((val) & 0xFC00) == 0xDC00)
1330
1331#define UTF_16_INVALID_P(val) \
1332 (((val) == 0xFFFE) \
1333 || ((val) == 0xFFFF) \
1334 || UTF_16_LOW_SURROGATE_P (val))
1335
1336
1337static int
ff0dacd7 1338detect_coding_utf_16 (coding, detect_info)
b73bfc1c 1339 struct coding_system *coding;
ff0dacd7 1340 struct coding_detection_info *detect_info;
b73bfc1c 1341{
df7492f9
KH
1342 unsigned char *src = coding->source, *src_base = src;
1343 unsigned char *src_end = coding->source + coding->src_bytes;
1344 int multibytep = coding->src_multibyte;
1345 int consumed_chars = 0;
1346 int c1, c2;
1347
ff0dacd7 1348 detect_info->checked |= CATEGORY_MASK_UTF_16;
89528eb3 1349
ff0dacd7
KH
1350 if (coding->mode & CODING_MODE_LAST_BLOCK
1351 && (coding->src_bytes & 1))
1352 {
1353 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1354 return 0;
1355 }
df7492f9
KH
1356 ONE_MORE_BYTE (c1);
1357 ONE_MORE_BYTE (c2);
4ed46869 1358
df7492f9 1359 if ((c1 == 0xFF) && (c2 == 0xFE))
ff0dacd7 1360 {
b49a1807
KH
1361 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1362 | CATEGORY_MASK_UTF_16_AUTO);
ff0dacd7
KH
1363 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1364 }
df7492f9 1365 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1366 {
b49a1807
KH
1367 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1368 | CATEGORY_MASK_UTF_16_AUTO);
ff0dacd7
KH
1369 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1370 }
df7492f9 1371 no_more_source:
ff0dacd7 1372 return 1;
df7492f9 1373}
ec6d2bb8 1374
df7492f9
KH
1375static void
1376decode_coding_utf_16 (coding)
1377 struct coding_system *coding;
1378{
1379 unsigned char *src = coding->source + coding->consumed;
1380 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1381 unsigned char *src_base;
df7492f9
KH
1382 int *charbuf = coding->charbuf;
1383 int *charbuf_end = charbuf + coding->charbuf_size;
1384 int consumed_chars = 0, consumed_chars_base;
1385 int multibytep = coding->src_multibyte;
1386 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1387 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1388 int surrogate = CODING_UTF_16_SURROGATE (coding);
1389 Lisp_Object attr, eol_type, charset_list;
1390
1391 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1392
b49a1807 1393 if (bom == utf_16_with_bom)
df7492f9
KH
1394 {
1395 int c, c1, c2;
4af310db 1396
df7492f9
KH
1397 src_base = src;
1398 ONE_MORE_BYTE (c1);
1399 ONE_MORE_BYTE (c2);
e19c3639 1400 c = (c1 << 8) | c2;
b49a1807
KH
1401
1402 if (endian == utf_16_big_endian
1403 ? c != 0xFEFF : c != 0xFFFE)
4af310db 1404 {
b49a1807
KH
1405 /* The first two bytes are not BOM. Treat them as bytes
1406 for a normal character. */
1407 src = src_base;
1408 coding->errors++;
4af310db 1409 }
b49a1807
KH
1410 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1411 }
1412 else if (bom == utf_16_detect_bom)
1413 {
1414 /* We have already tried to detect BOM and failed in
1415 detect_coding. */
1416 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9
KH
1417 }
1418
1419 while (1)
1420 {
1421 int c, c1, c2;
1422
1423 src_base = src;
1424 consumed_chars_base = consumed_chars;
1425
1426 if (charbuf + 2 >= charbuf_end)
1427 break;
1428
1429 ONE_MORE_BYTE (c1);
1430 ONE_MORE_BYTE (c2);
1431 c = (endian == utf_16_big_endian
e19c3639 1432 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1433 if (surrogate)
aa72b389 1434 {
df7492f9 1435 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1436 {
df7492f9
KH
1437 if (endian == utf_16_big_endian)
1438 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1439 else
1440 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1441 *charbuf++ = c1;
1442 *charbuf++ = c2;
1443 coding->errors++;
1444 if (UTF_16_HIGH_SURROGATE_P (c))
1445 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1446 else
1447 *charbuf++ = c;
aa72b389 1448 }
df7492f9
KH
1449 else
1450 {
1451 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1452 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1453 *charbuf++ = c;
1454 }
1455 }
1456 else
1457 {
1458 if (UTF_16_HIGH_SURROGATE_P (c))
1459 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1460 else
1461 *charbuf++ = c;
1462 }
1463 }
1464
1465 no_more_source:
1466 coding->consumed_char += consumed_chars_base;
1467 coding->consumed = src_base - coding->source;
1468 coding->charbuf_used = charbuf - coding->charbuf;
1469}
1470
1471static int
1472encode_coding_utf_16 (coding)
1473 struct coding_system *coding;
1474{
1475 int multibytep = coding->dst_multibyte;
1476 int *charbuf = coding->charbuf;
1477 int *charbuf_end = charbuf + coding->charbuf_used;
1478 unsigned char *dst = coding->destination + coding->produced;
1479 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1480 int safe_room = 8;
1481 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1482 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1483 int produced_chars = 0;
1484 Lisp_Object attrs, eol_type, charset_list;
1485 int c;
1486
1487 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1488
b49a1807 1489 if (bom != utf_16_without_bom)
df7492f9
KH
1490 {
1491 ASSURE_DESTINATION (safe_room);
1492 if (big_endian)
df7492f9 1493 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1494 else
1495 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1496 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1497 }
1498
1499 while (charbuf < charbuf_end)
1500 {
1501 ASSURE_DESTINATION (safe_room);
1502 c = *charbuf++;
e19c3639
KH
1503 if (c >= MAX_UNICODE_CHAR)
1504 c = coding->default_char;
df7492f9
KH
1505
1506 if (c < 0x10000)
1507 {
1508 if (big_endian)
1509 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1510 else
1511 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1512 }
1513 else
1514 {
1515 int c1, c2;
1516
1517 c -= 0x10000;
1518 c1 = (c >> 10) + 0xD800;
1519 c2 = (c & 0x3FF) + 0xDC00;
1520 if (big_endian)
1521 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1522 else
1523 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1524 }
1525 }
1526 coding->result = CODING_RESULT_SUCCESS;
1527 coding->produced = dst - coding->destination;
1528 coding->produced_char += produced_chars;
1529 return 0;
1530}
1531
1532\f
1533/*** 6. Old Emacs' internal format (emacs-mule) ***/
1534
1535/* Emacs' internal format for representation of multiple character
1536 sets is a kind of multi-byte encoding, i.e. characters are
1537 represented by variable-length sequences of one-byte codes.
1538
1539 ASCII characters and control characters (e.g. `tab', `newline') are
1540 represented by one-byte sequences which are their ASCII codes, in
1541 the range 0x00 through 0x7F.
1542
1543 8-bit characters of the range 0x80..0x9F are represented by
1544 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1545 code + 0x20).
1546
1547 8-bit characters of the range 0xA0..0xFF are represented by
1548 one-byte sequences which are their 8-bit code.
1549
1550 The other characters are represented by a sequence of `base
1551 leading-code', optional `extended leading-code', and one or two
1552 `position-code's. The length of the sequence is determined by the
1553 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1554 whereas extended leading-code and position-code take the range 0xA0
1555 through 0xFF. See `charset.h' for more details about leading-code
1556 and position-code.
1557
1558 --- CODE RANGE of Emacs' internal format ---
1559 character set range
1560 ------------- -----
1561 ascii 0x00..0x7F
1562 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1563 eight-bit-graphic 0xA0..0xBF
1564 ELSE 0x81..0x9D + [0xA0..0xFF]+
1565 ---------------------------------------------
1566
1567 As this is the internal character representation, the format is
1568 usually not used externally (i.e. in a file or in a data sent to a
1569 process). But, it is possible to have a text externally in this
1570 format (i.e. by encoding by the coding system `emacs-mule').
1571
1572 In that case, a sequence of one-byte codes has a slightly different
1573 form.
1574
1575 At first, all characters in eight-bit-control are represented by
1576 one-byte sequences which are their 8-bit code.
1577
1578 Next, character composition data are represented by the byte
1579 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1580 where,
1581 METHOD is 0xF0 plus one of composition method (enum
1582 composition_method),
1583
1584 BYTES is 0xA0 plus a byte length of this composition data,
1585
1586 CHARS is 0x20 plus a number of characters composed by this
1587 data,
1588
1589 COMPONENTs are characters of multibye form or composition
1590 rules encoded by two-byte of ASCII codes.
1591
1592 In addition, for backward compatibility, the following formats are
1593 also recognized as composition data on decoding.
1594
1595 0x80 MSEQ ...
1596 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1597
1598 Here,
1599 MSEQ is a multibyte form but in these special format:
1600 ASCII: 0xA0 ASCII_CODE+0x80,
1601 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1602 RULE is a one byte code of the range 0xA0..0xF0 that
1603 represents a composition rule.
1604 */
1605
1606char emacs_mule_bytes[256];
1607
df7492f9 1608int
ff0dacd7 1609emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1610 struct coding_system *coding;
781d7a48 1611 unsigned char *src;
ff0dacd7 1612 int *nbytes, *nchars, *id;
df7492f9 1613{
df7492f9
KH
1614 unsigned char *src_end = coding->source + coding->src_bytes;
1615 int multibytep = coding->src_multibyte;
1616 unsigned char *src_base = src;
1617 struct charset *charset;
1618 unsigned code;
1619 int c;
1620 int consumed_chars = 0;
1621
1622 ONE_MORE_BYTE (c);
df7492f9
KH
1623 switch (emacs_mule_bytes[c])
1624 {
1625 case 2:
1626 if (! (charset = emacs_mule_charset[c]))
1627 goto invalid_code;
1628 ONE_MORE_BYTE (c);
1629 code = c & 0x7F;
1630 break;
1631
1632 case 3:
7c78e542
KH
1633 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1634 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
b73bfc1c 1635 {
df7492f9
KH
1636 ONE_MORE_BYTE (c);
1637 if (! (charset = emacs_mule_charset[c]))
1638 goto invalid_code;
1639 ONE_MORE_BYTE (c);
1640 code = c & 0x7F;
b73bfc1c
KH
1641 }
1642 else
1643 {
df7492f9
KH
1644 if (! (charset = emacs_mule_charset[c]))
1645 goto invalid_code;
1646 ONE_MORE_BYTE (c);
781d7a48 1647 code = (c & 0x7F) << 8;
df7492f9
KH
1648 ONE_MORE_BYTE (c);
1649 code |= c & 0x7F;
1650 }
1651 break;
1652
1653 case 4:
781d7a48 1654 ONE_MORE_BYTE (c);
df7492f9
KH
1655 if (! (charset = emacs_mule_charset[c]))
1656 goto invalid_code;
1657 ONE_MORE_BYTE (c);
781d7a48 1658 code = (c & 0x7F) << 8;
df7492f9
KH
1659 ONE_MORE_BYTE (c);
1660 code |= c & 0x7F;
1661 break;
1662
1663 case 1:
1664 code = c;
9d123124
KH
1665 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1666 ? charset_ascii : charset_eight_bit);
df7492f9
KH
1667 break;
1668
1669 default:
1670 abort ();
1671 }
1672 c = DECODE_CHAR (charset, code);
1673 if (c < 0)
1674 goto invalid_code;
1675 *nbytes = src - src_base;
1676 *nchars = consumed_chars;
ff0dacd7
KH
1677 if (id)
1678 *id = charset->id;
df7492f9
KH
1679 return c;
1680
1681 no_more_source:
1682 return -2;
1683
1684 invalid_code:
1685 return -1;
1686}
1687
1688
1689/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1690 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1691 else return 0. */
df7492f9
KH
1692
1693static int
ff0dacd7 1694detect_coding_emacs_mule (coding, detect_info)
df7492f9 1695 struct coding_system *coding;
ff0dacd7 1696 struct coding_detection_info *detect_info;
df7492f9
KH
1697{
1698 unsigned char *src = coding->source, *src_base = src;
1699 unsigned char *src_end = coding->source + coding->src_bytes;
1700 int multibytep = coding->src_multibyte;
1701 int consumed_chars = 0;
1702 int c;
1703 int found = 0;
89528eb3 1704 int incomplete;
df7492f9 1705
ff0dacd7 1706 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1707 /* A coding system of this category is always ASCII compatible. */
1708 src += coding->head_ascii;
1709
1710 while (1)
1711 {
89528eb3 1712 incomplete = 0;
df7492f9 1713 ONE_MORE_BYTE (c);
89528eb3 1714 incomplete = 1;
df7492f9
KH
1715
1716 if (c == 0x80)
1717 {
1718 /* Perhaps the start of composite character. We simple skip
1719 it because analyzing it is too heavy for detecting. But,
1720 at least, we check that the composite character
1721 constitues of more than 4 bytes. */
1722 unsigned char *src_base;
1723
1724 repeat:
1725 src_base = src;
1726 do
1727 {
1728 ONE_MORE_BYTE (c);
1729 }
1730 while (c >= 0xA0);
1731
1732 if (src - src_base <= 4)
1733 break;
ff0dacd7 1734 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1735 if (c == 0x80)
1736 goto repeat;
b73bfc1c 1737 }
df7492f9
KH
1738
1739 if (c < 0x80)
b73bfc1c 1740 {
df7492f9
KH
1741 if (c < 0x20
1742 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1743 break;
1744 }
1745 else
1746 {
1747 unsigned char *src_base = src - 1;
1748
1749 do
1750 {
1751 ONE_MORE_BYTE (c);
1752 }
1753 while (c >= 0xA0);
1754 if (src - src_base != emacs_mule_bytes[*src_base])
1755 break;
ff0dacd7 1756 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1757 }
1758 }
ff0dacd7 1759 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1760 return 0;
1761
1762 no_more_source:
89528eb3
KH
1763 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1764 {
ff0dacd7 1765 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1766 return 0;
1767 }
ff0dacd7
KH
1768 detect_info->found |= found;
1769 return 1;
4ed46869
KH
1770}
1771
b73bfc1c 1772
df7492f9
KH
1773/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1774
1775/* Decode a character represented as a component of composition
1776 sequence of Emacs 20/21 style at SRC. Set C to that character and
1777 update SRC to the head of next character (or an encoded composition
1778 rule). If SRC doesn't points a composition component, set C to -1.
1779 If SRC points an invalid byte sequence, global exit by a return
1780 value 0. */
1781
1782#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1783 if (1) \
1784 { \
1785 int c; \
1786 int nbytes, nchars; \
1787 \
1788 if (src == src_end) \
1789 break; \
ff0dacd7 1790 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1791 if (c < 0) \
1792 { \
1793 if (c == -2) \
1794 break; \
1795 goto invalid_code; \
1796 } \
1797 *buf++ = c; \
1798 src += nbytes; \
1799 consumed_chars += nchars; \
1800 } \
1801 else
1802
1803
1804/* Decode a composition rule represented as a component of composition
781d7a48
KH
1805 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1806 and increment BUF. If SRC points an invalid byte sequence, set C
1807 to -1. */
df7492f9 1808
781d7a48 1809#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1810 do { \
1811 int c, gref, nref; \
1812 \
781d7a48 1813 if (src >= src_end) \
df7492f9
KH
1814 goto invalid_code; \
1815 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1816 c -= 0x20; \
df7492f9
KH
1817 if (c < 0 || c >= 81) \
1818 goto invalid_code; \
1819 \
1820 gref = c / 9, nref = c % 9; \
1821 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1822 } while (0)
1823
1824
781d7a48
KH
1825/* Decode a composition rule represented as a component of composition
1826 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1827 and increment BUF. If SRC points an invalid byte sequence, set C
1828 to -1. */
1829
1830#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1831 do { \
1832 int gref, nref; \
1833 \
1834 if (src + 1>= src_end) \
1835 goto invalid_code; \
1836 ONE_MORE_BYTE_NO_CHECK (gref); \
1837 gref -= 0x20; \
1838 ONE_MORE_BYTE_NO_CHECK (nref); \
1839 nref -= 0x20; \
1840 if (gref < 0 || gref >= 81 \
1841 || nref < 0 || nref >= 81) \
1842 goto invalid_code; \
1843 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1844 } while (0)
1845
1846
df7492f9 1847#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1848 do { \
df7492f9 1849 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1850 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1851 the byte length of this composition information, CHARS is the \
1852 number of characters composed by this composition. */ \
781d7a48
KH
1853 enum composition_method method = c - 0xF2; \
1854 int *charbuf_base = charbuf; \
ff0dacd7 1855 int from, to; \
df7492f9
KH
1856 int consumed_chars_limit; \
1857 int nbytes, nchars; \
1858 \
1859 ONE_MORE_BYTE (c); \
1860 nbytes = c - 0xA0; \
1861 if (nbytes < 3) \
1862 goto invalid_code; \
1863 ONE_MORE_BYTE (c); \
1864 nchars = c - 0xA0; \
ff0dacd7
KH
1865 from = coding->produced + char_offset; \
1866 to = from + nchars; \
1867 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1868 consumed_chars_limit = consumed_chars_base + nbytes; \
1869 if (method != COMPOSITION_RELATIVE) \
aa72b389 1870 { \
df7492f9
KH
1871 int i = 0; \
1872 while (consumed_chars < consumed_chars_limit) \
aa72b389 1873 { \
df7492f9 1874 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1875 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1876 else \
1877 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1878 i++; \
aa72b389 1879 } \
df7492f9
KH
1880 if (consumed_chars < consumed_chars_limit) \
1881 goto invalid_code; \
781d7a48 1882 charbuf_base[0] -= i; \
aa72b389
KH
1883 } \
1884 } while (0)
93dec019 1885
aa72b389 1886
df7492f9
KH
1887#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1888 do { \
1889 /* Emacs 20 style format for relative composition. */ \
1890 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1891 enum composition_method method = COMPOSITION_RELATIVE; \
df7492f9
KH
1892 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1893 int *buf = components; \
1894 int i, j; \
ff0dacd7 1895 int from, to; \
df7492f9
KH
1896 \
1897 src = src_base; \
1898 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1899 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1900 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1901 if (i < 2) \
1902 goto invalid_code; \
ff0dacd7
KH
1903 from = coding->produced_char + char_offset; \
1904 to = from + i; \
1905 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1906 for (j = 0; j < i; j++) \
1907 *charbuf++ = components[j]; \
1908 } while (0)
1909
1910
1911#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1912 do { \
1913 /* Emacs 20 style format for rule-base composition. */ \
1914 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1915 enum composition_method method = COMPOSITION_WITH_RULE; \
df7492f9
KH
1916 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1917 int *buf = components; \
1918 int i, j; \
ff0dacd7 1919 int from, to; \
df7492f9
KH
1920 \
1921 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1922 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1923 { \
781d7a48 1924 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1925 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1926 } \
1927 if (i < 1 || (buf - components) % 2 == 0) \
1928 goto invalid_code; \
1929 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1930 goto no_more_source; \
ff0dacd7
KH
1931 from = coding->produced_char + char_offset; \
1932 to = from + i; \
1933 ADD_COMPOSITION_DATA (buf, from, to, method); \
df7492f9
KH
1934 for (j = 0; j < i; j++) \
1935 *charbuf++ = components[j]; \
1936 for (j = 0; j < i; j += 2) \
1937 *charbuf++ = components[j]; \
1938 } while (0)
1939
aa72b389
KH
1940
1941static void
df7492f9 1942decode_coding_emacs_mule (coding)
aa72b389 1943 struct coding_system *coding;
aa72b389 1944{
df7492f9
KH
1945 unsigned char *src = coding->source + coding->consumed;
1946 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1947 unsigned char *src_base;
df7492f9 1948 int *charbuf = coding->charbuf;
ff0dacd7 1949 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 1950 int consumed_chars = 0, consumed_chars_base;
df7492f9
KH
1951 int multibytep = coding->src_multibyte;
1952 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
1953 int char_offset = coding->produced_char;
1954 int last_offset = char_offset;
1955 int last_id = charset_ascii;
aa72b389 1956
df7492f9 1957 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1958
aa72b389
KH
1959 while (1)
1960 {
df7492f9
KH
1961 int c;
1962
aa72b389 1963 src_base = src;
df7492f9
KH
1964 consumed_chars_base = consumed_chars;
1965
1966 if (charbuf >= charbuf_end)
1967 break;
aa72b389 1968
df7492f9
KH
1969 ONE_MORE_BYTE (c);
1970
1971 if (c < 0x80)
aa72b389 1972 {
df7492f9
KH
1973 if (c == '\r')
1974 {
1975 if (EQ (eol_type, Qdos))
1976 {
1977 if (src == src_end)
98725083
KH
1978 {
1979 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1980 goto no_more_source;
1981 }
df7492f9
KH
1982 if (*src == '\n')
1983 ONE_MORE_BYTE (c);
1984 }
1985 else if (EQ (eol_type, Qmac))
1986 c = '\n';
1987 }
1988 *charbuf++ = c;
1989 char_offset++;
aa72b389 1990 }
df7492f9
KH
1991 else if (c == 0x80)
1992 {
df7492f9 1993 ONE_MORE_BYTE (c);
781d7a48
KH
1994 if (c - 0xF2 >= COMPOSITION_RELATIVE
1995 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
1996 DECODE_EMACS_MULE_21_COMPOSITION (c);
1997 else if (c < 0xC0)
1998 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1999 else if (c == 0xFF)
2000 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2001 else
2002 goto invalid_code;
2003 }
2004 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2005 {
2006 int nbytes, nchars;
ff0dacd7
KH
2007 int id;
2008
781d7a48
KH
2009 src = src_base;
2010 consumed_chars = consumed_chars_base;
ff0dacd7 2011 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2012 if (c < 0)
2013 {
2014 if (c == -2)
2015 break;
2016 goto invalid_code;
2017 }
ff0dacd7
KH
2018 if (last_id != id)
2019 {
2020 if (last_id != charset_ascii)
2021 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2022 last_id = id;
2023 last_offset = char_offset;
2024 }
df7492f9 2025 *charbuf++ = c;
781d7a48
KH
2026 src += nbytes;
2027 consumed_chars += nchars;
df7492f9
KH
2028 char_offset++;
2029 }
2030 continue;
2031
2032 invalid_code:
2033 src = src_base;
2034 consumed_chars = consumed_chars_base;
2035 ONE_MORE_BYTE (c);
2036 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2037 char_offset++;
df7492f9
KH
2038 coding->errors++;
2039 }
2040
2041 no_more_source:
ff0dacd7
KH
2042 if (last_id != charset_ascii)
2043 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
2044 coding->consumed_char += consumed_chars_base;
2045 coding->consumed = src_base - coding->source;
2046 coding->charbuf_used = charbuf - coding->charbuf;
2047}
2048
2049
2050#define EMACS_MULE_LEADING_CODES(id, codes) \
2051 do { \
2052 if (id < 0xA0) \
2053 codes[0] = id, codes[1] = 0; \
2054 else if (id < 0xE0) \
2055 codes[0] = 0x9A, codes[1] = id; \
2056 else if (id < 0xF0) \
2057 codes[0] = 0x9B, codes[1] = id; \
2058 else if (id < 0xF5) \
2059 codes[0] = 0x9C, codes[1] = id; \
2060 else \
2061 codes[0] = 0x9D, codes[1] = id; \
2062 } while (0);
2063
aa72b389 2064
df7492f9
KH
2065static int
2066encode_coding_emacs_mule (coding)
2067 struct coding_system *coding;
2068{
2069 int multibytep = coding->dst_multibyte;
2070 int *charbuf = coding->charbuf;
2071 int *charbuf_end = charbuf + coding->charbuf_used;
2072 unsigned char *dst = coding->destination + coding->produced;
2073 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2074 int safe_room = 8;
df7492f9
KH
2075 int produced_chars = 0;
2076 Lisp_Object attrs, eol_type, charset_list;
2077 int c;
ff0dacd7 2078 int preferred_charset_id = -1;
df7492f9
KH
2079
2080 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2081
2082 while (charbuf < charbuf_end)
2083 {
2084 ASSURE_DESTINATION (safe_room);
2085 c = *charbuf++;
ff0dacd7
KH
2086
2087 if (c < 0)
2088 {
2089 /* Handle an annotation. */
2090 switch (*charbuf)
2091 {
2092 case CODING_ANNOTATE_COMPOSITION_MASK:
2093 /* Not yet implemented. */
2094 break;
2095 case CODING_ANNOTATE_CHARSET_MASK:
2096 preferred_charset_id = charbuf[3];
2097 if (preferred_charset_id >= 0
2098 && NILP (Fmemq (make_number (preferred_charset_id),
2099 charset_list)))
2100 preferred_charset_id = -1;
2101 break;
2102 default:
2103 abort ();
2104 }
2105 charbuf += -c - 1;
2106 continue;
2107 }
2108
df7492f9
KH
2109 if (ASCII_CHAR_P (c))
2110 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2111 else if (CHAR_BYTE8_P (c))
2112 {
2113 c = CHAR_TO_BYTE8 (c);
2114 EMIT_ONE_BYTE (c);
2115 }
df7492f9 2116 else
aa72b389 2117 {
df7492f9
KH
2118 struct charset *charset;
2119 unsigned code;
2120 int dimension;
2121 int emacs_mule_id;
2122 unsigned char leading_codes[2];
2123
ff0dacd7
KH
2124 if (preferred_charset_id >= 0)
2125 {
2126 charset = CHARSET_FROM_ID (preferred_charset_id);
2127 if (! CHAR_CHARSET_P (c, charset))
2128 charset = char_charset (c, charset_list, NULL);
2129 }
2130 else
2131 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2132 if (! charset)
2133 {
2134 c = coding->default_char;
2135 if (ASCII_CHAR_P (c))
2136 {
2137 EMIT_ONE_ASCII_BYTE (c);
2138 continue;
2139 }
2140 charset = char_charset (c, charset_list, &code);
2141 }
2142 dimension = CHARSET_DIMENSION (charset);
2143 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2144 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2145 EMIT_ONE_BYTE (leading_codes[0]);
2146 if (leading_codes[1])
2147 EMIT_ONE_BYTE (leading_codes[1]);
2148 if (dimension == 1)
2149 EMIT_ONE_BYTE (code);
aa72b389 2150 else
df7492f9
KH
2151 {
2152 EMIT_ONE_BYTE (code >> 8);
2153 EMIT_ONE_BYTE (code & 0xFF);
2154 }
aa72b389 2155 }
aa72b389 2156 }
df7492f9
KH
2157 coding->result = CODING_RESULT_SUCCESS;
2158 coding->produced_char += produced_chars;
2159 coding->produced = dst - coding->destination;
2160 return 0;
aa72b389 2161}
b73bfc1c 2162
4ed46869 2163\f
df7492f9 2164/*** 7. ISO2022 handlers ***/
4ed46869
KH
2165
2166/* The following note describes the coding system ISO2022 briefly.
39787efd 2167 Since the intention of this note is to help understand the
5a936b46 2168 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2169 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46
DL
2170 original document of ISO2022. This is equivalent to the standard
2171 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2172
2173 ISO2022 provides many mechanisms to encode several character sets
5a936b46 2174 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2175 is encoded using bytes less than 128. This may make the encoded
2176 text a little bit longer, but the text passes more easily through
5a936b46
DL
2177 several types of gateway, some of which strip off the MSB (Most
2178 Significant Bit).
b73bfc1c 2179
5a936b46
DL
2180 There are two kinds of character sets: control character sets and
2181 graphic character sets. The former contain control characters such
4ed46869 2182 as `newline' and `escape' to provide control functions (control
39787efd 2183 functions are also provided by escape sequences). The latter
5a936b46 2184 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2185 two control character sets and many graphic character sets.
2186
2187 Graphic character sets are classified into one of the following
39787efd
KH
2188 four classes, according to the number of bytes (DIMENSION) and
2189 number of characters in one dimension (CHARS) of the set:
2190 - DIMENSION1_CHARS94
2191 - DIMENSION1_CHARS96
2192 - DIMENSION2_CHARS94
2193 - DIMENSION2_CHARS96
2194
2195 In addition, each character set is assigned an identification tag,
5a936b46 2196 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2197 hereafter). The <F> of each character set is decided by ECMA(*)
2198 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2199 (0x30..0x3F are for private use only).
4ed46869
KH
2200
2201 Note (*): ECMA = European Computer Manufacturers Association
2202
5a936b46 2203 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2204 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2205 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2206 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2207 o DIMENSION2_CHARS96 -- none for the moment
2208
39787efd 2209 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2210 C0 [0x00..0x1F] -- control character plane 0
2211 GL [0x20..0x7F] -- graphic character plane 0
2212 C1 [0x80..0x9F] -- control character plane 1
2213 GR [0xA0..0xFF] -- graphic character plane 1
2214
2215 A control character set is directly designated and invoked to C0 or
39787efd
KH
2216 C1 by an escape sequence. The most common case is that:
2217 - ISO646's control character set is designated/invoked to C0, and
2218 - ISO6429's control character set is designated/invoked to C1,
2219 and usually these designations/invocations are omitted in encoded
2220 text. In a 7-bit environment, only C0 can be used, and a control
2221 character for C1 is encoded by an appropriate escape sequence to
2222 fit into the environment. All control characters for C1 are
2223 defined to have corresponding escape sequences.
4ed46869
KH
2224
2225 A graphic character set is at first designated to one of four
2226 graphic registers (G0 through G3), then these graphic registers are
2227 invoked to GL or GR. These designations and invocations can be
2228 done independently. The most common case is that G0 is invoked to
39787efd
KH
2229 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2230 these invocations and designations are omitted in encoded text.
2231 In a 7-bit environment, only GL can be used.
4ed46869 2232
39787efd
KH
2233 When a graphic character set of CHARS94 is invoked to GL, codes
2234 0x20 and 0x7F of the GL area work as control characters SPACE and
2235 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2236 be used.
4ed46869
KH
2237
2238 There are two ways of invocation: locking-shift and single-shift.
2239 With locking-shift, the invocation lasts until the next different
39787efd
KH
2240 invocation, whereas with single-shift, the invocation affects the
2241 following character only and doesn't affect the locking-shift
2242 state. Invocations are done by the following control characters or
2243 escape sequences:
4ed46869
KH
2244
2245 ----------------------------------------------------------------------
39787efd 2246 abbrev function cntrl escape seq description
4ed46869 2247 ----------------------------------------------------------------------
39787efd
KH
2248 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2249 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2250 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2251 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2252 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2253 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2254 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2255 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2256 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2257 ----------------------------------------------------------------------
39787efd
KH
2258 (*) These are not used by any known coding system.
2259
2260 Control characters for these functions are defined by macros
2261 ISO_CODE_XXX in `coding.h'.
4ed46869 2262
39787efd 2263 Designations are done by the following escape sequences:
4ed46869
KH
2264 ----------------------------------------------------------------------
2265 escape sequence description
2266 ----------------------------------------------------------------------
2267 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2268 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2269 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2270 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2271 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2272 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2273 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2274 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2275 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2276 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2277 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2278 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2279 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2280 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2281 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2282 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2283 ----------------------------------------------------------------------
2284
2285 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2286 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2287
2288 Note (*): Although these designations are not allowed in ISO2022,
2289 Emacs accepts them on decoding, and produces them on encoding
39787efd 2290 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2291 7-bit environment, non-locking-shift, and non-single-shift.
2292
2293 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2294 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2295
5a936b46 2296 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2297 same multilingual text in ISO2022. Actually, there exist many
2298 coding systems such as Compound Text (used in X11's inter client
5a936b46
DL
2299 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2300 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2301 localized platforms), and all of these are variants of ISO2022.
2302
2303 In addition to the above, Emacs handles two more kinds of escape
2304 sequences: ISO6429's direction specification and Emacs' private
2305 sequence for specifying character composition.
2306
39787efd 2307 ISO6429's direction specification takes the following form:
4ed46869
KH
2308 o CSI ']' -- end of the current direction
2309 o CSI '0' ']' -- end of the current direction
2310 o CSI '1' ']' -- start of left-to-right text
2311 o CSI '2' ']' -- start of right-to-left text
2312 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2313 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2314
2315 Character composition specification takes the following form:
ec6d2bb8
KH
2316 o ESC '0' -- start relative composition
2317 o ESC '1' -- end composition
2318 o ESC '2' -- start rule-base composition (*)
2319 o ESC '3' -- start relative composition with alternate chars (**)
2320 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2321 Since these are not standard escape sequences of any ISO standard,
5a936b46 2322 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2323
5a936b46
DL
2324 (*) This form is used only in Emacs 20.7 and older versions,
2325 but newer versions can safely decode it.
2326 (**) This form is used only in Emacs 21.1 and newer versions,
2327 and older versions can't decode it.
ec6d2bb8 2328
5a936b46 2329 Here's a list of example usages of these composition escape
b73bfc1c 2330 sequences (categorized by `enum composition_method').
ec6d2bb8 2331
b73bfc1c 2332 COMPOSITION_RELATIVE:
ec6d2bb8 2333 ESC 0 CHAR [ CHAR ] ESC 1
5a936b46 2334 COMPOSITION_WITH_RULE:
ec6d2bb8 2335 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2336 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2337 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2338 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2339 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2340
2341enum iso_code_class_type iso_code_class[256];
2342
df7492f9
KH
2343#define SAFE_CHARSET_P(coding, id) \
2344 ((id) <= (coding)->max_charset_id \
2345 && (coding)->safe_charsets[id] >= 0)
2346
2347
2348#define SHIFT_OUT_OK(category) \
2349 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2350
2351static void
f0064e1f
DL
2352setup_iso_safe_charsets (attrs)
2353 Lisp_Object attrs;
df7492f9
KH
2354{
2355 Lisp_Object charset_list, safe_charsets;
2356 Lisp_Object request;
2357 Lisp_Object reg_usage;
2358 Lisp_Object tail;
2359 int reg94, reg96;
2360 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2361 int max_charset_id;
2362
2363 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2364 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2365 && ! EQ (charset_list, Viso_2022_charset_list))
2366 {
2367 CODING_ATTR_CHARSET_LIST (attrs)
2368 = charset_list = Viso_2022_charset_list;
2369 ASET (attrs, coding_attr_safe_charsets, Qnil);
2370 }
2371
2372 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2373 return;
2374
2375 max_charset_id = 0;
2376 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2377 {
2378 int id = XINT (XCAR (tail));
2379 if (max_charset_id < id)
2380 max_charset_id = id;
2381 }
2382
2383 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2384 make_number (255));
2385 request = AREF (attrs, coding_attr_iso_request);
2386 reg_usage = AREF (attrs, coding_attr_iso_usage);
2387 reg94 = XINT (XCAR (reg_usage));
2388 reg96 = XINT (XCDR (reg_usage));
2389
2390 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2391 {
2392 Lisp_Object id;
2393 Lisp_Object reg;
2394 struct charset *charset;
2395
2396 id = XCAR (tail);
2397 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2398 reg = Fcdr (Fassq (id, request));
df7492f9
KH
2399 if (! NILP (reg))
2400 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2401 else if (charset->iso_chars_96)
2402 {
2403 if (reg96 < 4)
2404 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2405 }
2406 else
2407 {
2408 if (reg94 < 4)
2409 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2410 }
2411 }
2412 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2413}
d46c5b12 2414
d46c5b12 2415
4ed46869 2416/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2417 Check if a text is encoded in one of ISO-2022 based codig systems.
2418 If it is, return 1, else return 0. */
4ed46869 2419
0a28aafb 2420static int
ff0dacd7 2421detect_coding_iso_2022 (coding, detect_info)
df7492f9 2422 struct coding_system *coding;
ff0dacd7 2423 struct coding_detection_info *detect_info;
4ed46869 2424{
df7492f9
KH
2425 unsigned char *src = coding->source, *src_base = src;
2426 unsigned char *src_end = coding->source + coding->src_bytes;
2427 int multibytep = coding->src_multibyte;
ff0dacd7 2428 int single_shifting = 0;
df7492f9
KH
2429 int id;
2430 int c, c1;
2431 int consumed_chars = 0;
2432 int i;
ff0dacd7
KH
2433 int rejected = 0;
2434 int found = 0;
2435
2436 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2437
2438 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2439 {
2440 struct coding_system *this = &(coding_categories[i]);
2441 Lisp_Object attrs, val;
2442
2443 attrs = CODING_ID_ATTRS (this->id);
2444 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2445 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2446 setup_iso_safe_charsets (attrs);
2447 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2448 this->max_charset_id = XSTRING (val)->size - 1;
2449 this->safe_charsets = (char *) XSTRING (val)->data;
2450 }
2451
2452 /* A coding system of this category is always ASCII compatible. */
2453 src += coding->head_ascii;
3f003981 2454
ff0dacd7 2455 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2456 {
df7492f9 2457 ONE_MORE_BYTE (c);
4ed46869
KH
2458 switch (c)
2459 {
2460 case ISO_CODE_ESC:
74383408
KH
2461 if (inhibit_iso_escape_detection)
2462 break;
f46869e4 2463 single_shifting = 0;
df7492f9 2464 ONE_MORE_BYTE (c);
d46c5b12 2465 if (c >= '(' && c <= '/')
4ed46869 2466 {
bf9cdd4e 2467 /* Designation sequence for a charset of dimension 1. */
df7492f9 2468 ONE_MORE_BYTE (c1);
d46c5b12 2469 if (c1 < ' ' || c1 >= 0x80
df7492f9 2470 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2471 /* Invalid designation sequence. Just ignore. */
2472 break;
bf9cdd4e
KH
2473 }
2474 else if (c == '$')
2475 {
2476 /* Designation sequence for a charset of dimension 2. */
df7492f9 2477 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2478 if (c >= '@' && c <= 'B')
2479 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2480 id = iso_charset_table[1][0][c];
bf9cdd4e 2481 else if (c >= '(' && c <= '/')
bcf26d6a 2482 {
df7492f9 2483 ONE_MORE_BYTE (c1);
d46c5b12 2484 if (c1 < ' ' || c1 >= 0x80
df7492f9 2485 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2486 /* Invalid designation sequence. Just ignore. */
2487 break;
bcf26d6a 2488 }
bf9cdd4e 2489 else
ff0dacd7 2490 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2491 break;
2492 }
ae9ff118 2493 else if (c == 'N' || c == 'O')
d46c5b12 2494 {
ae9ff118 2495 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2496 single_shifting = 1;
2497 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2498 break;
4ed46869 2499 }
ec6d2bb8
KH
2500 else if (c >= '0' && c <= '4')
2501 {
2502 /* ESC <Fp> for start/end composition. */
ff0dacd7 2503 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2504 break;
2505 }
bf9cdd4e 2506 else
df7492f9 2507 {
ff0dacd7 2508 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2509 break;
2510 }
d46c5b12
KH
2511
2512 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2513 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2514 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2515 id))
ff0dacd7 2516 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2517 else
ff0dacd7 2518 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2519 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2520 id))
ff0dacd7 2521 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2522 else
ff0dacd7 2523 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2524 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2525 id))
ff0dacd7 2526 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2527 else
ff0dacd7 2528 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2529 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2530 id))
ff0dacd7 2531 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2532 else
ff0dacd7 2533 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2534 break;
2535
4ed46869 2536 case ISO_CODE_SO:
d46c5b12 2537 case ISO_CODE_SI:
ff0dacd7 2538 /* Locking shift out/in. */
74383408
KH
2539 if (inhibit_iso_escape_detection)
2540 break;
f46869e4 2541 single_shifting = 0;
ff0dacd7
KH
2542 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2543 found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2544 break;
ff0dacd7 2545
4ed46869 2546 case ISO_CODE_CSI:
ff0dacd7 2547 /* Control sequence introducer. */
f46869e4 2548 single_shifting = 0;
ff0dacd7
KH
2549 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2550 found |= CATEGORY_MASK_ISO_8_ELSE;
2551 goto check_extra_latin;
2552
2553
4ed46869
KH
2554 case ISO_CODE_SS2:
2555 case ISO_CODE_SS3:
ff0dacd7
KH
2556 /* Single shift. */
2557 if (inhibit_iso_escape_detection)
2558 break;
2559 single_shifting = 1;
2560 rejected |= CATEGORY_MASK_ISO_7BIT;
2561 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2562 & CODING_ISO_FLAG_SINGLE_SHIFT)
2563 found |= CATEGORY_MASK_ISO_8_1;
2564 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2565 & CODING_ISO_FLAG_SINGLE_SHIFT)
2566 found |= CATEGORY_MASK_ISO_8_2;
2567 goto check_extra_latin;
4ed46869
KH
2568
2569 default:
2570 if (c < 0x80)
f46869e4
KH
2571 {
2572 single_shifting = 0;
2573 break;
2574 }
ff0dacd7 2575 if (c >= 0xA0)
c4825358 2576 {
ff0dacd7
KH
2577 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2578 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2579 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2580 0xA0..0FF. If the byte length is even, we include
2581 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2582 only when we are not single shifting. */
2583 if (! single_shifting
2584 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2585 {
e17de821 2586 int i = 1;
b73bfc1c
KH
2587 while (src < src_end)
2588 {
df7492f9 2589 ONE_MORE_BYTE (c);
b73bfc1c
KH
2590 if (c < 0xA0)
2591 break;
2592 i++;
2593 }
2594
2595 if (i & 1 && src < src_end)
ff0dacd7 2596 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2597 else
ff0dacd7 2598 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2599 }
ff0dacd7 2600 break;
4ed46869 2601 }
ff0dacd7
KH
2602 check_extra_latin:
2603 single_shifting = 0;
2604 if (! VECTORP (Vlatin_extra_code_table)
2605 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2606 {
2607 rejected = CATEGORY_MASK_ISO;
2608 break;
2609 }
2610 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2611 & CODING_ISO_FLAG_LATIN_EXTRA)
2612 found |= CATEGORY_MASK_ISO_8_1;
2613 else
2614 rejected |= CATEGORY_MASK_ISO_8_1;
2615 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2616 & CODING_ISO_FLAG_LATIN_EXTRA)
2617 found |= CATEGORY_MASK_ISO_8_2;
2618 else
2619 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2620 }
2621 }
ff0dacd7
KH
2622 detect_info->rejected |= CATEGORY_MASK_ISO;
2623 return 0;
2624
df7492f9 2625 no_more_source:
ff0dacd7
KH
2626 detect_info->rejected |= rejected;
2627 detect_info->found |= (found & ~rejected);
df7492f9 2628 return 1;
4ed46869
KH
2629}
2630
4ed46869
KH
2631
2632/* Set designation state into CODING. */
df7492f9
KH
2633#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2634 do { \
2635 int id, prev; \
2636 \
2637 if (final < '0' || final >= 128 \
2638 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2639 || !SAFE_CHARSET_P (coding, id)) \
2640 { \
2641 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2642 goto invalid_code; \
2643 } \
2644 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2645 if (id == charset_jisx0201_roman) \
2646 { \
2647 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2648 id = charset_ascii; \
2649 } \
2650 else if (id == charset_jisx0208_1978) \
2651 { \
2652 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2653 id = charset_jisx0208; \
2654 } \
df7492f9
KH
2655 CODING_ISO_DESIGNATION (coding, reg) = id; \
2656 /* If there was an invalid designation to REG previously, and this \
2657 designation is ASCII to REG, we should keep this designation \
2658 sequence. */ \
2659 if (prev == -2 && id == charset_ascii) \
2660 goto invalid_code; \
4ed46869
KH
2661 } while (0)
2662
d46c5b12 2663
df7492f9
KH
2664#define MAYBE_FINISH_COMPOSITION() \
2665 do { \
2666 int i; \
2667 if (composition_state == COMPOSING_NO) \
2668 break; \
2669 /* It is assured that we have enough room for producing \
2670 characters stored in the table `components'. */ \
2671 if (charbuf + component_idx > charbuf_end) \
2672 goto no_more_source; \
2673 composition_state = COMPOSING_NO; \
2674 if (method == COMPOSITION_RELATIVE \
2675 || method == COMPOSITION_WITH_ALTCHARS) \
2676 { \
2677 for (i = 0; i < component_idx; i++) \
2678 *charbuf++ = components[i]; \
2679 char_offset += component_idx; \
2680 } \
2681 else \
2682 { \
2683 for (i = 0; i < component_idx; i += 2) \
2684 *charbuf++ = components[i]; \
2685 char_offset += (component_idx / 2) + 1; \
2686 } \
2687 } while (0)
2688
d46c5b12 2689
aa72b389
KH
2690/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2691 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2692 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2693 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2694 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2695 */
ec6d2bb8 2696
df7492f9
KH
2697#define DECODE_COMPOSITION_START(c1) \
2698 do { \
2699 if (c1 == '0' \
781d7a48 2700 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2701 { \
2702 component_len = component_idx; \
2703 composition_state = COMPOSING_CHAR; \
2704 } \
2705 else \
2706 { \
2707 unsigned char *p; \
2708 \
2709 MAYBE_FINISH_COMPOSITION (); \
2710 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2711 goto no_more_source; \
2712 for (p = src; p < src_end - 1; p++) \
2713 if (*p == ISO_CODE_ESC && p[1] == '1') \
2714 break; \
2715 if (p == src_end - 1) \
2716 { \
2717 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2718 goto invalid_code; \
2719 goto no_more_source; \
2720 } \
2721 \
2722 /* This is surely the start of a composition. */ \
2723 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2724 : c1 == '2' ? COMPOSITION_WITH_RULE \
2725 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2726 : COMPOSITION_WITH_RULE_ALTCHARS); \
2727 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2728 : COMPOSING_COMPONENT_CHAR); \
2729 component_idx = component_len = 0; \
2730 } \
ec6d2bb8
KH
2731 } while (0)
2732
ec6d2bb8 2733
df7492f9
KH
2734/* Handle compositoin end sequence ESC 1. */
2735
2736#define DECODE_COMPOSITION_END() \
ec6d2bb8 2737 do { \
df7492f9
KH
2738 int nchars = (component_len > 0 ? component_idx - component_len \
2739 : method == COMPOSITION_RELATIVE ? component_idx \
2740 : (component_idx + 1) / 2); \
2741 int i; \
2742 int *saved_charbuf = charbuf; \
ff0dacd7
KH
2743 int from = coding->produced_char + char_offset; \
2744 int to = from + nchars; \
df7492f9 2745 \
ff0dacd7 2746 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9 2747 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2748 { \
df7492f9
KH
2749 if (component_len == 0) \
2750 for (i = 0; i < component_idx; i++) \
2751 *charbuf++ = components[i]; \
2752 else \
2753 for (i = 0; i < component_len; i++) \
2754 *charbuf++ = components[i]; \
2755 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2756 } \
df7492f9
KH
2757 if (method == COMPOSITION_WITH_RULE) \
2758 for (i = 0; i < component_idx; i += 2, char_offset++) \
2759 *charbuf++ = components[i]; \
ec6d2bb8 2760 else \
df7492f9
KH
2761 for (i = component_len; i < component_idx; i++, char_offset++) \
2762 *charbuf++ = components[i]; \
2763 coding->annotated = 1; \
2764 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2765 } while (0)
2766
df7492f9 2767
ec6d2bb8
KH
2768/* Decode a composition rule from the byte C1 (and maybe one more byte
2769 from SRC) and store one encoded composition rule in
2770 coding->cmp_data. */
2771
2772#define DECODE_COMPOSITION_RULE(c1) \
2773 do { \
ec6d2bb8
KH
2774 (c1) -= 32; \
2775 if (c1 < 81) /* old format (before ver.21) */ \
2776 { \
2777 int gref = (c1) / 9; \
2778 int nref = (c1) % 9; \
2779 if (gref == 4) gref = 10; \
2780 if (nref == 4) nref = 10; \
df7492f9 2781 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2782 } \
b73bfc1c 2783 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2784 { \
2785 ONE_MORE_BYTE (c2); \
df7492f9 2786 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2787 } \
df7492f9
KH
2788 else \
2789 c1 = 0; \
ec6d2bb8 2790 } while (0)
88993dfd 2791
d46c5b12 2792
4ed46869
KH
2793/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2794
b73bfc1c 2795static void
df7492f9 2796decode_coding_iso_2022 (coding)
4ed46869 2797 struct coding_system *coding;
4ed46869 2798{
df7492f9
KH
2799 unsigned char *src = coding->source + coding->consumed;
2800 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2801 unsigned char *src_base;
df7492f9 2802 int *charbuf = coding->charbuf;
ff0dacd7
KH
2803 int *charbuf_end
2804 = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2805 int consumed_chars = 0, consumed_chars_base;
df7492f9
KH
2806 int multibytep = coding->src_multibyte;
2807 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2808 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2809 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2810 struct charset *charset;
2811 int c;
2812 /* For handling composition sequence. */
2813#define COMPOSING_NO 0
2814#define COMPOSING_CHAR 1
2815#define COMPOSING_RULE 2
2816#define COMPOSING_COMPONENT_CHAR 3
2817#define COMPOSING_COMPONENT_RULE 4
2818
2819 int composition_state = COMPOSING_NO;
2820 enum composition_method method;
2821 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2822 int component_idx;
2823 int component_len;
2824 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
2825 int char_offset = coding->produced_char;
2826 int last_offset = char_offset;
2827 int last_id = charset_ascii;
df7492f9
KH
2828
2829 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2830 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2831
2832 while (1)
4ed46869 2833 {
b73bfc1c
KH
2834 int c1, c2;
2835
2836 src_base = src;
df7492f9
KH
2837 consumed_chars_base = consumed_chars;
2838
2839 if (charbuf >= charbuf_end)
2840 break;
2841
b73bfc1c 2842 ONE_MORE_BYTE (c1);
4ed46869 2843
98725083 2844 /* We produce at most one character. */
4ed46869
KH
2845 switch (iso_code_class [c1])
2846 {
2847 case ISO_0x20_or_0x7F:
df7492f9 2848 if (composition_state != COMPOSING_NO)
ec6d2bb8 2849 {
df7492f9
KH
2850 if (composition_state == COMPOSING_RULE
2851 || composition_state == COMPOSING_COMPONENT_RULE)
2852 {
2853 DECODE_COMPOSITION_RULE (c1);
2854 components[component_idx++] = c1;
2855 composition_state--;
2856 continue;
2857 }
ec6d2bb8 2858 }
df7492f9
KH
2859 if (charset_id_0 < 0
2860 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2861 /* This is SPACE or DEL. */
2862 charset = CHARSET_FROM_ID (charset_ascii);
2863 else
2864 charset = CHARSET_FROM_ID (charset_id_0);
2865 break;
4ed46869
KH
2866
2867 case ISO_graphic_plane_0:
781d7a48 2868 if (composition_state != COMPOSING_NO)
b73bfc1c 2869 {
781d7a48
KH
2870 if (composition_state == COMPOSING_RULE
2871 || composition_state == COMPOSING_COMPONENT_RULE)
2872 {
2873 DECODE_COMPOSITION_RULE (c1);
2874 components[component_idx++] = c1;
2875 composition_state--;
2876 continue;
2877 }
b73bfc1c 2878 }
df7492f9 2879 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2880 break;
2881
2882 case ISO_0xA0_or_0xFF:
df7492f9
KH
2883 if (charset_id_1 < 0
2884 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2885 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2886 goto invalid_code;
4ed46869
KH
2887 /* This is a graphic character, we fall down ... */
2888
2889 case ISO_graphic_plane_1:
df7492f9
KH
2890 if (charset_id_1 < 0)
2891 goto invalid_code;
2892 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2893 break;
2894
2895 case ISO_carriage_return:
df7492f9 2896 if (c1 == '\r')
4ed46869 2897 {
df7492f9 2898 if (EQ (eol_type, Qdos))
4ed46869 2899 {
df7492f9 2900 if (src == src_end)
98725083
KH
2901 {
2902 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2903 goto no_more_source;
2904 }
df7492f9
KH
2905 if (*src == '\n')
2906 ONE_MORE_BYTE (c1);
4ed46869 2907 }
df7492f9
KH
2908 else if (EQ (eol_type, Qmac))
2909 c1 = '\n';
4ed46869 2910 }
df7492f9
KH
2911 /* fall through */
2912
2913 case ISO_control_0:
2914 MAYBE_FINISH_COMPOSITION ();
2915 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2916 break;
2917
df7492f9
KH
2918 case ISO_control_1:
2919 MAYBE_FINISH_COMPOSITION ();
2920 goto invalid_code;
2921
4ed46869 2922 case ISO_shift_out:
df7492f9
KH
2923 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2924 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2925 goto invalid_code;
2926 CODING_ISO_INVOCATION (coding, 0) = 1;
2927 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2928 continue;
4ed46869
KH
2929
2930 case ISO_shift_in:
df7492f9
KH
2931 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2932 goto invalid_code;
2933 CODING_ISO_INVOCATION (coding, 0) = 0;
2934 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2935 continue;
4ed46869
KH
2936
2937 case ISO_single_shift_2_7:
2938 case ISO_single_shift_2:
df7492f9
KH
2939 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2940 goto invalid_code;
4ed46869
KH
2941 /* SS2 is handled as an escape sequence of ESC 'N' */
2942 c1 = 'N';
2943 goto label_escape_sequence;
2944
2945 case ISO_single_shift_3:
df7492f9
KH
2946 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2947 goto invalid_code;
4ed46869
KH
2948 /* SS2 is handled as an escape sequence of ESC 'O' */
2949 c1 = 'O';
2950 goto label_escape_sequence;
2951
2952 case ISO_control_sequence_introducer:
2953 /* CSI is handled as an escape sequence of ESC '[' ... */
2954 c1 = '[';
2955 goto label_escape_sequence;
2956
2957 case ISO_escape:
2958 ONE_MORE_BYTE (c1);
2959 label_escape_sequence:
df7492f9 2960 /* Escape sequences handled here are invocation,
4ed46869
KH
2961 designation, direction specification, and character
2962 composition specification. */
2963 switch (c1)
2964 {
2965 case '&': /* revision of following character set */
2966 ONE_MORE_BYTE (c1);
2967 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2968 goto invalid_code;
4ed46869
KH
2969 ONE_MORE_BYTE (c1);
2970 if (c1 != ISO_CODE_ESC)
df7492f9 2971 goto invalid_code;
4ed46869
KH
2972 ONE_MORE_BYTE (c1);
2973 goto label_escape_sequence;
2974
2975 case '$': /* designation of 2-byte character set */
df7492f9
KH
2976 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2977 goto invalid_code;
4ed46869
KH
2978 ONE_MORE_BYTE (c1);
2979 if (c1 >= '@' && c1 <= 'B')
2980 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2981 or JISX0208.1980 */
df7492f9 2982 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2983 }
2984 else if (c1 >= 0x28 && c1 <= 0x2B)
2985 { /* designation of DIMENSION2_CHARS94 character set */
2986 ONE_MORE_BYTE (c2);
df7492f9 2987 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2988 }
2989 else if (c1 >= 0x2C && c1 <= 0x2F)
2990 { /* designation of DIMENSION2_CHARS96 character set */
2991 ONE_MORE_BYTE (c2);
df7492f9 2992 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2993 }
2994 else
df7492f9 2995 goto invalid_code;
b73bfc1c 2996 /* We must update these variables now. */
df7492f9
KH
2997 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2998 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2999 continue;
4ed46869
KH
3000
3001 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3002 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3003 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3004 goto invalid_code;
3005 CODING_ISO_INVOCATION (coding, 0) = 2;
3006 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3007 continue;
4ed46869
KH
3008
3009 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3010 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3011 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3012 goto invalid_code;
3013 CODING_ISO_INVOCATION (coding, 0) = 3;
3014 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3015 continue;
4ed46869
KH
3016
3017 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3018 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3019 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3020 goto invalid_code;
3021 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 3022 ONE_MORE_BYTE (c1);
e7046a18 3023 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3024 goto invalid_code;
4ed46869
KH
3025 break;
3026
3027 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3028 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3029 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3030 goto invalid_code;
3031 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 3032 ONE_MORE_BYTE (c1);
e7046a18 3033 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3034 goto invalid_code;
4ed46869
KH
3035 break;
3036
ec6d2bb8 3037 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3038 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3039 goto invalid_code;
ec6d2bb8 3040 DECODE_COMPOSITION_START (c1);
b73bfc1c 3041 continue;
4ed46869 3042
ec6d2bb8 3043 case '1': /* end composition */
df7492f9
KH
3044 if (composition_state == COMPOSING_NO)
3045 goto invalid_code;
3046 DECODE_COMPOSITION_END ();
b73bfc1c 3047 continue;
4ed46869
KH
3048
3049 case '[': /* specification of direction */
df7492f9
KH
3050 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3051 goto invalid_code;
4ed46869 3052 /* For the moment, nested direction is not supported.
d46c5b12 3053 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3054 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3055 ONE_MORE_BYTE (c1);
3056 switch (c1)
3057 {
3058 case ']': /* end of the current direction */
d46c5b12 3059 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3060
3061 case '0': /* end of the current direction */
3062 case '1': /* start of left-to-right direction */
3063 ONE_MORE_BYTE (c1);
3064 if (c1 == ']')
d46c5b12 3065 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3066 else
df7492f9 3067 goto invalid_code;
4ed46869
KH
3068 break;
3069
3070 case '2': /* start of right-to-left direction */
3071 ONE_MORE_BYTE (c1);
3072 if (c1 == ']')
d46c5b12 3073 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3074 else
df7492f9 3075 goto invalid_code;
4ed46869
KH
3076 break;
3077
3078 default:
df7492f9 3079 goto invalid_code;
4ed46869 3080 }
b73bfc1c 3081 continue;
4ed46869
KH
3082
3083 default:
df7492f9
KH
3084 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3085 goto invalid_code;
4ed46869
KH
3086 if (c1 >= 0x28 && c1 <= 0x2B)
3087 { /* designation of DIMENSION1_CHARS94 character set */
3088 ONE_MORE_BYTE (c2);
df7492f9 3089 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
3090 }
3091 else if (c1 >= 0x2C && c1 <= 0x2F)
3092 { /* designation of DIMENSION1_CHARS96 character set */
3093 ONE_MORE_BYTE (c2);
df7492f9 3094 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
3095 }
3096 else
df7492f9 3097 goto invalid_code;
b73bfc1c 3098 /* We must update these variables now. */
df7492f9
KH
3099 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3100 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3101 continue;
4ed46869 3102 }
b73bfc1c 3103 }
4ed46869 3104
ff0dacd7
KH
3105 if (charset->id != charset_ascii
3106 && last_id != charset->id)
3107 {
3108 if (last_id != charset_ascii)
3109 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3110 last_id = charset->id;
3111 last_offset = char_offset;
3112 }
3113
b73bfc1c 3114 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3115 Produce a decoded character while getting 2nd position code
3116 C2 if necessary. */
3117 c1 &= 0x7F;
3118 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3119 {
3120 ONE_MORE_BYTE (c2);
df7492f9 3121 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3122 /* C2 is not in a valid range. */
df7492f9
KH
3123 goto invalid_code;
3124 c1 = (c1 << 8) | (c2 & 0x7F);
3125 if (CHARSET_DIMENSION (charset) > 2)
3126 {
3127 ONE_MORE_BYTE (c2);
3128 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3129 /* C2 is not in a valid range. */
3130 goto invalid_code;
3131 c1 = (c1 << 8) | (c2 & 0x7F);
3132 }
3133 }
3134
3135 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3136 if (c < 0)
3137 {
3138 MAYBE_FINISH_COMPOSITION ();
3139 for (; src_base < src; src_base++, char_offset++)
3140 {
3141 if (ASCII_BYTE_P (*src_base))
3142 *charbuf++ = *src_base;
3143 else
3144 *charbuf++ = BYTE8_TO_CHAR (*src_base);
ff0dacd7 3145 char_offset++;
df7492f9
KH
3146 }
3147 }
3148 else if (composition_state == COMPOSING_NO)
3149 {
3150 *charbuf++ = c;
3151 char_offset++;
4ed46869 3152 }
df7492f9 3153 else
781d7a48
KH
3154 {
3155 components[component_idx++] = c;
3156 if (method == COMPOSITION_WITH_RULE
3157 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3158 && composition_state == COMPOSING_COMPONENT_CHAR))
3159 composition_state++;
3160 }
4ed46869
KH
3161 continue;
3162
df7492f9
KH
3163 invalid_code:
3164 MAYBE_FINISH_COMPOSITION ();
4ed46869 3165 src = src_base;
df7492f9
KH
3166 consumed_chars = consumed_chars_base;
3167 ONE_MORE_BYTE (c);
3168 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3169 char_offset++;
df7492f9 3170 coding->errors++;
4ed46869 3171 }
fb88bf2d 3172
df7492f9 3173 no_more_source:
ff0dacd7
KH
3174 if (last_id != charset_ascii)
3175 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
3176 coding->consumed_char += consumed_chars_base;
3177 coding->consumed = src_base - coding->source;
3178 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3179}
3180
b73bfc1c 3181
f4dee582 3182/* ISO2022 encoding stuff. */
4ed46869
KH
3183
3184/*
f4dee582 3185 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3186 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3187 variant has the following specifications:
df7492f9 3188 1. Initial designation to G0 thru G3.
4ed46869
KH
3189 2. Allows short-form designation?
3190 3. ASCII should be designated to G0 before control characters?
3191 4. ASCII should be designated to G0 at end of line?
3192 5. 7-bit environment or 8-bit environment?
3193 6. Use locking-shift?
3194 7. Use Single-shift?
3195 And the following two are only for Japanese:
3196 8. Use ASCII in place of JIS0201-1976-Roman?
3197 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3198 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3199 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3200 details.
4ed46869
KH
3201*/
3202
3203/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3204 register REG at DST, and increment DST. If <final-char> of CHARSET is
3205 '@', 'A', or 'B' and the coding system CODING allows, produce
3206 designation sequence of short-form. */
4ed46869
KH
3207
3208#define ENCODE_DESIGNATION(charset, reg, coding) \
3209 do { \
df7492f9 3210 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3211 char *intermediate_char_94 = "()*+"; \
3212 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3213 int revision = -1; \
3214 int c; \
3215 \
3216 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3217 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3218 \
3219 if (revision >= 0) \
70c22245 3220 { \
df7492f9
KH
3221 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3222 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3223 } \
df7492f9 3224 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3225 if (CHARSET_DIMENSION (charset) == 1) \
3226 { \
df7492f9
KH
3227 if (! CHARSET_ISO_CHARS_96 (charset)) \
3228 c = intermediate_char_94[reg]; \
4ed46869 3229 else \
df7492f9
KH
3230 c = intermediate_char_96[reg]; \
3231 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3232 } \
3233 else \
3234 { \
df7492f9
KH
3235 EMIT_ONE_ASCII_BYTE ('$'); \
3236 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3237 { \
df7492f9 3238 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3239 || reg != 0 \
3240 || final_char < '@' || final_char > 'B') \
df7492f9 3241 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3242 } \
3243 else \
df7492f9 3244 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3245 } \
df7492f9
KH
3246 EMIT_ONE_ASCII_BYTE (final_char); \
3247 \
3248 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3249 } while (0)
3250
df7492f9 3251
4ed46869
KH
3252/* The following two macros produce codes (control character or escape
3253 sequence) for ISO2022 single-shift functions (single-shift-2 and
3254 single-shift-3). */
3255
df7492f9
KH
3256#define ENCODE_SINGLE_SHIFT_2 \
3257 do { \
3258 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3259 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3260 else \
3261 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3262 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3263 } while (0)
3264
df7492f9
KH
3265
3266#define ENCODE_SINGLE_SHIFT_3 \
3267 do { \
3268 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3269 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3270 else \
3271 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3272 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3273 } while (0)
3274
df7492f9 3275
4ed46869
KH
3276/* The following four macros produce codes (control character or
3277 escape sequence) for ISO2022 locking-shift functions (shift-in,
3278 shift-out, locking-shift-2, and locking-shift-3). */
3279
df7492f9
KH
3280#define ENCODE_SHIFT_IN \
3281 do { \
3282 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3283 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3284 } while (0)
3285
df7492f9
KH
3286
3287#define ENCODE_SHIFT_OUT \
3288 do { \
3289 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3290 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3291 } while (0)
3292
df7492f9
KH
3293
3294#define ENCODE_LOCKING_SHIFT_2 \
3295 do { \
3296 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3297 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3298 } while (0)
3299
df7492f9
KH
3300
3301#define ENCODE_LOCKING_SHIFT_3 \
3302 do { \
3303 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3304 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3305 } while (0)
3306
df7492f9 3307
f4dee582
RS
3308/* Produce codes for a DIMENSION1 character whose character set is
3309 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3310 sequences are also produced in advance if necessary. */
3311
6e85d753
KH
3312#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3313 do { \
df7492f9 3314 int id = CHARSET_ID (charset); \
bf16eb23
KH
3315 \
3316 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3317 && id == charset_ascii) \
3318 { \
3319 id = charset_jisx0201_roman; \
3320 charset = CHARSET_FROM_ID (id); \
3321 } \
3322 \
df7492f9 3323 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3324 { \
df7492f9
KH
3325 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3326 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3327 else \
df7492f9
KH
3328 EMIT_ONE_BYTE (c1 | 0x80); \
3329 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3330 break; \
3331 } \
df7492f9 3332 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3333 { \
df7492f9 3334 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3335 break; \
3336 } \
df7492f9 3337 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3338 { \
df7492f9 3339 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3340 break; \
3341 } \
6e85d753
KH
3342 else \
3343 /* Since CHARSET is not yet invoked to any graphic planes, we \
3344 must invoke it, or, at first, designate it to some graphic \
3345 register. Then repeat the loop to actually produce the \
3346 character. */ \
df7492f9
KH
3347 dst = encode_invocation_designation (charset, coding, dst, \
3348 &produced_chars); \
4ed46869
KH
3349 } while (1)
3350
df7492f9 3351
f4dee582
RS
3352/* Produce codes for a DIMENSION2 character whose character set is
3353 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3354 invocation codes are also produced in advance if necessary. */
3355
6e85d753
KH
3356#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3357 do { \
df7492f9 3358 int id = CHARSET_ID (charset); \
bf16eb23
KH
3359 \
3360 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3361 && id == charset_jisx0208) \
3362 { \
3363 id = charset_jisx0208_1978; \
3364 charset = CHARSET_FROM_ID (id); \
3365 } \
3366 \
df7492f9 3367 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3368 { \
df7492f9
KH
3369 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3370 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3371 else \
df7492f9
KH
3372 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3373 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3374 break; \
3375 } \
df7492f9 3376 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3377 { \
df7492f9 3378 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3379 break; \
3380 } \
df7492f9 3381 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3382 { \
df7492f9 3383 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3384 break; \
3385 } \
6e85d753
KH
3386 else \
3387 /* Since CHARSET is not yet invoked to any graphic planes, we \
3388 must invoke it, or, at first, designate it to some graphic \
3389 register. Then repeat the loop to actually produce the \
3390 character. */ \
df7492f9
KH
3391 dst = encode_invocation_designation (charset, coding, dst, \
3392 &produced_chars); \
4ed46869
KH
3393 } while (1)
3394
05e6f5dc 3395
df7492f9
KH
3396#define ENCODE_ISO_CHARACTER(charset, c) \
3397 do { \
3398 int code = ENCODE_CHAR ((charset),(c)); \
3399 \
3400 if (CHARSET_DIMENSION (charset) == 1) \
3401 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3402 else \
3403 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3404 } while (0)
bdd9fb48 3405
05e6f5dc 3406
4ed46869 3407/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3408 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3409 Return new DST. */
3410
3411unsigned char *
df7492f9
KH
3412encode_invocation_designation (charset, coding, dst, p_nchars)
3413 struct charset *charset;
4ed46869
KH
3414 struct coding_system *coding;
3415 unsigned char *dst;
df7492f9 3416 int *p_nchars;
4ed46869 3417{
df7492f9
KH
3418 int multibytep = coding->dst_multibyte;
3419 int produced_chars = *p_nchars;
4ed46869 3420 int reg; /* graphic register number */
df7492f9 3421 int id = CHARSET_ID (charset);
4ed46869
KH
3422
3423 /* At first, check designations. */
3424 for (reg = 0; reg < 4; reg++)
df7492f9 3425 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3426 break;
3427
3428 if (reg >= 4)
3429 {
3430 /* CHARSET is not yet designated to any graphic registers. */
3431 /* At first check the requested designation. */
df7492f9
KH
3432 reg = CODING_ISO_REQUEST (coding, id);
3433 if (reg < 0)
1ba9e4ab
KH
3434 /* Since CHARSET requests no special designation, designate it
3435 to graphic register 0. */
4ed46869
KH
3436 reg = 0;
3437
3438 ENCODE_DESIGNATION (charset, reg, coding);
3439 }
3440
df7492f9
KH
3441 if (CODING_ISO_INVOCATION (coding, 0) != reg
3442 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3443 {
3444 /* Since the graphic register REG is not invoked to any graphic
3445 planes, invoke it to graphic plane 0. */
3446 switch (reg)
3447 {
3448 case 0: /* graphic register 0 */
3449 ENCODE_SHIFT_IN;
3450 break;
3451
3452 case 1: /* graphic register 1 */
3453 ENCODE_SHIFT_OUT;
3454 break;
3455
3456 case 2: /* graphic register 2 */
df7492f9 3457 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3458 ENCODE_SINGLE_SHIFT_2;
3459 else
3460 ENCODE_LOCKING_SHIFT_2;
3461 break;
3462
3463 case 3: /* graphic register 3 */
df7492f9 3464 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3465 ENCODE_SINGLE_SHIFT_3;
3466 else
3467 ENCODE_LOCKING_SHIFT_3;
3468 break;
3469 }
3470 }
b73bfc1c 3471
df7492f9 3472 *p_nchars = produced_chars;
4ed46869
KH
3473 return dst;
3474}
3475
df7492f9
KH
3476/* The following three macros produce codes for indicating direction
3477 of text. */
3478#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3479 do { \
df7492f9
KH
3480 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3481 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3482 else \
df7492f9 3483 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3484 } while (0)
3485
ec6d2bb8 3486
df7492f9
KH
3487#define ENCODE_DIRECTION_R2L() \
3488 do { \
3489 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3490 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3491 } while (0)
3492
ec6d2bb8 3493
df7492f9 3494#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3495 do { \
df7492f9
KH
3496 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3497 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3498 } while (0)
3499
4ed46869
KH
3500
3501/* Produce codes for designation and invocation to reset the graphic
3502 planes and registers to initial state. */
df7492f9
KH
3503#define ENCODE_RESET_PLANE_AND_REGISTER() \
3504 do { \
3505 int reg; \
3506 struct charset *charset; \
3507 \
3508 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3509 ENCODE_SHIFT_IN; \
3510 for (reg = 0; reg < 4; reg++) \
3511 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3512 && (CODING_ISO_DESIGNATION (coding, reg) \
3513 != CODING_ISO_INITIAL (coding, reg))) \
3514 { \
3515 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3516 ENCODE_DESIGNATION (charset, reg, coding); \
3517 } \
4ed46869
KH
3518 } while (0)
3519
df7492f9 3520
bdd9fb48 3521/* Produce designation sequences of charsets in the line started from
b73bfc1c 3522 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3523
3524 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3525 find all the necessary designations. */
3526
b73bfc1c 3527static unsigned char *
df7492f9 3528encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3529 struct coding_system *coding;
df7492f9
KH
3530 int *charbuf, *charbuf_end;
3531 unsigned char *dst;
e0e989f6 3532{
df7492f9 3533 struct charset *charset;
bdd9fb48
KH
3534 /* Table of charsets to be designated to each graphic register. */
3535 int r[4];
df7492f9
KH
3536 int c, found = 0, reg;
3537 int produced_chars = 0;
3538 int multibytep = coding->dst_multibyte;
3539 Lisp_Object attrs;
3540 Lisp_Object charset_list;
3541
3542 attrs = CODING_ID_ATTRS (coding->id);
3543 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3544 if (EQ (charset_list, Qiso_2022))
3545 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3546
3547 for (reg = 0; reg < 4; reg++)
3548 r[reg] = -1;
3549
b73bfc1c 3550 while (found < 4)
e0e989f6 3551 {
df7492f9
KH
3552 int id;
3553
3554 c = *charbuf++;
b73bfc1c
KH
3555 if (c == '\n')
3556 break;
df7492f9
KH
3557 charset = char_charset (c, charset_list, NULL);
3558 id = CHARSET_ID (charset);
3559 reg = CODING_ISO_REQUEST (coding, id);
3560 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3561 {
3562 found++;
df7492f9 3563 r[reg] = id;
bdd9fb48 3564 }
bdd9fb48
KH
3565 }
3566
3567 if (found)
3568 {
3569 for (reg = 0; reg < 4; reg++)
3570 if (r[reg] >= 0
df7492f9
KH
3571 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3572 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3573 }
b73bfc1c
KH
3574
3575 return dst;
e0e989f6
KH
3576}
3577
4ed46869
KH
3578/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3579
df7492f9
KH
3580static int
3581encode_coding_iso_2022 (coding)
4ed46869 3582 struct coding_system *coding;
4ed46869 3583{
df7492f9
KH
3584 int multibytep = coding->dst_multibyte;
3585 int *charbuf = coding->charbuf;
3586 int *charbuf_end = charbuf + coding->charbuf_used;
3587 unsigned char *dst = coding->destination + coding->produced;
3588 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3589 int safe_room = 16;
3590 int bol_designation
3591 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3592 && CODING_ISO_BOL (coding));
3593 int produced_chars = 0;
3594 Lisp_Object attrs, eol_type, charset_list;
3595 int ascii_compatible;
b73bfc1c 3596 int c;
ff0dacd7 3597 int preferred_charset_id = -1;
05e6f5dc 3598
df7492f9 3599 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4 3600 setup_iso_safe_charsets (attrs);
ff0dacd7
KH
3601 /* Charset list may have been changed. */
3602 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
004068e4
KH
3603 coding->safe_charsets
3604 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
bdd9fb48 3605
df7492f9 3606 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3607
df7492f9 3608 while (charbuf < charbuf_end)
4ed46869 3609 {
df7492f9 3610 ASSURE_DESTINATION (safe_room);
b73bfc1c 3611
df7492f9 3612 if (bol_designation)
b73bfc1c 3613 {
df7492f9 3614 unsigned char *dst_prev = dst;
4ed46869 3615
bdd9fb48 3616 /* We have to produce designation sequences if any now. */
df7492f9
KH
3617 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3618 bol_designation = 0;
3619 /* We are sure that designation sequences are all ASCII bytes. */
3620 produced_chars += dst - dst_prev;
4ed46869 3621 }
ec6d2bb8 3622
df7492f9 3623 c = *charbuf++;
4ed46869 3624
ff0dacd7
KH
3625 if (c < 0)
3626 {
3627 /* Handle an annotation. */
3628 switch (*charbuf)
3629 {
3630 case CODING_ANNOTATE_COMPOSITION_MASK:
3631 /* Not yet implemented. */
3632 break;
3633 case CODING_ANNOTATE_CHARSET_MASK:
3634 preferred_charset_id = charbuf[3];
3635 if (preferred_charset_id >= 0
3636 && NILP (Fmemq (make_number (preferred_charset_id),
3637 charset_list)))
3638 preferred_charset_id = -1;
3639 break;
3640 default:
3641 abort ();
3642 }
3643 charbuf += -c - 1;
3644 continue;
3645 }
3646
b73bfc1c
KH
3647 /* Now encode the character C. */
3648 if (c < 0x20 || c == 0x7F)
3649 {
df7492f9
KH
3650 if (c == '\n'
3651 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3652 {
df7492f9
KH
3653 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3654 ENCODE_RESET_PLANE_AND_REGISTER ();
3655 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3656 {
df7492f9
KH
3657 int i;
3658
3659 for (i = 0; i < 4; i++)
3660 CODING_ISO_DESIGNATION (coding, i)
3661 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3662 }
df7492f9
KH
3663 bol_designation
3664 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3665 }
df7492f9
KH
3666 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3667 ENCODE_RESET_PLANE_AND_REGISTER ();
3668 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3669 }
df7492f9 3670 else if (ASCII_CHAR_P (c))
88993dfd 3671 {
df7492f9
KH
3672 if (ascii_compatible)
3673 EMIT_ONE_ASCII_BYTE (c);
3674 else
bf16eb23
KH
3675 {
3676 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3677 ENCODE_ISO_CHARACTER (charset, c);
3678 }
88993dfd 3679 }
16eafb5d
KH
3680 else if (CHAR_BYTE8_P (c))
3681 {
3682 c = CHAR_TO_BYTE8 (c);
3683 EMIT_ONE_BYTE (c);
3684 }
b73bfc1c 3685 else
df7492f9 3686 {
ff0dacd7 3687 struct charset *charset;
b73bfc1c 3688
ff0dacd7
KH
3689 if (preferred_charset_id >= 0)
3690 {
3691 charset = CHARSET_FROM_ID (preferred_charset_id);
3692 if (! CHAR_CHARSET_P (c, charset))
3693 charset = char_charset (c, charset_list, NULL);
3694 }
3695 else
3696 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3697 if (!charset)
3698 {
41cbe562
KH
3699 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3700 {
3701 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3702 charset = CHARSET_FROM_ID (charset_ascii);
3703 }
3704 else
3705 {
3706 c = coding->default_char;
3707 charset = char_charset (c, charset_list, NULL);
3708 }
df7492f9
KH
3709 }
3710 ENCODE_ISO_CHARACTER (charset, c);
3711 }
84fbb8a0 3712 }
b73bfc1c 3713
df7492f9
KH
3714 if (coding->mode & CODING_MODE_LAST_BLOCK
3715 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3716 {
3717 ASSURE_DESTINATION (safe_room);
3718 ENCODE_RESET_PLANE_AND_REGISTER ();
3719 }
3720 coding->result = CODING_RESULT_SUCCESS;
3721 CODING_ISO_BOL (coding) = bol_designation;
3722 coding->produced_char += produced_chars;
3723 coding->produced = dst - coding->destination;
3724 return 0;
4ed46869
KH
3725}
3726
3727\f
df7492f9 3728/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3729
df7492f9 3730/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3731 quite widely. So, for the moment, Emacs supports them in the bare
3732 C code. But, in the future, they may be supported only by CCL. */
3733
3734/* SJIS is a coding system encoding three character sets: ASCII, right
3735 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3736 as is. A character of charset katakana-jisx0201 is encoded by
3737 "position-code + 0x80". A character of charset japanese-jisx0208
3738 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3739 so that it fit in the range below.
4ed46869
KH
3740
3741 --- CODE RANGE of SJIS ---
3742 (character set) (range)
3743 ASCII 0x00 .. 0x7F
df7492f9 3744 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3745 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3746 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3747 -------------------------------
3748
3749*/
3750
3751/* BIG5 is a coding system encoding two character sets: ASCII and
3752 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3753 character set and is encoded in two-byte.
4ed46869
KH
3754
3755 --- CODE RANGE of BIG5 ---
3756 (character set) (range)
3757 ASCII 0x00 .. 0x7F
3758 Big5 (1st byte) 0xA1 .. 0xFE
3759 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3760 --------------------------
3761
df7492f9 3762 */
4ed46869
KH
3763
3764/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3765 Check if a text is encoded in SJIS. If it is, return
df7492f9 3766 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3767
0a28aafb 3768static int
ff0dacd7 3769detect_coding_sjis (coding, detect_info)
df7492f9 3770 struct coding_system *coding;
ff0dacd7 3771 struct coding_detection_info *detect_info;
4ed46869 3772{
df7492f9
KH
3773 unsigned char *src = coding->source, *src_base = src;
3774 unsigned char *src_end = coding->source + coding->src_bytes;
3775 int multibytep = coding->src_multibyte;
3776 int consumed_chars = 0;
3777 int found = 0;
b73bfc1c 3778 int c;
89528eb3 3779 int incomplete;
df7492f9 3780
ff0dacd7 3781 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
3782 /* A coding system of this category is always ASCII compatible. */
3783 src += coding->head_ascii;
4ed46869 3784
b73bfc1c 3785 while (1)
4ed46869 3786 {
89528eb3 3787 incomplete = 0;
df7492f9 3788 ONE_MORE_BYTE (c);
89528eb3 3789 incomplete = 1;
682169fe
KH
3790 if (c < 0x80)
3791 continue;
df7492f9 3792 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3793 {
df7492f9 3794 ONE_MORE_BYTE (c);
682169fe 3795 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 3796 break;
ff0dacd7 3797 found = CATEGORY_MASK_SJIS;
4ed46869 3798 }
df7492f9 3799 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 3800 found = CATEGORY_MASK_SJIS;
df7492f9
KH
3801 else
3802 break;
4ed46869 3803 }
ff0dacd7 3804 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
3805 return 0;
3806
3807 no_more_source:
89528eb3
KH
3808 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3809 {
ff0dacd7 3810 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3
KH
3811 return 0;
3812 }
ff0dacd7
KH
3813 detect_info->found |= found;
3814 return 1;
4ed46869
KH
3815}
3816
3817/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3818 Check if a text is encoded in BIG5. If it is, return
df7492f9 3819 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3820
0a28aafb 3821static int
ff0dacd7 3822detect_coding_big5 (coding, detect_info)
df7492f9 3823 struct coding_system *coding;
ff0dacd7 3824 struct coding_detection_info *detect_info;
4ed46869 3825{
df7492f9
KH
3826 unsigned char *src = coding->source, *src_base = src;
3827 unsigned char *src_end = coding->source + coding->src_bytes;
3828 int multibytep = coding->src_multibyte;
3829 int consumed_chars = 0;
3830 int found = 0;
b73bfc1c 3831 int c;
89528eb3 3832 int incomplete;
fa42c37f 3833
ff0dacd7 3834 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
3835 /* A coding system of this category is always ASCII compatible. */
3836 src += coding->head_ascii;
fa42c37f 3837
b73bfc1c 3838 while (1)
fa42c37f 3839 {
89528eb3 3840 incomplete = 0;
df7492f9 3841 ONE_MORE_BYTE (c);
89528eb3 3842 incomplete = 1;
df7492f9 3843 if (c < 0x80)
fa42c37f 3844 continue;
df7492f9 3845 if (c >= 0xA1)
fa42c37f 3846 {
df7492f9
KH
3847 ONE_MORE_BYTE (c);
3848 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3849 return 0;
ff0dacd7 3850 found = CATEGORY_MASK_BIG5;
fa42c37f 3851 }
df7492f9
KH
3852 else
3853 break;
fa42c37f 3854 }
ff0dacd7 3855 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 3856 return 0;
df7492f9
KH
3857
3858 no_more_source:
89528eb3
KH
3859 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3860 {
ff0dacd7 3861 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
3862 return 0;
3863 }
ff0dacd7
KH
3864 detect_info->found |= found;
3865 return 1;
fa42c37f
KH
3866}
3867
4ed46869
KH
3868/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3869 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3870
b73bfc1c 3871static void
df7492f9 3872decode_coding_sjis (coding)
4ed46869 3873 struct coding_system *coding;
4ed46869 3874{
df7492f9
KH
3875 unsigned char *src = coding->source + coding->consumed;
3876 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3877 unsigned char *src_base;
df7492f9 3878 int *charbuf = coding->charbuf;
ff0dacd7 3879 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
3880 int consumed_chars = 0, consumed_chars_base;
3881 int multibytep = coding->src_multibyte;
3882 struct charset *charset_roman, *charset_kanji, *charset_kana;
3883 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
3884 int char_offset = coding->produced_char;
3885 int last_offset = char_offset;
3886 int last_id = charset_ascii;
a5d301df 3887
df7492f9
KH
3888 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3889
3890 val = charset_list;
3891 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3
KH
3892 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3893 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3894
b73bfc1c 3895 while (1)
4ed46869 3896 {
df7492f9 3897 int c, c1;
b73bfc1c
KH
3898
3899 src_base = src;
df7492f9
KH
3900 consumed_chars_base = consumed_chars;
3901
3902 if (charbuf >= charbuf_end)
3903 break;
3904
3905 ONE_MORE_BYTE (c);
b73bfc1c 3906
df7492f9 3907 if (c == '\r')
4ed46869 3908 {
df7492f9 3909 if (EQ (eol_type, Qdos))
4ed46869 3910 {
df7492f9 3911 if (src == src_end)
98725083
KH
3912 {
3913 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3914 goto no_more_source;
3915 }
df7492f9
KH
3916 if (*src == '\n')
3917 ONE_MORE_BYTE (c);
4ed46869 3918 }
df7492f9
KH
3919 else if (EQ (eol_type, Qmac))
3920 c = '\n';
4ed46869 3921 }
54f78171 3922 else
df7492f9
KH
3923 {
3924 struct charset *charset;
3925
3926 if (c < 0x80)
3927 charset = charset_roman;
3928 else
4ed46869 3929 {
df7492f9
KH
3930 if (c >= 0xF0)
3931 goto invalid_code;
3932 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3933 {
54f78171 3934 /* SJIS -> JISX0208 */
df7492f9
KH
3935 ONE_MORE_BYTE (c1);
3936 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3937 goto invalid_code;
3938 c = (c << 8) | c1;
3939 SJIS_TO_JIS (c);
3940 charset = charset_kanji;
5e34de15 3941 }
fb88bf2d 3942 else
89528eb3
KH
3943 {
3944 /* SJIS -> JISX0201-Kana */
3945 c &= 0x7F;
3946 charset = charset_kana;
3947 }
df7492f9 3948 }
ff0dacd7
KH
3949 if (charset->id != charset_ascii
3950 && last_id != charset->id)
3951 {
3952 if (last_id != charset_ascii)
3953 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3954 last_id = charset->id;
3955 last_offset = char_offset;
3956 }
df7492f9
KH
3957 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3958 }
3959 *charbuf++ = c;
ff0dacd7 3960 char_offset++;
df7492f9
KH
3961 continue;
3962
3963 invalid_code:
3964 src = src_base;
3965 consumed_chars = consumed_chars_base;
3966 ONE_MORE_BYTE (c);
3967 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3968 char_offset++;
df7492f9
KH
3969 coding->errors++;
3970 }
3971
3972 no_more_source:
ff0dacd7
KH
3973 if (last_id != charset_ascii)
3974 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
3975 coding->consumed_char += consumed_chars_base;
3976 coding->consumed = src_base - coding->source;
3977 coding->charbuf_used = charbuf - coding->charbuf;
3978}
3979
3980static void
3981decode_coding_big5 (coding)
3982 struct coding_system *coding;
3983{
3984 unsigned char *src = coding->source + coding->consumed;
3985 unsigned char *src_end = coding->source + coding->src_bytes;
3986 unsigned char *src_base;
3987 int *charbuf = coding->charbuf;
ff0dacd7 3988 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
3989 int consumed_chars = 0, consumed_chars_base;
3990 int multibytep = coding->src_multibyte;
3991 struct charset *charset_roman, *charset_big5;
3992 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
3993 int char_offset = coding->produced_char;
3994 int last_offset = char_offset;
3995 int last_id = charset_ascii;
df7492f9
KH
3996
3997 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3998 val = charset_list;
3999 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4000 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4001
4002 while (1)
4003 {
4004 int c, c1;
4005
4006 src_base = src;
4007 consumed_chars_base = consumed_chars;
4008
4009 if (charbuf >= charbuf_end)
4010 break;
4011
4012 ONE_MORE_BYTE (c);
4013
4014 if (c == '\r')
4015 {
4016 if (EQ (eol_type, Qdos))
4017 {
4018 if (src == src_end)
98725083
KH
4019 {
4020 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4021 goto no_more_source;
4022 }
df7492f9
KH
4023 if (*src == '\n')
4024 ONE_MORE_BYTE (c);
4ed46869 4025 }
df7492f9
KH
4026 else if (EQ (eol_type, Qmac))
4027 c = '\n';
4028 }
4029 else
4030 {
4031 struct charset *charset;
4032 if (c < 0x80)
4033 charset = charset_roman;
fb88bf2d 4034 else
fb88bf2d 4035 {
54f78171 4036 /* BIG5 -> Big5 */
df7492f9
KH
4037 if (c < 0xA1 || c > 0xFE)
4038 goto invalid_code;
4039 ONE_MORE_BYTE (c1);
4040 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4041 goto invalid_code;
4042 c = c << 8 | c1;
4043 charset = charset_big5;
4ed46869 4044 }
ff0dacd7
KH
4045 if (charset->id != charset_ascii
4046 && last_id != charset->id)
4047 {
4048 if (last_id != charset_ascii)
4049 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4050 last_id = charset->id;
4051 last_offset = char_offset;
4052 }
df7492f9 4053 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 4054 }
4ed46869 4055
df7492f9 4056 *charbuf++ = c;
ff0dacd7 4057 char_offset++;
fb88bf2d
KH
4058 continue;
4059
df7492f9 4060 invalid_code:
4ed46869 4061 src = src_base;
df7492f9
KH
4062 consumed_chars = consumed_chars_base;
4063 ONE_MORE_BYTE (c);
4064 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4065 char_offset++;
df7492f9 4066 coding->errors++;
fb88bf2d 4067 }
d46c5b12 4068
df7492f9 4069 no_more_source:
ff0dacd7
KH
4070 if (last_id != charset_ascii)
4071 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4072 coding->consumed_char += consumed_chars_base;
4073 coding->consumed = src_base - coding->source;
4074 coding->charbuf_used = charbuf - coding->charbuf;
4075}
4076
4077/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4078 This function can encode charsets `ascii', `katakana-jisx0201',
4079 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4080 are sure that all these charsets are registered as official charset
4081 (i.e. do not have extended leading-codes). Characters of other
4082 charsets are produced without any encoding. If SJIS_P is 1, encode
4083 SJIS text, else encode BIG5 text. */
4084
4085static int
4086encode_coding_sjis (coding)
4087 struct coding_system *coding;
4088{
4089 int multibytep = coding->dst_multibyte;
4090 int *charbuf = coding->charbuf;
4091 int *charbuf_end = charbuf + coding->charbuf_used;
4092 unsigned char *dst = coding->destination + coding->produced;
4093 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4094 int safe_room = 4;
4095 int produced_chars = 0;
4096 Lisp_Object attrs, eol_type, charset_list, val;
4097 int ascii_compatible;
4098 struct charset *charset_roman, *charset_kanji, *charset_kana;
4099 int c;
4100
4101 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4102 val = charset_list;
4103 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4104 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4105 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4106
4107 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4108
4109 while (charbuf < charbuf_end)
4110 {
4111 ASSURE_DESTINATION (safe_room);
4112 c = *charbuf++;
4113 /* Now encode the character C. */
4114 if (ASCII_CHAR_P (c) && ascii_compatible)
4115 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4116 else if (CHAR_BYTE8_P (c))
4117 {
4118 c = CHAR_TO_BYTE8 (c);
4119 EMIT_ONE_BYTE (c);
4120 }
df7492f9
KH
4121 else
4122 {
4123 unsigned code;
4124 struct charset *charset = char_charset (c, charset_list, &code);
4125
4126 if (!charset)
4127 {
41cbe562
KH
4128 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4129 {
4130 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4131 charset = CHARSET_FROM_ID (charset_ascii);
4132 }
4133 else
4134 {
4135 c = coding->default_char;
4136 charset = char_charset (c, charset_list, &code);
4137 }
df7492f9
KH
4138 }
4139 if (code == CHARSET_INVALID_CODE (charset))
4140 abort ();
4141 if (charset == charset_kanji)
4142 {
4143 int c1, c2;
4144 JIS_TO_SJIS (code);
4145 c1 = code >> 8, c2 = code & 0xFF;
4146 EMIT_TWO_BYTES (c1, c2);
4147 }
4148 else if (charset == charset_kana)
4149 EMIT_ONE_BYTE (code | 0x80);
4150 else
4151 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4152 }
4153 }
4154 coding->result = CODING_RESULT_SUCCESS;
4155 coding->produced_char += produced_chars;
4156 coding->produced = dst - coding->destination;
4157 return 0;
4158}
4159
4160static int
4161encode_coding_big5 (coding)
4162 struct coding_system *coding;
4163{
4164 int multibytep = coding->dst_multibyte;
4165 int *charbuf = coding->charbuf;
4166 int *charbuf_end = charbuf + coding->charbuf_used;
4167 unsigned char *dst = coding->destination + coding->produced;
4168 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4169 int safe_room = 4;
4170 int produced_chars = 0;
4171 Lisp_Object attrs, eol_type, charset_list, val;
4172 int ascii_compatible;
4173 struct charset *charset_roman, *charset_big5;
4174 int c;
4175
4176 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4177 val = charset_list;
4178 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4179 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4180 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4181
4182 while (charbuf < charbuf_end)
4183 {
4184 ASSURE_DESTINATION (safe_room);
4185 c = *charbuf++;
4186 /* Now encode the character C. */
4187 if (ASCII_CHAR_P (c) && ascii_compatible)
4188 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4189 else if (CHAR_BYTE8_P (c))
4190 {
4191 c = CHAR_TO_BYTE8 (c);
4192 EMIT_ONE_BYTE (c);
4193 }
df7492f9
KH
4194 else
4195 {
4196 unsigned code;
4197 struct charset *charset = char_charset (c, charset_list, &code);
4198
4199 if (! charset)
4200 {
41cbe562
KH
4201 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4202 {
4203 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4204 charset = CHARSET_FROM_ID (charset_ascii);
4205 }
4206 else
4207 {
4208 c = coding->default_char;
4209 charset = char_charset (c, charset_list, &code);
4210 }
df7492f9
KH
4211 }
4212 if (code == CHARSET_INVALID_CODE (charset))
4213 abort ();
4214 if (charset == charset_big5)
4215 {
4216 int c1, c2;
4217
4218 c1 = code >> 8, c2 = code & 0xFF;
4219 EMIT_TWO_BYTES (c1, c2);
4220 }
4221 else
4222 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4223 }
4224 }
4225 coding->result = CODING_RESULT_SUCCESS;
4226 coding->produced_char += produced_chars;
4227 coding->produced = dst - coding->destination;
4228 return 0;
4229}
4230
4231\f
4232/*** 10. CCL handlers ***/
4233
4234/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4235 Check if a text is encoded in a coding system of which
4236 encoder/decoder are written in CCL program. If it is, return
4237 CATEGORY_MASK_CCL, else return 0. */
4238
4239static int
ff0dacd7 4240detect_coding_ccl (coding, detect_info)
df7492f9 4241 struct coding_system *coding;
ff0dacd7 4242 struct coding_detection_info *detect_info;
df7492f9
KH
4243{
4244 unsigned char *src = coding->source, *src_base = src;
4245 unsigned char *src_end = coding->source + coding->src_bytes;
4246 int multibytep = coding->src_multibyte;
4247 int consumed_chars = 0;
4248 int found = 0;
4249 unsigned char *valids = CODING_CCL_VALIDS (coding);
4250 int head_ascii = coding->head_ascii;
4251 Lisp_Object attrs;
4252
ff0dacd7
KH
4253 detect_info->checked |= CATEGORY_MASK_CCL;
4254
df7492f9
KH
4255 coding = &coding_categories[coding_category_ccl];
4256 attrs = CODING_ID_ATTRS (coding->id);
4257 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4258 src += head_ascii;
4259
4260 while (1)
4261 {
4262 int c;
4263 ONE_MORE_BYTE (c);
4264 if (! valids[c])
4265 break;
ff0dacd7
KH
4266 if ((valids[c] > 1))
4267 found = CATEGORY_MASK_CCL;
df7492f9 4268 }
ff0dacd7 4269 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4270 return 0;
4271
4272 no_more_source:
ff0dacd7
KH
4273 detect_info->found |= found;
4274 return 1;
df7492f9
KH
4275}
4276
4277static void
4278decode_coding_ccl (coding)
4279 struct coding_system *coding;
4280{
7c78e542 4281 const unsigned char *src = coding->source + coding->consumed;
df7492f9
KH
4282 unsigned char *src_end = coding->source + coding->src_bytes;
4283 int *charbuf = coding->charbuf;
4284 int *charbuf_end = charbuf + coding->charbuf_size;
4285 int consumed_chars = 0;
4286 int multibytep = coding->src_multibyte;
4287 struct ccl_program ccl;
4288 int source_charbuf[1024];
4289 int source_byteidx[1024];
8dcbea82 4290 Lisp_Object attrs, eol_type, charset_list, valids;
df7492f9 4291
8dcbea82 4292 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4293 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4294
4295 while (src < src_end)
4296 {
7c78e542 4297 const unsigned char *p = src;
df7492f9
KH
4298 int *source, *source_end;
4299 int i = 0;
4300
4301 if (multibytep)
4302 while (i < 1024 && p < src_end)
4303 {
4304 source_byteidx[i] = p - src;
4305 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4306 }
4307 else
4308 while (i < 1024 && p < src_end)
4309 source_charbuf[i++] = *p++;
4310
4311 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4312 ccl.last_block = 1;
4313
4314 source = source_charbuf;
4315 source_end = source + i;
4316 while (source < source_end)
4317 {
4318 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4319 source_end - source, charbuf_end - charbuf,
4320 charset_list);
df7492f9
KH
4321 source += ccl.consumed;
4322 charbuf += ccl.produced;
4323 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4324 break;
4325 }
4326 if (source < source_end)
4327 src += source_byteidx[source - source_charbuf];
4328 else
4329 src = p;
4330 consumed_chars += source - source_charbuf;
4331
4332 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4333 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4334 break;
4335 }
4336
4337 switch (ccl.status)
4338 {
4339 case CCL_STAT_SUSPEND_BY_SRC:
4340 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4341 break;
4342 case CCL_STAT_SUSPEND_BY_DST:
4343 break;
4344 case CCL_STAT_QUIT:
4345 case CCL_STAT_INVALID_CMD:
4346 coding->result = CODING_RESULT_INTERRUPT;
4347 break;
4348 default:
4349 coding->result = CODING_RESULT_SUCCESS;
4350 break;
4351 }
4352 coding->consumed_char += consumed_chars;
4353 coding->consumed = src - coding->source;
4354 coding->charbuf_used = charbuf - coding->charbuf;
4355}
4356
4357static int
4358encode_coding_ccl (coding)
4359 struct coding_system *coding;
4360{
4361 struct ccl_program ccl;
4362 int multibytep = coding->dst_multibyte;
4363 int *charbuf = coding->charbuf;
4364 int *charbuf_end = charbuf + coding->charbuf_used;
4365 unsigned char *dst = coding->destination + coding->produced;
4366 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4367 unsigned char *adjusted_dst_end = dst_end - 1;
4368 int destination_charbuf[1024];
4369 int i, produced_chars = 0;
8dcbea82 4370 Lisp_Object attrs, eol_type, charset_list;
df7492f9 4371
8dcbea82 4372 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4373 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4374
4375 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4376 ccl.dst_multibyte = coding->dst_multibyte;
4377
4378 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4379 {
4380 int dst_bytes = dst_end - dst;
4381 if (dst_bytes > 1024)
4382 dst_bytes = 1024;
4383
4384 ccl_driver (&ccl, charbuf, destination_charbuf,
8dcbea82 4385 charbuf_end - charbuf, dst_bytes, charset_list);
df7492f9
KH
4386 charbuf += ccl.consumed;
4387 if (multibytep)
4388 for (i = 0; i < ccl.produced; i++)
4389 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4390 else
4391 {
4392 for (i = 0; i < ccl.produced; i++)
4393 *dst++ = destination_charbuf[i] & 0xFF;
4394 produced_chars += ccl.produced;
4395 }
4396 }
4397
4398 switch (ccl.status)
4399 {
4400 case CCL_STAT_SUSPEND_BY_SRC:
4401 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4402 break;
4403 case CCL_STAT_SUSPEND_BY_DST:
4404 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4405 break;
4406 case CCL_STAT_QUIT:
4407 case CCL_STAT_INVALID_CMD:
4408 coding->result = CODING_RESULT_INTERRUPT;
4409 break;
4410 default:
4411 coding->result = CODING_RESULT_SUCCESS;
4412 break;
4413 }
4414
4415 coding->produced_char += produced_chars;
4416 coding->produced = dst - coding->destination;
4417 return 0;
4ed46869
KH
4418}
4419
df7492f9
KH
4420
4421\f
4422/*** 10, 11. no-conversion handlers ***/
4423
4424/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4425
b73bfc1c 4426static void
df7492f9 4427decode_coding_raw_text (coding)
4ed46869 4428 struct coding_system *coding;
4ed46869 4429{
df7492f9 4430 coding->chars_at_source = 1;
2c78b7e1
KH
4431 coding->consumed_char = 0;
4432 coding->consumed = 0;
df7492f9
KH
4433 coding->result = CODING_RESULT_SUCCESS;
4434}
4ed46869 4435
df7492f9
KH
4436static int
4437encode_coding_raw_text (coding)
4438 struct coding_system *coding;
4439{
4440 int multibytep = coding->dst_multibyte;
4441 int *charbuf = coding->charbuf;
4442 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4443 unsigned char *dst = coding->destination + coding->produced;
4444 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4445 int produced_chars = 0;
4446 int c;
a5d301df 4447
df7492f9 4448 if (multibytep)
b73bfc1c 4449 {
df7492f9 4450 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4451
df7492f9
KH
4452 if (coding->src_multibyte)
4453 while (charbuf < charbuf_end)
4454 {
4455 ASSURE_DESTINATION (safe_room);
4456 c = *charbuf++;
4457 if (ASCII_CHAR_P (c))
4458 EMIT_ONE_ASCII_BYTE (c);
4459 else if (CHAR_BYTE8_P (c))
4460 {
4461 c = CHAR_TO_BYTE8 (c);
4462 EMIT_ONE_BYTE (c);
4463 }
4464 else
4465 {
4466 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4467
df7492f9
KH
4468 CHAR_STRING_ADVANCE (c, p1);
4469 while (p0 < p1)
9d123124
KH
4470 {
4471 EMIT_ONE_BYTE (*p0);
4472 p0++;
4473 }
df7492f9
KH
4474 }
4475 }
b73bfc1c 4476 else
df7492f9
KH
4477 while (charbuf < charbuf_end)
4478 {
4479 ASSURE_DESTINATION (safe_room);
4480 c = *charbuf++;
4481 EMIT_ONE_BYTE (c);
4482 }
4483 }
4484 else
4485 {
4486 if (coding->src_multibyte)
b73bfc1c 4487 {
df7492f9
KH
4488 int safe_room = MAX_MULTIBYTE_LENGTH;
4489
4490 while (charbuf < charbuf_end)
b73bfc1c 4491 {
df7492f9
KH
4492 ASSURE_DESTINATION (safe_room);
4493 c = *charbuf++;
4494 if (ASCII_CHAR_P (c))
4495 *dst++ = c;
4496 else if (CHAR_BYTE8_P (c))
4497 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4498 else
df7492f9
KH
4499 CHAR_STRING_ADVANCE (c, dst);
4500 produced_chars++;
b73bfc1c 4501 }
4ed46869 4502 }
df7492f9
KH
4503 else
4504 {
4505 ASSURE_DESTINATION (charbuf_end - charbuf);
4506 while (charbuf < charbuf_end && dst < dst_end)
4507 *dst++ = *charbuf++;
4508 produced_chars = dst - (coding->destination + coding->dst_bytes);
4509 }
4ed46869 4510 }
df7492f9
KH
4511 coding->result = CODING_RESULT_SUCCESS;
4512 coding->produced_char += produced_chars;
4513 coding->produced = dst - coding->destination;
4514 return 0;
4ed46869
KH
4515}
4516
ff0dacd7
KH
4517/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4518 Check if a text is encoded in a charset-based coding system. If it
4519 is, return 1, else return 0. */
4520
0a28aafb 4521static int
ff0dacd7 4522detect_coding_charset (coding, detect_info)
df7492f9 4523 struct coding_system *coding;
ff0dacd7 4524 struct coding_detection_info *detect_info;
1397dc18 4525{
df7492f9
KH
4526 unsigned char *src = coding->source, *src_base = src;
4527 unsigned char *src_end = coding->source + coding->src_bytes;
4528 int multibytep = coding->src_multibyte;
4529 int consumed_chars = 0;
4530 Lisp_Object attrs, valids;
584948ac 4531 int found = 0;
1397dc18 4532
ff0dacd7
KH
4533 detect_info->checked |= CATEGORY_MASK_CHARSET;
4534
df7492f9
KH
4535 coding = &coding_categories[coding_category_charset];
4536 attrs = CODING_ID_ATTRS (coding->id);
4537 valids = AREF (attrs, coding_attr_charset_valids);
4538
4539 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4540 src += coding->head_ascii;
1397dc18 4541
b73bfc1c 4542 while (1)
1397dc18 4543 {
df7492f9 4544 int c;
1397dc18 4545
df7492f9
KH
4546 ONE_MORE_BYTE (c);
4547 if (NILP (AREF (valids, c)))
4548 break;
584948ac 4549 if (c >= 0x80)
ff0dacd7 4550 found = CATEGORY_MASK_CHARSET;
df7492f9 4551 }
ff0dacd7 4552 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4553 return 0;
4ed46869 4554
df7492f9 4555 no_more_source:
ff0dacd7
KH
4556 detect_info->found |= found;
4557 return 1;
df7492f9 4558}
4ed46869 4559
b73bfc1c 4560static void
df7492f9 4561decode_coding_charset (coding)
4ed46869 4562 struct coding_system *coding;
4ed46869 4563{
df7492f9
KH
4564 unsigned char *src = coding->source + coding->consumed;
4565 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4566 unsigned char *src_base;
df7492f9 4567 int *charbuf = coding->charbuf;
ff0dacd7 4568 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4569 int consumed_chars = 0, consumed_chars_base;
4570 int multibytep = coding->src_multibyte;
4eb6d3f1 4571 Lisp_Object attrs, eol_type, charset_list, valids;
ff0dacd7
KH
4572 int char_offset = coding->produced_char;
4573 int last_offset = char_offset;
4574 int last_id = charset_ascii;
df7492f9
KH
4575
4576 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4577 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4578
df7492f9 4579 while (1)
4ed46869 4580 {
4eb6d3f1 4581 int c;
df7492f9
KH
4582
4583 src_base = src;
4584 consumed_chars_base = consumed_chars;
b73bfc1c 4585
df7492f9
KH
4586 if (charbuf >= charbuf_end)
4587 break;
4588
4eb6d3f1 4589 ONE_MORE_BYTE (c);
df7492f9 4590 if (c == '\r')
d46c5b12 4591 {
c7c66a95
KH
4592 /* Here we assume that no charset maps '\r' to something
4593 else. */
df7492f9 4594 if (EQ (eol_type, Qdos))
b73bfc1c 4595 {
98725083
KH
4596 if (src == src_end)
4597 {
4598 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4599 goto no_more_source;
4600 }
4601 if (*src == '\n')
df7492f9 4602 ONE_MORE_BYTE (c);
b73bfc1c 4603 }
df7492f9 4604 else if (EQ (eol_type, Qmac))
b73bfc1c 4605 c = '\n';
d46c5b12 4606 }
df7492f9 4607 else
d46c5b12 4608 {
4eb6d3f1
KH
4609 Lisp_Object val;
4610 struct charset *charset;
c7c66a95 4611 int dim;
acb2a965
KH
4612 int len = 1;
4613 unsigned code = c;
4eb6d3f1
KH
4614
4615 val = AREF (valids, c);
4616 if (NILP (val))
4617 goto invalid_code;
c7c66a95 4618 if (INTEGERP (val))
4eb6d3f1 4619 {
c7c66a95
KH
4620 charset = CHARSET_FROM_ID (XFASTINT (val));
4621 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4622 while (len < dim)
4eb6d3f1 4623 {
acb2a965
KH
4624 ONE_MORE_BYTE (c);
4625 code = (code << 8) | c;
f9d71dcd 4626 len++;
4eb6d3f1 4627 }
c7c66a95
KH
4628 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4629 charset, code, c);
4630 }
4631 else
4632 {
4633 /* VAL is a list of charset IDs. It is assured that the
4634 list is sorted by charset dimensions (smaller one
4635 comes first). */
c7c66a95
KH
4636 while (CONSP (val))
4637 {
4638 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4639 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4640 while (len < dim)
c7c66a95 4641 {
acb2a965
KH
4642 ONE_MORE_BYTE (c);
4643 code = (code << 8) | c;
f9d71dcd 4644 len++;
c7c66a95 4645 }
c7c66a95
KH
4646 CODING_DECODE_CHAR (coding, src, src_base,
4647 src_end, charset, code, c);
4648 if (c >= 0)
4649 break;
4650 val = XCDR (val);
4651 }
4eb6d3f1 4652 }
df7492f9
KH
4653 if (c < 0)
4654 goto invalid_code;
ff0dacd7
KH
4655 if (charset->id != charset_ascii
4656 && last_id != charset->id)
4657 {
4658 if (last_id != charset_ascii)
4659 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4660 last_id = charset->id;
4661 last_offset = char_offset;
4662 }
d46c5b12 4663 }
df7492f9 4664 *charbuf++ = c;
ff0dacd7 4665 char_offset++;
df7492f9
KH
4666 continue;
4667
4668 invalid_code:
4669 src = src_base;
4670 consumed_chars = consumed_chars_base;
4671 ONE_MORE_BYTE (c);
4672 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4673 char_offset++;
df7492f9 4674 coding->errors++;
4ed46869
KH
4675 }
4676
df7492f9 4677 no_more_source:
ff0dacd7
KH
4678 if (last_id != charset_ascii)
4679 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4680 coding->consumed_char += consumed_chars_base;
4681 coding->consumed = src_base - coding->source;
4682 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4683}
4684
df7492f9
KH
4685static int
4686encode_coding_charset (coding)
4ed46869 4687 struct coding_system *coding;
4ed46869 4688{
df7492f9
KH
4689 int multibytep = coding->dst_multibyte;
4690 int *charbuf = coding->charbuf;
4691 int *charbuf_end = charbuf + coding->charbuf_used;
4692 unsigned char *dst = coding->destination + coding->produced;
4693 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4694 int safe_room = MAX_MULTIBYTE_LENGTH;
4695 int produced_chars = 0;
df7492f9
KH
4696 Lisp_Object attrs, eol_type, charset_list;
4697 int ascii_compatible;
b73bfc1c 4698 int c;
b73bfc1c 4699
df7492f9 4700 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4701 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4702
df7492f9 4703 while (charbuf < charbuf_end)
4ed46869 4704 {
4eb6d3f1 4705 struct charset *charset;
df7492f9
KH
4706 unsigned code;
4707
4708 ASSURE_DESTINATION (safe_room);
4709 c = *charbuf++;
4710 if (ascii_compatible && ASCII_CHAR_P (c))
4711 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4712 else if (CHAR_BYTE8_P (c))
4713 {
4714 c = CHAR_TO_BYTE8 (c);
4715 EMIT_ONE_BYTE (c);
4716 }
d46c5b12 4717 else
4eb6d3f1
KH
4718 {
4719 charset = char_charset (c, charset_list, &code);
4720 if (charset)
4721 {
4722 if (CHARSET_DIMENSION (charset) == 1)
4723 EMIT_ONE_BYTE (code);
4724 else if (CHARSET_DIMENSION (charset) == 2)
4725 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4726 else if (CHARSET_DIMENSION (charset) == 3)
4727 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4728 else
4729 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4730 (code >> 8) & 0xFF, code & 0xFF);
4731 }
4732 else
41cbe562
KH
4733 {
4734 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4735 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4736 else
4737 c = coding->default_char;
4738 EMIT_ONE_BYTE (c);
4739 }
4eb6d3f1 4740 }
4ed46869
KH
4741 }
4742
df7492f9
KH
4743 coding->result = CODING_RESULT_SUCCESS;
4744 coding->produced_char += produced_chars;
4745 coding->produced = dst - coding->destination;
4746 return 0;
4ed46869
KH
4747}
4748
4749\f
1397dc18 4750/*** 7. C library functions ***/
4ed46869 4751
df7492f9
KH
4752/* Setup coding context CODING from information about CODING_SYSTEM.
4753 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4754 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4755
4756void
df7492f9
KH
4757setup_coding_system (coding_system, coding)
4758 Lisp_Object coding_system;
ec6d2bb8
KH
4759 struct coding_system *coding;
4760{
df7492f9
KH
4761 Lisp_Object attrs;
4762 Lisp_Object eol_type;
4763 Lisp_Object coding_type;
4764 Lisp_Object val;
ec6d2bb8 4765
df7492f9
KH
4766 if (NILP (coding_system))
4767 coding_system = Qno_conversion;
4768
4769 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4770
4771 attrs = CODING_ID_ATTRS (coding->id);
4772 eol_type = CODING_ID_EOL_TYPE (coding->id);
4773
4774 coding->mode = 0;
4775 coding->head_ascii = -1;
4776 coding->common_flags
4777 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4778
4779 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4780 coding->max_charset_id = XSTRING (val)->size - 1;
4781 coding->safe_charsets = (char *) XSTRING (val)->data;
4782 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4783
4784 coding_type = CODING_ATTR_TYPE (attrs);
4785 if (EQ (coding_type, Qundecided))
4786 {
4787 coding->detector = NULL;
4788 coding->decoder = decode_coding_raw_text;
4789 coding->encoder = encode_coding_raw_text;
4790 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4791 }
4792 else if (EQ (coding_type, Qiso_2022))
4793 {
4794 int i;
4795 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
ff0dacd7 4796 enum coding_category category = XINT (CODING_ATTR_CATEGORY (attrs));
df7492f9
KH
4797
4798 /* Invoke graphic register 0 to plane 0. */
4799 CODING_ISO_INVOCATION (coding, 0) = 0;
4800 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4801 CODING_ISO_INVOCATION (coding, 1)
4802 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4803 /* Setup the initial status of designation. */
4804 for (i = 0; i < 4; i++)
4805 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4806 /* Not single shifting initially. */
4807 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4808 /* Beginning of buffer should also be regarded as bol. */
4809 CODING_ISO_BOL (coding) = 1;
4810 coding->detector = detect_coding_iso_2022;
4811 coding->decoder = decode_coding_iso_2022;
4812 coding->encoder = encode_coding_iso_2022;
4813 if (flags & CODING_ISO_FLAG_SAFE)
4814 coding->mode |= CODING_MODE_SAFE_ENCODING;
4815 coding->common_flags
4816 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4817 | CODING_REQUIRE_FLUSHING_MASK);
4818 if (flags & CODING_ISO_FLAG_COMPOSITION)
4819 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
4820 if (flags & CODING_ISO_FLAG_DESIGNATION)
4821 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
4822 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4823 {
4824 setup_iso_safe_charsets (attrs);
4825 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4826 coding->max_charset_id = XSTRING (val)->size - 1;
4827 coding->safe_charsets = (char *) XSTRING (val)->data;
4828 }
4829 CODING_ISO_FLAGS (coding) = flags;
4830 }
4831 else if (EQ (coding_type, Qcharset))
4832 {
4833 coding->detector = detect_coding_charset;
4834 coding->decoder = decode_coding_charset;
4835 coding->encoder = encode_coding_charset;
4836 coding->common_flags
4837 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4838 }
4839 else if (EQ (coding_type, Qutf_8))
4840 {
4841 coding->detector = detect_coding_utf_8;
4842 coding->decoder = decode_coding_utf_8;
4843 coding->encoder = encode_coding_utf_8;
4844 coding->common_flags
4845 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4846 }
4847 else if (EQ (coding_type, Qutf_16))
4848 {
4849 val = AREF (attrs, coding_attr_utf_16_bom);
4850 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4851 : EQ (val, Qt) ? utf_16_with_bom
4852 : utf_16_without_bom);
4853 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 4854 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 4855 : utf_16_little_endian);
e19c3639 4856 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4857 coding->detector = detect_coding_utf_16;
4858 coding->decoder = decode_coding_utf_16;
4859 coding->encoder = encode_coding_utf_16;
4860 coding->common_flags
4861 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
4862 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4863 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
df7492f9
KH
4864 }
4865 else if (EQ (coding_type, Qccl))
4866 {
4867 coding->detector = detect_coding_ccl;
4868 coding->decoder = decode_coding_ccl;
4869 coding->encoder = encode_coding_ccl;
4870 coding->common_flags
4871 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4872 | CODING_REQUIRE_FLUSHING_MASK);
4873 }
4874 else if (EQ (coding_type, Qemacs_mule))
4875 {
4876 coding->detector = detect_coding_emacs_mule;
4877 coding->decoder = decode_coding_emacs_mule;
4878 coding->encoder = encode_coding_emacs_mule;
4879 coding->common_flags
4880 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4881 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4882 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4883 {
4884 Lisp_Object tail, safe_charsets;
4885 int max_charset_id = 0;
4886
4887 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4888 tail = XCDR (tail))
4889 if (max_charset_id < XFASTINT (XCAR (tail)))
4890 max_charset_id = XFASTINT (XCAR (tail));
4891 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4892 make_number (255));
4893 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4894 tail = XCDR (tail))
4895 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4896 coding->max_charset_id = max_charset_id;
4897 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4898 }
4899 }
4900 else if (EQ (coding_type, Qshift_jis))
4901 {
4902 coding->detector = detect_coding_sjis;
4903 coding->decoder = decode_coding_sjis;
4904 coding->encoder = encode_coding_sjis;
4905 coding->common_flags
4906 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4907 }
4908 else if (EQ (coding_type, Qbig5))
4909 {
4910 coding->detector = detect_coding_big5;
4911 coding->decoder = decode_coding_big5;
4912 coding->encoder = encode_coding_big5;
4913 coding->common_flags
4914 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4915 }
4916 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4917 {
df7492f9
KH
4918 coding->detector = NULL;
4919 coding->decoder = decode_coding_raw_text;
4920 coding->encoder = encode_coding_raw_text;
4921 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4922 }
df7492f9
KH
4923
4924 return;
ec6d2bb8
KH
4925}
4926
df7492f9
KH
4927/* Return raw-text or one of its subsidiaries that has the same
4928 eol_type as CODING-SYSTEM. */
ec6d2bb8 4929
df7492f9
KH
4930Lisp_Object
4931raw_text_coding_system (coding_system)
4932 Lisp_Object coding_system;
ec6d2bb8 4933{
0be8721c 4934 Lisp_Object spec, attrs;
df7492f9
KH
4935 Lisp_Object eol_type, raw_text_eol_type;
4936
4937 spec = CODING_SYSTEM_SPEC (coding_system);
4938 attrs = AREF (spec, 0);
4939
4940 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4941 return coding_system;
ec6d2bb8 4942
df7492f9
KH
4943 eol_type = AREF (spec, 2);
4944 if (VECTORP (eol_type))
4945 return Qraw_text;
4946 spec = CODING_SYSTEM_SPEC (Qraw_text);
4947 raw_text_eol_type = AREF (spec, 2);
4948 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4949 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4950 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4951}
4952
54f78171 4953
df7492f9
KH
4954/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4955 does, return one of the subsidiary that has the same eol-spec as
4956 PARENT. Otherwise, return CODING_SYSTEM. */
4957
4958Lisp_Object
4959coding_inherit_eol_type (coding_system, parent)
b74e4686 4960 Lisp_Object coding_system, parent;
54f78171 4961{
df7492f9 4962 Lisp_Object spec, attrs, eol_type;
54f78171 4963
df7492f9
KH
4964 spec = CODING_SYSTEM_SPEC (coding_system);
4965 attrs = AREF (spec, 0);
4966 eol_type = AREF (spec, 2);
4967 if (VECTORP (eol_type))
4968 {
4969 Lisp_Object parent_spec;
df7492f9
KH
4970 Lisp_Object parent_eol_type;
4971
4972 parent_spec
4973 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4974 parent_eol_type = AREF (parent_spec, 2);
4975 if (EQ (parent_eol_type, Qunix))
4976 coding_system = AREF (eol_type, 0);
4977 else if (EQ (parent_eol_type, Qdos))
4978 coding_system = AREF (eol_type, 1);
4979 else if (EQ (parent_eol_type, Qmac))
4980 coding_system = AREF (eol_type, 2);
54f78171 4981 }
df7492f9 4982 return coding_system;
54f78171
KH
4983}
4984
4ed46869
KH
4985/* Emacs has a mechanism to automatically detect a coding system if it
4986 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4987 it's impossible to distinguish some coding systems accurately
4988 because they use the same range of codes. So, at first, coding
4989 systems are categorized into 7, those are:
4990
0ef69138 4991 o coding-category-emacs-mule
4ed46869
KH
4992
4993 The category for a coding system which has the same code range
4994 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 4995 symbol) `emacs-mule' by default.
4ed46869
KH
4996
4997 o coding-category-sjis
4998
4999 The category for a coding system which has the same code range
5000 as SJIS. Assigned the coding-system (Lisp
7717c392 5001 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5002
5003 o coding-category-iso-7
5004
5005 The category for a coding system which has the same code range
7717c392 5006 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5007 shift and single shift functions. This can encode/decode all
5008 charsets. Assigned the coding-system (Lisp symbol)
5009 `iso-2022-7bit' by default.
5010
5011 o coding-category-iso-7-tight
5012
5013 Same as coding-category-iso-7 except that this can
5014 encode/decode only the specified charsets.
4ed46869
KH
5015
5016 o coding-category-iso-8-1
5017
5018 The category for a coding system which has the same code range
5019 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5020 for DIMENSION1 charset. This doesn't use any locking shift
5021 and single shift functions. Assigned the coding-system (Lisp
5022 symbol) `iso-latin-1' by default.
4ed46869
KH
5023
5024 o coding-category-iso-8-2
5025
5026 The category for a coding system which has the same code range
5027 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5028 for DIMENSION2 charset. This doesn't use any locking shift
5029 and single shift functions. Assigned the coding-system (Lisp
5030 symbol) `japanese-iso-8bit' by default.
4ed46869 5031
7717c392 5032 o coding-category-iso-7-else
4ed46869
KH
5033
5034 The category for a coding system which has the same code range
df7492f9 5035 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5036 single shift functions. Assigned the coding-system (Lisp
5037 symbol) `iso-2022-7bit-lock' by default.
5038
5039 o coding-category-iso-8-else
5040
5041 The category for a coding system which has the same code range
df7492f9 5042 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5043 single shift functions. Assigned the coding-system (Lisp
5044 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5045
5046 o coding-category-big5
5047
5048 The category for a coding system which has the same code range
5049 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5050 `cn-big5' by default.
4ed46869 5051
fa42c37f
KH
5052 o coding-category-utf-8
5053
5054 The category for a coding system which has the same code range
5055 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5056 symbol) `utf-8' by default.
5057
5058 o coding-category-utf-16-be
5059
5060 The category for a coding system in which a text has an
5061 Unicode signature (cf. Unicode Standard) in the order of BIG
5062 endian at the head. Assigned the coding-system (Lisp symbol)
5063 `utf-16-be' by default.
5064
5065 o coding-category-utf-16-le
5066
5067 The category for a coding system in which a text has an
5068 Unicode signature (cf. Unicode Standard) in the order of
5069 LITTLE endian at the head. Assigned the coding-system (Lisp
5070 symbol) `utf-16-le' by default.
5071
1397dc18
KH
5072 o coding-category-ccl
5073
5074 The category for a coding system of which encoder/decoder is
5075 written in CCL programs. The default value is nil, i.e., no
5076 coding system is assigned.
5077
4ed46869
KH
5078 o coding-category-binary
5079
5080 The category for a coding system not categorized in any of the
5081 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5082 `no-conversion' by default.
4ed46869
KH
5083
5084 Each of them is a Lisp symbol and the value is an actual
df7492f9 5085 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5086 What Emacs does actually is to detect a category of coding system.
5087 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5088 decide only one possible category, it selects a category of the
4ed46869
KH
5089 highest priority. Priorities of categories are also specified by a
5090 user in a Lisp variable `coding-category-list'.
5091
5092*/
5093
df7492f9
KH
5094#define EOL_SEEN_NONE 0
5095#define EOL_SEEN_LF 1
5096#define EOL_SEEN_CR 2
5097#define EOL_SEEN_CRLF 4
4ed46869 5098
ff0dacd7
KH
5099/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5100 SOURCE is encoded. If CATEGORY is one of
5101 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5102 two-byte, else they are encoded by one-byte.
5103
5104 Return one of EOL_SEEN_XXX. */
4ed46869 5105
bc4bc72a
RS
5106#define MAX_EOL_CHECK_COUNT 3
5107
d46c5b12 5108static int
89528eb3 5109detect_eol (source, src_bytes, category)
d46c5b12 5110 unsigned char *source;
df7492f9 5111 EMACS_INT src_bytes;
89528eb3 5112 enum coding_category category;
4ed46869 5113{
d46c5b12 5114 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5115 unsigned char c;
df7492f9
KH
5116 int total = 0;
5117 int eol_seen = EOL_SEEN_NONE;
4ed46869 5118
89528eb3 5119 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5120 {
df7492f9 5121 int msb, lsb;
fa42c37f 5122
89528eb3
KH
5123 msb = category == (coding_category_utf_16_le
5124 | coding_category_utf_16_le_nosig);
df7492f9 5125 lsb = 1 - msb;
fa42c37f 5126
df7492f9 5127 while (src + 1 < src_end)
fa42c37f 5128 {
df7492f9
KH
5129 c = src[lsb];
5130 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5131 {
df7492f9
KH
5132 int this_eol;
5133
5134 if (c == '\n')
5135 this_eol = EOL_SEEN_LF;
5136 else if (src + 3 >= src_end
5137 || src[msb + 2] != 0
5138 || src[lsb + 2] != '\n')
5139 this_eol = EOL_SEEN_CR;
fa42c37f 5140 else
df7492f9
KH
5141 this_eol = EOL_SEEN_CRLF;
5142
5143 if (eol_seen == EOL_SEEN_NONE)
5144 /* This is the first end-of-line. */
5145 eol_seen = this_eol;
5146 else if (eol_seen != this_eol)
fa42c37f 5147 {
df7492f9
KH
5148 /* The found type is different from what found before. */
5149 eol_seen = EOL_SEEN_LF;
5150 break;
fa42c37f 5151 }
df7492f9
KH
5152 if (++total == MAX_EOL_CHECK_COUNT)
5153 break;
fa42c37f 5154 }
df7492f9 5155 src += 2;
fa42c37f 5156 }
df7492f9 5157 }
d46c5b12 5158 else
27901516 5159 {
df7492f9 5160 while (src < src_end)
27901516 5161 {
df7492f9
KH
5162 c = *src++;
5163 if (c == '\n' || c == '\r')
5164 {
5165 int this_eol;
d46c5b12 5166
df7492f9
KH
5167 if (c == '\n')
5168 this_eol = EOL_SEEN_LF;
5169 else if (src >= src_end || *src != '\n')
5170 this_eol = EOL_SEEN_CR;
5171 else
5172 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5173
df7492f9
KH
5174 if (eol_seen == EOL_SEEN_NONE)
5175 /* This is the first end-of-line. */
5176 eol_seen = this_eol;
5177 else if (eol_seen != this_eol)
5178 {
5179 /* The found type is different from what found before. */
5180 eol_seen = EOL_SEEN_LF;
5181 break;
5182 }
5183 if (++total == MAX_EOL_CHECK_COUNT)
5184 break;
5185 }
5186 }
73be902c 5187 }
df7492f9 5188 return eol_seen;
73be902c
KH
5189}
5190
df7492f9 5191
73be902c 5192static void
df7492f9
KH
5193adjust_coding_eol_type (coding, eol_seen)
5194 struct coding_system *coding;
5195 int eol_seen;
73be902c 5196{
0be8721c 5197 Lisp_Object eol_type;
df7492f9
KH
5198
5199 eol_type = CODING_ID_EOL_TYPE (coding->id);
5200 if (eol_seen & EOL_SEEN_LF)
5201 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6f197c07 5202 else if (eol_seen & EOL_SEEN_CRLF)
df7492f9 5203 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6f197c07 5204 else if (eol_seen & EOL_SEEN_CR)
df7492f9 5205 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
5206}
5207
df7492f9
KH
5208/* Detect how a text specified in CODING is encoded. If a coding
5209 system is detected, update fields of CODING by the detected coding
5210 system. */
5211
5212void
5213detect_coding (coding)
d46c5b12 5214 struct coding_system *coding;
d46c5b12 5215{
df7492f9
KH
5216 unsigned char *src, *src_end;
5217 Lisp_Object attrs, coding_type;
d46c5b12 5218
df7492f9
KH
5219 coding->consumed = coding->consumed_char = 0;
5220 coding->produced = coding->produced_char = 0;
5221 coding_set_source (coding);
1c3478b0 5222
df7492f9 5223 src_end = coding->source + coding->src_bytes;
1c3478b0 5224
df7492f9
KH
5225 /* If we have not yet decided the text encoding type, detect it
5226 now. */
5227 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5228 {
df7492f9
KH
5229 int c, i;
5230
5231 for (src = coding->source; src < src_end; src++)
5232 {
5233 c = *src;
5234 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5235 || c == ISO_CODE_SI
5236 || c == ISO_CODE_SO)))
5237 break;
5238 }
5239 coding->head_ascii = src - (coding->source + coding->consumed);
5240
5241 if (coding->head_ascii < coding->src_bytes)
1c3478b0 5242 {
ff0dacd7
KH
5243 struct coding_detection_info detect_info;
5244 enum coding_category category;
5245 struct coding_system *this;
df7492f9 5246
ff0dacd7 5247 detect_info.checked = detect_info.found = detect_info.rejected = 0;
df7492f9 5248 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 5249 {
ff0dacd7
KH
5250 category = coding_priorities[i];
5251 this = coding_categories + category;
df7492f9 5252 if (this->id < 0)
1c3478b0 5253 {
df7492f9 5254 /* No coding system of this category is defined. */
ff0dacd7 5255 detect_info.rejected |= (1 << category);
df7492f9 5256 }
ff0dacd7 5257 else if (category >= coding_category_raw_text)
89528eb3 5258 continue;
ff0dacd7 5259 else if (detect_info.checked & (1 << category))
df7492f9 5260 {
ff0dacd7
KH
5261 if (detect_info.found & (1 << category))
5262 break;
1c3478b0 5263 }
ff0dacd7
KH
5264 else if ((*(this->detector)) (coding, &detect_info)
5265 && detect_info.found & (1 << category))
5266 break;
1c3478b0 5267 }
ff0dacd7
KH
5268 if (i < coding_category_raw_text)
5269 setup_coding_system (CODING_ID_NAME (this->id), coding);
5270 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5271 setup_coding_system (Qraw_text, coding);
ff0dacd7 5272 else if (detect_info.rejected)
df7492f9 5273 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5274 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5275 {
5276 this = coding_categories + coding_priorities[i];
5277 setup_coding_system (CODING_ID_NAME (this->id), coding);
5278 break;
5279 }
1c3478b0 5280 }
b73bfc1c 5281 }
b49a1807
KH
5282 else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
5283 {
5284 Lisp_Object coding_systems;
5285 struct coding_detection_info detect_info;
5286
5287 coding_systems
5288 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5289 detect_info.found = detect_info.rejected = 0;
5290 if (CONSP (coding_systems)
5291 && detect_coding_utf_16 (coding, &detect_info)
5292 && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
5293 | CATEGORY_MASK_UTF_16_BE)))
5294 {
5295 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5296 setup_coding_system (XCAR (coding_systems), coding);
5297 else
5298 setup_coding_system (XCDR (coding_systems), coding);
5299 }
5300 }
69f76525 5301
df7492f9
KH
5302 attrs = CODING_ID_ATTRS (coding->id);
5303 coding_type = CODING_ATTR_TYPE (attrs);
5304
5305 /* If we have not yet decided the EOL type, detect it now. But, the
5306 detection is impossible for a CCL based coding system, in which
5307 case, we detct the EOL type after decoding. */
5308 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5309 && ! EQ (coding_type, Qccl))
d46c5b12 5310 {
89528eb3
KH
5311 int eol_seen = detect_eol (coding->source, coding->src_bytes,
5312 XINT (CODING_ATTR_CATEGORY (attrs)));
df7492f9
KH
5313
5314 if (eol_seen != EOL_SEEN_NONE)
5315 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5316 }
4ed46869
KH
5317}
5318
aaaf0b1e
KH
5319
5320static void
df7492f9 5321decode_eol (coding)
aaaf0b1e 5322 struct coding_system *coding;
aaaf0b1e 5323{
df7492f9 5324 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5325 {
df7492f9
KH
5326 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5327 unsigned char *pend = p + coding->produced;
5328 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5329
df7492f9 5330 for (; p < pend; p++)
aaaf0b1e 5331 {
df7492f9
KH
5332 if (*p == '\n')
5333 eol_seen |= EOL_SEEN_LF;
5334 else if (*p == '\r')
aaaf0b1e 5335 {
df7492f9 5336 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5337 {
df7492f9
KH
5338 eol_seen |= EOL_SEEN_CRLF;
5339 p++;
aaaf0b1e 5340 }
aaaf0b1e 5341 else
df7492f9 5342 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5343 }
aaaf0b1e 5344 }
df7492f9
KH
5345 if (eol_seen != EOL_SEEN_NONE)
5346 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5347 }
aaaf0b1e 5348
df7492f9
KH
5349 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5350 {
5351 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5352 unsigned char *pend = p + coding->produced;
5353
5354 for (; p < pend; p++)
5355 if (*p == '\r')
5356 *p = '\n';
5357 }
5358 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5359 {
5360 unsigned char *p, *pbeg, *pend;
5361 Lisp_Object undo_list;
5362
5363 move_gap_both (coding->dst_pos + coding->produced_char,
5364 coding->dst_pos_byte + coding->produced);
5365 undo_list = current_buffer->undo_list;
5366 current_buffer->undo_list = Qt;
c197f191 5367 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
df7492f9
KH
5368 current_buffer->undo_list = undo_list;
5369 pbeg = GPT_ADDR;
5370 pend = pbeg + coding->produced;
5371
5372 for (p = pend - 1; p >= pbeg; p--)
5373 if (*p == '\r')
5374 {
5375 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5376 pend--;
5377 }
5378 coding->produced_char -= coding->produced - (pend - pbeg);
5379 coding->produced = pend - pbeg;
5380 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5381 }
5382}
5383
df7492f9
KH
5384static void
5385translate_chars (coding, table)
4ed46869 5386 struct coding_system *coding;
df7492f9 5387 Lisp_Object table;
4ed46869 5388{
df7492f9
KH
5389 int *charbuf = coding->charbuf;
5390 int *charbuf_end = charbuf + coding->charbuf_used;
5391 int c;
5392
5393 if (coding->chars_at_source)
5394 return;
4ed46869 5395
df7492f9 5396 while (charbuf < charbuf_end)
8844fa83 5397 {
df7492f9
KH
5398 c = *charbuf;
5399 if (c < 0)
5400 charbuf += c;
5401 else
5402 *charbuf++ = translate_char (table, c);
8844fa83 5403 }
df7492f9 5404}
4ed46869 5405
df7492f9
KH
5406static int
5407produce_chars (coding)
5408 struct coding_system *coding;
5409{
5410 unsigned char *dst = coding->destination + coding->produced;
5411 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5412 int produced;
5413 int produced_chars = 0;
b73bfc1c 5414
df7492f9 5415 if (! coding->chars_at_source)
4ed46869 5416 {
df7492f9
KH
5417 /* Characters are in coding->charbuf. */
5418 int *buf = coding->charbuf;
5419 int *buf_end = buf + coding->charbuf_used;
5420 unsigned char *adjusted_dst_end;
4ed46869 5421
df7492f9
KH
5422 if (BUFFERP (coding->src_object)
5423 && EQ (coding->src_object, coding->dst_object))
5424 dst_end = coding->source + coding->consumed;
5425 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5426
df7492f9
KH
5427 while (buf < buf_end)
5428 {
5429 int c = *buf++;
5430
5431 if (dst >= adjusted_dst_end)
5432 {
5433 dst = alloc_destination (coding,
5434 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5435 dst);
5436 dst_end = coding->destination + coding->dst_bytes;
5437 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5438 }
5439 if (c >= 0)
5440 {
5441 if (coding->dst_multibyte
5442 || ! CHAR_BYTE8_P (c))
5443 CHAR_STRING_ADVANCE (c, dst);
5444 else
5445 *dst++ = CHAR_TO_BYTE8 (c);
5446 produced_chars++;
5447 }
5448 else
e1c23804 5449 /* This is an annotation datum. */
df7492f9
KH
5450 buf -= c + 1;
5451 }
5452 }
5453 else
5454 {
df7492f9
KH
5455 unsigned char *src = coding->source;
5456 unsigned char *src_end = src + coding->src_bytes;
5457 Lisp_Object eol_type;
b73bfc1c 5458
df7492f9 5459 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5460
df7492f9 5461 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5462 {
df7492f9
KH
5463 if (coding->src_multibyte)
5464 {
71c81426 5465 int multibytep = 1;
df7492f9 5466 int consumed_chars;
d46c5b12 5467
df7492f9
KH
5468 while (1)
5469 {
5470 unsigned char *src_base = src;
5471 int c;
b73bfc1c 5472
df7492f9
KH
5473 ONE_MORE_BYTE (c);
5474 if (c == '\r')
5475 {
5476 if (EQ (eol_type, Qdos))
5477 {
98725083
KH
5478 if (src == src_end)
5479 {
5480 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5481 goto no_more_source;
5482 }
5483 if (*src == '\n')
df7492f9
KH
5484 c = *src++;
5485 }
5486 else if (EQ (eol_type, Qmac))
5487 c = '\n';
5488 }
5489 if (dst == dst_end)
5490 {
2c78b7e1 5491 coding->consumed = src - coding->source;
b73bfc1c 5492
2c78b7e1
KH
5493 if (EQ (coding->src_object, coding->dst_object))
5494 dst_end = src;
5495 if (dst == dst_end)
5496 {
5497 dst = alloc_destination (coding, src_end - src + 1,
5498 dst);
5499 dst_end = coding->destination + coding->dst_bytes;
5500 coding_set_source (coding);
5501 src = coding->source + coding->consumed;
5502 src_end = coding->source + coding->src_bytes;
5503 }
df7492f9
KH
5504 }
5505 *dst++ = c;
5506 produced_chars++;
5507 }
5508 no_more_source:
5509 ;
5510 }
5511 else
5512 while (src < src_end)
5513 {
71c81426 5514 int multibytep = 1;
df7492f9 5515 int c = *src++;
b73bfc1c 5516
df7492f9
KH
5517 if (c == '\r')
5518 {
5519 if (EQ (eol_type, Qdos))
5520 {
5521 if (src < src_end
5522 && *src == '\n')
5523 c = *src++;
5524 }
5525 else if (EQ (eol_type, Qmac))
5526 c = '\n';
5527 }
5528 if (dst >= dst_end - 1)
5529 {
2c78b7e1 5530 coding->consumed = src - coding->source;
df7492f9 5531
2c78b7e1
KH
5532 if (EQ (coding->src_object, coding->dst_object))
5533 dst_end = src;
5534 if (dst >= dst_end - 1)
5535 {
5536 dst = alloc_destination (coding, src_end - src + 2,
5537 dst);
5538 dst_end = coding->destination + coding->dst_bytes;
5539 coding_set_source (coding);
5540 src = coding->source + coding->consumed;
5541 src_end = coding->source + coding->src_bytes;
5542 }
df7492f9
KH
5543 }
5544 EMIT_ONE_BYTE (c);
5545 }
d46c5b12 5546 }
df7492f9
KH
5547 else
5548 {
5549 if (!EQ (coding->src_object, coding->dst_object))
5550 {
5551 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5552
df7492f9
KH
5553 if (require > 0)
5554 {
5555 EMACS_INT offset = src - coding->source;
5556
5557 dst = alloc_destination (coding, require, dst);
5558 coding_set_source (coding);
5559 src = coding->source + offset;
5560 src_end = coding->source + coding->src_bytes;
5561 }
5562 }
5563 produced_chars = coding->src_chars;
5564 while (src < src_end)
5565 {
5566 int c = *src++;
5567
5568 if (c == '\r')
5569 {
5570 if (EQ (eol_type, Qdos))
5571 {
5572 if (src < src_end
5573 && *src == '\n')
5574 c = *src++;
5575 produced_chars--;
5576 }
5577 else if (EQ (eol_type, Qmac))
5578 c = '\n';
5579 }
5580 *dst++ = c;
5581 }
5582 }
2c78b7e1
KH
5583 coding->consumed = coding->src_bytes;
5584 coding->consumed_char = coding->src_chars;
b73bfc1c 5585 }
4ed46869 5586
df7492f9
KH
5587 produced = dst - (coding->destination + coding->produced);
5588 if (BUFFERP (coding->dst_object))
5589 insert_from_gap (produced_chars, produced);
5590 coding->produced += produced;
5591 coding->produced_char += produced_chars;
5592 return produced_chars;
b73bfc1c 5593}
52d41803 5594
ff0dacd7
KH
5595/* Compose text in CODING->object according to the annotation data at
5596 CHARBUF. CHARBUF is an array:
5597 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 5598 */
4ed46869 5599
df7492f9
KH
5600static INLINE void
5601produce_composition (coding, charbuf)
4ed46869 5602 struct coding_system *coding;
df7492f9 5603 int *charbuf;
4ed46869 5604{
df7492f9 5605 int len;
ff0dacd7 5606 EMACS_INT from, to;
df7492f9 5607 enum composition_method method;
df7492f9
KH
5608 Lisp_Object components;
5609
df7492f9 5610 len = -charbuf[0];
ff0dacd7
KH
5611 from = coding->dst_pos + charbuf[2];
5612 to = coding->dst_pos + charbuf[3];
5613 method = (enum composition_method) (charbuf[4]);
df7492f9
KH
5614
5615 if (method == COMPOSITION_RELATIVE)
5616 components = Qnil;
5617 else
d46c5b12 5618 {
df7492f9
KH
5619 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5620 int i;
4ed46869 5621
df7492f9
KH
5622 len -= 5;
5623 charbuf += 5;
5624 for (i = 0; i < len; i++)
5625 args[i] = make_number (charbuf[i]);
5626 components = (method == COMPOSITION_WITH_ALTCHARS
5627 ? Fstring (len, args) : Fvector (len, args));
5628 }
ff0dacd7 5629 compose_text (from, to, components, Qnil, coding->dst_object);
df7492f9 5630}
b73bfc1c 5631
d46c5b12 5632
ff0dacd7
KH
5633/* Put `charset' property on text in CODING->object according to
5634 the annotation data at CHARBUF. CHARBUF is an array:
5635 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5636 */
b73bfc1c 5637
ff0dacd7
KH
5638static INLINE void
5639produce_charset (coding, charbuf)
5640 struct coding_system *coding;
5641 int *charbuf;
5642{
5643 EMACS_INT from = coding->dst_pos + charbuf[2];
5644 EMACS_INT to = coding->dst_pos + charbuf[3];
5645 struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
b73bfc1c 5646
ff0dacd7
KH
5647 Fput_text_property (make_number (from), make_number (to),
5648 Qcharset, CHARSET_NAME (charset),
5649 coding->dst_object);
4ed46869
KH
5650}
5651
ff0dacd7 5652
df7492f9
KH
5653#define CHARBUF_SIZE 0x4000
5654
5655#define ALLOC_CONVERSION_WORK_AREA(coding) \
5656 do { \
5657 int size = CHARBUF_SIZE;; \
5658 \
5659 coding->charbuf = NULL; \
5660 while (size > 1024) \
5661 { \
5662 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5663 if (coding->charbuf) \
5664 break; \
5665 size >>= 1; \
5666 } \
5667 if (! coding->charbuf) \
5668 { \
5669 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5670 return coding->result; \
5671 } \
5672 coding->charbuf_size = size; \
5673 } while (0)
4ed46869 5674
d46c5b12
KH
5675
5676static void
df7492f9 5677produce_annotation (coding)
d46c5b12 5678 struct coding_system *coding;
d46c5b12 5679{
df7492f9
KH
5680 int *charbuf = coding->charbuf;
5681 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5682
ff0dacd7
KH
5683 if (NILP (coding->dst_object))
5684 return;
5685
df7492f9 5686 while (charbuf < charbuf_end)
d46c5b12 5687 {
df7492f9
KH
5688 if (*charbuf >= 0)
5689 charbuf++;
d46c5b12 5690 else
d46c5b12 5691 {
df7492f9 5692 int len = -*charbuf;
ff0dacd7 5693 switch (charbuf[1])
df7492f9
KH
5694 {
5695 case CODING_ANNOTATE_COMPOSITION_MASK:
5696 produce_composition (coding, charbuf);
5697 break;
ff0dacd7
KH
5698 case CODING_ANNOTATE_CHARSET_MASK:
5699 produce_charset (coding, charbuf);
5700 break;
df7492f9
KH
5701 default:
5702 abort ();
5703 }
5704 charbuf += len;
d46c5b12 5705 }
df7492f9
KH
5706 }
5707}
d46c5b12 5708
df7492f9
KH
5709/* Decode the data at CODING->src_object into CODING->dst_object.
5710 CODING->src_object is a buffer, a string, or nil.
5711 CODING->dst_object is a buffer.
de79a6a5 5712
df7492f9
KH
5713 If CODING->src_object is a buffer, it must be the current buffer.
5714 In this case, if CODING->src_pos is positive, it is a position of
5715 the source text in the buffer, otherwise, the source text is in the
5716 gap area of the buffer, and CODING->src_pos specifies the offset of
5717 the text from GPT (which must be the same as PT). If this is the
5718 same buffer as CODING->dst_object, CODING->src_pos must be
5719 negative.
b73bfc1c 5720
df7492f9
KH
5721 If CODING->src_object is a string, CODING->src_pos in an index to
5722 that string.
d46c5b12 5723
df7492f9
KH
5724 If CODING->src_object is nil, CODING->source must already point to
5725 the non-relocatable memory area. In this case, CODING->src_pos is
5726 an offset from CODING->source.
d46c5b12 5727
df7492f9
KH
5728 The decoded data is inserted at the current point of the buffer
5729 CODING->dst_object.
5730*/
5731
5732static int
5733decode_coding (coding)
d46c5b12 5734 struct coding_system *coding;
d46c5b12 5735{
df7492f9 5736 Lisp_Object attrs;
d46c5b12 5737
df7492f9
KH
5738 if (BUFFERP (coding->src_object)
5739 && coding->src_pos > 0
5740 && coding->src_pos < GPT
5741 && coding->src_pos + coding->src_chars > GPT)
5742 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5743
df7492f9 5744 if (BUFFERP (coding->dst_object))
88993dfd 5745 {
df7492f9
KH
5746 if (current_buffer != XBUFFER (coding->dst_object))
5747 set_buffer_internal (XBUFFER (coding->dst_object));
5748 if (GPT != PT)
5749 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5750 }
5751
df7492f9
KH
5752 coding->consumed = coding->consumed_char = 0;
5753 coding->produced = coding->produced_char = 0;
5754 coding->chars_at_source = 0;
5755 coding->result = CODING_RESULT_SUCCESS;
5756 coding->errors = 0;
5757
5758 ALLOC_CONVERSION_WORK_AREA (coding);
5759
5760 attrs = CODING_ID_ATTRS (coding->id);
5761
5762 do
d46c5b12 5763 {
df7492f9
KH
5764 coding_set_source (coding);
5765 coding->annotated = 0;
5766 (*(coding->decoder)) (coding);
5767 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
da4109a9
KH
5768 translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5769 else if (!NILP (Vstandard_translation_table_for_decode))
5770 translate_chars (coding, Vstandard_translation_table_for_decode);
df7492f9
KH
5771 coding_set_destination (coding);
5772 produce_chars (coding);
5773 if (coding->annotated)
5774 produce_annotation (coding);
d46c5b12 5775 }
df7492f9
KH
5776 while (coding->consumed < coding->src_bytes
5777 && ! coding->result);
d46c5b12 5778
df7492f9
KH
5779 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5780 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5781 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5782 decode_eol (coding);
d46c5b12 5783
df7492f9
KH
5784 coding->carryover_bytes = 0;
5785 if (coding->consumed < coding->src_bytes)
d46c5b12 5786 {
df7492f9
KH
5787 int nbytes = coding->src_bytes - coding->consumed;
5788 unsigned char *src;
5789
5790 coding_set_source (coding);
5791 coding_set_destination (coding);
5792 src = coding->source + coding->consumed;
5793
5794 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5795 {
df7492f9
KH
5796 /* Flush out unprocessed data as binary chars. We are sure
5797 that the number of data is less than the size of
5798 coding->charbuf. */
df7492f9 5799 while (nbytes-- > 0)
d46c5b12 5800 {
df7492f9 5801 int c = *src++;
98725083
KH
5802
5803 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
d46c5b12 5804 }
df7492f9 5805 produce_chars (coding);
d46c5b12 5806 }
d46c5b12 5807 else
df7492f9
KH
5808 {
5809 /* Record unprocessed bytes in coding->carryover. We are
5810 sure that the number of data is less than the size of
5811 coding->carryover. */
5812 unsigned char *p = coding->carryover;
5813
5814 coding->carryover_bytes = nbytes;
5815 while (nbytes-- > 0)
5816 *p++ = *src++;
5817 }
5818 coding->consumed = coding->src_bytes;
5819 }
b73bfc1c 5820
df7492f9 5821 return coding->result;
d46c5b12
KH
5822}
5823
ff0dacd7 5824
e1c23804 5825/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
5826 ending before LIMIT of CODING->src_object (buffer or string), store
5827 the data in BUF, set *STOP to a starting position of the next
5828 composition (if any) or to LIMIT, and return the address of the
5829 next element of BUF.
5830
5831 If such an annotation is not found, set *STOP to a starting
5832 position of a composition after POS (if any) or to LIMIT, and
5833 return BUF. */
5834
5835static INLINE int *
5836handle_composition_annotation (pos, limit, coding, buf, stop)
5837 EMACS_INT pos, limit;
5838 struct coding_system *coding;
5839 int *buf;
5840 EMACS_INT *stop;
5841{
5842 EMACS_INT start, end;
5843 Lisp_Object prop;
5844
5845 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5846 || end > limit)
5847 *stop = limit;
5848 else if (start > pos)
5849 *stop = start;
5850 else
5851 {
5852 if (start == pos)
5853 {
5854 /* We found a composition. Store the corresponding
5855 annotation data in BUF. */
5856 int *head = buf;
5857 enum composition_method method = COMPOSITION_METHOD (prop);
5858 int nchars = COMPOSITION_LENGTH (prop);
5859
5860 ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5861 if (method != COMPOSITION_RELATIVE)
5862 {
5863 Lisp_Object components;
5864 int len, i, i_byte;
5865
5866 components = COMPOSITION_COMPONENTS (prop);
5867 if (VECTORP (components))
5868 {
5869 len = XVECTOR (components)->size;
5870 for (i = 0; i < len; i++)
5871 *buf++ = XINT (AREF (components, i));
5872 }
5873 else if (STRINGP (components))
5874 {
5875 len = XSTRING (components)->size;
5876 i = i_byte = 0;
5877 while (i < len)
5878 {
5879 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5880 buf++;
5881 }
5882 }
5883 else if (INTEGERP (components))
5884 {
5885 len = 1;
5886 *buf++ = XINT (components);
5887 }
5888 else if (CONSP (components))
5889 {
5890 for (len = 0; CONSP (components);
5891 len++, components = XCDR (components))
5892 *buf++ = XINT (XCAR (components));
5893 }
5894 else
5895 abort ();
5896 *head -= len;
5897 }
5898 }
5899
5900 if (find_composition (end, limit, &start, &end, &prop,
5901 coding->src_object)
5902 && end <= limit)
5903 *stop = start;
5904 else
5905 *stop = limit;
5906 }
5907 return buf;
5908}
5909
5910
e1c23804 5911/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
5912 CODING->src_object (buffer of string), store the data in BUF, set
5913 *STOP to the position where the value of `charset' property changes
5914 (limiting by LIMIT), and return the address of the next element of
5915 BUF.
5916
5917 If the property value is nil, set *STOP to the position where the
5918 property value is non-nil (limiting by LIMIT), and return BUF. */
5919
5920static INLINE int *
5921handle_charset_annotation (pos, limit, coding, buf, stop)
5922 EMACS_INT pos, limit;
5923 struct coding_system *coding;
5924 int *buf;
5925 EMACS_INT *stop;
5926{
5927 Lisp_Object val, next;
5928 int id;
5929
5930 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5931 if (! NILP (val) && CHARSETP (val))
5932 id = XINT (CHARSET_SYMBOL_ID (val));
5933 else
5934 id = -1;
5935 ADD_CHARSET_DATA (buf, 0, 0, id);
5936 next = Fnext_single_property_change (make_number (pos), Qcharset,
5937 coding->src_object,
5938 make_number (limit));
5939 *stop = XINT (next);
5940 return buf;
5941}
5942
5943
df7492f9
KH
5944static void
5945consume_chars (coding)
5946 struct coding_system *coding;
5947{
5948 int *buf = coding->charbuf;
ff0dacd7 5949 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 5950 const unsigned char *src = coding->source + coding->consumed;
ff0dacd7
KH
5951 EMACS_INT pos = coding->src_pos + coding->consumed_char;
5952 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
5953 int multibytep = coding->src_multibyte;
5954 Lisp_Object eol_type;
5955 int c;
ff0dacd7
KH
5956 EMACS_INT stop, stop_composition, stop_charset;
5957 int id;
88993dfd 5958
df7492f9
KH
5959 eol_type = CODING_ID_EOL_TYPE (coding->id);
5960 if (VECTORP (eol_type))
5961 eol_type = Qunix;
88993dfd 5962
df7492f9
KH
5963 /* Note: composition handling is not yet implemented. */
5964 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 5965
ff0dacd7
KH
5966 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
5967 stop = stop_composition = pos;
5968 else
5969 stop = stop_composition = end_pos;
5970 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
5971 stop = stop_charset = pos;
df7492f9 5972 else
ff0dacd7 5973 stop_charset = end_pos;
ec6d2bb8 5974
ff0dacd7
KH
5975 /* Compensate for CRLF and annotation. */
5976 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 5977 while (buf < buf_end)
ec6d2bb8 5978 {
df7492f9 5979 if (pos == stop)
ec6d2bb8 5980 {
df7492f9 5981 int *p;
ec6d2bb8 5982
df7492f9
KH
5983 if (pos == end_pos)
5984 break;
ff0dacd7
KH
5985 if (pos == stop_composition)
5986 buf = handle_composition_annotation (pos, end_pos, coding,
5987 buf, &stop_composition);
5988 if (pos == stop_charset)
5989 buf = handle_charset_annotation (pos, end_pos, coding,
5990 buf, &stop_charset);
5991 stop = (stop_composition < stop_charset
5992 ? stop_composition : stop_charset);
df7492f9
KH
5993 }
5994
5995 if (! multibytep)
5996 c = *src++;
5997 else
5998 c = STRING_CHAR_ADVANCE (src);
5999 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6000 c = '\n';
6001 if (! EQ (eol_type, Qunix))
6002 {
6003 if (c == '\n')
6004 {
6005 if (EQ (eol_type, Qdos))
6006 *buf++ = '\r';
6007 else
6008 c = '\r';
ec6d2bb8 6009 }
ec6d2bb8 6010 }
df7492f9
KH
6011 *buf++ = c;
6012 pos++;
ec6d2bb8 6013 }
ec6d2bb8 6014
df7492f9
KH
6015 coding->consumed = src - coding->source;
6016 coding->consumed_char = pos - coding->src_pos;
6017 coding->charbuf_used = buf - coding->charbuf;
6018 coding->chars_at_source = 0;
ec6d2bb8
KH
6019}
6020
ec6d2bb8 6021
df7492f9
KH
6022/* Encode the text at CODING->src_object into CODING->dst_object.
6023 CODING->src_object is a buffer or a string.
6024 CODING->dst_object is a buffer or nil.
6025
6026 If CODING->src_object is a buffer, it must be the current buffer.
6027 In this case, if CODING->src_pos is positive, it is a position of
6028 the source text in the buffer, otherwise. the source text is in the
6029 gap area of the buffer, and coding->src_pos specifies the offset of
6030 the text from GPT (which must be the same as PT). If this is the
6031 same buffer as CODING->dst_object, CODING->src_pos must be
6032 negative and CODING should not have `pre-write-conversion'.
6033
6034 If CODING->src_object is a string, CODING should not have
6035 `pre-write-conversion'.
6036
6037 If CODING->dst_object is a buffer, the encoded data is inserted at
6038 the current point of that buffer.
6039
6040 If CODING->dst_object is nil, the encoded data is placed at the
6041 memory area specified by CODING->destination. */
6042
6043static int
6044encode_coding (coding)
ec6d2bb8 6045 struct coding_system *coding;
ec6d2bb8 6046{
df7492f9 6047 Lisp_Object attrs;
ec6d2bb8 6048
df7492f9 6049 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 6050
df7492f9 6051 if (BUFFERP (coding->dst_object))
ec6d2bb8 6052 {
df7492f9
KH
6053 set_buffer_internal (XBUFFER (coding->dst_object));
6054 coding->dst_multibyte
6055 = ! NILP (current_buffer->enable_multibyte_characters);
6056 }
ec6d2bb8 6057
df7492f9
KH
6058 coding->consumed = coding->consumed_char = 0;
6059 coding->produced = coding->produced_char = 0;
6060 coding->result = CODING_RESULT_SUCCESS;
6061 coding->errors = 0;
ec6d2bb8 6062
df7492f9 6063 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 6064
df7492f9
KH
6065 do {
6066 coding_set_source (coding);
6067 consume_chars (coding);
6068
6069 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
da4109a9
KH
6070 translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6071 else if (!NILP (Vstandard_translation_table_for_encode))
6072 translate_chars (coding, Vstandard_translation_table_for_encode);
df7492f9
KH
6073
6074 coding_set_destination (coding);
6075 (*(coding->encoder)) (coding);
6076 } while (coding->consumed_char < coding->src_chars);
6077
6078 if (BUFFERP (coding->dst_object))
6079 insert_from_gap (coding->produced_char, coding->produced);
6080
6081 return (coding->result);
ec6d2bb8
KH
6082}
6083
df7492f9 6084/* Work buffer */
fb88bf2d 6085
df7492f9
KH
6086/* List of currently used working buffer. */
6087Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 6088
df7492f9
KH
6089/* A working buffer used by the top level conversion. */
6090Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 6091
4ed46869 6092
df7492f9
KH
6093/* Return a working buffer that can be freely used by the following
6094 code conversion. MULTIBYTEP specifies the multibyteness of the
6095 buffer. */
b73bfc1c 6096
df7492f9
KH
6097Lisp_Object
6098make_conversion_work_buffer (multibytep)
6099 int multibytep;
6100{
6101 struct buffer *current = current_buffer;
6102 Lisp_Object buf;
d46c5b12 6103
df7492f9 6104 if (NILP (Vcode_conversion_work_buf_list))
e133c8fa 6105 {
df7492f9
KH
6106 if (NILP (Vcode_conversion_reused_work_buf))
6107 Vcode_conversion_reused_work_buf
6108 = Fget_buffer_create (build_string (" *code-conversion-work*"));
6109 Vcode_conversion_work_buf_list
6110 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
e133c8fa 6111 }
df7492f9 6112 else
d46c5b12 6113 {
c197f191 6114 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 6115 char str[128];
e077cc80 6116
df7492f9
KH
6117 sprintf (str, " *code-conversion-work*<%d>", depth);
6118 Vcode_conversion_work_buf_list
6119 = Fcons (Fget_buffer_create (build_string (str)),
6120 Vcode_conversion_work_buf_list);
d46c5b12 6121 }
d46c5b12 6122
df7492f9
KH
6123 buf = XCAR (Vcode_conversion_work_buf_list);
6124 set_buffer_internal (XBUFFER (buf));
6125 current_buffer->undo_list = Qt;
6126 Ferase_buffer ();
9d123124 6127 Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
df7492f9
KH
6128 set_buffer_internal (current);
6129 return buf;
6130}
d46c5b12 6131
df7492f9 6132static struct coding_system *saved_coding;
d46c5b12 6133
df7492f9
KH
6134Lisp_Object
6135code_conversion_restore (info)
6136 Lisp_Object info;
6137{
c197f191 6138 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 6139 Lisp_Object buf;
d46c5b12 6140
df7492f9 6141 if (depth > 0)
d46c5b12 6142 {
df7492f9
KH
6143 buf = XCAR (Vcode_conversion_work_buf_list);
6144 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6145 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
6146 Fkill_buffer (buf);
6147 }
d46c5b12 6148
c197f191 6149 if (EQ (saved_coding->dst_object, Qt)
df7492f9
KH
6150 && saved_coding->destination)
6151 xfree (saved_coding->destination);
b843d1ae 6152
df7492f9
KH
6153 return save_excursion_restore (info);
6154}
d46c5b12 6155
12410ef1 6156
df7492f9
KH
6157int
6158decode_coding_gap (coding, chars, bytes)
6159 struct coding_system *coding;
6160 EMACS_INT chars, bytes;
6161{
6162 int count = specpdl_ptr - specpdl;
fb88bf2d 6163
df7492f9
KH
6164 saved_coding = coding;
6165 record_unwind_protect (code_conversion_restore, save_excursion_save ());
ec6d2bb8 6166
df7492f9
KH
6167 coding->src_object = Fcurrent_buffer ();
6168 coding->src_chars = chars;
6169 coding->src_bytes = bytes;
6170 coding->src_pos = -chars;
6171 coding->src_pos_byte = -bytes;
6172 coding->src_multibyte = chars < bytes;
6173 coding->dst_object = coding->src_object;
6174 coding->dst_pos = PT;
6175 coding->dst_pos_byte = PT_BYTE;
71c81426 6176 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
98725083 6177 coding->mode |= CODING_MODE_LAST_BLOCK;
4956c225 6178
df7492f9
KH
6179 if (CODING_REQUIRE_DETECTION (coding))
6180 detect_coding (coding);
6181
6182 decode_coding (coding);
d46c5b12 6183
df7492f9
KH
6184 unbind_to (count, Qnil);
6185 return coding->result;
6186}
d46c5b12 6187
df7492f9
KH
6188int
6189encode_coding_gap (coding, chars, bytes)
6190 struct coding_system *coding;
6191 EMACS_INT chars, bytes;
6192{
6193 int count = specpdl_ptr - specpdl;
6194 Lisp_Object buffer;
d46c5b12 6195
df7492f9
KH
6196 saved_coding = coding;
6197 record_unwind_protect (code_conversion_restore, save_excursion_save ());
fb88bf2d 6198
df7492f9
KH
6199 buffer = Fcurrent_buffer ();
6200 coding->src_object = buffer;
6201 coding->src_chars = chars;
6202 coding->src_bytes = bytes;
6203 coding->src_pos = -chars;
6204 coding->src_pos_byte = -bytes;
6205 coding->src_multibyte = chars < bytes;
6206 coding->dst_object = coding->src_object;
6207 coding->dst_pos = PT;
6208 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 6209
df7492f9 6210 encode_coding (coding);
f2558efd 6211
df7492f9
KH
6212 unbind_to (count, Qnil);
6213 return coding->result;
6214}
b73bfc1c 6215
d46c5b12 6216
df7492f9
KH
6217/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6218 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 6219
df7492f9 6220 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 6221
df7492f9
KH
6222 If it is a buffer, the text is at point of the buffer. FROM and TO
6223 are positions in the buffer.
ec6d2bb8 6224
df7492f9
KH
6225 If it is a string, the text is at the beginning of the string.
6226 FROM and TO are indices to the string.
ec6d2bb8 6227
df7492f9
KH
6228 If it is nil, the text is at coding->source. FROM and TO are
6229 indices to coding->source.
ec6d2bb8 6230
df7492f9 6231 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 6232
df7492f9
KH
6233 If it is a buffer, the decoded text is inserted at point of the
6234 buffer. If the buffer is the same as SRC_OBJECT, the source text
6235 is deleted.
d46c5b12 6236
df7492f9
KH
6237 If it is Qt, a string is made from the decoded text, and
6238 set in CODING->dst_object.
d46c5b12 6239
df7492f9 6240 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6241 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6242 CODING->destination by xmalloc. If the decoded text is longer than
6243 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6244 */
d46c5b12 6245
df7492f9
KH
6246void
6247decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6248 dst_object)
6249 struct coding_system *coding;
6250 Lisp_Object src_object;
6251 EMACS_INT from, from_byte, to, to_byte;
6252 Lisp_Object dst_object;
6253{
6254 int count = specpdl_ptr - specpdl;
6255 unsigned char *destination;
6256 EMACS_INT dst_bytes;
6257 EMACS_INT chars = to - from;
6258 EMACS_INT bytes = to_byte - from_byte;
6259 Lisp_Object attrs;
d46c5b12 6260
df7492f9
KH
6261 saved_coding = coding;
6262 record_unwind_protect (code_conversion_restore, save_excursion_save ());
93dec019 6263
df7492f9
KH
6264 if (NILP (dst_object))
6265 {
6266 destination = coding->destination;
6267 dst_bytes = coding->dst_bytes;
6268 }
93dec019 6269
df7492f9
KH
6270 coding->src_object = src_object;
6271 coding->src_chars = chars;
6272 coding->src_bytes = bytes;
6273 coding->src_multibyte = chars < bytes;
70ad9fc4 6274
df7492f9
KH
6275 if (STRINGP (src_object))
6276 {
6277 coding->src_pos = from;
6278 coding->src_pos_byte = from_byte;
6279 }
6280 else if (BUFFERP (src_object))
6281 {
6282 set_buffer_internal (XBUFFER (src_object));
6283 if (from != GPT)
6284 move_gap_both (from, from_byte);
6285 if (EQ (src_object, dst_object))
fb88bf2d 6286 {
df7492f9
KH
6287 TEMP_SET_PT_BOTH (from, from_byte);
6288 del_range_both (from, from_byte, to, to_byte, 1);
6289 coding->src_pos = -chars;
6290 coding->src_pos_byte = -bytes;
fb88bf2d 6291 }
df7492f9 6292 else
fb88bf2d 6293 {
df7492f9
KH
6294 coding->src_pos = from;
6295 coding->src_pos_byte = from_byte;
fb88bf2d 6296 }
d46c5b12 6297 }
fb88bf2d 6298
df7492f9
KH
6299 if (CODING_REQUIRE_DETECTION (coding))
6300 detect_coding (coding);
6301 attrs = CODING_ID_ATTRS (coding->id);
6302
2cb26057
KH
6303 if (EQ (dst_object, Qt)
6304 || (! NILP (CODING_ATTR_POST_READ (attrs))
6305 && NILP (dst_object)))
b73bfc1c 6306 {
df7492f9
KH
6307 coding->dst_object = make_conversion_work_buffer (1);
6308 coding->dst_pos = BEG;
6309 coding->dst_pos_byte = BEG_BYTE;
6310 coding->dst_multibyte = 1;
b73bfc1c 6311 }
df7492f9 6312 else if (BUFFERP (dst_object))
12410ef1 6313 {
df7492f9
KH
6314 coding->dst_object = dst_object;
6315 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6316 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6317 coding->dst_multibyte
6318 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 6319 }
72d1a715 6320 else
df7492f9
KH
6321 {
6322 coding->dst_object = Qnil;
6323 coding->dst_multibyte = 1;
6324 }
6325
6326 decode_coding (coding);
4ed46869 6327
df7492f9
KH
6328 if (BUFFERP (coding->dst_object))
6329 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 6330
df7492f9 6331 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6332 {
df7492f9
KH
6333 struct gcpro gcpro1, gcpro2;
6334 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6335 Lisp_Object val;
4ed46869 6336
c0cc7f7f 6337 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6338 GCPRO2 (coding->src_object, coding->dst_object);
6339 val = call1 (CODING_ATTR_POST_READ (attrs),
6340 make_number (coding->produced_char));
6341 UNGCPRO;
6342 CHECK_NATNUM (val);
6343 coding->produced_char += Z - prev_Z;
6344 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6345 }
4ed46869 6346
df7492f9 6347 if (EQ (dst_object, Qt))
ec6d2bb8 6348 {
df7492f9
KH
6349 coding->dst_object = Fbuffer_string ();
6350 }
6351 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6352 {
6353 set_buffer_internal (XBUFFER (coding->dst_object));
6354 if (dst_bytes < coding->produced)
6355 {
6356 destination
6357 = (unsigned char *) xrealloc (destination, coding->produced);
6358 if (! destination)
6359 {
6360 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6361 unbind_to (count, Qnil);
6362 return;
6363 }
6364 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6365 move_gap_both (BEGV, BEGV_BYTE);
6366 bcopy (BEGV_ADDR, destination, coding->produced);
6367 coding->destination = destination;
6368 }
ec6d2bb8 6369 }
2b4f9037 6370
df7492f9 6371 unbind_to (count, Qnil);
d46c5b12
KH
6372}
6373
df7492f9
KH
6374
6375void
6376encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6377 dst_object)
b73bfc1c 6378 struct coding_system *coding;
df7492f9
KH
6379 Lisp_Object src_object;
6380 EMACS_INT from, from_byte, to, to_byte;
6381 Lisp_Object dst_object;
b73bfc1c
KH
6382{
6383 int count = specpdl_ptr - specpdl;
df7492f9
KH
6384 EMACS_INT chars = to - from;
6385 EMACS_INT bytes = to_byte - from_byte;
6386 Lisp_Object attrs;
6387
6388 saved_coding = coding;
6389 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6390
6391 coding->src_object = src_object;
6392 coding->src_chars = chars;
6393 coding->src_bytes = bytes;
6394 coding->src_multibyte = chars < bytes;
6395
6396 attrs = CODING_ID_ATTRS (coding->id);
6397
6398 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6399 {
df7492f9
KH
6400 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6401 set_buffer_internal (XBUFFER (coding->src_object));
6402 if (STRINGP (src_object))
6403 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6404 else if (BUFFERP (src_object))
6405 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6406 else
6407 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6408
6409 if (EQ (src_object, dst_object))
6410 {
6411 set_buffer_internal (XBUFFER (src_object));
6412 del_range_both (from, from_byte, to, to_byte, 1);
6413 set_buffer_internal (XBUFFER (coding->src_object));
6414 }
6415
ac87bbef
KH
6416 call2 (CODING_ATTR_PRE_WRITE (attrs),
6417 make_number (BEG), make_number (Z));
6418 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6419 if (BEG != GPT)
6420 move_gap_both (BEG, BEG_BYTE);
6421 coding->src_chars = Z - BEG;
6422 coding->src_bytes = Z_BYTE - BEG_BYTE;
6423 coding->src_pos = BEG;
6424 coding->src_pos_byte = BEG_BYTE;
6425 coding->src_multibyte = Z < Z_BYTE;
6426 }
6427 else if (STRINGP (src_object))
6428 {
6429 coding->src_pos = from;
6430 coding->src_pos_byte = from_byte;
6431 }
6432 else if (BUFFERP (src_object))
d46c5b12 6433 {
df7492f9 6434 set_buffer_internal (XBUFFER (src_object));
df7492f9 6435 if (EQ (src_object, dst_object))
d46c5b12 6436 {
ff0dacd7
KH
6437 coding->src_object = del_range_1 (from, to, 1, 1);
6438 coding->src_pos = 0;
6439 coding->src_pos_byte = 0;
d46c5b12 6440 }
df7492f9 6441 else
d46c5b12 6442 {
ff0dacd7
KH
6443 if (from < GPT && to >= GPT)
6444 move_gap_both (from, from_byte);
df7492f9
KH
6445 coding->src_pos = from;
6446 coding->src_pos_byte = from_byte;
d46c5b12
KH
6447 }
6448 }
4ed46869 6449
df7492f9 6450 if (BUFFERP (dst_object))
d46c5b12 6451 {
df7492f9 6452 coding->dst_object = dst_object;
28f67a95
KH
6453 if (EQ (src_object, dst_object))
6454 {
6455 coding->dst_pos = from;
6456 coding->dst_pos_byte = from_byte;
6457 }
6458 else
6459 {
6460 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6461 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6462 }
df7492f9
KH
6463 coding->dst_multibyte
6464 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6465 }
df7492f9 6466 else if (EQ (dst_object, Qt))
4956c225 6467 {
df7492f9 6468 coding->dst_object = Qnil;
df7492f9 6469 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6470 if (coding->dst_bytes == 0)
6471 coding->dst_bytes = 1;
6472 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6473 coding->dst_multibyte = 0;
4956c225 6474 }
df7492f9 6475 else
78108bcd 6476 {
df7492f9
KH
6477 coding->dst_object = Qnil;
6478 coding->dst_multibyte = 0;
78108bcd
KH
6479 }
6480
df7492f9 6481 encode_coding (coding);
4ed46869 6482
df7492f9 6483 if (EQ (dst_object, Qt))
4ed46869 6484 {
df7492f9
KH
6485 if (BUFFERP (coding->dst_object))
6486 coding->dst_object = Fbuffer_string ();
6487 else
73be902c 6488 {
df7492f9
KH
6489 coding->dst_object
6490 = make_unibyte_string ((char *) coding->destination,
6491 coding->produced);
6492 xfree (coding->destination);
73be902c 6493 }
4ed46869 6494 }
d46c5b12 6495
df7492f9 6496 unbind_to (count, Qnil);
b73bfc1c
KH
6497}
6498
df7492f9 6499
b73bfc1c 6500Lisp_Object
df7492f9 6501preferred_coding_system ()
b73bfc1c 6502{
df7492f9 6503 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6504
df7492f9 6505 return CODING_ID_NAME (id);
4ed46869
KH
6506}
6507
6508\f
6509#ifdef emacs
1397dc18 6510/*** 8. Emacs Lisp library functions ***/
4ed46869 6511
4ed46869 6512DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6513 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6514See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6515about coding-system objects. */)
6516 (obj)
4ed46869
KH
6517 Lisp_Object obj;
6518{
df7492f9 6519 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6520}
6521
9d991de8
RS
6522DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6523 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6524 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6525 (prompt)
4ed46869
KH
6526 Lisp_Object prompt;
6527{
e0e989f6 6528 Lisp_Object val;
9d991de8
RS
6529 do
6530 {
4608c386
KH
6531 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6532 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6533 }
6534 while (XSTRING (val)->size == 0);
e0e989f6 6535 return (Fintern (val, Qnil));
4ed46869
KH
6536}
6537
9b787f3e 6538DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6539 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6540If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6541 (prompt, default_coding_system)
9b787f3e 6542 Lisp_Object prompt, default_coding_system;
4ed46869 6543{
f44d27ce 6544 Lisp_Object val;
9b787f3e
RS
6545 if (SYMBOLP (default_coding_system))
6546 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6547 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6548 Qt, Qnil, Qcoding_system_history,
6549 default_coding_system, Qnil);
e0e989f6 6550 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6551}
6552
6553DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6554 1, 1, 0,
48b0f3ae 6555 doc: /* Check validity of CODING-SYSTEM.
b054002f 6556If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
df7492f9 6557 (coding_system)
4ed46869
KH
6558 Lisp_Object coding_system;
6559{
b7826503 6560 CHECK_SYMBOL (coding_system);
4ed46869
KH
6561 if (!NILP (Fcoding_system_p (coding_system)))
6562 return coding_system;
6563 while (1)
02ba4723 6564 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6565}
df7492f9 6566
3a73fa5d 6567\f
89528eb3
KH
6568/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6569 HIGHEST is nonzero, return the coding system of the highest
6570 priority among the detected coding systems. Otherwize return a
6571 list of detected coding systems sorted by their priorities. If
6572 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6573 multibyte form but contains only ASCII and eight-bit chars.
6574 Otherwise, the bytes are raw bytes.
6575
6576 CODING-SYSTEM controls the detection as below:
6577
6578 If it is nil, detect both text-format and eol-format. If the
6579 text-format part of CODING-SYSTEM is already specified
6580 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6581 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6582 detect only text-format. */
6583
d46c5b12 6584Lisp_Object
df7492f9 6585detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6586 unsigned char *src;
6587 int src_bytes, highest;
0a28aafb 6588 int multibytep;
df7492f9 6589 Lisp_Object coding_system;
4ed46869 6590{
df7492f9
KH
6591 unsigned char *src_end = src + src_bytes;
6592 int mask = CATEGORY_MASK_ANY;
df7492f9
KH
6593 Lisp_Object attrs, eol_type;
6594 Lisp_Object val;
6595 struct coding_system coding;
89528eb3 6596 int id;
ff0dacd7 6597 struct coding_detection_info detect_info;
df7492f9
KH
6598
6599 if (NILP (coding_system))
6600 coding_system = Qundecided;
6601 setup_coding_system (coding_system, &coding);
6602 attrs = CODING_ID_ATTRS (coding.id);
6603 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 6604 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 6605
df7492f9
KH
6606 coding.source = src;
6607 coding.src_bytes = src_bytes;
6608 coding.src_multibyte = multibytep;
6609 coding.consumed = 0;
89528eb3 6610 coding.mode |= CODING_MODE_LAST_BLOCK;
4ed46869 6611
ff0dacd7
KH
6612 detect_info.checked = detect_info.found = detect_info.rejected = 0;
6613
89528eb3
KH
6614 /* At first, detect text-format if necessary. */
6615 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
4ed46869 6616 {
ff0dacd7
KH
6617 enum coding_category category;
6618 struct coding_system *this;
6619 int c, i;
6620
df7492f9 6621 for (; src < src_end; src++)
4ed46869 6622 {
df7492f9 6623 c = *src;
89528eb3
KH
6624 if (c & 0x80
6625 || (c < 0x20 && (c == ISO_CODE_ESC
6626 || c == ISO_CODE_SI
584948ac 6627 || c == ISO_CODE_SO)))
d46c5b12 6628 break;
4ed46869 6629 }
df7492f9
KH
6630 coding.head_ascii = src - coding.source;
6631
6632 if (src < src_end)
6633 for (i = 0; i < coding_category_raw_text; i++)
6634 {
ff0dacd7
KH
6635 category = coding_priorities[i];
6636 this = coding_categories + category;
df7492f9 6637
df7492f9
KH
6638 if (this->id < 0)
6639 {
6640 /* No coding system of this category is defined. */
ff0dacd7 6641 detect_info.rejected |= (1 << category);
df7492f9 6642 }
ff0dacd7 6643 else if (category >= coding_category_raw_text)
89528eb3 6644 continue;
ff0dacd7
KH
6645 else if (detect_info.checked & (1 << category))
6646 {
6647 if (highest
6648 && (detect_info.found & (1 << category)))
6649 break;
6650 }
df7492f9
KH
6651 else
6652 {
ff0dacd7 6653 if ((*(this->detector)) (&coding, &detect_info)
89528eb3 6654 && highest
ff0dacd7
KH
6655 && (detect_info.found & (1 << category)))
6656 break;
df7492f9
KH
6657 }
6658 }
4ed46869 6659
ff0dacd7
KH
6660
6661 if (detect_info.rejected == CATEGORY_MASK_ANY)
89528eb3 6662 {
ff0dacd7 6663 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
6664 id = coding_categories[coding_category_raw_text].id;
6665 val = Fcons (make_number (id), Qnil);
6666 }
ff0dacd7 6667 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 6668 {
ff0dacd7 6669 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
6670 id = coding_categories[coding_category_undecided].id;
6671 val = Fcons (make_number (id), Qnil);
6672 }
6673 else if (highest)
6674 {
ff0dacd7
KH
6675 if (detect_info.found)
6676 {
6677 detect_info.found = 1 << category;
6678 val = Fcons (make_number (this->id), Qnil);
6679 }
6680 else
6681 for (i = 0; i < coding_category_raw_text; i++)
6682 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6683 {
6684 detect_info.found = 1 << coding_priorities[i];
6685 id = coding_categories[coding_priorities[i]].id;
6686 val = Fcons (make_number (id), Qnil);
6687 break;
6688 }
6689 }
89528eb3
KH
6690 else
6691 {
ff0dacd7
KH
6692 int mask = detect_info.rejected | detect_info.found;
6693 int found = 0;
89528eb3 6694 val = Qnil;
ff0dacd7 6695
89528eb3 6696 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
6697 {
6698 category = coding_priorities[i];
6699 if (! (mask & (1 << category)))
6700 {
6701 found |= 1 << category;
6702 id = coding_categories[category].id;
6703 val = Fcons (make_number (id), val);
6704 }
6705 }
6706 for (i = coding_category_raw_text - 1; i >= 0; i--)
6707 {
6708 category = coding_priorities[i];
6709 if (detect_info.found & (1 << category))
6710 {
6711 id = coding_categories[category].id;
6712 val = Fcons (make_number (id), val);
6713 }
6714 }
6715 detect_info.found |= found;
89528eb3
KH
6716 }
6717 }
df7492f9
KH
6718 else
6719 {
ff0dacd7 6720 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 6721 val = Fcons (make_number (coding.id), Qnil);
4ed46869 6722 }
df7492f9 6723
89528eb3 6724 /* Then, detect eol-format if necessary. */
df7492f9 6725 {
89528eb3 6726 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
6727 Lisp_Object tail;
6728
89528eb3
KH
6729 if (VECTORP (eol_type))
6730 {
ff0dacd7 6731 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
6732 normal_eol = detect_eol (coding.source, src_bytes,
6733 coding_category_raw_text);
ff0dacd7
KH
6734 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6735 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
6736 utf_16_be_eol = detect_eol (coding.source, src_bytes,
6737 coding_category_utf_16_be);
ff0dacd7
KH
6738 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6739 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
6740 utf_16_le_eol = detect_eol (coding.source, src_bytes,
6741 coding_category_utf_16_le);
6742 }
6743 else
6744 {
6745 if (EQ (eol_type, Qunix))
6746 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6747 else if (EQ (eol_type, Qdos))
6748 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6749 else
6750 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6751 }
6752
df7492f9
KH
6753 for (tail = val; CONSP (tail); tail = XCDR (tail))
6754 {
89528eb3 6755 enum coding_category category;
df7492f9 6756 int this_eol;
89528eb3
KH
6757
6758 id = XINT (XCAR (tail));
6759 attrs = CODING_ID_ATTRS (id);
6760 category = XINT (CODING_ATTR_CATEGORY (attrs));
6761 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
6762 if (VECTORP (eol_type))
6763 {
89528eb3
KH
6764 if (category == coding_category_utf_16_be
6765 || category == coding_category_utf_16_be_nosig)
6766 this_eol = utf_16_be_eol;
6767 else if (category == coding_category_utf_16_le
6768 || category == coding_category_utf_16_le_nosig)
6769 this_eol = utf_16_le_eol;
df7492f9 6770 else
89528eb3
KH
6771 this_eol = normal_eol;
6772
df7492f9
KH
6773 if (this_eol == EOL_SEEN_LF)
6774 XSETCAR (tail, AREF (eol_type, 0));
6775 else if (this_eol == EOL_SEEN_CRLF)
6776 XSETCAR (tail, AREF (eol_type, 1));
6777 else if (this_eol == EOL_SEEN_CR)
6778 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
6779 else
6780 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 6781 }
89528eb3
KH
6782 else
6783 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
6784 }
6785 }
6786
03699b14 6787 return (highest ? XCAR (val) : val);
93dec019 6788}
4ed46869 6789
df7492f9 6790
d46c5b12
KH
6791DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6792 2, 3, 0,
48b0f3ae
PJ
6793 doc: /* Detect coding system of the text in the region between START and END.
6794Return a list of possible coding systems ordered by priority.
6795
6796If only ASCII characters are found, it returns a list of single element
6797`undecided' or its subsidiary coding system according to a detected
6798end-of-line format.
6799
6800If optional argument HIGHEST is non-nil, return the coding system of
6801highest priority. */)
6802 (start, end, highest)
d46c5b12
KH
6803 Lisp_Object start, end, highest;
6804{
6805 int from, to;
6806 int from_byte, to_byte;
6289dd10 6807
b7826503
PJ
6808 CHECK_NUMBER_COERCE_MARKER (start);
6809 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6810
d46c5b12
KH
6811 validate_region (&start, &end);
6812 from = XINT (start), to = XINT (end);
6813 from_byte = CHAR_TO_BYTE (from);
6814 to_byte = CHAR_TO_BYTE (to);
6289dd10 6815
d46c5b12
KH
6816 if (from < GPT && to >= GPT)
6817 move_gap_both (to, to_byte);
c210f766 6818
d46c5b12 6819 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6820 to_byte - from_byte,
0a28aafb
KH
6821 !NILP (highest),
6822 !NILP (current_buffer
df7492f9
KH
6823 ->enable_multibyte_characters),
6824 Qnil);
d46c5b12 6825}
6289dd10 6826
d46c5b12
KH
6827DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6828 1, 2, 0,
48b0f3ae
PJ
6829 doc: /* Detect coding system of the text in STRING.
6830Return a list of possible coding systems ordered by priority.
6831
6832If only ASCII characters are found, it returns a list of single element
6833`undecided' or its subsidiary coding system according to a detected
6834end-of-line format.
6835
6836If optional argument HIGHEST is non-nil, return the coding system of
6837highest priority. */)
6838 (string, highest)
d46c5b12
KH
6839 Lisp_Object string, highest;
6840{
b7826503 6841 CHECK_STRING (string);
4ed46869 6842
d46c5b12 6843 return detect_coding_system (XSTRING (string)->data,
df7492f9 6844 STRING_BYTES (XSTRING (string)),
0a28aafb 6845 !NILP (highest),
df7492f9
KH
6846 STRING_MULTIBYTE (string),
6847 Qnil);
4ed46869
KH
6848}
6849
05e6f5dc 6850
df7492f9
KH
6851static INLINE int
6852char_encodable_p (c, attrs)
6853 int c;
6854 Lisp_Object attrs;
05e6f5dc 6855{
df7492f9 6856 Lisp_Object tail;
df7492f9 6857 struct charset *charset;
05e6f5dc 6858
df7492f9
KH
6859 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6860 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6861 {
df7492f9
KH
6862 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6863 if (CHAR_CHARSET_P (c, charset))
6864 break;
05e6f5dc 6865 }
df7492f9 6866 return (! NILP (tail));
05e6f5dc
KH
6867}
6868
6869
df7492f9
KH
6870/* Return a list of coding systems that safely encode the text between
6871 START and END. If EXCLUDE is non-nil, it is a list of coding
6872 systems not to check. The returned list doesn't contain any such
48468dac 6873 coding systems. In any case, if the text contains only ASCII or is
df7492f9
KH
6874 unibyte, return t. */
6875
6876DEFUN ("find-coding-systems-region-internal",
6877 Ffind_coding_systems_region_internal,
6878 Sfind_coding_systems_region_internal, 2, 3, 0,
6879 doc: /* Internal use only. */)
6880 (start, end, exclude)
6881 Lisp_Object start, end, exclude;
6882{
6883 Lisp_Object coding_attrs_list, safe_codings;
6884 EMACS_INT start_byte, end_byte;
7c78e542 6885 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
6886 int c;
6887 Lisp_Object tail, elt;
05e6f5dc 6888
df7492f9
KH
6889 if (STRINGP (start))
6890 {
6891 if (!STRING_MULTIBYTE (start)
48468dac 6892 || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
df7492f9
KH
6893 return Qt;
6894 start_byte = 0;
6895 end_byte = STRING_BYTES (XSTRING (start));
6896 }
6897 else
6898 {
6899 CHECK_NUMBER_COERCE_MARKER (start);
6900 CHECK_NUMBER_COERCE_MARKER (end);
6901 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6902 args_out_of_range (start, end);
6903 if (NILP (current_buffer->enable_multibyte_characters))
6904 return Qt;
6905 start_byte = CHAR_TO_BYTE (XINT (start));
6906 end_byte = CHAR_TO_BYTE (XINT (end));
6907 if (XINT (end) - XINT (start) == end_byte - start_byte)
6908 return Qt;
05e6f5dc 6909
e1c23804 6910 if (XINT (start) < GPT && XINT (end) > GPT)
df7492f9 6911 {
e1c23804
DL
6912 if ((GPT - XINT (start)) < (XINT (end) - GPT))
6913 move_gap_both (XINT (start), start_byte);
df7492f9 6914 else
e1c23804 6915 move_gap_both (XINT (end), end_byte);
df7492f9
KH
6916 }
6917 }
05e6f5dc 6918
df7492f9
KH
6919 coding_attrs_list = Qnil;
6920 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6921 if (NILP (exclude)
6922 || NILP (Fmemq (XCAR (tail), exclude)))
6923 {
6924 Lisp_Object attrs;
05e6f5dc 6925
df7492f9
KH
6926 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6927 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6928 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6929 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6930 }
6931
6932 if (STRINGP (start))
6933 p = pbeg = XSTRING (start)->data;
6934 else
6935 p = pbeg = BYTE_POS_ADDR (start_byte);
6936 pend = p + (end_byte - start_byte);
6937
6938 while (p < pend && ASCII_BYTE_P (*p)) p++;
6939 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
6940
6941 while (p < pend)
6942 {
df7492f9
KH
6943 if (ASCII_BYTE_P (*p))
6944 p++;
6945 else
6946 {
6947 c = STRING_CHAR_ADVANCE (p);
6948
6949 charset_map_loaded = 0;
6950 for (tail = coding_attrs_list; CONSP (tail);)
6951 {
6952 elt = XCAR (tail);
6953 if (NILP (elt))
6954 tail = XCDR (tail);
6955 else if (char_encodable_p (c, elt))
6956 tail = XCDR (tail);
6957 else if (CONSP (XCDR (tail)))
6958 {
6959 XSETCAR (tail, XCAR (XCDR (tail)));
6960 XSETCDR (tail, XCDR (XCDR (tail)));
6961 }
6962 else
6963 {
6964 XSETCAR (tail, Qnil);
6965 tail = XCDR (tail);
6966 }
6967 }
6968 if (charset_map_loaded)
6969 {
6970 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 6971
df7492f9
KH
6972 if (STRINGP (start))
6973 pbeg = XSTRING (start)->data;
6974 else
6975 pbeg = BYTE_POS_ADDR (start_byte);
6976 p = pbeg + p_offset;
6977 pend = pbeg + pend_offset;
6978 }
6979 }
05e6f5dc 6980 }
df7492f9
KH
6981
6982 safe_codings = Qnil;
6983 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6984 if (! NILP (XCAR (tail)))
6985 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6986
05e6f5dc
KH
6987 return safe_codings;
6988}
6989
6990
df7492f9
KH
6991DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6992 Scheck_coding_systems_region, 3, 3, 0,
6993 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 6994
df7492f9
KH
6995START and END are buffer positions specifying the region.
6996CODING-SYSTEM-LIST is a list of coding systems to check.
6997
6998The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6999CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7000whole region, POS0, POS1, ... are buffer positions where non-encodable
7001characters are found.
7002
7003If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7004value is nil.
7005
7006START may be a string. In that case, check if the string is
7007encodable, and the value contains indices to the string instead of
7008buffer positions. END is ignored. */)
7009 (start, end, coding_system_list)
7010 Lisp_Object start, end, coding_system_list;
05e6f5dc 7011{
df7492f9
KH
7012 Lisp_Object list;
7013 EMACS_INT start_byte, end_byte;
7014 int pos;
7c78e542 7015 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7016 int c;
7017 Lisp_Object tail, elt;
05e6f5dc
KH
7018
7019 if (STRINGP (start))
7020 {
df7492f9
KH
7021 if (!STRING_MULTIBYTE (start)
7022 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
7023 return Qnil;
7024 start_byte = 0;
7025 end_byte = STRING_BYTES (XSTRING (start));
7026 pos = 0;
05e6f5dc
KH
7027 }
7028 else
7029 {
b7826503
PJ
7030 CHECK_NUMBER_COERCE_MARKER (start);
7031 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7032 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7033 args_out_of_range (start, end);
7034 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7035 return Qnil;
7036 start_byte = CHAR_TO_BYTE (XINT (start));
7037 end_byte = CHAR_TO_BYTE (XINT (end));
7038 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7039 return Qt;
df7492f9 7040
e1c23804 7041 if (XINT (start) < GPT && XINT (end) > GPT)
df7492f9 7042 {
e1c23804
DL
7043 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7044 move_gap_both (XINT (start), start_byte);
df7492f9 7045 else
e1c23804 7046 move_gap_both (XINT (end), end_byte);
df7492f9 7047 }
e1c23804 7048 pos = XINT (start);
df7492f9
KH
7049 }
7050
7051 list = Qnil;
7052 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7053 {
7054 elt = XCAR (tail);
7055 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7056 Qnil)),
7057 list);
05e6f5dc
KH
7058 }
7059
df7492f9
KH
7060 if (STRINGP (start))
7061 p = pbeg = XSTRING (start)->data;
7062 else
7063 p = pbeg = BYTE_POS_ADDR (start_byte);
7064 pend = p + (end_byte - start_byte);
7065
7066 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7067 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7068
7069 while (p < pend)
05e6f5dc 7070 {
df7492f9
KH
7071 if (ASCII_BYTE_P (*p))
7072 p++;
7073 else
05e6f5dc 7074 {
df7492f9
KH
7075 c = STRING_CHAR_ADVANCE (p);
7076
7077 charset_map_loaded = 0;
7078 for (tail = list; CONSP (tail); tail = XCDR (tail))
7079 {
7080 elt = XCDR (XCAR (tail));
7081 if (! char_encodable_p (c, XCAR (elt)))
7082 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7083 }
7084 if (charset_map_loaded)
7085 {
7086 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7087
7088 if (STRINGP (start))
7089 pbeg = XSTRING (start)->data;
7090 else
7091 pbeg = BYTE_POS_ADDR (start_byte);
7092 p = pbeg + p_offset;
7093 pend = pbeg + pend_offset;
7094 }
05e6f5dc 7095 }
df7492f9 7096 pos++;
05e6f5dc
KH
7097 }
7098
df7492f9
KH
7099 tail = list;
7100 list = Qnil;
7101 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 7102 {
df7492f9
KH
7103 elt = XCAR (tail);
7104 if (CONSP (XCDR (XCDR (elt))))
7105 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7106 list);
05e6f5dc 7107 }
df7492f9
KH
7108
7109 return list;
05e6f5dc
KH
7110}
7111
7112
df7492f9 7113
4031e2bf 7114Lisp_Object
df7492f9
KH
7115code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7116 Lisp_Object start, end, coding_system, dst_object;
7117 int encodep, norecord;
3a73fa5d
RS
7118{
7119 struct coding_system coding;
df7492f9
KH
7120 EMACS_INT from, from_byte, to, to_byte;
7121 Lisp_Object src_object;
3a73fa5d 7122
b7826503
PJ
7123 CHECK_NUMBER_COERCE_MARKER (start);
7124 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
7125 if (NILP (coding_system))
7126 coding_system = Qno_conversion;
7127 else
7128 CHECK_CODING_SYSTEM (coding_system);
7129 src_object = Fcurrent_buffer ();
7130 if (NILP (dst_object))
7131 dst_object = src_object;
7132 else if (! EQ (dst_object, Qt))
7133 CHECK_BUFFER (dst_object);
3a73fa5d 7134
d46c5b12
KH
7135 validate_region (&start, &end);
7136 from = XFASTINT (start);
df7492f9 7137 from_byte = CHAR_TO_BYTE (from);
d46c5b12 7138 to = XFASTINT (end);
df7492f9 7139 to_byte = CHAR_TO_BYTE (to);
d46c5b12 7140
df7492f9
KH
7141 setup_coding_system (coding_system, &coding);
7142 coding.mode |= CODING_MODE_LAST_BLOCK;
7143
7144 if (encodep)
7145 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7146 dst_object);
7147 else
7148 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7149 dst_object);
7150 if (! norecord)
7151 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 7152
df7492f9
KH
7153 if (coding.result != CODING_RESULT_SUCCESS)
7154 error ("Code conversion error: %d", coding.result);
3a73fa5d 7155
df7492f9
KH
7156 return (BUFFERP (dst_object)
7157 ? make_number (coding.produced_char)
7158 : coding.dst_object);
4031e2bf
KH
7159}
7160
df7492f9 7161
4031e2bf 7162DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 7163 3, 4, "r\nzCoding system: ",
48b0f3ae 7164 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
7165When called from a program, takes four arguments:
7166 START, END, CODING-SYSTEM, and DESTINATION.
7167START and END are buffer positions.
7168
7169Optional 4th arguments DESTINATION specifies where the decoded text goes.
7170If nil, the region between START and END is replace by the decoded text.
7171If buffer, the decoded text is inserted in the buffer.
7172If t, the decoded text is returned.
7173
48b0f3ae
PJ
7174This function sets `last-coding-system-used' to the precise coding system
7175used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7176not fully specified.)
7177It returns the length of the decoded text. */)
df7492f9
KH
7178 (start, end, coding_system, destination)
7179 Lisp_Object start, end, coding_system, destination;
4031e2bf 7180{
df7492f9 7181 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
7182}
7183
7184DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
7185 3, 4, "r\nzCoding system: ",
7186 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
7187When called from a program, takes three arguments:
7188START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
7189
7190Optional 4th arguments DESTINATION specifies where the encoded text goes.
7191If nil, the region between START and END is replace by the encoded text.
7192If buffer, the encoded text is inserted in the buffer.
7193If t, the encoded text is returned.
7194
48b0f3ae
PJ
7195This function sets `last-coding-system-used' to the precise coding system
7196used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7197not fully specified.)
7198It returns the length of the encoded text. */)
df7492f9
KH
7199 (start, end, coding_system, destination)
7200 Lisp_Object start, end, coding_system, destination;
3a73fa5d 7201{
df7492f9 7202 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 7203}
3a73fa5d 7204
4031e2bf 7205Lisp_Object
df7492f9
KH
7206code_convert_string (string, coding_system, dst_object,
7207 encodep, nocopy, norecord)
7208 Lisp_Object string, coding_system, dst_object;
7209 int encodep, nocopy, norecord;
4031e2bf
KH
7210{
7211 struct coding_system coding;
df7492f9 7212 EMACS_INT chars, bytes;
3a73fa5d 7213
b7826503 7214 CHECK_STRING (string);
d46c5b12 7215 if (NILP (coding_system))
df7492f9
KH
7216 {
7217 if (! norecord)
7218 Vlast_coding_system_used = Qno_conversion;
7219 if (NILP (dst_object))
7220 return (nocopy ? Fcopy_sequence (string) : string);
7221 }
4ed46869 7222
df7492f9
KH
7223 if (NILP (coding_system))
7224 coding_system = Qno_conversion;
7225 else
7226 CHECK_CODING_SYSTEM (coding_system);
7227 if (NILP (dst_object))
7228 dst_object = Qt;
7229 else if (! EQ (dst_object, Qt))
7230 CHECK_BUFFER (dst_object);
5f1cd180 7231
df7492f9 7232 setup_coding_system (coding_system, &coding);
d46c5b12 7233 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
7234 chars = XSTRING (string)->size;
7235 bytes = STRING_BYTES (XSTRING (string));
7236 if (encodep)
7237 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7238 else
7239 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7240 if (! norecord)
7241 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 7242
df7492f9
KH
7243 if (coding.result != CODING_RESULT_SUCCESS)
7244 error ("Code conversion error: %d", coding.result);
4ed46869 7245
df7492f9
KH
7246 return (BUFFERP (dst_object)
7247 ? make_number (coding.produced_char)
7248 : coding.dst_object);
4ed46869
KH
7249}
7250
4031e2bf 7251
ecec61c1 7252/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
7253 Do not set Vlast_coding_system_used.
7254
7255 This function is called only from macros DECODE_FILE and
7256 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
7257
7258Lisp_Object
7259code_convert_string_norecord (string, coding_system, encodep)
7260 Lisp_Object string, coding_system;
7261 int encodep;
7262{
0be8721c 7263 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 7264}
ecec61c1 7265
ecec61c1 7266
df7492f9
KH
7267DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7268 2, 4, 0,
7269 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7270
7271Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7272if the decoding operation is trivial.
ecec61c1 7273
df7492f9 7274Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 7275inserted in BUFFER instead of returned as a string. In this case,
df7492f9 7276the return value is BUFFER.
ecec61c1 7277
df7492f9
KH
7278This function sets `last-coding-system-used' to the precise coding system
7279used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7280not fully specified. */)
7281 (string, coding_system, nocopy, buffer)
7282 Lisp_Object string, coding_system, nocopy, buffer;
7283{
7284 return code_convert_string (string, coding_system, buffer,
7285 0, ! NILP (nocopy), 0);
7286}
7287
7288DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7289 2, 4, 0,
7290 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7291
7292Optional third arg NOCOPY non-nil means it is OK to return STRING
7293itself if the encoding operation is trivial.
7294
7295Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 7296inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
7297the return value is BUFFER.
7298
7299This function sets `last-coding-system-used' to the precise coding system
7300used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7301not fully specified.) */)
7302 (string, coding_system, nocopy, buffer)
7303 Lisp_Object string, coding_system, nocopy, buffer;
7304{
7305 return code_convert_string (string, coding_system, buffer,
c197f191 7306 1, ! NILP (nocopy), 1);
ecec61c1 7307}
df7492f9 7308
3a73fa5d 7309\f
4ed46869 7310DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7311 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7312Return the corresponding character. */)
7313 (code)
4ed46869
KH
7314 Lisp_Object code;
7315{
df7492f9
KH
7316 Lisp_Object spec, attrs, val;
7317 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7318 int c;
7319
7320 CHECK_NATNUM (code);
7321 c = XFASTINT (code);
7322 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7323 attrs = AREF (spec, 0);
4ed46869 7324
df7492f9
KH
7325 if (ASCII_BYTE_P (c)
7326 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7327 return code;
7328
7329 val = CODING_ATTR_CHARSET_LIST (attrs);
7330 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
7331 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7332 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
df7492f9
KH
7333
7334 if (c <= 0x7F)
7335 charset = charset_roman;
7336 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 7337 {
df7492f9
KH
7338 charset = charset_kana;
7339 c -= 0x80;
55ab7be3
KH
7340 }
7341 else
7342 {
004068e4 7343 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
7344
7345 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7346 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7347 error ("Invalid code: %d", code);
7348 SJIS_TO_JIS (c);
7349 charset = charset_kanji;
55ab7be3 7350 }
df7492f9
KH
7351 c = DECODE_CHAR (charset, c);
7352 if (c < 0)
7353 error ("Invalid code: %d", code);
7354 return make_number (c);
4ed46869
KH
7355}
7356
df7492f9 7357
4ed46869 7358DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7359 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7360Return the corresponding code in SJIS. */)
7361 (ch)
df7492f9 7362 Lisp_Object ch;
4ed46869 7363{
df7492f9
KH
7364 Lisp_Object spec, attrs, charset_list;
7365 int c;
7366 struct charset *charset;
7367 unsigned code;
4ed46869 7368
df7492f9
KH
7369 CHECK_CHARACTER (ch);
7370 c = XFASTINT (ch);
7371 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7372 attrs = AREF (spec, 0);
7373
7374 if (ASCII_CHAR_P (c)
7375 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7376 return ch;
7377
7378 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7379 charset = char_charset (c, charset_list, &code);
7380 if (code == CHARSET_INVALID_CODE (charset))
7381 error ("Can't encode by shift_jis encoding: %d", c);
7382 JIS_TO_SJIS (code);
7383
7384 return make_number (code);
4ed46869
KH
7385}
7386
7387DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7388 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7389Return the corresponding character. */)
7390 (code)
4ed46869
KH
7391 Lisp_Object code;
7392{
df7492f9
KH
7393 Lisp_Object spec, attrs, val;
7394 struct charset *charset_roman, *charset_big5, *charset;
7395 int c;
4ed46869 7396
df7492f9
KH
7397 CHECK_NATNUM (code);
7398 c = XFASTINT (code);
7399 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7400 attrs = AREF (spec, 0);
7401
7402 if (ASCII_BYTE_P (c)
7403 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7404 return code;
7405
7406 val = CODING_ATTR_CHARSET_LIST (attrs);
7407 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7408 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7409
7410 if (c <= 0x7F)
7411 charset = charset_roman;
c28a9453
KH
7412 else
7413 {
df7492f9
KH
7414 int b1 = c >> 8, b2 = c & 0x7F;
7415 if (b1 < 0xA1 || b1 > 0xFE
7416 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7417 error ("Invalid code: %d", code);
7418 charset = charset_big5;
c28a9453 7419 }
df7492f9
KH
7420 c = DECODE_CHAR (charset, (unsigned )c);
7421 if (c < 0)
7422 error ("Invalid code: %d", code);
7423 return make_number (c);
4ed46869
KH
7424}
7425
7426DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7427 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7428Return the corresponding character code in Big5. */)
7429 (ch)
4ed46869
KH
7430 Lisp_Object ch;
7431{
df7492f9
KH
7432 Lisp_Object spec, attrs, charset_list;
7433 struct charset *charset;
7434 int c;
7435 unsigned code;
7436
7437 CHECK_CHARACTER (ch);
7438 c = XFASTINT (ch);
7439 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7440 attrs = AREF (spec, 0);
7441 if (ASCII_CHAR_P (c)
7442 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7443 return ch;
7444
7445 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7446 charset = char_charset (c, charset_list, &code);
7447 if (code == CHARSET_INVALID_CODE (charset))
7448 error ("Can't encode by Big5 encoding: %d", c);
7449
7450 return make_number (code);
4ed46869 7451}
df7492f9 7452
3a73fa5d 7453\f
1ba9e4ab
KH
7454DEFUN ("set-terminal-coding-system-internal",
7455 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7456 Sset_terminal_coding_system_internal, 1, 1, 0,
7457 doc: /* Internal use only. */)
7458 (coding_system)
b74e4686 7459 Lisp_Object coding_system;
4ed46869 7460{
b7826503 7461 CHECK_SYMBOL (coding_system);
df7492f9
KH
7462 setup_coding_system (Fcheck_coding_system (coding_system),
7463 &terminal_coding);
7464
70c22245 7465 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7466 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7467 /* Characer composition should be disabled. */
7468 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7469 terminal_coding.src_multibyte = 1;
7470 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7471 return Qnil;
7472}
7473
c4825358
KH
7474DEFUN ("set-safe-terminal-coding-system-internal",
7475 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7476 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7477 doc: /* Internal use only. */)
48b0f3ae 7478 (coding_system)
b74e4686 7479 Lisp_Object coding_system;
c4825358 7480{
b7826503 7481 CHECK_SYMBOL (coding_system);
c4825358
KH
7482 setup_coding_system (Fcheck_coding_system (coding_system),
7483 &safe_terminal_coding);
df7492f9
KH
7484 /* Characer composition should be disabled. */
7485 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7486 safe_terminal_coding.src_multibyte = 1;
7487 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7488 return Qnil;
7489}
7490
4ed46869
KH
7491DEFUN ("terminal-coding-system",
7492 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7493 doc: /* Return coding system specified for terminal output. */)
7494 ()
4ed46869 7495{
df7492f9 7496 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7497}
7498
1ba9e4ab
KH
7499DEFUN ("set-keyboard-coding-system-internal",
7500 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7501 Sset_keyboard_coding_system_internal, 1, 1, 0,
7502 doc: /* Internal use only. */)
7503 (coding_system)
4ed46869
KH
7504 Lisp_Object coding_system;
7505{
b7826503 7506 CHECK_SYMBOL (coding_system);
df7492f9
KH
7507 setup_coding_system (Fcheck_coding_system (coding_system),
7508 &keyboard_coding);
7509 /* Characer composition should be disabled. */
7510 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7511 return Qnil;
7512}
7513
7514DEFUN ("keyboard-coding-system",
7515 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7516 doc: /* Return coding system specified for decoding keyboard input. */)
7517 ()
4ed46869 7518{
df7492f9 7519 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7520}
7521
7522\f
a5d301df
KH
7523DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7524 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7525 doc: /* Choose a coding system for an operation based on the target name.
7526The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7527DECODING-SYSTEM is the coding system to use for decoding
7528\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7529for encoding (in case OPERATION does encoding).
7530
7531The first argument OPERATION specifies an I/O primitive:
7532 For file I/O, `insert-file-contents' or `write-region'.
7533 For process I/O, `call-process', `call-process-region', or `start-process'.
7534 For network I/O, `open-network-stream'.
7535
7536The remaining arguments should be the same arguments that were passed
7537to the primitive. Depending on which primitive, one of those arguments
7538is selected as the TARGET. For example, if OPERATION does file I/O,
7539whichever argument specifies the file name is TARGET.
7540
7541TARGET has a meaning which depends on OPERATION:
7542 For file I/O, TARGET is a file name.
7543 For process I/O, TARGET is a process name.
7544 For network I/O, TARGET is a service name or a port number
7545
7546This function looks up what specified for TARGET in,
7547`file-coding-system-alist', `process-coding-system-alist',
7548or `network-coding-system-alist' depending on OPERATION.
7549They may specify a coding system, a cons of coding systems,
7550or a function symbol to call.
7551In the last case, we call the function with one argument,
7552which is a list of all the arguments given to this function.
7553
7554usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7555 (nargs, args)
4ed46869
KH
7556 int nargs;
7557 Lisp_Object *args;
7558{
7559 Lisp_Object operation, target_idx, target, val;
7560 register Lisp_Object chain;
7561
7562 if (nargs < 2)
7563 error ("Too few arguments");
7564 operation = args[0];
7565 if (!SYMBOLP (operation)
7566 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7567 error ("Invalid first arguement");
4ed46869
KH
7568 if (nargs < 1 + XINT (target_idx))
7569 error ("Too few arguments for operation: %s",
7570 XSYMBOL (operation)->name->data);
7571 target = args[XINT (target_idx) + 1];
7572 if (!(STRINGP (target)
7573 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7574 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7575
2e34157c
RS
7576 chain = ((EQ (operation, Qinsert_file_contents)
7577 || EQ (operation, Qwrite_region))
02ba4723 7578 ? Vfile_coding_system_alist
2e34157c 7579 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7580 ? Vnetwork_coding_system_alist
7581 : Vprocess_coding_system_alist));
4ed46869
KH
7582 if (NILP (chain))
7583 return Qnil;
7584
03699b14 7585 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7586 {
f44d27ce 7587 Lisp_Object elt;
4ed46869 7588
df7492f9 7589 elt = XCAR (chain);
4ed46869
KH
7590 if (CONSP (elt)
7591 && ((STRINGP (target)
03699b14
KR
7592 && STRINGP (XCAR (elt))
7593 && fast_string_match (XCAR (elt), target) >= 0)
7594 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7595 {
03699b14 7596 val = XCDR (elt);
b19fd4c5
KH
7597 /* Here, if VAL is both a valid coding system and a valid
7598 function symbol, we return VAL as a coding system. */
02ba4723
KH
7599 if (CONSP (val))
7600 return val;
7601 if (! SYMBOLP (val))
7602 return Qnil;
7603 if (! NILP (Fcoding_system_p (val)))
7604 return Fcons (val, val);
b19fd4c5
KH
7605 if (! NILP (Ffboundp (val)))
7606 {
7607 val = call1 (val, Flist (nargs, args));
7608 if (CONSP (val))
7609 return val;
7610 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7611 return Fcons (val, val);
7612 }
02ba4723
KH
7613 return Qnil;
7614 }
4ed46869
KH
7615 }
7616 return Qnil;
7617}
7618
df7492f9 7619DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 7620 Sset_coding_system_priority, 0, MANY, 0,
da7db224 7621 doc: /* Assign higher priority to the coding systems given as arguments.
1fcd6c8b 7622usage: (set-coding-system-priority CODING-SYSTEM ...) */)
df7492f9
KH
7623 (nargs, args)
7624 int nargs;
7625 Lisp_Object *args;
7626{
7627 int i, j;
7628 int changed[coding_category_max];
7629 enum coding_category priorities[coding_category_max];
7630
7631 bzero (changed, sizeof changed);
7632
7633 for (i = j = 0; i < nargs; i++)
7634 {
7635 enum coding_category category;
7636 Lisp_Object spec, attrs;
7637
7638 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7639 attrs = AREF (spec, 0);
7640 category = XINT (CODING_ATTR_CATEGORY (attrs));
7641 if (changed[category])
7642 /* Ignore this coding system because a coding system of the
7643 same category already had a higher priority. */
7644 continue;
7645 changed[category] = 1;
7646 priorities[j++] = category;
7647 if (coding_categories[category].id >= 0
7648 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7649 setup_coding_system (args[i], &coding_categories[category]);
7650 }
7651
7652 /* Now we have decided top J priorities. Reflect the order of the
7653 original priorities to the remaining priorities. */
7654
7655 for (i = j, j = 0; i < coding_category_max; i++, j++)
7656 {
7657 while (j < coding_category_max
7658 && changed[coding_priorities[j]])
7659 j++;
7660 if (j == coding_category_max)
7661 abort ();
7662 priorities[i] = coding_priorities[j];
7663 }
7664
7665 bcopy (priorities, coding_priorities, sizeof priorities);
7666 return Qnil;
7667}
7668
7669DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7670 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7671 doc: /* Return a list of coding systems ordered by their priorities.
7672HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7673 (highestp)
7674 Lisp_Object highestp;
d46c5b12
KH
7675{
7676 int i;
df7492f9 7677 Lisp_Object val;
d46c5b12 7678
df7492f9 7679 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7680 {
df7492f9
KH
7681 enum coding_category category = coding_priorities[i];
7682 int id = coding_categories[category].id;
7683 Lisp_Object attrs;
7684
7685 if (id < 0)
7686 continue;
7687 attrs = CODING_ID_ATTRS (id);
7688 if (! NILP (highestp))
7689 return CODING_ATTR_BASE_NAME (attrs);
7690 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7691 }
7692 return Fnreverse (val);
7693}
7694
f0064e1f
DL
7695static char *suffixes[] = { "-unix", "-dos", "-mac" };
7696
df7492f9
KH
7697static Lisp_Object
7698make_subsidiaries (base)
7699 Lisp_Object base;
7700{
7701 Lisp_Object subsidiaries;
df7492f9
KH
7702 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7703 char *buf = (char *) alloca (base_name_len + 6);
7704 int i;
7705
7706 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7707 subsidiaries = Fmake_vector (make_number (3), Qnil);
7708 for (i = 0; i < 3; i++)
7709 {
7710 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7711 ASET (subsidiaries, i, intern (buf));
7712 }
7713 return subsidiaries;
7714}
7715
7716
7717DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7718 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7719 doc: /* For internal use only.
7720usage: (define-coding-system-internal ...) */)
df7492f9
KH
7721 (nargs, args)
7722 int nargs;
7723 Lisp_Object *args;
7724{
7725 Lisp_Object name;
7726 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7727 Lisp_Object attrs; /* Vector of attributes. */
7728 Lisp_Object eol_type;
7729 Lisp_Object aliases;
7730 Lisp_Object coding_type, charset_list, safe_charsets;
7731 enum coding_category category;
7732 Lisp_Object tail, val;
7733 int max_charset_id = 0;
7734 int i;
7735
7736 if (nargs < coding_arg_max)
7737 goto short_args;
7738
7739 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7740
7741 name = args[coding_arg_name];
7742 CHECK_SYMBOL (name);
7743 CODING_ATTR_BASE_NAME (attrs) = name;
7744
7745 val = args[coding_arg_mnemonic];
7746 if (! STRINGP (val))
7747 CHECK_CHARACTER (val);
7748 CODING_ATTR_MNEMONIC (attrs) = val;
7749
7750 coding_type = args[coding_arg_coding_type];
7751 CHECK_SYMBOL (coding_type);
7752 CODING_ATTR_TYPE (attrs) = coding_type;
7753
7754 charset_list = args[coding_arg_charset_list];
7755 if (SYMBOLP (charset_list))
7756 {
7757 if (EQ (charset_list, Qiso_2022))
7758 {
7759 if (! EQ (coding_type, Qiso_2022))
7760 error ("Invalid charset-list");
7761 charset_list = Viso_2022_charset_list;
7762 }
7763 else if (EQ (charset_list, Qemacs_mule))
7764 {
7765 if (! EQ (coding_type, Qemacs_mule))
7766 error ("Invalid charset-list");
7767 charset_list = Vemacs_mule_charset_list;
7768 }
7769 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7770 if (max_charset_id < XFASTINT (XCAR (tail)))
7771 max_charset_id = XFASTINT (XCAR (tail));
7772 }
7773 else
7774 {
7775 charset_list = Fcopy_sequence (charset_list);
7776 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7777 {
7778 struct charset *charset;
7779
7780 val = Fcar (tail);
7781 CHECK_CHARSET_GET_CHARSET (val, charset);
7782 if (EQ (coding_type, Qiso_2022)
7783 ? CHARSET_ISO_FINAL (charset) < 0
7784 : EQ (coding_type, Qemacs_mule)
7785 ? CHARSET_EMACS_MULE_ID (charset) < 0
7786 : 0)
7787 error ("Can't handle charset `%s'",
7788 XSYMBOL (CHARSET_NAME (charset))->name->data);
7789
7790 XCAR (tail) = make_number (charset->id);
7791 if (max_charset_id < charset->id)
7792 max_charset_id = charset->id;
7793 }
7794 }
7795 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7796
7797 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7798 make_number (255));
7799 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7800 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7801 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7802
584948ac
KH
7803 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
7804
df7492f9
KH
7805 val = args[coding_arg_decode_translation_table];
7806 if (! NILP (val))
7807 CHECK_CHAR_TABLE (val);
7808 CODING_ATTR_DECODE_TBL (attrs) = val;
7809
7810 val = args[coding_arg_encode_translation_table];
7811 if (! NILP (val))
7812 CHECK_CHAR_TABLE (val);
7813 CODING_ATTR_ENCODE_TBL (attrs) = val;
7814
7815 val = args[coding_arg_post_read_conversion];
7816 CHECK_SYMBOL (val);
7817 CODING_ATTR_POST_READ (attrs) = val;
7818
7819 val = args[coding_arg_pre_write_conversion];
7820 CHECK_SYMBOL (val);
7821 CODING_ATTR_PRE_WRITE (attrs) = val;
7822
7823 val = args[coding_arg_default_char];
7824 if (NILP (val))
7825 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7826 else
7827 {
7828 CHECK_CHARACTER (val);
7829 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7830 }
7831
7832 val = args[coding_arg_plist];
7833 CHECK_LIST (val);
7834 CODING_ATTR_PLIST (attrs) = val;
7835
7836 if (EQ (coding_type, Qcharset))
7837 {
c7c66a95
KH
7838 /* Generate a lisp vector of 256 elements. Each element is nil,
7839 integer, or a list of charset IDs.
7840
7841 If Nth element is nil, the byte code N is invalid in this
7842 coding system.
7843
7844 If Nth element is a number NUM, N is the first byte of a
7845 charset whose ID is NUM.
7846
7847 If Nth element is a list of charset IDs, N is the first byte
7848 of one of them. The list is sorted by dimensions of the
7849 charsets. A charset of smaller dimension comes firtst.
7850 */
df7492f9
KH
7851 val = Fmake_vector (make_number (256), Qnil);
7852
7853 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7854 {
c7c66a95
KH
7855 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7856 int dim = CHARSET_DIMENSION (charset);
7857 int idx = (dim - 1) * 4;
7858
584948ac
KH
7859 if (CHARSET_ASCII_COMPATIBLE_P (charset))
7860 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7861
15d143f7
KH
7862 for (i = charset->code_space[idx];
7863 i <= charset->code_space[idx + 1]; i++)
7864 {
c7c66a95
KH
7865 Lisp_Object tmp, tmp2;
7866 int dim2;
7867
7868 tmp = AREF (val, i);
7869 if (NILP (tmp))
7870 tmp = XCAR (tail);
7871 else if (NUMBERP (tmp))
7872 {
7873 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7874 if (dim < dim2)
c7c66a95 7875 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
7876 else
7877 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 7878 }
15d143f7 7879 else
c7c66a95
KH
7880 {
7881 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7882 {
7883 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7884 if (dim < dim2)
7885 break;
7886 }
7887 if (NILP (tmp2))
7888 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7889 else
7890 {
7891 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7892 XSETCAR (tmp2, XCAR (tail));
7893 }
7894 }
7895 ASET (val, i, tmp);
15d143f7 7896 }
df7492f9
KH
7897 }
7898 ASET (attrs, coding_attr_charset_valids, val);
7899 category = coding_category_charset;
7900 }
7901 else if (EQ (coding_type, Qccl))
7902 {
7903 Lisp_Object valids;
7904
7905 if (nargs < coding_arg_ccl_max)
7906 goto short_args;
7907
7908 val = args[coding_arg_ccl_decoder];
7909 CHECK_CCL_PROGRAM (val);
7910 if (VECTORP (val))
7911 val = Fcopy_sequence (val);
7912 ASET (attrs, coding_attr_ccl_decoder, val);
7913
7914 val = args[coding_arg_ccl_encoder];
7915 CHECK_CCL_PROGRAM (val);
7916 if (VECTORP (val))
7917 val = Fcopy_sequence (val);
7918 ASET (attrs, coding_attr_ccl_encoder, val);
7919
7920 val = args[coding_arg_ccl_valids];
7921 valids = Fmake_string (make_number (256), make_number (0));
7922 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7923 {
8dcbea82
KH
7924 int from, to;
7925
df7492f9
KH
7926 val = Fcar (tail);
7927 if (INTEGERP (val))
8dcbea82
KH
7928 {
7929 from = to = XINT (val);
7930 if (from < 0 || from > 255)
7931 args_out_of_range_3 (val, make_number (0), make_number (255));
7932 }
df7492f9
KH
7933 else
7934 {
df7492f9
KH
7935 CHECK_CONS (val);
7936 CHECK_NUMBER (XCAR (val));
7937 CHECK_NUMBER (XCDR (val));
7938 from = XINT (XCAR (val));
8dcbea82
KH
7939 if (from < 0 || from > 255)
7940 args_out_of_range_3 (XCAR (val),
7941 make_number (0), make_number (255));
df7492f9 7942 to = XINT (XCDR (val));
8dcbea82
KH
7943 if (to < from || to > 255)
7944 args_out_of_range_3 (XCDR (val),
7945 XCAR (val), make_number (255));
df7492f9 7946 }
8dcbea82
KH
7947 for (i = from; i <= to; i++)
7948 XSTRING (valids)->data[i] = 1;
df7492f9
KH
7949 }
7950 ASET (attrs, coding_attr_ccl_valids, valids);
7951
7952 category = coding_category_ccl;
7953 }
7954 else if (EQ (coding_type, Qutf_16))
7955 {
7956 Lisp_Object bom, endian;
7957
584948ac
KH
7958 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
7959
df7492f9
KH
7960 if (nargs < coding_arg_utf16_max)
7961 goto short_args;
7962
7963 bom = args[coding_arg_utf16_bom];
7964 if (! NILP (bom) && ! EQ (bom, Qt))
7965 {
7966 CHECK_CONS (bom);
7967 CHECK_CODING_SYSTEM (XCAR (bom));
7968 CHECK_CODING_SYSTEM (XCDR (bom));
7969 }
7970 ASET (attrs, coding_attr_utf_16_bom, bom);
7971
7972 endian = args[coding_arg_utf16_endian];
b49a1807
KH
7973 CHECK_SYMBOL (endian);
7974 if (NILP (endian))
7975 endian = Qbig;
7976 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
7977 error ("Invalid endian: %s", XSYMBOL (endian)->name->data);
df7492f9
KH
7978 ASET (attrs, coding_attr_utf_16_endian, endian);
7979
7980 category = (CONSP (bom)
7981 ? coding_category_utf_16_auto
7982 : NILP (bom)
b49a1807 7983 ? (EQ (endian, Qbig)
df7492f9
KH
7984 ? coding_category_utf_16_be_nosig
7985 : coding_category_utf_16_le_nosig)
b49a1807 7986 : (EQ (endian, Qbig)
df7492f9
KH
7987 ? coding_category_utf_16_be
7988 : coding_category_utf_16_le));
7989 }
7990 else if (EQ (coding_type, Qiso_2022))
7991 {
7992 Lisp_Object initial, reg_usage, request, flags;
0be8721c 7993 int i, id;
1397dc18 7994
df7492f9
KH
7995 if (nargs < coding_arg_iso2022_max)
7996 goto short_args;
7997
7998 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7999 CHECK_VECTOR (initial);
8000 for (i = 0; i < 4; i++)
8001 {
8002 val = Faref (initial, make_number (i));
8003 if (! NILP (val))
8004 {
584948ac
KH
8005 struct charset *charset;
8006
8007 CHECK_CHARSET_GET_CHARSET (val, charset);
8008 ASET (initial, i, make_number (CHARSET_ID (charset)));
8009 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8010 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8011 }
8012 else
8013 ASET (initial, i, make_number (-1));
8014 }
8015
8016 reg_usage = args[coding_arg_iso2022_reg_usage];
8017 CHECK_CONS (reg_usage);
8018 CHECK_NATNUM (XCAR (reg_usage));
8019 CHECK_NATNUM (XCDR (reg_usage));
8020
8021 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8022 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 8023 {
df7492f9
KH
8024 int id;
8025
8026 val = Fcar (tail);
8027 CHECK_CONS (val);
8028 CHECK_CHARSET_GET_ID (XCAR (val), id);
8029 CHECK_NATNUM (XCDR (val));
8030 if (XINT (XCDR (val)) >= 4)
8031 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8032 XCAR (val) = make_number (id);
1397dc18 8033 }
df7492f9
KH
8034
8035 flags = args[coding_arg_iso2022_flags];
8036 CHECK_NATNUM (flags);
8037 i = XINT (flags);
8038 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8039 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8040
8041 ASET (attrs, coding_attr_iso_initial, initial);
8042 ASET (attrs, coding_attr_iso_usage, reg_usage);
8043 ASET (attrs, coding_attr_iso_request, request);
8044 ASET (attrs, coding_attr_iso_flags, flags);
8045 setup_iso_safe_charsets (attrs);
8046
8047 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8048 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8049 | CODING_ISO_FLAG_SINGLE_SHIFT))
8050 ? coding_category_iso_7_else
8051 : EQ (args[coding_arg_charset_list], Qiso_2022)
8052 ? coding_category_iso_7
8053 : coding_category_iso_7_tight);
8054 else
8055 {
8056 int id = XINT (AREF (initial, 1));
8057
c6fb6e98 8058 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
8059 || EQ (args[coding_arg_charset_list], Qiso_2022)
8060 || id < 0)
8061 ? coding_category_iso_8_else
8062 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8063 ? coding_category_iso_8_1
8064 : coding_category_iso_8_2);
8065 }
0ce7886f
KH
8066 if (category != coding_category_iso_8_1
8067 && category != coding_category_iso_8_2)
8068 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
8069 }
8070 else if (EQ (coding_type, Qemacs_mule))
8071 {
8072 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8073 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 8074 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8075 category = coding_category_emacs_mule;
8076 }
8077 else if (EQ (coding_type, Qshift_jis))
8078 {
8079
8080 struct charset *charset;
8081
8082 if (XINT (Flength (charset_list)) != 3)
8083 error ("There should be just three charsets");
8084
8085 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8086 if (CHARSET_DIMENSION (charset) != 1)
8087 error ("Dimension of charset %s is not one",
8088 XSYMBOL (CHARSET_NAME (charset))->name->data);
584948ac
KH
8089 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8090 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8091
8092 charset_list = XCDR (charset_list);
8093 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8094 if (CHARSET_DIMENSION (charset) != 1)
8095 error ("Dimension of charset %s is not one",
8096 XSYMBOL (CHARSET_NAME (charset))->name->data);
8097
8098 charset_list = XCDR (charset_list);
8099 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8100 if (CHARSET_DIMENSION (charset) != 2)
8101 error ("Dimension of charset %s is not two",
8102 XSYMBOL (CHARSET_NAME (charset))->name->data);
8103
8104 category = coding_category_sjis;
8105 Vsjis_coding_system = name;
8106 }
8107 else if (EQ (coding_type, Qbig5))
8108 {
8109 struct charset *charset;
8110
8111 if (XINT (Flength (charset_list)) != 2)
8112 error ("There should be just two charsets");
8113
8114 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8115 if (CHARSET_DIMENSION (charset) != 1)
8116 error ("Dimension of charset %s is not one",
8117 XSYMBOL (CHARSET_NAME (charset))->name->data);
584948ac
KH
8118 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8119 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8120
8121 charset_list = XCDR (charset_list);
8122 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8123 if (CHARSET_DIMENSION (charset) != 2)
8124 error ("Dimension of charset %s is not two",
8125 XSYMBOL (CHARSET_NAME (charset))->name->data);
8126
8127 category = coding_category_big5;
8128 Vbig5_coding_system = name;
8129 }
8130 else if (EQ (coding_type, Qraw_text))
584948ac
KH
8131 {
8132 category = coding_category_raw_text;
8133 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8134 }
df7492f9 8135 else if (EQ (coding_type, Qutf_8))
584948ac
KH
8136 {
8137 category = coding_category_utf_8;
8138 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8139 }
df7492f9
KH
8140 else if (EQ (coding_type, Qundecided))
8141 category = coding_category_undecided;
8142 else
8143 error ("Invalid coding system type: %s",
8144 XSYMBOL (coding_type)->name->data);
8145
8146 CODING_ATTR_CATEGORY (attrs) = make_number (category);
8147
8148 eol_type = args[coding_arg_eol_type];
8149 if (! NILP (eol_type)
8150 && ! EQ (eol_type, Qunix)
8151 && ! EQ (eol_type, Qdos)
8152 && ! EQ (eol_type, Qmac))
8153 error ("Invalid eol-type");
8154
8155 aliases = Fcons (name, Qnil);
8156
8157 if (NILP (eol_type))
8158 {
8159 eol_type = make_subsidiaries (name);
8160 for (i = 0; i < 3; i++)
1397dc18 8161 {
df7492f9
KH
8162 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8163
8164 this_name = AREF (eol_type, i);
8165 this_aliases = Fcons (this_name, Qnil);
8166 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8167 this_spec = Fmake_vector (make_number (3), attrs);
8168 ASET (this_spec, 1, this_aliases);
8169 ASET (this_spec, 2, this_eol_type);
8170 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8171 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8172 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8173 Vcoding_system_alist);
1397dc18 8174 }
d46c5b12 8175 }
1397dc18 8176
df7492f9
KH
8177 spec_vec = Fmake_vector (make_number (3), attrs);
8178 ASET (spec_vec, 1, aliases);
8179 ASET (spec_vec, 2, eol_type);
8180
8181 Fputhash (name, spec_vec, Vcoding_system_hash_table);
8182 Vcoding_system_list = Fcons (name, Vcoding_system_list);
8183 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8184 Vcoding_system_alist);
8185
8186 {
8187 int id = coding_categories[category].id;
8188
8189 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8190 setup_coding_system (name, &coding_categories[category]);
8191 }
8192
d46c5b12 8193 return Qnil;
df7492f9
KH
8194
8195 short_args:
8196 return Fsignal (Qwrong_number_of_arguments,
8197 Fcons (intern ("define-coding-system-internal"),
8198 make_number (nargs)));
d46c5b12
KH
8199}
8200
da7db224 8201/* Fixme: should this record the alias relationships for
e1c23804 8202 diagnostics? Should it update coding-system-list? */
df7492f9
KH
8203DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8204 Sdefine_coding_system_alias, 2, 2, 0,
8205 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8206 (alias, coding_system)
8207 Lisp_Object alias, coding_system;
66cfb530 8208{
df7492f9 8209 Lisp_Object spec, aliases, eol_type;
84d60297 8210
df7492f9
KH
8211 CHECK_SYMBOL (alias);
8212 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8213 aliases = AREF (spec, 1);
8214 while (!NILP (XCDR (aliases)))
8215 aliases = XCDR (aliases);
8216 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 8217
df7492f9
KH
8218 eol_type = AREF (spec, 2);
8219 if (VECTORP (eol_type))
66cfb530 8220 {
df7492f9
KH
8221 Lisp_Object subsidiaries;
8222 int i;
8223
8224 subsidiaries = make_subsidiaries (alias);
8225 for (i = 0; i < 3; i++)
8226 Fdefine_coding_system_alias (AREF (subsidiaries, i),
8227 AREF (eol_type, i));
8228
8229 ASET (spec, 2, subsidiaries);
66cfb530 8230 }
df7492f9
KH
8231
8232 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
8233 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8234 Vcoding_system_alist);
66cfb530
KH
8235
8236 return Qnil;
8237}
8238
df7492f9
KH
8239DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8240 1, 1, 0,
8241 doc: /* Return the base of CODING-SYSTEM.
da7db224 8242Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
8243 (coding_system)
8244 Lisp_Object coding_system;
8245{
8246 Lisp_Object spec, attrs;
8247
8248 if (NILP (coding_system))
8249 return (Qno_conversion);
8250 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8251 attrs = AREF (spec, 0);
8252 return CODING_ATTR_BASE_NAME (attrs);
8253}
8254
8255DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8256 1, 1, 0,
8257 doc: "Return the property list of CODING-SYSTEM.")
8258 (coding_system)
8259 Lisp_Object coding_system;
8260{
8261 Lisp_Object spec, attrs;
8262
8263 if (NILP (coding_system))
8264 coding_system = Qno_conversion;
8265 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8266 attrs = AREF (spec, 0);
8267 return CODING_ATTR_PLIST (attrs);
8268}
8269
8270
8271DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8272 1, 1, 0,
da7db224 8273 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
8274 (coding_system)
8275 Lisp_Object coding_system;
8276{
8277 Lisp_Object spec;
8278
8279 if (NILP (coding_system))
8280 coding_system = Qno_conversion;
8281 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 8282 return AREF (spec, 1);
df7492f9
KH
8283}
8284
8285DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8286 Scoding_system_eol_type, 1, 1, 0,
8287 doc: /* Return eol-type of CODING-SYSTEM.
8288An eol-type is integer 0, 1, 2, or a vector of coding systems.
8289
8290Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8291and CR respectively.
8292
8293A vector value indicates that a format of end-of-line should be
8294detected automatically. Nth element of the vector is the subsidiary
8295coding system whose eol-type is N. */)
8296 (coding_system)
8297 Lisp_Object coding_system;
8298{
8299 Lisp_Object spec, eol_type;
8300 int n;
8301
8302 if (NILP (coding_system))
8303 coding_system = Qno_conversion;
8304 if (! CODING_SYSTEM_P (coding_system))
8305 return Qnil;
8306 spec = CODING_SYSTEM_SPEC (coding_system);
8307 eol_type = AREF (spec, 2);
8308 if (VECTORP (eol_type))
8309 return Fcopy_sequence (eol_type);
8310 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8311 return make_number (n);
8312}
8313
4ed46869
KH
8314#endif /* emacs */
8315
8316\f
1397dc18 8317/*** 9. Post-amble ***/
4ed46869 8318
dfcf069d 8319void
4ed46869
KH
8320init_coding_once ()
8321{
8322 int i;
8323
df7492f9
KH
8324 for (i = 0; i < coding_category_max; i++)
8325 {
8326 coding_categories[i].id = -1;
8327 coding_priorities[i] = i;
8328 }
4ed46869
KH
8329
8330 /* ISO2022 specific initialize routine. */
8331 for (i = 0; i < 0x20; i++)
b73bfc1c 8332 iso_code_class[i] = ISO_control_0;
4ed46869
KH
8333 for (i = 0x21; i < 0x7F; i++)
8334 iso_code_class[i] = ISO_graphic_plane_0;
8335 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 8336 iso_code_class[i] = ISO_control_1;
4ed46869
KH
8337 for (i = 0xA1; i < 0xFF; i++)
8338 iso_code_class[i] = ISO_graphic_plane_1;
8339 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8340 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8341 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8342 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8343 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8344 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8345 iso_code_class[ISO_CODE_ESC] = ISO_escape;
8346 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8347 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8348 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8349
b843d1ae 8350 inhibit_pre_post_conversion = 0;
df7492f9
KH
8351
8352 for (i = 0; i < 256; i++)
8353 {
8354 emacs_mule_bytes[i] = 1;
8355 }
7c78e542
KH
8356 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8357 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8358 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8359 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
8360}
8361
8362#ifdef emacs
8363
dfcf069d 8364void
e0e989f6
KH
8365syms_of_coding ()
8366{
df7492f9
KH
8367 staticpro (&Vcoding_system_hash_table);
8368 Vcoding_system_hash_table = Fmakehash (Qeq);
8369
8370 staticpro (&Vsjis_coding_system);
8371 Vsjis_coding_system = Qnil;
8372
8373 staticpro (&Vbig5_coding_system);
8374 Vbig5_coding_system = Qnil;
8375
8376 staticpro (&Vcode_conversion_work_buf_list);
8377 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 8378
df7492f9
KH
8379 staticpro (&Vcode_conversion_reused_work_buf);
8380 Vcode_conversion_reused_work_buf = Qnil;
8381
8382 DEFSYM (Qcharset, "charset");
8383 DEFSYM (Qtarget_idx, "target-idx");
8384 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
8385 Fset (Qcoding_system_history, Qnil);
8386
9ce27fde 8387 /* Target FILENAME is the first argument. */
e0e989f6 8388 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 8389 /* Target FILENAME is the third argument. */
e0e989f6
KH
8390 Fput (Qwrite_region, Qtarget_idx, make_number (2));
8391
df7492f9 8392 DEFSYM (Qcall_process, "call-process");
9ce27fde 8393 /* Target PROGRAM is the first argument. */
e0e989f6
KH
8394 Fput (Qcall_process, Qtarget_idx, make_number (0));
8395
df7492f9 8396 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 8397 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8398 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8399
df7492f9 8400 DEFSYM (Qstart_process, "start-process");
9ce27fde 8401 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8402 Fput (Qstart_process, Qtarget_idx, make_number (2));
8403
df7492f9 8404 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 8405 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
8406 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8407
df7492f9
KH
8408 DEFSYM (Qcoding_system, "coding-system");
8409 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 8410
df7492f9
KH
8411 DEFSYM (Qeol_type, "eol-type");
8412 DEFSYM (Qunix, "unix");
8413 DEFSYM (Qdos, "dos");
4ed46869 8414
df7492f9
KH
8415 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8416 DEFSYM (Qpost_read_conversion, "post-read-conversion");
8417 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8418 DEFSYM (Qdefault_char, "default-char");
8419 DEFSYM (Qundecided, "undecided");
8420 DEFSYM (Qno_conversion, "no-conversion");
8421 DEFSYM (Qraw_text, "raw-text");
4ed46869 8422
df7492f9 8423 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 8424
df7492f9 8425 DEFSYM (Qutf_8, "utf-8");
27901516 8426
df7492f9 8427 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
8428 DEFSYM (Qbig, "big");
8429 DEFSYM (Qlittle, "little");
27901516 8430
df7492f9
KH
8431 DEFSYM (Qshift_jis, "shift-jis");
8432 DEFSYM (Qbig5, "big5");
4ed46869 8433
df7492f9 8434 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 8435
df7492f9 8436 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
8437 Fput (Qcoding_system_error, Qerror_conditions,
8438 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8439 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 8440 build_string ("Invalid coding system"));
4ed46869 8441
df7492f9
KH
8442 /* Intern this now in case it isn't already done.
8443 Setting this variable twice is harmless.
8444 But don't staticpro it here--that is done in alloc.c. */
8445 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 8446
df7492f9 8447 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 8448 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
8449 DEFSYM (Qtranslation_table_id, "translation-table-id");
8450 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8451 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 8452
df7492f9 8453 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 8454
df7492f9 8455 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 8456
df7492f9
KH
8457 Vcoding_category_table
8458 = Fmake_vector (make_number (coding_category_max), Qnil);
8459 staticpro (&Vcoding_category_table);
8460 /* Followings are target of code detection. */
8461 ASET (Vcoding_category_table, coding_category_iso_7,
8462 intern ("coding-category-iso-7"));
8463 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8464 intern ("coding-category-iso-7-tight"));
8465 ASET (Vcoding_category_table, coding_category_iso_8_1,
8466 intern ("coding-category-iso-8-1"));
8467 ASET (Vcoding_category_table, coding_category_iso_8_2,
8468 intern ("coding-category-iso-8-2"));
8469 ASET (Vcoding_category_table, coding_category_iso_7_else,
8470 intern ("coding-category-iso-7-else"));
8471 ASET (Vcoding_category_table, coding_category_iso_8_else,
8472 intern ("coding-category-iso-8-else"));
8473 ASET (Vcoding_category_table, coding_category_utf_8,
8474 intern ("coding-category-utf-8"));
8475 ASET (Vcoding_category_table, coding_category_utf_16_be,
8476 intern ("coding-category-utf-16-be"));
8477 ASET (Vcoding_category_table, coding_category_utf_16_le,
8478 intern ("coding-category-utf-16-le"));
8479 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8480 intern ("coding-category-utf-16-be-nosig"));
8481 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8482 intern ("coding-category-utf-16-le-nosig"));
8483 ASET (Vcoding_category_table, coding_category_charset,
8484 intern ("coding-category-charset"));
8485 ASET (Vcoding_category_table, coding_category_sjis,
8486 intern ("coding-category-sjis"));
8487 ASET (Vcoding_category_table, coding_category_big5,
8488 intern ("coding-category-big5"));
8489 ASET (Vcoding_category_table, coding_category_ccl,
8490 intern ("coding-category-ccl"));
8491 ASET (Vcoding_category_table, coding_category_emacs_mule,
8492 intern ("coding-category-emacs-mule"));
8493 /* Followings are NOT target of code detection. */
8494 ASET (Vcoding_category_table, coding_category_raw_text,
8495 intern ("coding-category-raw-text"));
8496 ASET (Vcoding_category_table, coding_category_undecided,
8497 intern ("coding-category-undecided"));
70c22245 8498
4ed46869
KH
8499 defsubr (&Scoding_system_p);
8500 defsubr (&Sread_coding_system);
8501 defsubr (&Sread_non_nil_coding_system);
8502 defsubr (&Scheck_coding_system);
8503 defsubr (&Sdetect_coding_region);
d46c5b12 8504 defsubr (&Sdetect_coding_string);
05e6f5dc 8505 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 8506 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8507 defsubr (&Sdecode_coding_region);
8508 defsubr (&Sencode_coding_region);
8509 defsubr (&Sdecode_coding_string);
8510 defsubr (&Sencode_coding_string);
8511 defsubr (&Sdecode_sjis_char);
8512 defsubr (&Sencode_sjis_char);
8513 defsubr (&Sdecode_big5_char);
8514 defsubr (&Sencode_big5_char);
1ba9e4ab 8515 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8516 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8517 defsubr (&Sterminal_coding_system);
1ba9e4ab 8518 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8519 defsubr (&Skeyboard_coding_system);
a5d301df 8520 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
8521 defsubr (&Sset_coding_system_priority);
8522 defsubr (&Sdefine_coding_system_internal);
8523 defsubr (&Sdefine_coding_system_alias);
8524 defsubr (&Scoding_system_base);
8525 defsubr (&Scoding_system_plist);
8526 defsubr (&Scoding_system_aliases);
8527 defsubr (&Scoding_system_eol_type);
8528 defsubr (&Scoding_system_priority_list);
4ed46869 8529
4608c386 8530 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8531 doc: /* List of coding systems.
8532
8533Do not alter the value of this variable manually. This variable should be
df7492f9 8534updated by the functions `define-coding-system' and
48b0f3ae 8535`define-coding-system-alias'. */);
4608c386
KH
8536 Vcoding_system_list = Qnil;
8537
8538 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8539 doc: /* Alist of coding system names.
8540Each element is one element list of coding system name.
8541This variable is given to `completing-read' as TABLE argument.
8542
8543Do not alter the value of this variable manually. This variable should be
8544updated by the functions `make-coding-system' and
8545`define-coding-system-alias'. */);
4608c386
KH
8546 Vcoding_system_alist = Qnil;
8547
4ed46869 8548 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8549 doc: /* List of coding-categories (symbols) ordered by priority.
8550
8551On detecting a coding system, Emacs tries code detection algorithms
8552associated with each coding-category one by one in this order. When
8553one algorithm agrees with a byte sequence of source text, the coding
8554system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8555 {
8556 int i;
8557
8558 Vcoding_category_list = Qnil;
df7492f9 8559 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8560 Vcoding_category_list
d46c5b12
KH
8561 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8562 Vcoding_category_list);
4ed46869
KH
8563 }
8564
8565 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8566 doc: /* Specify the coding system for read operations.
8567It is useful to bind this variable with `let', but do not set it globally.
8568If the value is a coding system, it is used for decoding on read operation.
8569If not, an appropriate element is used from one of the coding system alists:
8570There are three such tables, `file-coding-system-alist',
8571`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8572 Vcoding_system_for_read = Qnil;
8573
8574 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8575 doc: /* Specify the coding system for write operations.
8576Programs bind this variable with `let', but you should not set it globally.
8577If the value is a coding system, it is used for encoding of output,
8578when writing it to a file and when sending it to a file or subprocess.
8579
8580If this does not specify a coding system, an appropriate element
8581is used from one of the coding system alists:
8582There are three such tables, `file-coding-system-alist',
8583`process-coding-system-alist', and `network-coding-system-alist'.
8584For output to files, if the above procedure does not specify a coding system,
8585the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8586 Vcoding_system_for_write = Qnil;
8587
8588 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8589 doc: /*
8590Coding system used in the latest file or process I/O. */);
4ed46869
KH
8591 Vlast_coding_system_used = Qnil;
8592
9ce27fde 8593 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8594 doc: /*
8595*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8596See info node `Coding Systems' and info node `Text and Binary' concerning
8597such conversion. */);
9ce27fde
KH
8598 inhibit_eol_conversion = 0;
8599
ed29121d 8600 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8601 doc: /*
8602Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8603Bind it to t if the process output is to be treated as if it were a file
8604read from some filesystem. */);
ed29121d
EZ
8605 inherit_process_coding_system = 0;
8606
02ba4723 8607 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8608 doc: /*
8609Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8610The format is ((PATTERN . VAL) ...),
8611where PATTERN is a regular expression matching a file name,
8612VAL is a coding system, a cons of coding systems, or a function symbol.
8613If VAL is a coding system, it is used for both decoding and encoding
8614the file contents.
8615If VAL is a cons of coding systems, the car part is used for decoding,
8616and the cdr part is used for encoding.
8617If VAL is a function symbol, the function must return a coding system
0192762c
DL
8618or a cons of coding systems which are used as above. The function gets
8619the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8620
8621See also the function `find-operation-coding-system'
8622and the variable `auto-coding-alist'. */);
02ba4723
KH
8623 Vfile_coding_system_alist = Qnil;
8624
8625 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8626 doc: /*
8627Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8628The format is ((PATTERN . VAL) ...),
8629where PATTERN is a regular expression matching a program name,
8630VAL is a coding system, a cons of coding systems, or a function symbol.
8631If VAL is a coding system, it is used for both decoding what received
8632from the program and encoding what sent to the program.
8633If VAL is a cons of coding systems, the car part is used for decoding,
8634and the cdr part is used for encoding.
8635If VAL is a function symbol, the function must return a coding system
8636or a cons of coding systems which are used as above.
8637
8638See also the function `find-operation-coding-system'. */);
02ba4723
KH
8639 Vprocess_coding_system_alist = Qnil;
8640
8641 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8642 doc: /*
8643Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8644The format is ((PATTERN . VAL) ...),
8645where PATTERN is a regular expression matching a network service name
8646or is a port number to connect to,
8647VAL is a coding system, a cons of coding systems, or a function symbol.
8648If VAL is a coding system, it is used for both decoding what received
8649from the network stream and encoding what sent to the network stream.
8650If VAL is a cons of coding systems, the car part is used for decoding,
8651and the cdr part is used for encoding.
8652If VAL is a function symbol, the function must return a coding system
8653or a cons of coding systems which are used as above.
8654
8655See also the function `find-operation-coding-system'. */);
02ba4723 8656 Vnetwork_coding_system_alist = Qnil;
4ed46869 8657
68c45bf0 8658 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8659 doc: /* Coding system to use with system messages.
8660Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8661 Vlocale_coding_system = Qnil;
8662
005f0d35 8663 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8664 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8665 doc: /*
8666*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8667 eol_mnemonic_unix = build_string (":");
4ed46869 8668
7722baf9 8669 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8670 doc: /*
8671*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8672 eol_mnemonic_dos = build_string ("\\");
4ed46869 8673
7722baf9 8674 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8675 doc: /*
8676*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8677 eol_mnemonic_mac = build_string ("/");
4ed46869 8678
7722baf9 8679 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8680 doc: /*
8681*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8682 eol_mnemonic_undecided = build_string (":");
4ed46869 8683
84fbb8a0 8684 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8685 doc: /*
8686*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8687 Venable_character_translation = Qt;
bdd9fb48 8688
f967223b 8689 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8690 &Vstandard_translation_table_for_decode,
8691 doc: /* Table for translating characters while decoding. */);
f967223b 8692 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8693
f967223b 8694 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8695 &Vstandard_translation_table_for_encode,
8696 doc: /* Table for translating characters while encoding. */);
f967223b 8697 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8698
df7492f9 8699 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8700 doc: /* Alist of charsets vs revision numbers.
8701While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8702designate it with the escape sequence identifying revision (cdr part
8703of the element). */);
8704 Vcharset_revision_table = Qnil;
02ba4723
KH
8705
8706 DEFVAR_LISP ("default-process-coding-system",
8707 &Vdefault_process_coding_system,
48b0f3ae
PJ
8708 doc: /* Cons of coding systems used for process I/O by default.
8709The car part is used for decoding a process output,
8710the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8711 Vdefault_process_coding_system = Qnil;
c4825358 8712
3f003981 8713 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8714 doc: /*
8715Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8716This is a vector of length 256.
8717If Nth element is non-nil, the existence of code N in a file
8718\(or output of subprocess) doesn't prevent it to be detected as
8719a coding system of ISO 2022 variant which has a flag
8720`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8721or reading output of a subprocess.
8722Only 128th through 159th elements has a meaning. */);
3f003981 8723 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8724
8725 DEFVAR_LISP ("select-safe-coding-system-function",
8726 &Vselect_safe_coding_system_function,
df7492f9
KH
8727 doc: /*
8728Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8729
8730If set, this function is called to force a user to select a proper
8731coding system which can encode the text in the case that a default
8732coding system used in each operation can't encode the text.
8733
8734The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8735 Vselect_safe_coding_system_function = Qnil;
8736
22ab2303 8737 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8738 &inhibit_iso_escape_detection,
df7492f9
KH
8739 doc: /*
8740If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8741
8742By default, on reading a file, Emacs tries to detect how the text is
8743encoded. This code detection is sensitive to escape sequences. If
8744the sequence is valid as ISO2022, the code is determined as one of
8745the ISO2022 encodings, and the file is decoded by the corresponding
8746coding system (e.g. `iso-2022-7bit').
8747
8748However, there may be a case that you want to read escape sequences in
8749a file as is. In such a case, you can set this variable to non-nil.
8750Then, as the code detection ignores any escape sequences, no file is
8751detected as encoded in some ISO2022 encoding. The result is that all
8752escape sequences become visible in a buffer.
8753
8754The default value is nil, and it is strongly recommended not to change
8755it. That is because many Emacs Lisp source files that contain
8756non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8757in Emacs's distribution, and they won't be decoded correctly on
8758reading if you suppress escape sequence detection.
8759
8760The other way to read escape sequences in a file without decoding is
8761to explicitly specify some coding system that doesn't use ISO2022's
8762escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8763 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8764
8765 {
8766 Lisp_Object args[coding_arg_max];
8767 Lisp_Object plist[14];
8768 int i;
8769
8770 for (i = 0; i < coding_arg_max; i++)
8771 args[i] = Qnil;
8772
8773 plist[0] = intern (":name");
8774 plist[1] = args[coding_arg_name] = Qno_conversion;
8775 plist[2] = intern (":mnemonic");
8776 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8777 plist[4] = intern (":coding-type");
8778 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8779 plist[6] = intern (":ascii-compatible-p");
8780 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8781 plist[8] = intern (":default-char");
8782 plist[9] = args[coding_arg_default_char] = make_number (0);
8783 plist[10] = intern (":docstring");
8784 plist[11] = build_string ("Do no conversion.\n\
8785\n\
8786When you visit a file with this coding, the file is read into a\n\
8787unibyte buffer as is, thus each byte of a file is treated as a\n\
8788character.");
8789 plist[12] = intern (":eol-type");
8790 plist[13] = args[coding_arg_eol_type] = Qunix;
8791 args[coding_arg_plist] = Flist (14, plist);
8792 Fdefine_coding_system_internal (coding_arg_max, args);
8793 }
8794
8795 setup_coding_system (Qno_conversion, &keyboard_coding);
8796 setup_coding_system (Qno_conversion, &terminal_coding);
8797 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8798}
8799
68c45bf0
PE
8800char *
8801emacs_strerror (error_number)
8802 int error_number;
8803{
8804 char *str;
8805
ca9c0567 8806 synchronize_system_messages_locale ();
68c45bf0
PE
8807 str = strerror (error_number);
8808
8809 if (! NILP (Vlocale_coding_system))
8810 {
8811 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8812 Vlocale_coding_system,
8813 0);
8814 str = (char *) XSTRING (dec)->data;
8815 }
8816
8817 return str;
8818}
8819
4ed46869 8820#endif /* emacs */