*** empty log message ***
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
6f197c07 4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
5a936b46 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
5bad0796 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
e19c3639 138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9 146 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
df7492f9 149
ff0dacd7 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
151
152 Below is the template of these functions. */
153
4ed46869 154#if 0
df7492f9 155static int
ff0dacd7 156detect_coding_XXX (coding, detect_info)
df7492f9 157 struct coding_system *coding;
ff0dacd7 158 struct coding_detection_info *detect_info;
4ed46869 159{
df7492f9
KH
160 unsigned char *src = coding->source;
161 unsigned char *src_end = coding->source + coding->src_bytes;
162 int multibytep = coding->src_multibyte;
ff0dacd7 163 int consumed_chars = 0;
df7492f9
KH
164 int found = 0;
165 ...;
166
167 while (1)
168 {
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
171 ONE_MORE_BYTE (c);
ff0dacd7
KH
172
173 if (! __C_conforms_to_XXX___ (c))
174 break;
175 if (! __C_strongly_suggests_XXX__ (c))
176 found = CATEGORY_MASK_XXX;
df7492f9 177 }
ff0dacd7
KH
178 /* The byte sequence is invalid for XXX. */
179 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 180 return 0;
ff0dacd7 181
df7492f9 182 no_more_source:
ff0dacd7
KH
183 /* The source exausted successfully. */
184 detect_info->found |= found;
df7492f9 185 return 1;
4ed46869
KH
186}
187#endif
188
189/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
190
df7492f9
KH
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
d46c5b12 195
df7492f9
KH
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
d46c5b12 200
df7492f9 201 Below is the template of these functions. */
d46c5b12 202
4ed46869 203#if 0
b73bfc1c 204static void
df7492f9 205decode_coding_XXXX (coding)
4ed46869 206 struct coding_system *coding;
4ed46869 207{
df7492f9
KH
208 unsigned char *src = coding->source + coding->consumed;
209 unsigned char *src_end = coding->source + coding->src_bytes;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base;
214 /* A buffer to produce decoded characters. */
215 int *charbuf = coding->charbuf;
216 int *charbuf_end = charbuf + coding->charbuf_size;
217 int multibytep = coding->src_multibyte;
218
219 while (1)
220 {
221 src_base = src;
222 if (charbuf < charbuf_end)
223 /* No more room to produce a decoded character. */
224 break;
225 ONE_MORE_BYTE (c);
226 /* Decode it. */
227 }
228
229 no_more_source:
230 if (src_base < src_end
231 && coding->mode & CODING_MODE_LAST_BLOCK)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base < src_end && charbuf < charbuf_end)
235 *charbuf++ = *src_base++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding->consumed = coding->consumed_char = src_base - coding->source;
239 /* Remember how many characters we produced. */
240 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
241}
242#endif
243
244/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
245
df7492f9
KH
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
d46c5b12 250
df7492f9
KH
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 255
df7492f9
KH
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
d46c5b12 259
df7492f9 260 Below is a template of these functions. */
4ed46869 261#if 0
b73bfc1c 262static void
df7492f9 263encode_coding_XXX (coding)
4ed46869 264 struct coding_system *coding;
4ed46869 265{
df7492f9
KH
266 int multibytep = coding->dst_multibyte;
267 int *charbuf = coding->charbuf;
268 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
269 unsigned char *dst = coding->destination + coding->produced;
270 unsigned char *dst_end = coding->destination + coding->dst_bytes;
271 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
272 int produced_chars = 0;
273
274 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
275 {
276 int c = *charbuf;
277 /* Encode C into DST, and increment DST. */
278 }
279 label_no_more_destination:
280 /* How many chars and bytes we produced. */
281 coding->produced_char += produced_chars;
282 coding->produced = dst - coding->destination;
4ed46869
KH
283}
284#endif
285
4ed46869
KH
286\f
287/*** 1. Preamble ***/
288
68c45bf0 289#include <config.h>
4ed46869
KH
290#include <stdio.h>
291
4ed46869
KH
292#include "lisp.h"
293#include "buffer.h"
df7492f9 294#include "character.h"
4ed46869
KH
295#include "charset.h"
296#include "ccl.h"
df7492f9 297#include "composite.h"
4ed46869
KH
298#include "coding.h"
299#include "window.h"
300
df7492f9 301Lisp_Object Vcoding_system_hash_table;
4ed46869 302
df7492f9 303Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
304Lisp_Object Qunix, Qdos;
305extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
306Lisp_Object Qbuffer_file_coding_system;
307Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 308Lisp_Object Qdefault_char;
27901516 309Lisp_Object Qno_conversion, Qundecided;
df7492f9 310Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 311Lisp_Object Qbig, Qlittle;
bb0115a2 312Lisp_Object Qcoding_system_history;
1397dc18 313Lisp_Object Qvalid_codes;
4ed46869
KH
314
315extern Lisp_Object Qinsert_file_contents, Qwrite_region;
316Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
317Lisp_Object Qstart_process, Qopen_network_stream;
318Lisp_Object Qtarget_idx;
319
d46c5b12
KH
320Lisp_Object Vselect_safe_coding_system_function;
321
7722baf9
EZ
322/* Mnemonic string for each format of end-of-line. */
323Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
324/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 325 decided. */
7722baf9 326Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
327
328#ifdef emacs
329
4608c386
KH
330Lisp_Object Vcoding_system_list, Vcoding_system_alist;
331
332Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 333
d46c5b12
KH
334/* Coding system emacs-mule and raw-text are for converting only
335 end-of-line format. */
336Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 337
4ed46869
KH
338/* Coding-systems are handed between Emacs Lisp programs and C internal
339 routines by the following three variables. */
340/* Coding-system for reading files and receiving data from process. */
341Lisp_Object Vcoding_system_for_read;
342/* Coding-system for writing files and sending data to process. */
343Lisp_Object Vcoding_system_for_write;
344/* Coding-system actually used in the latest I/O. */
345Lisp_Object Vlast_coding_system_used;
346
c4825358 347/* A vector of length 256 which contains information about special
94487c4e 348 Latin codes (especially for dealing with Microsoft codes). */
3f003981 349Lisp_Object Vlatin_extra_code_table;
c4825358 350
9ce27fde
KH
351/* Flag to inhibit code conversion of end-of-line format. */
352int inhibit_eol_conversion;
353
74383408
KH
354/* Flag to inhibit ISO2022 escape sequence detection. */
355int inhibit_iso_escape_detection;
356
ed29121d
EZ
357/* Flag to make buffer-file-coding-system inherit from process-coding. */
358int inherit_process_coding_system;
359
c4825358 360/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
361struct coding_system terminal_coding;
362
c4825358
KH
363/* Coding system to be used to encode text for terminal display when
364 terminal coding system is nil. */
365struct coding_system safe_terminal_coding;
366
367/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
368struct coding_system keyboard_coding;
369
02ba4723
KH
370Lisp_Object Vfile_coding_system_alist;
371Lisp_Object Vprocess_coding_system_alist;
372Lisp_Object Vnetwork_coding_system_alist;
4ed46869 373
68c45bf0
PE
374Lisp_Object Vlocale_coding_system;
375
4ed46869
KH
376#endif /* emacs */
377
f967223b
KH
378/* Flag to tell if we look up translation table on character code
379 conversion. */
84fbb8a0 380Lisp_Object Venable_character_translation;
f967223b
KH
381/* Standard translation table to look up on decoding (reading). */
382Lisp_Object Vstandard_translation_table_for_decode;
383/* Standard translation table to look up on encoding (writing). */
384Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 385
f967223b
KH
386Lisp_Object Qtranslation_table;
387Lisp_Object Qtranslation_table_id;
388Lisp_Object Qtranslation_table_for_decode;
389Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
390
391/* Alist of charsets vs revision number. */
df7492f9 392static Lisp_Object Vcharset_revision_table;
4ed46869 393
02ba4723
KH
394/* Default coding systems used for process I/O. */
395Lisp_Object Vdefault_process_coding_system;
396
b843d1ae
KH
397/* Global flag to tell that we can't call post-read-conversion and
398 pre-write-conversion functions. Usually the value is zero, but it
399 is set to 1 temporarily while such functions are running. This is
400 to avoid infinite recursive call. */
401static int inhibit_pre_post_conversion;
402
df7492f9
KH
403/* Two special coding systems. */
404Lisp_Object Vsjis_coding_system;
405Lisp_Object Vbig5_coding_system;
406
407
ff0dacd7
KH
408static int detect_coding_utf_8 P_ ((struct coding_system *,
409 struct coding_detection_info *info));
df7492f9
KH
410static void decode_coding_utf_8 P_ ((struct coding_system *));
411static int encode_coding_utf_8 P_ ((struct coding_system *));
412
ff0dacd7
KH
413static int detect_coding_utf_16 P_ ((struct coding_system *,
414 struct coding_detection_info *info));
df7492f9
KH
415static void decode_coding_utf_16 P_ ((struct coding_system *));
416static int encode_coding_utf_16 P_ ((struct coding_system *));
417
ff0dacd7
KH
418static int detect_coding_iso_2022 P_ ((struct coding_system *,
419 struct coding_detection_info *info));
df7492f9
KH
420static void decode_coding_iso_2022 P_ ((struct coding_system *));
421static int encode_coding_iso_2022 P_ ((struct coding_system *));
422
ff0dacd7
KH
423static int detect_coding_emacs_mule P_ ((struct coding_system *,
424 struct coding_detection_info *info));
df7492f9
KH
425static void decode_coding_emacs_mule P_ ((struct coding_system *));
426static int encode_coding_emacs_mule P_ ((struct coding_system *));
427
ff0dacd7
KH
428static int detect_coding_sjis P_ ((struct coding_system *,
429 struct coding_detection_info *info));
df7492f9
KH
430static void decode_coding_sjis P_ ((struct coding_system *));
431static int encode_coding_sjis P_ ((struct coding_system *));
432
ff0dacd7
KH
433static int detect_coding_big5 P_ ((struct coding_system *,
434 struct coding_detection_info *info));
df7492f9
KH
435static void decode_coding_big5 P_ ((struct coding_system *));
436static int encode_coding_big5 P_ ((struct coding_system *));
437
ff0dacd7
KH
438static int detect_coding_ccl P_ ((struct coding_system *,
439 struct coding_detection_info *info));
df7492f9
KH
440static void decode_coding_ccl P_ ((struct coding_system *));
441static int encode_coding_ccl P_ ((struct coding_system *));
442
443static void decode_coding_raw_text P_ ((struct coding_system *));
444static int encode_coding_raw_text P_ ((struct coding_system *));
445
446
447/* ISO2022 section */
448
449#define CODING_ISO_INITIAL(coding, reg) \
450 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
451 coding_attr_iso_initial), \
452 reg)))
453
454
455#define CODING_ISO_REQUEST(coding, charset_id) \
456 ((charset_id <= (coding)->max_charset_id \
457 ? (coding)->safe_charsets[charset_id] \
458 : -1))
459
460
461#define CODING_ISO_FLAGS(coding) \
462 ((coding)->spec.iso_2022.flags)
463#define CODING_ISO_DESIGNATION(coding, reg) \
464 ((coding)->spec.iso_2022.current_designation[reg])
465#define CODING_ISO_INVOCATION(coding, plane) \
466 ((coding)->spec.iso_2022.current_invocation[plane])
467#define CODING_ISO_SINGLE_SHIFTING(coding) \
468 ((coding)->spec.iso_2022.single_shifting)
469#define CODING_ISO_BOL(coding) \
470 ((coding)->spec.iso_2022.bol)
471#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
472 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
473
474/* Control characters of ISO2022. */
475 /* code */ /* function */
476#define ISO_CODE_LF 0x0A /* line-feed */
477#define ISO_CODE_CR 0x0D /* carriage-return */
478#define ISO_CODE_SO 0x0E /* shift-out */
479#define ISO_CODE_SI 0x0F /* shift-in */
480#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
481#define ISO_CODE_ESC 0x1B /* escape */
482#define ISO_CODE_SS2 0x8E /* single-shift-2 */
483#define ISO_CODE_SS3 0x8F /* single-shift-3 */
484#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
485
486/* All code (1-byte) of ISO2022 is classified into one of the
487 followings. */
488enum iso_code_class_type
489 {
490 ISO_control_0, /* Control codes in the range
491 0x00..0x1F and 0x7F, except for the
492 following 5 codes. */
493 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
494 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
495 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
496 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
497 ISO_escape, /* ISO_CODE_SO (0x1B) */
498 ISO_control_1, /* Control codes in the range
499 0x80..0x9F, except for the
500 following 3 codes. */
501 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
502 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
503 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
504 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
505 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
506 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
507 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
508 };
05e6f5dc 509
df7492f9
KH
510/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
511 `iso-flags' attribute of an iso2022 coding system. */
93dec019 512
df7492f9
KH
513/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
514 instead of the correct short-form sequence (e.g. ESC $ A). */
515#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 516
df7492f9
KH
517/* If set, reset graphic planes and registers at end-of-line to the
518 initial state. */
519#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 520
df7492f9
KH
521/* If set, reset graphic planes and registers before any control
522 characters to the initial state. */
523#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 524
df7492f9
KH
525/* If set, encode by 7-bit environment. */
526#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 527
df7492f9
KH
528/* If set, use locking-shift function. */
529#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 530
df7492f9
KH
531/* If set, use single-shift function. Overwrite
532 CODING_ISO_FLAG_LOCKING_SHIFT. */
533#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 534
df7492f9
KH
535/* If set, use designation escape sequence. */
536#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 537
df7492f9
KH
538/* If set, produce revision number sequence. */
539#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 540
df7492f9
KH
541/* If set, produce ISO6429's direction specifying sequence. */
542#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 543
df7492f9
KH
544/* If set, assume designation states are reset at beginning of line on
545 output. */
546#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 547
df7492f9
KH
548/* If set, designation sequence should be placed at beginning of line
549 on output. */
550#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 551
df7492f9
KH
552/* If set, do not encode unsafe charactes on output. */
553#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 554
df7492f9
KH
555/* If set, extra latin codes (128..159) are accepted as a valid code
556 on input. */
557#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 558
df7492f9 559#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 560
df7492f9 561#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 562
bf16eb23
KH
563#define CODING_ISO_FLAG_USE_ROMAN 0x8000
564
565#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
566
567#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 568
df7492f9
KH
569/* A character to be produced on output if encoding of the original
570 character is prohibited by CODING_ISO_FLAG_SAFE. */
571#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 572
aa72b389 573
df7492f9
KH
574/* UTF-16 section */
575#define CODING_UTF_16_BOM(coding) \
576 ((coding)->spec.utf_16.bom)
4ed46869 577
df7492f9
KH
578#define CODING_UTF_16_ENDIAN(coding) \
579 ((coding)->spec.utf_16.endian)
4ed46869 580
df7492f9
KH
581#define CODING_UTF_16_SURROGATE(coding) \
582 ((coding)->spec.utf_16.surrogate)
4ed46869 583
4ed46869 584
df7492f9
KH
585/* CCL section */
586#define CODING_CCL_DECODER(coding) \
587 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
588#define CODING_CCL_ENCODER(coding) \
589 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
590#define CODING_CCL_VALIDS(coding) \
591 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
592 ->data)
4ed46869 593
5a936b46 594/* Index for each coding category in `coding_categories' */
4ed46869 595
df7492f9
KH
596enum coding_category
597 {
598 coding_category_iso_7,
599 coding_category_iso_7_tight,
600 coding_category_iso_8_1,
601 coding_category_iso_8_2,
602 coding_category_iso_7_else,
603 coding_category_iso_8_else,
604 coding_category_utf_8,
605 coding_category_utf_16_auto,
606 coding_category_utf_16_be,
607 coding_category_utf_16_le,
608 coding_category_utf_16_be_nosig,
609 coding_category_utf_16_le_nosig,
610 coding_category_charset,
611 coding_category_sjis,
612 coding_category_big5,
613 coding_category_ccl,
614 coding_category_emacs_mule,
615 /* All above are targets of code detection. */
616 coding_category_raw_text,
617 coding_category_undecided,
618 coding_category_max
619 };
620
621/* Definitions of flag bits used in detect_coding_XXXX. */
622#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
623#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
624#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
625#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
626#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
627#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
628#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 629#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
630#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
631#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
632#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
633#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
634#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
635#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
636#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
637#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
638#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 639#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
640
641/* This value is returned if detect_coding_mask () find nothing other
642 than ASCII characters. */
643#define CATEGORY_MASK_ANY \
644 (CATEGORY_MASK_ISO_7 \
645 | CATEGORY_MASK_ISO_7_TIGHT \
646 | CATEGORY_MASK_ISO_8_1 \
647 | CATEGORY_MASK_ISO_8_2 \
648 | CATEGORY_MASK_ISO_7_ELSE \
649 | CATEGORY_MASK_ISO_8_ELSE \
650 | CATEGORY_MASK_UTF_8 \
651 | CATEGORY_MASK_UTF_16_BE \
652 | CATEGORY_MASK_UTF_16_LE \
653 | CATEGORY_MASK_UTF_16_BE_NOSIG \
654 | CATEGORY_MASK_UTF_16_LE_NOSIG \
655 | CATEGORY_MASK_CHARSET \
656 | CATEGORY_MASK_SJIS \
657 | CATEGORY_MASK_BIG5 \
658 | CATEGORY_MASK_CCL \
659 | CATEGORY_MASK_EMACS_MULE)
660
661
662#define CATEGORY_MASK_ISO_7BIT \
663 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
664
665#define CATEGORY_MASK_ISO_8BIT \
666 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
667
668#define CATEGORY_MASK_ISO_ELSE \
669 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
670
671#define CATEGORY_MASK_ISO_ESCAPE \
672 (CATEGORY_MASK_ISO_7 \
673 | CATEGORY_MASK_ISO_7_TIGHT \
674 | CATEGORY_MASK_ISO_7_ELSE \
675 | CATEGORY_MASK_ISO_8_ELSE)
676
677#define CATEGORY_MASK_ISO \
678 ( CATEGORY_MASK_ISO_7BIT \
679 | CATEGORY_MASK_ISO_8BIT \
680 | CATEGORY_MASK_ISO_ELSE)
681
682#define CATEGORY_MASK_UTF_16 \
683 (CATEGORY_MASK_UTF_16_BE \
684 | CATEGORY_MASK_UTF_16_LE \
685 | CATEGORY_MASK_UTF_16_BE_NOSIG \
686 | CATEGORY_MASK_UTF_16_LE_NOSIG)
687
688
689/* List of symbols `coding-category-xxx' ordered by priority. This
690 variable is exposed to Emacs Lisp. */
691static Lisp_Object Vcoding_category_list;
692
693/* Table of coding categories (Lisp symbols). This variable is for
694 internal use oly. */
695static Lisp_Object Vcoding_category_table;
696
697/* Table of coding-categories ordered by priority. */
698static enum coding_category coding_priorities[coding_category_max];
699
700/* Nth element is a coding context for the coding system bound to the
701 Nth coding category. */
702static struct coding_system coding_categories[coding_category_max];
703
df7492f9
KH
704/*** Commonly used macros and functions ***/
705
706#ifndef min
707#define min(a, b) ((a) < (b) ? (a) : (b))
708#endif
709#ifndef max
710#define max(a, b) ((a) > (b) ? (a) : (b))
711#endif
4ed46869 712
df7492f9
KH
713#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
714 do { \
715 attrs = CODING_ID_ATTRS (coding->id); \
716 eol_type = CODING_ID_EOL_TYPE (coding->id); \
717 if (VECTORP (eol_type)) \
718 eol_type = Qunix; \
719 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
720 } while (0)
4ed46869 721
4ed46869 722
df7492f9
KH
723/* Safely get one byte from the source text pointed by SRC which ends
724 at SRC_END, and set C to that byte. If there are not enough bytes
725 in the source, it jumps to `no_more_source'. The caller
726 should declare and set these variables appropriately in advance:
727 src, src_end, multibytep
728*/
aa72b389 729
df7492f9 730#define ONE_MORE_BYTE(c) \
aa72b389 731 do { \
df7492f9
KH
732 if (src == src_end) \
733 { \
734 if (src_base < src) \
735 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
736 goto no_more_source; \
737 } \
738 c = *src++; \
739 if (multibytep && (c & 0x80)) \
740 { \
741 if ((c & 0xFE) != 0xC0) \
742 error ("Undecodable char found"); \
743 c = ((c & 1) << 6) | *src++; \
744 } \
745 consumed_chars++; \
aa72b389
KH
746 } while (0)
747
aa72b389 748
df7492f9
KH
749#define ONE_MORE_BYTE_NO_CHECK(c) \
750 do { \
751 c = *src++; \
752 if (multibytep && (c & 0x80)) \
753 { \
754 if ((c & 0xFE) != 0xC0) \
755 error ("Undecodable char found"); \
756 c = ((c & 1) << 6) | *src++; \
757 } \
781d7a48 758 consumed_chars++; \
aa72b389
KH
759 } while (0)
760
aa72b389 761
df7492f9
KH
762/* Store a byte C in the place pointed by DST and increment DST to the
763 next free point, and increment PRODUCED_CHARS. The caller should
764 assure that C is 0..127, and declare and set the variable `dst'
765 appropriately in advance.
766*/
aa72b389
KH
767
768
df7492f9
KH
769#define EMIT_ONE_ASCII_BYTE(c) \
770 do { \
771 produced_chars++; \
772 *dst++ = (c); \
773 } while (0)
aa72b389 774
aa72b389 775
df7492f9 776/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 777
df7492f9
KH
778#define EMIT_TWO_ASCII_BYTES(c1, c2) \
779 do { \
780 produced_chars += 2; \
781 *dst++ = (c1), *dst++ = (c2); \
782 } while (0)
aa72b389 783
df7492f9
KH
784
785/* Store a byte C in the place pointed by DST and increment DST to the
786 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
787 nonzero, store in an appropriate multibyte from. The caller should
788 declare and set the variables `dst' and `multibytep' appropriately
789 in advance. */
790
791#define EMIT_ONE_BYTE(c) \
792 do { \
793 produced_chars++; \
794 if (multibytep) \
795 { \
796 int ch = (c); \
797 if (ch >= 0x80) \
798 ch = BYTE8_TO_CHAR (ch); \
799 CHAR_STRING_ADVANCE (ch, dst); \
800 } \
801 else \
802 *dst++ = (c); \
aa72b389
KH
803 } while (0)
804
805
df7492f9 806/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 807
e19c3639
KH
808#define EMIT_TWO_BYTES(c1, c2) \
809 do { \
810 produced_chars += 2; \
811 if (multibytep) \
812 { \
813 int ch; \
814 \
815 ch = (c1); \
816 if (ch >= 0x80) \
817 ch = BYTE8_TO_CHAR (ch); \
818 CHAR_STRING_ADVANCE (ch, dst); \
819 ch = (c2); \
820 if (ch >= 0x80) \
821 ch = BYTE8_TO_CHAR (ch); \
822 CHAR_STRING_ADVANCE (ch, dst); \
823 } \
824 else \
825 { \
826 *dst++ = (c1); \
827 *dst++ = (c2); \
828 } \
aa72b389
KH
829 } while (0)
830
831
df7492f9
KH
832#define EMIT_THREE_BYTES(c1, c2, c3) \
833 do { \
834 EMIT_ONE_BYTE (c1); \
835 EMIT_TWO_BYTES (c2, c3); \
836 } while (0)
aa72b389 837
aa72b389 838
df7492f9
KH
839#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
840 do { \
841 EMIT_TWO_BYTES (c1, c2); \
842 EMIT_TWO_BYTES (c3, c4); \
843 } while (0)
aa72b389 844
aa72b389 845
df7492f9
KH
846#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
847 do { \
848 charset_map_loaded = 0; \
849 c = DECODE_CHAR (charset, code); \
850 if (charset_map_loaded) \
851 { \
852 unsigned char *orig = coding->source; \
853 EMACS_INT offset; \
854 \
855 coding_set_source (coding); \
856 offset = coding->source - orig; \
857 src += offset; \
858 src_base += offset; \
859 src_end += offset; \
860 } \
861 } while (0)
aa72b389 862
aa72b389 863
df7492f9
KH
864#define ASSURE_DESTINATION(bytes) \
865 do { \
866 if (dst + (bytes) >= dst_end) \
867 { \
868 int more_bytes = charbuf_end - charbuf + (bytes); \
869 \
870 dst = alloc_destination (coding, more_bytes, dst); \
871 dst_end = coding->destination + coding->dst_bytes; \
872 } \
873 } while (0)
b1887814 874
df7492f9
KH
875
876
877static void
878coding_set_source (coding)
879 struct coding_system *coding;
880{
881 if (BUFFERP (coding->src_object))
882 {
2cb26057
KH
883 struct buffer *buf = XBUFFER (coding->src_object);
884
df7492f9 885 if (coding->src_pos < 0)
2cb26057 886 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 887 else
2cb26057 888 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 889 }
df7492f9 890 else if (STRINGP (coding->src_object))
aa72b389 891 {
df7492f9
KH
892 coding->source = (XSTRING (coding->src_object)->data
893 + coding->src_pos_byte);
894 }
895 else
896 /* Otherwise, the source is C string and is never relocated
897 automatically. Thus we don't have to update anything. */
898 ;
899}
900
901static void
902coding_set_destination (coding)
903 struct coding_system *coding;
904{
905 if (BUFFERP (coding->dst_object))
906 {
df7492f9 907 if (coding->src_pos < 0)
28f67a95
KH
908 {
909 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
910 coding->dst_bytes = (GAP_END_ADDR
911 - (coding->src_bytes - coding->consumed)
912 - coding->destination);
913 }
df7492f9 914 else
28f67a95
KH
915 {
916 /* We are sure that coding->dst_pos_byte is before the gap
917 of the buffer. */
918 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
919 + coding->dst_pos_byte - 1);
920 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
921 - coding->destination);
922 }
df7492f9
KH
923 }
924 else
925 /* Otherwise, the destination is C string and is never relocated
926 automatically. Thus we don't have to update anything. */
927 ;
928}
929
930
931static void
932coding_alloc_by_realloc (coding, bytes)
933 struct coding_system *coding;
934 EMACS_INT bytes;
935{
936 coding->destination = (unsigned char *) xrealloc (coding->destination,
937 coding->dst_bytes + bytes);
938 coding->dst_bytes += bytes;
939}
940
941static void
942coding_alloc_by_making_gap (coding, bytes)
943 struct coding_system *coding;
944 EMACS_INT bytes;
945{
2c78b7e1
KH
946 if (BUFFERP (coding->dst_object)
947 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
948 {
949 EMACS_INT add = coding->src_bytes - coding->consumed;
950
951 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
952 make_gap (bytes);
953 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
954 }
955 else
956 {
2c78b7e1
KH
957 Lisp_Object this_buffer;
958
959 this_buffer = Fcurrent_buffer ();
df7492f9
KH
960 set_buffer_internal (XBUFFER (coding->dst_object));
961 make_gap (bytes);
962 set_buffer_internal (XBUFFER (this_buffer));
963 }
964}
965
966
967static unsigned char *
968alloc_destination (coding, nbytes, dst)
969 struct coding_system *coding;
970 int nbytes;
971 unsigned char *dst;
972{
973 EMACS_INT offset = dst - coding->destination;
974
975 if (BUFFERP (coding->dst_object))
976 coding_alloc_by_making_gap (coding, nbytes);
977 else
978 coding_alloc_by_realloc (coding, nbytes);
979 coding->result = CODING_RESULT_SUCCESS;
980 coding_set_destination (coding);
981 dst = coding->destination + offset;
982 return dst;
983}
aa72b389 984
ff0dacd7
KH
985/** Macros for annotations. */
986
987/* Maximum length of annotation data (sum of annotations for
988 composition and charset). */
989#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
990
991/* An annotation data is stored in the array coding->charbuf in this
992 format:
993 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
994 LENGTH is the number of elements in the annotation.
995 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
996 FROM and TO specify the range of text annotated. They are relative
997 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
998
999 The format of the following elements depend on ANNOTATION_MASK.
1000
1001 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1002 follows:
1003 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1004 METHOD is one of enum composition_method.
1005 Optionnal COMPOSITION-COMPONENTS are characters and composition
1006 rules.
1007
1008 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1009 follows. */
1010
1011#define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1012 do { \
1013 *(buf)++ = -(len); \
1014 *(buf)++ = (mask); \
1015 *(buf)++ = (from); \
1016 *(buf)++ = (to); \
1017 coding->annotated = 1; \
1018 } while (0);
1019
1020#define ADD_COMPOSITION_DATA(buf, from, to, method) \
1021 do { \
1022 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1023 *buf++ = method; \
1024 } while (0)
1025
1026
1027#define ADD_CHARSET_DATA(buf, from, to, id) \
1028 do { \
1029 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1030 *buf++ = id; \
1031 } while (0)
1032
df7492f9
KH
1033\f
1034/*** 2. Emacs' internal format (emacs-utf-8) ***/
1035
1036
1037
1038\f
1039/*** 3. UTF-8 ***/
1040
1041/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1042 Check if a text is encoded in UTF-8. If it is, return 1, else
1043 return 0. */
df7492f9
KH
1044
1045#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1046#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1047#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1048#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1049#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1050#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1051
1052static int
ff0dacd7 1053detect_coding_utf_8 (coding, detect_info)
df7492f9 1054 struct coding_system *coding;
ff0dacd7 1055 struct coding_detection_info *detect_info;
df7492f9
KH
1056{
1057 unsigned char *src = coding->source, *src_base = src;
1058 unsigned char *src_end = coding->source + coding->src_bytes;
1059 int multibytep = coding->src_multibyte;
1060 int consumed_chars = 0;
1061 int found = 0;
89528eb3 1062 int incomplete;
df7492f9 1063
ff0dacd7 1064 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1065 /* A coding system of this category is always ASCII compatible. */
1066 src += coding->head_ascii;
1067
1068 while (1)
1069 {
1070 int c, c1, c2, c3, c4;
1071
89528eb3 1072 incomplete = 0;
df7492f9
KH
1073 ONE_MORE_BYTE (c);
1074 if (UTF_8_1_OCTET_P (c))
1075 continue;
89528eb3 1076 incomplete = 1;
df7492f9
KH
1077 ONE_MORE_BYTE (c1);
1078 if (! UTF_8_EXTRA_OCTET_P (c1))
1079 break;
1080 if (UTF_8_2_OCTET_LEADING_P (c))
1081 {
ff0dacd7 1082 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1083 continue;
1084 }
1085 ONE_MORE_BYTE (c2);
1086 if (! UTF_8_EXTRA_OCTET_P (c2))
1087 break;
1088 if (UTF_8_3_OCTET_LEADING_P (c))
1089 {
ff0dacd7 1090 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1091 continue;
1092 }
1093 ONE_MORE_BYTE (c3);
1094 if (! UTF_8_EXTRA_OCTET_P (c3))
1095 break;
1096 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1097 {
ff0dacd7 1098 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1099 continue;
1100 }
1101 ONE_MORE_BYTE (c4);
1102 if (! UTF_8_EXTRA_OCTET_P (c4))
1103 break;
1104 if (UTF_8_5_OCTET_LEADING_P (c))
1105 {
ff0dacd7 1106 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1107 continue;
1108 }
1109 break;
1110 }
ff0dacd7 1111 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1112 return 0;
1113
1114 no_more_source:
89528eb3
KH
1115 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1116 {
ff0dacd7 1117 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3
KH
1118 return 0;
1119 }
ff0dacd7
KH
1120 detect_info->found |= found;
1121 return 1;
df7492f9
KH
1122}
1123
1124
1125static void
1126decode_coding_utf_8 (coding)
1127 struct coding_system *coding;
1128{
1129 unsigned char *src = coding->source + coding->consumed;
1130 unsigned char *src_end = coding->source + coding->src_bytes;
1131 unsigned char *src_base;
1132 int *charbuf = coding->charbuf;
1133 int *charbuf_end = charbuf + coding->charbuf_size;
1134 int consumed_chars = 0, consumed_chars_base;
1135 int multibytep = coding->src_multibyte;
1136 Lisp_Object attr, eol_type, charset_list;
1137
1138 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1139
1140 while (1)
1141 {
1142 int c, c1, c2, c3, c4, c5;
1143
1144 src_base = src;
1145 consumed_chars_base = consumed_chars;
1146
1147 if (charbuf >= charbuf_end)
1148 break;
1149
1150 ONE_MORE_BYTE (c1);
1151 if (UTF_8_1_OCTET_P(c1))
1152 {
1153 c = c1;
1154 if (c == '\r')
aa72b389 1155 {
df7492f9
KH
1156 if (EQ (eol_type, Qdos))
1157 {
1158 if (src == src_end)
98725083
KH
1159 {
1160 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1161 goto no_more_source;
1162 }
df7492f9
KH
1163 if (*src == '\n')
1164 ONE_MORE_BYTE (c);
1165 }
1166 else if (EQ (eol_type, Qmac))
1167 c = '\n';
aa72b389 1168 }
aa72b389 1169 }
df7492f9 1170 else
aa72b389 1171 {
df7492f9
KH
1172 ONE_MORE_BYTE (c2);
1173 if (! UTF_8_EXTRA_OCTET_P (c2))
1174 goto invalid_code;
1175 if (UTF_8_2_OCTET_LEADING_P (c1))
b0edb2c5
DL
1176 {
1177 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1178 /* Reject overlong sequences here and below. Encoders
1179 producing them are incorrect, they can be misleading,
1180 and they mess up read/write invariance. */
1181 if (c < 128)
1182 goto invalid_code;
1183 }
df7492f9 1184 else
aa72b389 1185 {
df7492f9
KH
1186 ONE_MORE_BYTE (c3);
1187 if (! UTF_8_EXTRA_OCTET_P (c3))
1188 goto invalid_code;
1189 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1190 {
1191 c = (((c1 & 0xF) << 12)
1192 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1193 if (c < 0x800
1194 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1195 goto invalid_code;
1196 }
df7492f9
KH
1197 else
1198 {
1199 ONE_MORE_BYTE (c4);
1200 if (! UTF_8_EXTRA_OCTET_P (c4))
1201 goto invalid_code;
1202 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1203 {
df7492f9
KH
1204 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1205 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1206 if (c < 0x10000)
1207 goto invalid_code;
1208 }
df7492f9
KH
1209 else
1210 {
1211 ONE_MORE_BYTE (c5);
1212 if (! UTF_8_EXTRA_OCTET_P (c5))
1213 goto invalid_code;
1214 if (UTF_8_5_OCTET_LEADING_P (c1))
1215 {
1216 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1217 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1218 | (c5 & 0x3F));
b0edb2c5 1219 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1220 goto invalid_code;
1221 }
1222 else
1223 goto invalid_code;
1224 }
1225 }
aa72b389 1226 }
aa72b389 1227 }
df7492f9
KH
1228
1229 *charbuf++ = c;
1230 continue;
1231
1232 invalid_code:
1233 src = src_base;
1234 consumed_chars = consumed_chars_base;
1235 ONE_MORE_BYTE (c);
1236 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1237 coding->errors++;
aa72b389
KH
1238 }
1239
df7492f9
KH
1240 no_more_source:
1241 coding->consumed_char += consumed_chars_base;
1242 coding->consumed = src_base - coding->source;
1243 coding->charbuf_used = charbuf - coding->charbuf;
1244}
1245
1246
1247static int
1248encode_coding_utf_8 (coding)
1249 struct coding_system *coding;
1250{
1251 int multibytep = coding->dst_multibyte;
1252 int *charbuf = coding->charbuf;
1253 int *charbuf_end = charbuf + coding->charbuf_used;
1254 unsigned char *dst = coding->destination + coding->produced;
1255 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1256 int produced_chars = 0;
df7492f9
KH
1257 int c;
1258
1259 if (multibytep)
aa72b389 1260 {
df7492f9
KH
1261 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1262
1263 while (charbuf < charbuf_end)
aa72b389 1264 {
df7492f9
KH
1265 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1266
1267 ASSURE_DESTINATION (safe_room);
1268 c = *charbuf++;
28f67a95
KH
1269 if (CHAR_BYTE8_P (c))
1270 {
1271 c = CHAR_TO_BYTE8 (c);
1272 EMIT_ONE_BYTE (c);
1273 }
1274 else
1275 {
1276 CHAR_STRING_ADVANCE (c, pend);
1277 for (p = str; p < pend; p++)
1278 EMIT_ONE_BYTE (*p);
1279 }
aa72b389 1280 }
aa72b389 1281 }
df7492f9
KH
1282 else
1283 {
1284 int safe_room = MAX_MULTIBYTE_LENGTH;
1285
1286 while (charbuf < charbuf_end)
1287 {
1288 ASSURE_DESTINATION (safe_room);
1289 c = *charbuf++;
1290 dst += CHAR_STRING (c, dst);
1291 produced_chars++;
1292 }
1293 }
1294 coding->result = CODING_RESULT_SUCCESS;
1295 coding->produced_char += produced_chars;
1296 coding->produced = dst - coding->destination;
1297 return 0;
aa72b389
KH
1298}
1299
4ed46869 1300
df7492f9 1301/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1302 Check if a text is encoded in one of UTF-16 based coding systems.
1303 If it is, return 1, else return 0. */
df7492f9
KH
1304
1305#define UTF_16_HIGH_SURROGATE_P(val) \
1306 (((val) & 0xFC00) == 0xD800)
1307
1308#define UTF_16_LOW_SURROGATE_P(val) \
1309 (((val) & 0xFC00) == 0xDC00)
1310
1311#define UTF_16_INVALID_P(val) \
1312 (((val) == 0xFFFE) \
1313 || ((val) == 0xFFFF) \
1314 || UTF_16_LOW_SURROGATE_P (val))
1315
1316
1317static int
ff0dacd7 1318detect_coding_utf_16 (coding, detect_info)
b73bfc1c 1319 struct coding_system *coding;
ff0dacd7 1320 struct coding_detection_info *detect_info;
b73bfc1c 1321{
df7492f9
KH
1322 unsigned char *src = coding->source, *src_base = src;
1323 unsigned char *src_end = coding->source + coding->src_bytes;
1324 int multibytep = coding->src_multibyte;
1325 int consumed_chars = 0;
1326 int c1, c2;
1327
ff0dacd7 1328 detect_info->checked |= CATEGORY_MASK_UTF_16;
89528eb3 1329
ff0dacd7
KH
1330 if (coding->mode & CODING_MODE_LAST_BLOCK
1331 && (coding->src_bytes & 1))
1332 {
1333 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1334 return 0;
1335 }
df7492f9
KH
1336 ONE_MORE_BYTE (c1);
1337 ONE_MORE_BYTE (c2);
4ed46869 1338
df7492f9 1339 if ((c1 == 0xFF) && (c2 == 0xFE))
ff0dacd7 1340 {
b49a1807
KH
1341 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1342 | CATEGORY_MASK_UTF_16_AUTO);
ff0dacd7
KH
1343 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1344 }
df7492f9 1345 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1346 {
b49a1807
KH
1347 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1348 | CATEGORY_MASK_UTF_16_AUTO);
ff0dacd7
KH
1349 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1350 }
df7492f9 1351 no_more_source:
ff0dacd7 1352 return 1;
df7492f9 1353}
ec6d2bb8 1354
df7492f9
KH
1355static void
1356decode_coding_utf_16 (coding)
1357 struct coding_system *coding;
1358{
1359 unsigned char *src = coding->source + coding->consumed;
1360 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1361 unsigned char *src_base;
df7492f9
KH
1362 int *charbuf = coding->charbuf;
1363 int *charbuf_end = charbuf + coding->charbuf_size;
1364 int consumed_chars = 0, consumed_chars_base;
1365 int multibytep = coding->src_multibyte;
1366 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1367 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1368 int surrogate = CODING_UTF_16_SURROGATE (coding);
1369 Lisp_Object attr, eol_type, charset_list;
1370
1371 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1372
b49a1807 1373 if (bom == utf_16_with_bom)
df7492f9
KH
1374 {
1375 int c, c1, c2;
4af310db 1376
df7492f9
KH
1377 src_base = src;
1378 ONE_MORE_BYTE (c1);
1379 ONE_MORE_BYTE (c2);
e19c3639 1380 c = (c1 << 8) | c2;
b49a1807
KH
1381
1382 if (endian == utf_16_big_endian
1383 ? c != 0xFEFF : c != 0xFFFE)
4af310db 1384 {
b49a1807
KH
1385 /* The first two bytes are not BOM. Treat them as bytes
1386 for a normal character. */
1387 src = src_base;
1388 coding->errors++;
4af310db 1389 }
b49a1807
KH
1390 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1391 }
1392 else if (bom == utf_16_detect_bom)
1393 {
1394 /* We have already tried to detect BOM and failed in
1395 detect_coding. */
1396 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9
KH
1397 }
1398
1399 while (1)
1400 {
1401 int c, c1, c2;
1402
1403 src_base = src;
1404 consumed_chars_base = consumed_chars;
1405
1406 if (charbuf + 2 >= charbuf_end)
1407 break;
1408
1409 ONE_MORE_BYTE (c1);
1410 ONE_MORE_BYTE (c2);
1411 c = (endian == utf_16_big_endian
e19c3639 1412 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1413 if (surrogate)
aa72b389 1414 {
df7492f9 1415 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1416 {
df7492f9
KH
1417 if (endian == utf_16_big_endian)
1418 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1419 else
1420 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1421 *charbuf++ = c1;
1422 *charbuf++ = c2;
1423 coding->errors++;
1424 if (UTF_16_HIGH_SURROGATE_P (c))
1425 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1426 else
1427 *charbuf++ = c;
aa72b389 1428 }
df7492f9
KH
1429 else
1430 {
1431 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1432 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1433 *charbuf++ = c;
1434 }
1435 }
1436 else
1437 {
1438 if (UTF_16_HIGH_SURROGATE_P (c))
1439 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1440 else
1441 *charbuf++ = c;
1442 }
1443 }
1444
1445 no_more_source:
1446 coding->consumed_char += consumed_chars_base;
1447 coding->consumed = src_base - coding->source;
1448 coding->charbuf_used = charbuf - coding->charbuf;
1449}
1450
1451static int
1452encode_coding_utf_16 (coding)
1453 struct coding_system *coding;
1454{
1455 int multibytep = coding->dst_multibyte;
1456 int *charbuf = coding->charbuf;
1457 int *charbuf_end = charbuf + coding->charbuf_used;
1458 unsigned char *dst = coding->destination + coding->produced;
1459 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1460 int safe_room = 8;
1461 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1462 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1463 int produced_chars = 0;
1464 Lisp_Object attrs, eol_type, charset_list;
1465 int c;
1466
1467 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1468
b49a1807 1469 if (bom != utf_16_without_bom)
df7492f9
KH
1470 {
1471 ASSURE_DESTINATION (safe_room);
1472 if (big_endian)
df7492f9 1473 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1474 else
1475 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1476 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1477 }
1478
1479 while (charbuf < charbuf_end)
1480 {
1481 ASSURE_DESTINATION (safe_room);
1482 c = *charbuf++;
e19c3639
KH
1483 if (c >= MAX_UNICODE_CHAR)
1484 c = coding->default_char;
df7492f9
KH
1485
1486 if (c < 0x10000)
1487 {
1488 if (big_endian)
1489 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1490 else
1491 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1492 }
1493 else
1494 {
1495 int c1, c2;
1496
1497 c -= 0x10000;
1498 c1 = (c >> 10) + 0xD800;
1499 c2 = (c & 0x3FF) + 0xDC00;
1500 if (big_endian)
1501 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1502 else
1503 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1504 }
1505 }
1506 coding->result = CODING_RESULT_SUCCESS;
1507 coding->produced = dst - coding->destination;
1508 coding->produced_char += produced_chars;
1509 return 0;
1510}
1511
1512\f
1513/*** 6. Old Emacs' internal format (emacs-mule) ***/
1514
1515/* Emacs' internal format for representation of multiple character
1516 sets is a kind of multi-byte encoding, i.e. characters are
1517 represented by variable-length sequences of one-byte codes.
1518
1519 ASCII characters and control characters (e.g. `tab', `newline') are
1520 represented by one-byte sequences which are their ASCII codes, in
1521 the range 0x00 through 0x7F.
1522
1523 8-bit characters of the range 0x80..0x9F are represented by
1524 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1525 code + 0x20).
1526
1527 8-bit characters of the range 0xA0..0xFF are represented by
1528 one-byte sequences which are their 8-bit code.
1529
1530 The other characters are represented by a sequence of `base
1531 leading-code', optional `extended leading-code', and one or two
1532 `position-code's. The length of the sequence is determined by the
1533 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1534 whereas extended leading-code and position-code take the range 0xA0
1535 through 0xFF. See `charset.h' for more details about leading-code
1536 and position-code.
1537
1538 --- CODE RANGE of Emacs' internal format ---
1539 character set range
1540 ------------- -----
1541 ascii 0x00..0x7F
1542 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1543 eight-bit-graphic 0xA0..0xBF
1544 ELSE 0x81..0x9D + [0xA0..0xFF]+
1545 ---------------------------------------------
1546
1547 As this is the internal character representation, the format is
1548 usually not used externally (i.e. in a file or in a data sent to a
1549 process). But, it is possible to have a text externally in this
1550 format (i.e. by encoding by the coding system `emacs-mule').
1551
1552 In that case, a sequence of one-byte codes has a slightly different
1553 form.
1554
1555 At first, all characters in eight-bit-control are represented by
1556 one-byte sequences which are their 8-bit code.
1557
1558 Next, character composition data are represented by the byte
1559 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1560 where,
1561 METHOD is 0xF0 plus one of composition method (enum
1562 composition_method),
1563
1564 BYTES is 0xA0 plus a byte length of this composition data,
1565
1566 CHARS is 0x20 plus a number of characters composed by this
1567 data,
1568
1569 COMPONENTs are characters of multibye form or composition
1570 rules encoded by two-byte of ASCII codes.
1571
1572 In addition, for backward compatibility, the following formats are
1573 also recognized as composition data on decoding.
1574
1575 0x80 MSEQ ...
1576 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1577
1578 Here,
1579 MSEQ is a multibyte form but in these special format:
1580 ASCII: 0xA0 ASCII_CODE+0x80,
1581 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1582 RULE is a one byte code of the range 0xA0..0xF0 that
1583 represents a composition rule.
1584 */
1585
1586char emacs_mule_bytes[256];
1587
df7492f9 1588int
ff0dacd7 1589emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1590 struct coding_system *coding;
781d7a48 1591 unsigned char *src;
ff0dacd7 1592 int *nbytes, *nchars, *id;
df7492f9 1593{
df7492f9
KH
1594 unsigned char *src_end = coding->source + coding->src_bytes;
1595 int multibytep = coding->src_multibyte;
1596 unsigned char *src_base = src;
1597 struct charset *charset;
1598 unsigned code;
1599 int c;
1600 int consumed_chars = 0;
1601
1602 ONE_MORE_BYTE (c);
df7492f9
KH
1603 switch (emacs_mule_bytes[c])
1604 {
1605 case 2:
1606 if (! (charset = emacs_mule_charset[c]))
1607 goto invalid_code;
1608 ONE_MORE_BYTE (c);
1609 code = c & 0x7F;
1610 break;
1611
1612 case 3:
7c78e542
KH
1613 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1614 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
b73bfc1c 1615 {
df7492f9
KH
1616 ONE_MORE_BYTE (c);
1617 if (! (charset = emacs_mule_charset[c]))
1618 goto invalid_code;
1619 ONE_MORE_BYTE (c);
1620 code = c & 0x7F;
b73bfc1c
KH
1621 }
1622 else
1623 {
df7492f9
KH
1624 if (! (charset = emacs_mule_charset[c]))
1625 goto invalid_code;
1626 ONE_MORE_BYTE (c);
781d7a48 1627 code = (c & 0x7F) << 8;
df7492f9
KH
1628 ONE_MORE_BYTE (c);
1629 code |= c & 0x7F;
1630 }
1631 break;
1632
1633 case 4:
781d7a48 1634 ONE_MORE_BYTE (c);
df7492f9
KH
1635 if (! (charset = emacs_mule_charset[c]))
1636 goto invalid_code;
1637 ONE_MORE_BYTE (c);
781d7a48 1638 code = (c & 0x7F) << 8;
df7492f9
KH
1639 ONE_MORE_BYTE (c);
1640 code |= c & 0x7F;
1641 break;
1642
1643 case 1:
1644 code = c;
9d123124
KH
1645 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1646 ? charset_ascii : charset_eight_bit);
df7492f9
KH
1647 break;
1648
1649 default:
1650 abort ();
1651 }
1652 c = DECODE_CHAR (charset, code);
1653 if (c < 0)
1654 goto invalid_code;
1655 *nbytes = src - src_base;
1656 *nchars = consumed_chars;
ff0dacd7
KH
1657 if (id)
1658 *id = charset->id;
df7492f9
KH
1659 return c;
1660
1661 no_more_source:
1662 return -2;
1663
1664 invalid_code:
1665 return -1;
1666}
1667
1668
1669/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1670 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1671 else return 0. */
df7492f9
KH
1672
1673static int
ff0dacd7 1674detect_coding_emacs_mule (coding, detect_info)
df7492f9 1675 struct coding_system *coding;
ff0dacd7 1676 struct coding_detection_info *detect_info;
df7492f9
KH
1677{
1678 unsigned char *src = coding->source, *src_base = src;
1679 unsigned char *src_end = coding->source + coding->src_bytes;
1680 int multibytep = coding->src_multibyte;
1681 int consumed_chars = 0;
1682 int c;
1683 int found = 0;
89528eb3 1684 int incomplete;
df7492f9 1685
ff0dacd7 1686 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1687 /* A coding system of this category is always ASCII compatible. */
1688 src += coding->head_ascii;
1689
1690 while (1)
1691 {
89528eb3 1692 incomplete = 0;
df7492f9 1693 ONE_MORE_BYTE (c);
89528eb3 1694 incomplete = 1;
df7492f9
KH
1695
1696 if (c == 0x80)
1697 {
1698 /* Perhaps the start of composite character. We simple skip
1699 it because analyzing it is too heavy for detecting. But,
1700 at least, we check that the composite character
1701 constitues of more than 4 bytes. */
1702 unsigned char *src_base;
1703
1704 repeat:
1705 src_base = src;
1706 do
1707 {
1708 ONE_MORE_BYTE (c);
1709 }
1710 while (c >= 0xA0);
1711
1712 if (src - src_base <= 4)
1713 break;
ff0dacd7 1714 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1715 if (c == 0x80)
1716 goto repeat;
b73bfc1c 1717 }
df7492f9
KH
1718
1719 if (c < 0x80)
b73bfc1c 1720 {
df7492f9
KH
1721 if (c < 0x20
1722 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1723 break;
1724 }
1725 else
1726 {
1727 unsigned char *src_base = src - 1;
1728
1729 do
1730 {
1731 ONE_MORE_BYTE (c);
1732 }
1733 while (c >= 0xA0);
1734 if (src - src_base != emacs_mule_bytes[*src_base])
1735 break;
ff0dacd7 1736 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1737 }
1738 }
ff0dacd7 1739 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1740 return 0;
1741
1742 no_more_source:
89528eb3
KH
1743 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1744 {
ff0dacd7 1745 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1746 return 0;
1747 }
ff0dacd7
KH
1748 detect_info->found |= found;
1749 return 1;
4ed46869
KH
1750}
1751
b73bfc1c 1752
df7492f9
KH
1753/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1754
1755/* Decode a character represented as a component of composition
1756 sequence of Emacs 20/21 style at SRC. Set C to that character and
1757 update SRC to the head of next character (or an encoded composition
1758 rule). If SRC doesn't points a composition component, set C to -1.
1759 If SRC points an invalid byte sequence, global exit by a return
1760 value 0. */
1761
1762#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1763 if (1) \
1764 { \
1765 int c; \
1766 int nbytes, nchars; \
1767 \
1768 if (src == src_end) \
1769 break; \
ff0dacd7 1770 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1771 if (c < 0) \
1772 { \
1773 if (c == -2) \
1774 break; \
1775 goto invalid_code; \
1776 } \
1777 *buf++ = c; \
1778 src += nbytes; \
1779 consumed_chars += nchars; \
1780 } \
1781 else
1782
1783
1784/* Decode a composition rule represented as a component of composition
781d7a48
KH
1785 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1786 and increment BUF. If SRC points an invalid byte sequence, set C
1787 to -1. */
df7492f9 1788
781d7a48 1789#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1790 do { \
1791 int c, gref, nref; \
1792 \
781d7a48 1793 if (src >= src_end) \
df7492f9
KH
1794 goto invalid_code; \
1795 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1796 c -= 0x20; \
df7492f9
KH
1797 if (c < 0 || c >= 81) \
1798 goto invalid_code; \
1799 \
1800 gref = c / 9, nref = c % 9; \
1801 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1802 } while (0)
1803
1804
781d7a48
KH
1805/* Decode a composition rule represented as a component of composition
1806 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1807 and increment BUF. If SRC points an invalid byte sequence, set C
1808 to -1. */
1809
1810#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1811 do { \
1812 int gref, nref; \
1813 \
1814 if (src + 1>= src_end) \
1815 goto invalid_code; \
1816 ONE_MORE_BYTE_NO_CHECK (gref); \
1817 gref -= 0x20; \
1818 ONE_MORE_BYTE_NO_CHECK (nref); \
1819 nref -= 0x20; \
1820 if (gref < 0 || gref >= 81 \
1821 || nref < 0 || nref >= 81) \
1822 goto invalid_code; \
1823 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1824 } while (0)
1825
1826
df7492f9 1827#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1828 do { \
df7492f9 1829 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1830 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1831 the byte length of this composition information, CHARS is the \
1832 number of characters composed by this composition. */ \
781d7a48
KH
1833 enum composition_method method = c - 0xF2; \
1834 int *charbuf_base = charbuf; \
ff0dacd7 1835 int from, to; \
df7492f9
KH
1836 int consumed_chars_limit; \
1837 int nbytes, nchars; \
1838 \
1839 ONE_MORE_BYTE (c); \
1840 nbytes = c - 0xA0; \
1841 if (nbytes < 3) \
1842 goto invalid_code; \
1843 ONE_MORE_BYTE (c); \
1844 nchars = c - 0xA0; \
ff0dacd7
KH
1845 from = coding->produced + char_offset; \
1846 to = from + nchars; \
1847 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1848 consumed_chars_limit = consumed_chars_base + nbytes; \
1849 if (method != COMPOSITION_RELATIVE) \
aa72b389 1850 { \
df7492f9
KH
1851 int i = 0; \
1852 while (consumed_chars < consumed_chars_limit) \
aa72b389 1853 { \
df7492f9 1854 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1855 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1856 else \
1857 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1858 i++; \
aa72b389 1859 } \
df7492f9
KH
1860 if (consumed_chars < consumed_chars_limit) \
1861 goto invalid_code; \
781d7a48 1862 charbuf_base[0] -= i; \
aa72b389
KH
1863 } \
1864 } while (0)
93dec019 1865
aa72b389 1866
df7492f9
KH
1867#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1868 do { \
1869 /* Emacs 20 style format for relative composition. */ \
1870 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1871 enum composition_method method = COMPOSITION_RELATIVE; \
df7492f9
KH
1872 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1873 int *buf = components; \
1874 int i, j; \
ff0dacd7 1875 int from, to; \
df7492f9
KH
1876 \
1877 src = src_base; \
1878 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1879 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1880 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1881 if (i < 2) \
1882 goto invalid_code; \
ff0dacd7
KH
1883 from = coding->produced_char + char_offset; \
1884 to = from + i; \
1885 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1886 for (j = 0; j < i; j++) \
1887 *charbuf++ = components[j]; \
1888 } while (0)
1889
1890
1891#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1892 do { \
1893 /* Emacs 20 style format for rule-base composition. */ \
1894 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1895 enum composition_method method = COMPOSITION_WITH_RULE; \
df7492f9
KH
1896 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1897 int *buf = components; \
1898 int i, j; \
ff0dacd7 1899 int from, to; \
df7492f9
KH
1900 \
1901 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1902 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1903 { \
781d7a48 1904 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1905 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1906 } \
1907 if (i < 1 || (buf - components) % 2 == 0) \
1908 goto invalid_code; \
1909 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1910 goto no_more_source; \
ff0dacd7
KH
1911 from = coding->produced_char + char_offset; \
1912 to = from + i; \
1913 ADD_COMPOSITION_DATA (buf, from, to, method); \
df7492f9
KH
1914 for (j = 0; j < i; j++) \
1915 *charbuf++ = components[j]; \
1916 for (j = 0; j < i; j += 2) \
1917 *charbuf++ = components[j]; \
1918 } while (0)
1919
aa72b389
KH
1920
1921static void
df7492f9 1922decode_coding_emacs_mule (coding)
aa72b389 1923 struct coding_system *coding;
aa72b389 1924{
df7492f9
KH
1925 unsigned char *src = coding->source + coding->consumed;
1926 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1927 unsigned char *src_base;
df7492f9 1928 int *charbuf = coding->charbuf;
ff0dacd7 1929 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 1930 int consumed_chars = 0, consumed_chars_base;
df7492f9
KH
1931 int multibytep = coding->src_multibyte;
1932 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
1933 int char_offset = coding->produced_char;
1934 int last_offset = char_offset;
1935 int last_id = charset_ascii;
aa72b389 1936
df7492f9 1937 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1938
aa72b389
KH
1939 while (1)
1940 {
df7492f9
KH
1941 int c;
1942
aa72b389 1943 src_base = src;
df7492f9
KH
1944 consumed_chars_base = consumed_chars;
1945
1946 if (charbuf >= charbuf_end)
1947 break;
aa72b389 1948
df7492f9
KH
1949 ONE_MORE_BYTE (c);
1950
1951 if (c < 0x80)
aa72b389 1952 {
df7492f9
KH
1953 if (c == '\r')
1954 {
1955 if (EQ (eol_type, Qdos))
1956 {
1957 if (src == src_end)
98725083
KH
1958 {
1959 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1960 goto no_more_source;
1961 }
df7492f9
KH
1962 if (*src == '\n')
1963 ONE_MORE_BYTE (c);
1964 }
1965 else if (EQ (eol_type, Qmac))
1966 c = '\n';
1967 }
1968 *charbuf++ = c;
1969 char_offset++;
aa72b389 1970 }
df7492f9
KH
1971 else if (c == 0x80)
1972 {
df7492f9 1973 ONE_MORE_BYTE (c);
781d7a48
KH
1974 if (c - 0xF2 >= COMPOSITION_RELATIVE
1975 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
1976 DECODE_EMACS_MULE_21_COMPOSITION (c);
1977 else if (c < 0xC0)
1978 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1979 else if (c == 0xFF)
1980 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1981 else
1982 goto invalid_code;
1983 }
1984 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1985 {
1986 int nbytes, nchars;
ff0dacd7
KH
1987 int id;
1988
781d7a48
KH
1989 src = src_base;
1990 consumed_chars = consumed_chars_base;
ff0dacd7 1991 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
1992 if (c < 0)
1993 {
1994 if (c == -2)
1995 break;
1996 goto invalid_code;
1997 }
ff0dacd7
KH
1998 if (last_id != id)
1999 {
2000 if (last_id != charset_ascii)
2001 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2002 last_id = id;
2003 last_offset = char_offset;
2004 }
df7492f9 2005 *charbuf++ = c;
781d7a48
KH
2006 src += nbytes;
2007 consumed_chars += nchars;
df7492f9
KH
2008 char_offset++;
2009 }
2010 continue;
2011
2012 invalid_code:
2013 src = src_base;
2014 consumed_chars = consumed_chars_base;
2015 ONE_MORE_BYTE (c);
2016 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2017 char_offset++;
df7492f9
KH
2018 coding->errors++;
2019 }
2020
2021 no_more_source:
ff0dacd7
KH
2022 if (last_id != charset_ascii)
2023 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
2024 coding->consumed_char += consumed_chars_base;
2025 coding->consumed = src_base - coding->source;
2026 coding->charbuf_used = charbuf - coding->charbuf;
2027}
2028
2029
2030#define EMACS_MULE_LEADING_CODES(id, codes) \
2031 do { \
2032 if (id < 0xA0) \
2033 codes[0] = id, codes[1] = 0; \
2034 else if (id < 0xE0) \
2035 codes[0] = 0x9A, codes[1] = id; \
2036 else if (id < 0xF0) \
2037 codes[0] = 0x9B, codes[1] = id; \
2038 else if (id < 0xF5) \
2039 codes[0] = 0x9C, codes[1] = id; \
2040 else \
2041 codes[0] = 0x9D, codes[1] = id; \
2042 } while (0);
2043
aa72b389 2044
df7492f9
KH
2045static int
2046encode_coding_emacs_mule (coding)
2047 struct coding_system *coding;
2048{
2049 int multibytep = coding->dst_multibyte;
2050 int *charbuf = coding->charbuf;
2051 int *charbuf_end = charbuf + coding->charbuf_used;
2052 unsigned char *dst = coding->destination + coding->produced;
2053 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2054 int safe_room = 8;
df7492f9
KH
2055 int produced_chars = 0;
2056 Lisp_Object attrs, eol_type, charset_list;
2057 int c;
ff0dacd7 2058 int preferred_charset_id = -1;
df7492f9
KH
2059
2060 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2061
2062 while (charbuf < charbuf_end)
2063 {
2064 ASSURE_DESTINATION (safe_room);
2065 c = *charbuf++;
ff0dacd7
KH
2066
2067 if (c < 0)
2068 {
2069 /* Handle an annotation. */
2070 switch (*charbuf)
2071 {
2072 case CODING_ANNOTATE_COMPOSITION_MASK:
2073 /* Not yet implemented. */
2074 break;
2075 case CODING_ANNOTATE_CHARSET_MASK:
2076 preferred_charset_id = charbuf[3];
2077 if (preferred_charset_id >= 0
2078 && NILP (Fmemq (make_number (preferred_charset_id),
2079 charset_list)))
2080 preferred_charset_id = -1;
2081 break;
2082 default:
2083 abort ();
2084 }
2085 charbuf += -c - 1;
2086 continue;
2087 }
2088
df7492f9
KH
2089 if (ASCII_CHAR_P (c))
2090 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2091 else if (CHAR_BYTE8_P (c))
2092 {
2093 c = CHAR_TO_BYTE8 (c);
2094 EMIT_ONE_BYTE (c);
2095 }
df7492f9 2096 else
aa72b389 2097 {
df7492f9
KH
2098 struct charset *charset;
2099 unsigned code;
2100 int dimension;
2101 int emacs_mule_id;
2102 unsigned char leading_codes[2];
2103
ff0dacd7
KH
2104 if (preferred_charset_id >= 0)
2105 {
2106 charset = CHARSET_FROM_ID (preferred_charset_id);
2107 if (! CHAR_CHARSET_P (c, charset))
2108 charset = char_charset (c, charset_list, NULL);
2109 }
2110 else
2111 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2112 if (! charset)
2113 {
2114 c = coding->default_char;
2115 if (ASCII_CHAR_P (c))
2116 {
2117 EMIT_ONE_ASCII_BYTE (c);
2118 continue;
2119 }
2120 charset = char_charset (c, charset_list, &code);
2121 }
2122 dimension = CHARSET_DIMENSION (charset);
2123 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2124 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2125 EMIT_ONE_BYTE (leading_codes[0]);
2126 if (leading_codes[1])
2127 EMIT_ONE_BYTE (leading_codes[1]);
2128 if (dimension == 1)
2129 EMIT_ONE_BYTE (code);
aa72b389 2130 else
df7492f9
KH
2131 {
2132 EMIT_ONE_BYTE (code >> 8);
2133 EMIT_ONE_BYTE (code & 0xFF);
2134 }
aa72b389 2135 }
aa72b389 2136 }
df7492f9
KH
2137 coding->result = CODING_RESULT_SUCCESS;
2138 coding->produced_char += produced_chars;
2139 coding->produced = dst - coding->destination;
2140 return 0;
aa72b389 2141}
b73bfc1c 2142
4ed46869 2143\f
df7492f9 2144/*** 7. ISO2022 handlers ***/
4ed46869
KH
2145
2146/* The following note describes the coding system ISO2022 briefly.
39787efd 2147 Since the intention of this note is to help understand the
5a936b46 2148 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2149 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46
DL
2150 original document of ISO2022. This is equivalent to the standard
2151 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2152
2153 ISO2022 provides many mechanisms to encode several character sets
5a936b46 2154 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2155 is encoded using bytes less than 128. This may make the encoded
2156 text a little bit longer, but the text passes more easily through
5a936b46
DL
2157 several types of gateway, some of which strip off the MSB (Most
2158 Significant Bit).
b73bfc1c 2159
5a936b46
DL
2160 There are two kinds of character sets: control character sets and
2161 graphic character sets. The former contain control characters such
4ed46869 2162 as `newline' and `escape' to provide control functions (control
39787efd 2163 functions are also provided by escape sequences). The latter
5a936b46 2164 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2165 two control character sets and many graphic character sets.
2166
2167 Graphic character sets are classified into one of the following
39787efd
KH
2168 four classes, according to the number of bytes (DIMENSION) and
2169 number of characters in one dimension (CHARS) of the set:
2170 - DIMENSION1_CHARS94
2171 - DIMENSION1_CHARS96
2172 - DIMENSION2_CHARS94
2173 - DIMENSION2_CHARS96
2174
2175 In addition, each character set is assigned an identification tag,
5a936b46 2176 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2177 hereafter). The <F> of each character set is decided by ECMA(*)
2178 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2179 (0x30..0x3F are for private use only).
4ed46869
KH
2180
2181 Note (*): ECMA = European Computer Manufacturers Association
2182
5a936b46 2183 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2184 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2185 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2186 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2187 o DIMENSION2_CHARS96 -- none for the moment
2188
39787efd 2189 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2190 C0 [0x00..0x1F] -- control character plane 0
2191 GL [0x20..0x7F] -- graphic character plane 0
2192 C1 [0x80..0x9F] -- control character plane 1
2193 GR [0xA0..0xFF] -- graphic character plane 1
2194
2195 A control character set is directly designated and invoked to C0 or
39787efd
KH
2196 C1 by an escape sequence. The most common case is that:
2197 - ISO646's control character set is designated/invoked to C0, and
2198 - ISO6429's control character set is designated/invoked to C1,
2199 and usually these designations/invocations are omitted in encoded
2200 text. In a 7-bit environment, only C0 can be used, and a control
2201 character for C1 is encoded by an appropriate escape sequence to
2202 fit into the environment. All control characters for C1 are
2203 defined to have corresponding escape sequences.
4ed46869
KH
2204
2205 A graphic character set is at first designated to one of four
2206 graphic registers (G0 through G3), then these graphic registers are
2207 invoked to GL or GR. These designations and invocations can be
2208 done independently. The most common case is that G0 is invoked to
39787efd
KH
2209 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2210 these invocations and designations are omitted in encoded text.
2211 In a 7-bit environment, only GL can be used.
4ed46869 2212
39787efd
KH
2213 When a graphic character set of CHARS94 is invoked to GL, codes
2214 0x20 and 0x7F of the GL area work as control characters SPACE and
2215 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2216 be used.
4ed46869
KH
2217
2218 There are two ways of invocation: locking-shift and single-shift.
2219 With locking-shift, the invocation lasts until the next different
39787efd
KH
2220 invocation, whereas with single-shift, the invocation affects the
2221 following character only and doesn't affect the locking-shift
2222 state. Invocations are done by the following control characters or
2223 escape sequences:
4ed46869
KH
2224
2225 ----------------------------------------------------------------------
39787efd 2226 abbrev function cntrl escape seq description
4ed46869 2227 ----------------------------------------------------------------------
39787efd
KH
2228 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2229 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2230 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2231 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2232 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2233 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2234 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2235 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2236 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2237 ----------------------------------------------------------------------
39787efd
KH
2238 (*) These are not used by any known coding system.
2239
2240 Control characters for these functions are defined by macros
2241 ISO_CODE_XXX in `coding.h'.
4ed46869 2242
39787efd 2243 Designations are done by the following escape sequences:
4ed46869
KH
2244 ----------------------------------------------------------------------
2245 escape sequence description
2246 ----------------------------------------------------------------------
2247 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2248 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2249 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2250 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2251 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2252 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2253 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2254 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2255 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2256 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2257 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2258 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2259 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2260 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2261 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2262 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2263 ----------------------------------------------------------------------
2264
2265 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2266 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2267
2268 Note (*): Although these designations are not allowed in ISO2022,
2269 Emacs accepts them on decoding, and produces them on encoding
39787efd 2270 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2271 7-bit environment, non-locking-shift, and non-single-shift.
2272
2273 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2274 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2275
5a936b46 2276 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2277 same multilingual text in ISO2022. Actually, there exist many
2278 coding systems such as Compound Text (used in X11's inter client
5a936b46
DL
2279 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2280 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2281 localized platforms), and all of these are variants of ISO2022.
2282
2283 In addition to the above, Emacs handles two more kinds of escape
2284 sequences: ISO6429's direction specification and Emacs' private
2285 sequence for specifying character composition.
2286
39787efd 2287 ISO6429's direction specification takes the following form:
4ed46869
KH
2288 o CSI ']' -- end of the current direction
2289 o CSI '0' ']' -- end of the current direction
2290 o CSI '1' ']' -- start of left-to-right text
2291 o CSI '2' ']' -- start of right-to-left text
2292 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2293 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2294
2295 Character composition specification takes the following form:
ec6d2bb8
KH
2296 o ESC '0' -- start relative composition
2297 o ESC '1' -- end composition
2298 o ESC '2' -- start rule-base composition (*)
2299 o ESC '3' -- start relative composition with alternate chars (**)
2300 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2301 Since these are not standard escape sequences of any ISO standard,
5a936b46 2302 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2303
5a936b46
DL
2304 (*) This form is used only in Emacs 20.7 and older versions,
2305 but newer versions can safely decode it.
2306 (**) This form is used only in Emacs 21.1 and newer versions,
2307 and older versions can't decode it.
ec6d2bb8 2308
5a936b46 2309 Here's a list of example usages of these composition escape
b73bfc1c 2310 sequences (categorized by `enum composition_method').
ec6d2bb8 2311
b73bfc1c 2312 COMPOSITION_RELATIVE:
ec6d2bb8 2313 ESC 0 CHAR [ CHAR ] ESC 1
5a936b46 2314 COMPOSITION_WITH_RULE:
ec6d2bb8 2315 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2316 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2317 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2318 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2319 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2320
2321enum iso_code_class_type iso_code_class[256];
2322
df7492f9
KH
2323#define SAFE_CHARSET_P(coding, id) \
2324 ((id) <= (coding)->max_charset_id \
2325 && (coding)->safe_charsets[id] >= 0)
2326
2327
2328#define SHIFT_OUT_OK(category) \
2329 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2330
2331static void
f0064e1f
DL
2332setup_iso_safe_charsets (attrs)
2333 Lisp_Object attrs;
df7492f9
KH
2334{
2335 Lisp_Object charset_list, safe_charsets;
2336 Lisp_Object request;
2337 Lisp_Object reg_usage;
2338 Lisp_Object tail;
2339 int reg94, reg96;
2340 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2341 int max_charset_id;
2342
2343 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2344 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2345 && ! EQ (charset_list, Viso_2022_charset_list))
2346 {
2347 CODING_ATTR_CHARSET_LIST (attrs)
2348 = charset_list = Viso_2022_charset_list;
2349 ASET (attrs, coding_attr_safe_charsets, Qnil);
2350 }
2351
2352 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2353 return;
2354
2355 max_charset_id = 0;
2356 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2357 {
2358 int id = XINT (XCAR (tail));
2359 if (max_charset_id < id)
2360 max_charset_id = id;
2361 }
2362
2363 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2364 make_number (255));
2365 request = AREF (attrs, coding_attr_iso_request);
2366 reg_usage = AREF (attrs, coding_attr_iso_usage);
2367 reg94 = XINT (XCAR (reg_usage));
2368 reg96 = XINT (XCDR (reg_usage));
2369
2370 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2371 {
2372 Lisp_Object id;
2373 Lisp_Object reg;
2374 struct charset *charset;
2375
2376 id = XCAR (tail);
2377 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2378 reg = Fcdr (Fassq (id, request));
df7492f9
KH
2379 if (! NILP (reg))
2380 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2381 else if (charset->iso_chars_96)
2382 {
2383 if (reg96 < 4)
2384 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2385 }
2386 else
2387 {
2388 if (reg94 < 4)
2389 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2390 }
2391 }
2392 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2393}
d46c5b12 2394
d46c5b12 2395
4ed46869 2396/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2397 Check if a text is encoded in one of ISO-2022 based codig systems.
2398 If it is, return 1, else return 0. */
4ed46869 2399
0a28aafb 2400static int
ff0dacd7 2401detect_coding_iso_2022 (coding, detect_info)
df7492f9 2402 struct coding_system *coding;
ff0dacd7 2403 struct coding_detection_info *detect_info;
4ed46869 2404{
df7492f9
KH
2405 unsigned char *src = coding->source, *src_base = src;
2406 unsigned char *src_end = coding->source + coding->src_bytes;
2407 int multibytep = coding->src_multibyte;
ff0dacd7 2408 int single_shifting = 0;
df7492f9
KH
2409 int id;
2410 int c, c1;
2411 int consumed_chars = 0;
2412 int i;
ff0dacd7
KH
2413 int rejected = 0;
2414 int found = 0;
2415
2416 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2417
2418 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2419 {
2420 struct coding_system *this = &(coding_categories[i]);
2421 Lisp_Object attrs, val;
2422
2423 attrs = CODING_ID_ATTRS (this->id);
2424 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2425 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2426 setup_iso_safe_charsets (attrs);
2427 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2428 this->max_charset_id = XSTRING (val)->size - 1;
2429 this->safe_charsets = (char *) XSTRING (val)->data;
2430 }
2431
2432 /* A coding system of this category is always ASCII compatible. */
2433 src += coding->head_ascii;
3f003981 2434
ff0dacd7 2435 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2436 {
df7492f9 2437 ONE_MORE_BYTE (c);
4ed46869
KH
2438 switch (c)
2439 {
2440 case ISO_CODE_ESC:
74383408
KH
2441 if (inhibit_iso_escape_detection)
2442 break;
f46869e4 2443 single_shifting = 0;
df7492f9 2444 ONE_MORE_BYTE (c);
d46c5b12 2445 if (c >= '(' && c <= '/')
4ed46869 2446 {
bf9cdd4e 2447 /* Designation sequence for a charset of dimension 1. */
df7492f9 2448 ONE_MORE_BYTE (c1);
d46c5b12 2449 if (c1 < ' ' || c1 >= 0x80
df7492f9 2450 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2451 /* Invalid designation sequence. Just ignore. */
2452 break;
bf9cdd4e
KH
2453 }
2454 else if (c == '$')
2455 {
2456 /* Designation sequence for a charset of dimension 2. */
df7492f9 2457 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2458 if (c >= '@' && c <= 'B')
2459 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2460 id = iso_charset_table[1][0][c];
bf9cdd4e 2461 else if (c >= '(' && c <= '/')
bcf26d6a 2462 {
df7492f9 2463 ONE_MORE_BYTE (c1);
d46c5b12 2464 if (c1 < ' ' || c1 >= 0x80
df7492f9 2465 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2466 /* Invalid designation sequence. Just ignore. */
2467 break;
bcf26d6a 2468 }
bf9cdd4e 2469 else
ff0dacd7 2470 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2471 break;
2472 }
ae9ff118 2473 else if (c == 'N' || c == 'O')
d46c5b12 2474 {
ae9ff118 2475 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2476 single_shifting = 1;
2477 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2478 break;
4ed46869 2479 }
ec6d2bb8
KH
2480 else if (c >= '0' && c <= '4')
2481 {
2482 /* ESC <Fp> for start/end composition. */
ff0dacd7 2483 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2484 break;
2485 }
bf9cdd4e 2486 else
df7492f9 2487 {
ff0dacd7 2488 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2489 break;
2490 }
d46c5b12
KH
2491
2492 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2493 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2494 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2495 id))
ff0dacd7 2496 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2497 else
ff0dacd7 2498 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2499 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2500 id))
ff0dacd7 2501 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2502 else
ff0dacd7 2503 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2504 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2505 id))
ff0dacd7 2506 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2507 else
ff0dacd7 2508 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2509 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2510 id))
ff0dacd7 2511 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2512 else
ff0dacd7 2513 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2514 break;
2515
4ed46869 2516 case ISO_CODE_SO:
d46c5b12 2517 case ISO_CODE_SI:
ff0dacd7 2518 /* Locking shift out/in. */
74383408
KH
2519 if (inhibit_iso_escape_detection)
2520 break;
f46869e4 2521 single_shifting = 0;
ff0dacd7
KH
2522 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2523 found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2524 break;
ff0dacd7 2525
4ed46869 2526 case ISO_CODE_CSI:
ff0dacd7 2527 /* Control sequence introducer. */
f46869e4 2528 single_shifting = 0;
ff0dacd7
KH
2529 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2530 found |= CATEGORY_MASK_ISO_8_ELSE;
2531 goto check_extra_latin;
2532
2533
4ed46869
KH
2534 case ISO_CODE_SS2:
2535 case ISO_CODE_SS3:
ff0dacd7
KH
2536 /* Single shift. */
2537 if (inhibit_iso_escape_detection)
2538 break;
2539 single_shifting = 1;
2540 rejected |= CATEGORY_MASK_ISO_7BIT;
2541 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2542 & CODING_ISO_FLAG_SINGLE_SHIFT)
2543 found |= CATEGORY_MASK_ISO_8_1;
2544 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2545 & CODING_ISO_FLAG_SINGLE_SHIFT)
2546 found |= CATEGORY_MASK_ISO_8_2;
2547 goto check_extra_latin;
4ed46869
KH
2548
2549 default:
2550 if (c < 0x80)
f46869e4
KH
2551 {
2552 single_shifting = 0;
2553 break;
2554 }
ff0dacd7 2555 if (c >= 0xA0)
c4825358 2556 {
ff0dacd7
KH
2557 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2558 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2559 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2560 0xA0..0FF. If the byte length is even, we include
2561 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2562 only when we are not single shifting. */
2563 if (! single_shifting
2564 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2565 {
e17de821 2566 int i = 1;
b73bfc1c
KH
2567 while (src < src_end)
2568 {
df7492f9 2569 ONE_MORE_BYTE (c);
b73bfc1c
KH
2570 if (c < 0xA0)
2571 break;
2572 i++;
2573 }
2574
2575 if (i & 1 && src < src_end)
ff0dacd7 2576 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2577 else
ff0dacd7 2578 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2579 }
ff0dacd7 2580 break;
4ed46869 2581 }
ff0dacd7
KH
2582 check_extra_latin:
2583 single_shifting = 0;
2584 if (! VECTORP (Vlatin_extra_code_table)
2585 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2586 {
2587 rejected = CATEGORY_MASK_ISO;
2588 break;
2589 }
2590 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2591 & CODING_ISO_FLAG_LATIN_EXTRA)
2592 found |= CATEGORY_MASK_ISO_8_1;
2593 else
2594 rejected |= CATEGORY_MASK_ISO_8_1;
2595 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2596 & CODING_ISO_FLAG_LATIN_EXTRA)
2597 found |= CATEGORY_MASK_ISO_8_2;
2598 else
2599 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2600 }
2601 }
ff0dacd7
KH
2602 detect_info->rejected |= CATEGORY_MASK_ISO;
2603 return 0;
2604
df7492f9 2605 no_more_source:
ff0dacd7
KH
2606 detect_info->rejected |= rejected;
2607 detect_info->found |= (found & ~rejected);
df7492f9 2608 return 1;
4ed46869
KH
2609}
2610
4ed46869
KH
2611
2612/* Set designation state into CODING. */
df7492f9
KH
2613#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2614 do { \
2615 int id, prev; \
2616 \
2617 if (final < '0' || final >= 128 \
2618 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2619 || !SAFE_CHARSET_P (coding, id)) \
2620 { \
2621 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2622 goto invalid_code; \
2623 } \
2624 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2625 if (id == charset_jisx0201_roman) \
2626 { \
2627 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2628 id = charset_ascii; \
2629 } \
2630 else if (id == charset_jisx0208_1978) \
2631 { \
2632 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2633 id = charset_jisx0208; \
2634 } \
df7492f9
KH
2635 CODING_ISO_DESIGNATION (coding, reg) = id; \
2636 /* If there was an invalid designation to REG previously, and this \
2637 designation is ASCII to REG, we should keep this designation \
2638 sequence. */ \
2639 if (prev == -2 && id == charset_ascii) \
2640 goto invalid_code; \
4ed46869
KH
2641 } while (0)
2642
d46c5b12 2643
df7492f9
KH
2644#define MAYBE_FINISH_COMPOSITION() \
2645 do { \
2646 int i; \
2647 if (composition_state == COMPOSING_NO) \
2648 break; \
2649 /* It is assured that we have enough room for producing \
2650 characters stored in the table `components'. */ \
2651 if (charbuf + component_idx > charbuf_end) \
2652 goto no_more_source; \
2653 composition_state = COMPOSING_NO; \
2654 if (method == COMPOSITION_RELATIVE \
2655 || method == COMPOSITION_WITH_ALTCHARS) \
2656 { \
2657 for (i = 0; i < component_idx; i++) \
2658 *charbuf++ = components[i]; \
2659 char_offset += component_idx; \
2660 } \
2661 else \
2662 { \
2663 for (i = 0; i < component_idx; i += 2) \
2664 *charbuf++ = components[i]; \
2665 char_offset += (component_idx / 2) + 1; \
2666 } \
2667 } while (0)
2668
d46c5b12 2669
aa72b389
KH
2670/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2671 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2672 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2673 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2674 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2675 */
ec6d2bb8 2676
df7492f9
KH
2677#define DECODE_COMPOSITION_START(c1) \
2678 do { \
2679 if (c1 == '0' \
781d7a48 2680 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2681 { \
2682 component_len = component_idx; \
2683 composition_state = COMPOSING_CHAR; \
2684 } \
2685 else \
2686 { \
2687 unsigned char *p; \
2688 \
2689 MAYBE_FINISH_COMPOSITION (); \
2690 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2691 goto no_more_source; \
2692 for (p = src; p < src_end - 1; p++) \
2693 if (*p == ISO_CODE_ESC && p[1] == '1') \
2694 break; \
2695 if (p == src_end - 1) \
2696 { \
2697 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2698 goto invalid_code; \
2699 goto no_more_source; \
2700 } \
2701 \
2702 /* This is surely the start of a composition. */ \
2703 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2704 : c1 == '2' ? COMPOSITION_WITH_RULE \
2705 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2706 : COMPOSITION_WITH_RULE_ALTCHARS); \
2707 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2708 : COMPOSING_COMPONENT_CHAR); \
2709 component_idx = component_len = 0; \
2710 } \
ec6d2bb8
KH
2711 } while (0)
2712
ec6d2bb8 2713
df7492f9
KH
2714/* Handle compositoin end sequence ESC 1. */
2715
2716#define DECODE_COMPOSITION_END() \
ec6d2bb8 2717 do { \
df7492f9
KH
2718 int nchars = (component_len > 0 ? component_idx - component_len \
2719 : method == COMPOSITION_RELATIVE ? component_idx \
2720 : (component_idx + 1) / 2); \
2721 int i; \
2722 int *saved_charbuf = charbuf; \
ff0dacd7
KH
2723 int from = coding->produced_char + char_offset; \
2724 int to = from + nchars; \
df7492f9 2725 \
ff0dacd7 2726 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9 2727 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2728 { \
df7492f9
KH
2729 if (component_len == 0) \
2730 for (i = 0; i < component_idx; i++) \
2731 *charbuf++ = components[i]; \
2732 else \
2733 for (i = 0; i < component_len; i++) \
2734 *charbuf++ = components[i]; \
2735 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2736 } \
df7492f9
KH
2737 if (method == COMPOSITION_WITH_RULE) \
2738 for (i = 0; i < component_idx; i += 2, char_offset++) \
2739 *charbuf++ = components[i]; \
ec6d2bb8 2740 else \
df7492f9
KH
2741 for (i = component_len; i < component_idx; i++, char_offset++) \
2742 *charbuf++ = components[i]; \
2743 coding->annotated = 1; \
2744 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2745 } while (0)
2746
df7492f9 2747
ec6d2bb8
KH
2748/* Decode a composition rule from the byte C1 (and maybe one more byte
2749 from SRC) and store one encoded composition rule in
2750 coding->cmp_data. */
2751
2752#define DECODE_COMPOSITION_RULE(c1) \
2753 do { \
ec6d2bb8
KH
2754 (c1) -= 32; \
2755 if (c1 < 81) /* old format (before ver.21) */ \
2756 { \
2757 int gref = (c1) / 9; \
2758 int nref = (c1) % 9; \
2759 if (gref == 4) gref = 10; \
2760 if (nref == 4) nref = 10; \
df7492f9 2761 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2762 } \
b73bfc1c 2763 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2764 { \
2765 ONE_MORE_BYTE (c2); \
df7492f9 2766 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2767 } \
df7492f9
KH
2768 else \
2769 c1 = 0; \
ec6d2bb8 2770 } while (0)
88993dfd 2771
d46c5b12 2772
4ed46869
KH
2773/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2774
b73bfc1c 2775static void
df7492f9 2776decode_coding_iso_2022 (coding)
4ed46869 2777 struct coding_system *coding;
4ed46869 2778{
df7492f9
KH
2779 unsigned char *src = coding->source + coding->consumed;
2780 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2781 unsigned char *src_base;
df7492f9 2782 int *charbuf = coding->charbuf;
ff0dacd7
KH
2783 int *charbuf_end
2784 = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2785 int consumed_chars = 0, consumed_chars_base;
df7492f9
KH
2786 int multibytep = coding->src_multibyte;
2787 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2788 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2789 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2790 struct charset *charset;
2791 int c;
2792 /* For handling composition sequence. */
2793#define COMPOSING_NO 0
2794#define COMPOSING_CHAR 1
2795#define COMPOSING_RULE 2
2796#define COMPOSING_COMPONENT_CHAR 3
2797#define COMPOSING_COMPONENT_RULE 4
2798
2799 int composition_state = COMPOSING_NO;
2800 enum composition_method method;
2801 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2802 int component_idx;
2803 int component_len;
2804 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
2805 int char_offset = coding->produced_char;
2806 int last_offset = char_offset;
2807 int last_id = charset_ascii;
df7492f9
KH
2808
2809 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2810 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2811
2812 while (1)
4ed46869 2813 {
b73bfc1c
KH
2814 int c1, c2;
2815
2816 src_base = src;
df7492f9
KH
2817 consumed_chars_base = consumed_chars;
2818
2819 if (charbuf >= charbuf_end)
2820 break;
2821
b73bfc1c 2822 ONE_MORE_BYTE (c1);
4ed46869 2823
98725083 2824 /* We produce at most one character. */
4ed46869
KH
2825 switch (iso_code_class [c1])
2826 {
2827 case ISO_0x20_or_0x7F:
df7492f9 2828 if (composition_state != COMPOSING_NO)
ec6d2bb8 2829 {
df7492f9
KH
2830 if (composition_state == COMPOSING_RULE
2831 || composition_state == COMPOSING_COMPONENT_RULE)
2832 {
2833 DECODE_COMPOSITION_RULE (c1);
2834 components[component_idx++] = c1;
2835 composition_state--;
2836 continue;
2837 }
ec6d2bb8 2838 }
df7492f9
KH
2839 if (charset_id_0 < 0
2840 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2841 /* This is SPACE or DEL. */
2842 charset = CHARSET_FROM_ID (charset_ascii);
2843 else
2844 charset = CHARSET_FROM_ID (charset_id_0);
2845 break;
4ed46869
KH
2846
2847 case ISO_graphic_plane_0:
781d7a48 2848 if (composition_state != COMPOSING_NO)
b73bfc1c 2849 {
781d7a48
KH
2850 if (composition_state == COMPOSING_RULE
2851 || composition_state == COMPOSING_COMPONENT_RULE)
2852 {
2853 DECODE_COMPOSITION_RULE (c1);
2854 components[component_idx++] = c1;
2855 composition_state--;
2856 continue;
2857 }
b73bfc1c 2858 }
df7492f9 2859 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2860 break;
2861
2862 case ISO_0xA0_or_0xFF:
df7492f9
KH
2863 if (charset_id_1 < 0
2864 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2865 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2866 goto invalid_code;
4ed46869
KH
2867 /* This is a graphic character, we fall down ... */
2868
2869 case ISO_graphic_plane_1:
df7492f9
KH
2870 if (charset_id_1 < 0)
2871 goto invalid_code;
2872 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2873 break;
2874
2875 case ISO_carriage_return:
df7492f9 2876 if (c1 == '\r')
4ed46869 2877 {
df7492f9 2878 if (EQ (eol_type, Qdos))
4ed46869 2879 {
df7492f9 2880 if (src == src_end)
98725083
KH
2881 {
2882 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2883 goto no_more_source;
2884 }
df7492f9
KH
2885 if (*src == '\n')
2886 ONE_MORE_BYTE (c1);
4ed46869 2887 }
df7492f9
KH
2888 else if (EQ (eol_type, Qmac))
2889 c1 = '\n';
4ed46869 2890 }
df7492f9
KH
2891 /* fall through */
2892
2893 case ISO_control_0:
2894 MAYBE_FINISH_COMPOSITION ();
2895 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2896 break;
2897
df7492f9
KH
2898 case ISO_control_1:
2899 MAYBE_FINISH_COMPOSITION ();
2900 goto invalid_code;
2901
4ed46869 2902 case ISO_shift_out:
df7492f9
KH
2903 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2904 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2905 goto invalid_code;
2906 CODING_ISO_INVOCATION (coding, 0) = 1;
2907 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2908 continue;
4ed46869
KH
2909
2910 case ISO_shift_in:
df7492f9
KH
2911 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2912 goto invalid_code;
2913 CODING_ISO_INVOCATION (coding, 0) = 0;
2914 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2915 continue;
4ed46869
KH
2916
2917 case ISO_single_shift_2_7:
2918 case ISO_single_shift_2:
df7492f9
KH
2919 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2920 goto invalid_code;
4ed46869
KH
2921 /* SS2 is handled as an escape sequence of ESC 'N' */
2922 c1 = 'N';
2923 goto label_escape_sequence;
2924
2925 case ISO_single_shift_3:
df7492f9
KH
2926 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2927 goto invalid_code;
4ed46869
KH
2928 /* SS2 is handled as an escape sequence of ESC 'O' */
2929 c1 = 'O';
2930 goto label_escape_sequence;
2931
2932 case ISO_control_sequence_introducer:
2933 /* CSI is handled as an escape sequence of ESC '[' ... */
2934 c1 = '[';
2935 goto label_escape_sequence;
2936
2937 case ISO_escape:
2938 ONE_MORE_BYTE (c1);
2939 label_escape_sequence:
df7492f9 2940 /* Escape sequences handled here are invocation,
4ed46869
KH
2941 designation, direction specification, and character
2942 composition specification. */
2943 switch (c1)
2944 {
2945 case '&': /* revision of following character set */
2946 ONE_MORE_BYTE (c1);
2947 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2948 goto invalid_code;
4ed46869
KH
2949 ONE_MORE_BYTE (c1);
2950 if (c1 != ISO_CODE_ESC)
df7492f9 2951 goto invalid_code;
4ed46869
KH
2952 ONE_MORE_BYTE (c1);
2953 goto label_escape_sequence;
2954
2955 case '$': /* designation of 2-byte character set */
df7492f9
KH
2956 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2957 goto invalid_code;
4ed46869
KH
2958 ONE_MORE_BYTE (c1);
2959 if (c1 >= '@' && c1 <= 'B')
2960 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2961 or JISX0208.1980 */
df7492f9 2962 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2963 }
2964 else if (c1 >= 0x28 && c1 <= 0x2B)
2965 { /* designation of DIMENSION2_CHARS94 character set */
2966 ONE_MORE_BYTE (c2);
df7492f9 2967 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2968 }
2969 else if (c1 >= 0x2C && c1 <= 0x2F)
2970 { /* designation of DIMENSION2_CHARS96 character set */
2971 ONE_MORE_BYTE (c2);
df7492f9 2972 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2973 }
2974 else
df7492f9 2975 goto invalid_code;
b73bfc1c 2976 /* We must update these variables now. */
df7492f9
KH
2977 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2978 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2979 continue;
4ed46869
KH
2980
2981 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
2982 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2983 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2984 goto invalid_code;
2985 CODING_ISO_INVOCATION (coding, 0) = 2;
2986 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2987 continue;
4ed46869
KH
2988
2989 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
2990 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2991 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2992 goto invalid_code;
2993 CODING_ISO_INVOCATION (coding, 0) = 3;
2994 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2995 continue;
4ed46869
KH
2996
2997 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
2998 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2999 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3000 goto invalid_code;
3001 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 3002 ONE_MORE_BYTE (c1);
e7046a18 3003 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3004 goto invalid_code;
4ed46869
KH
3005 break;
3006
3007 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3008 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3009 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3010 goto invalid_code;
3011 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 3012 ONE_MORE_BYTE (c1);
e7046a18 3013 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3014 goto invalid_code;
4ed46869
KH
3015 break;
3016
ec6d2bb8 3017 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3018 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3019 goto invalid_code;
ec6d2bb8 3020 DECODE_COMPOSITION_START (c1);
b73bfc1c 3021 continue;
4ed46869 3022
ec6d2bb8 3023 case '1': /* end composition */
df7492f9
KH
3024 if (composition_state == COMPOSING_NO)
3025 goto invalid_code;
3026 DECODE_COMPOSITION_END ();
b73bfc1c 3027 continue;
4ed46869
KH
3028
3029 case '[': /* specification of direction */
df7492f9
KH
3030 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3031 goto invalid_code;
4ed46869 3032 /* For the moment, nested direction is not supported.
d46c5b12 3033 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3034 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3035 ONE_MORE_BYTE (c1);
3036 switch (c1)
3037 {
3038 case ']': /* end of the current direction */
d46c5b12 3039 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3040
3041 case '0': /* end of the current direction */
3042 case '1': /* start of left-to-right direction */
3043 ONE_MORE_BYTE (c1);
3044 if (c1 == ']')
d46c5b12 3045 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3046 else
df7492f9 3047 goto invalid_code;
4ed46869
KH
3048 break;
3049
3050 case '2': /* start of right-to-left direction */
3051 ONE_MORE_BYTE (c1);
3052 if (c1 == ']')
d46c5b12 3053 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3054 else
df7492f9 3055 goto invalid_code;
4ed46869
KH
3056 break;
3057
3058 default:
df7492f9 3059 goto invalid_code;
4ed46869 3060 }
b73bfc1c 3061 continue;
4ed46869 3062
4776e638
KH
3063 case '%':
3064 ONE_MORE_BYTE (c1);
3065 if (c1 == '/')
3066 {
3067 /* CTEXT extended segment:
3068 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3069 We keep these bytes as is for the moment.
3070 They may be decoded by post-read-conversion. */
3071 int dim, M, L;
3072 int size;
3073
3074 ONE_MORE_BYTE (dim);
3075 ONE_MORE_BYTE (M);
3076 ONE_MORE_BYTE (L);
3077 size = ((M - 128) * 128) + (L - 128);
3078 if (charbuf + 8 + size > charbuf_end)
3079 goto break_loop;
3080 *charbuf++ = ISO_CODE_ESC;
3081 *charbuf++ = '%';
3082 *charbuf++ = '/';
3083 *charbuf++ = dim;
3084 *charbuf++ = BYTE8_TO_CHAR (M);
3085 *charbuf++ = BYTE8_TO_CHAR (L);
3086 while (size-- > 0)
3087 {
3088 ONE_MORE_BYTE (c1);
3089 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3090 }
3091 }
3092 else if (c1 == 'G')
3093 {
3094 /* XFree86 extension for embedding UTF-8 in CTEXT:
3095 ESC % G --UTF-8-BYTES-- ESC % @
3096 We keep these bytes as is for the moment.
3097 They may be decoded by post-read-conversion. */
3098 int *p = charbuf;
3099
3100 if (p + 6 > charbuf_end)
3101 goto break_loop;
3102 *p++ = ISO_CODE_ESC;
3103 *p++ = '%';
3104 *p++ = 'G';
3105 while (p < charbuf_end)
3106 {
3107 ONE_MORE_BYTE (c1);
3108 if (c1 == ISO_CODE_ESC
3109 && src + 1 < src_end
3110 && src[0] == '%'
3111 && src[1] == '@')
3112 break;
3113 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3114 }
3115 if (p + 3 > charbuf_end)
3116 goto break_loop;
3117 *p++ = ISO_CODE_ESC;
3118 *p++ = '%';
3119 *p++ = '@';
3120 charbuf = p;
3121 }
3122 else
3123 goto invalid_code;
3124 continue;
3125 break;
3126
4ed46869 3127 default:
df7492f9
KH
3128 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3129 goto invalid_code;
4ed46869
KH
3130 if (c1 >= 0x28 && c1 <= 0x2B)
3131 { /* designation of DIMENSION1_CHARS94 character set */
3132 ONE_MORE_BYTE (c2);
df7492f9 3133 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
3134 }
3135 else if (c1 >= 0x2C && c1 <= 0x2F)
3136 { /* designation of DIMENSION1_CHARS96 character set */
3137 ONE_MORE_BYTE (c2);
df7492f9 3138 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
3139 }
3140 else
df7492f9 3141 goto invalid_code;
b73bfc1c 3142 /* We must update these variables now. */
df7492f9
KH
3143 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3144 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3145 continue;
4ed46869 3146 }
b73bfc1c 3147 }
4ed46869 3148
ff0dacd7
KH
3149 if (charset->id != charset_ascii
3150 && last_id != charset->id)
3151 {
3152 if (last_id != charset_ascii)
3153 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3154 last_id = charset->id;
3155 last_offset = char_offset;
3156 }
3157
b73bfc1c 3158 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3159 Produce a decoded character while getting 2nd position code
3160 C2 if necessary. */
3161 c1 &= 0x7F;
3162 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3163 {
3164 ONE_MORE_BYTE (c2);
df7492f9 3165 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3166 /* C2 is not in a valid range. */
df7492f9
KH
3167 goto invalid_code;
3168 c1 = (c1 << 8) | (c2 & 0x7F);
3169 if (CHARSET_DIMENSION (charset) > 2)
3170 {
3171 ONE_MORE_BYTE (c2);
3172 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3173 /* C2 is not in a valid range. */
3174 goto invalid_code;
3175 c1 = (c1 << 8) | (c2 & 0x7F);
3176 }
3177 }
3178
3179 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3180 if (c < 0)
3181 {
3182 MAYBE_FINISH_COMPOSITION ();
3183 for (; src_base < src; src_base++, char_offset++)
3184 {
3185 if (ASCII_BYTE_P (*src_base))
3186 *charbuf++ = *src_base;
3187 else
3188 *charbuf++ = BYTE8_TO_CHAR (*src_base);
ff0dacd7 3189 char_offset++;
df7492f9
KH
3190 }
3191 }
3192 else if (composition_state == COMPOSING_NO)
3193 {
3194 *charbuf++ = c;
3195 char_offset++;
4ed46869 3196 }
df7492f9 3197 else
781d7a48
KH
3198 {
3199 components[component_idx++] = c;
3200 if (method == COMPOSITION_WITH_RULE
3201 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3202 && composition_state == COMPOSING_COMPONENT_CHAR))
3203 composition_state++;
3204 }
4ed46869
KH
3205 continue;
3206
df7492f9
KH
3207 invalid_code:
3208 MAYBE_FINISH_COMPOSITION ();
4ed46869 3209 src = src_base;
df7492f9
KH
3210 consumed_chars = consumed_chars_base;
3211 ONE_MORE_BYTE (c);
3212 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3213 char_offset++;
df7492f9 3214 coding->errors++;
4776e638
KH
3215 continue;
3216
3217 break_loop:
3218 break;
4ed46869 3219 }
fb88bf2d 3220
df7492f9 3221 no_more_source:
ff0dacd7
KH
3222 if (last_id != charset_ascii)
3223 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
3224 coding->consumed_char += consumed_chars_base;
3225 coding->consumed = src_base - coding->source;
3226 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3227}
3228
b73bfc1c 3229
f4dee582 3230/* ISO2022 encoding stuff. */
4ed46869
KH
3231
3232/*
f4dee582 3233 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3234 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3235 variant has the following specifications:
df7492f9 3236 1. Initial designation to G0 thru G3.
4ed46869
KH
3237 2. Allows short-form designation?
3238 3. ASCII should be designated to G0 before control characters?
3239 4. ASCII should be designated to G0 at end of line?
3240 5. 7-bit environment or 8-bit environment?
3241 6. Use locking-shift?
3242 7. Use Single-shift?
3243 And the following two are only for Japanese:
3244 8. Use ASCII in place of JIS0201-1976-Roman?
3245 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3246 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3247 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3248 details.
4ed46869
KH
3249*/
3250
3251/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3252 register REG at DST, and increment DST. If <final-char> of CHARSET is
3253 '@', 'A', or 'B' and the coding system CODING allows, produce
3254 designation sequence of short-form. */
4ed46869
KH
3255
3256#define ENCODE_DESIGNATION(charset, reg, coding) \
3257 do { \
df7492f9 3258 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3259 char *intermediate_char_94 = "()*+"; \
3260 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3261 int revision = -1; \
3262 int c; \
3263 \
3264 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3265 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3266 \
3267 if (revision >= 0) \
70c22245 3268 { \
df7492f9
KH
3269 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3270 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3271 } \
df7492f9 3272 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3273 if (CHARSET_DIMENSION (charset) == 1) \
3274 { \
df7492f9
KH
3275 if (! CHARSET_ISO_CHARS_96 (charset)) \
3276 c = intermediate_char_94[reg]; \
4ed46869 3277 else \
df7492f9
KH
3278 c = intermediate_char_96[reg]; \
3279 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3280 } \
3281 else \
3282 { \
df7492f9
KH
3283 EMIT_ONE_ASCII_BYTE ('$'); \
3284 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3285 { \
df7492f9 3286 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3287 || reg != 0 \
3288 || final_char < '@' || final_char > 'B') \
df7492f9 3289 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3290 } \
3291 else \
df7492f9 3292 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3293 } \
df7492f9
KH
3294 EMIT_ONE_ASCII_BYTE (final_char); \
3295 \
3296 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3297 } while (0)
3298
df7492f9 3299
4ed46869
KH
3300/* The following two macros produce codes (control character or escape
3301 sequence) for ISO2022 single-shift functions (single-shift-2 and
3302 single-shift-3). */
3303
df7492f9
KH
3304#define ENCODE_SINGLE_SHIFT_2 \
3305 do { \
3306 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3307 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3308 else \
3309 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3310 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3311 } while (0)
3312
df7492f9
KH
3313
3314#define ENCODE_SINGLE_SHIFT_3 \
3315 do { \
3316 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3317 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3318 else \
3319 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3320 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3321 } while (0)
3322
df7492f9 3323
4ed46869
KH
3324/* The following four macros produce codes (control character or
3325 escape sequence) for ISO2022 locking-shift functions (shift-in,
3326 shift-out, locking-shift-2, and locking-shift-3). */
3327
df7492f9
KH
3328#define ENCODE_SHIFT_IN \
3329 do { \
3330 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3331 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3332 } while (0)
3333
df7492f9
KH
3334
3335#define ENCODE_SHIFT_OUT \
3336 do { \
3337 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3338 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3339 } while (0)
3340
df7492f9
KH
3341
3342#define ENCODE_LOCKING_SHIFT_2 \
3343 do { \
3344 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3345 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3346 } while (0)
3347
df7492f9
KH
3348
3349#define ENCODE_LOCKING_SHIFT_3 \
3350 do { \
3351 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3352 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3353 } while (0)
3354
df7492f9 3355
f4dee582
RS
3356/* Produce codes for a DIMENSION1 character whose character set is
3357 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3358 sequences are also produced in advance if necessary. */
3359
6e85d753
KH
3360#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3361 do { \
df7492f9 3362 int id = CHARSET_ID (charset); \
bf16eb23
KH
3363 \
3364 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3365 && id == charset_ascii) \
3366 { \
3367 id = charset_jisx0201_roman; \
3368 charset = CHARSET_FROM_ID (id); \
3369 } \
3370 \
df7492f9 3371 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3372 { \
df7492f9
KH
3373 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3374 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3375 else \
df7492f9
KH
3376 EMIT_ONE_BYTE (c1 | 0x80); \
3377 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3378 break; \
3379 } \
df7492f9 3380 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3381 { \
df7492f9 3382 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3383 break; \
3384 } \
df7492f9 3385 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3386 { \
df7492f9 3387 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3388 break; \
3389 } \
6e85d753
KH
3390 else \
3391 /* Since CHARSET is not yet invoked to any graphic planes, we \
3392 must invoke it, or, at first, designate it to some graphic \
3393 register. Then repeat the loop to actually produce the \
3394 character. */ \
df7492f9
KH
3395 dst = encode_invocation_designation (charset, coding, dst, \
3396 &produced_chars); \
4ed46869
KH
3397 } while (1)
3398
df7492f9 3399
f4dee582
RS
3400/* Produce codes for a DIMENSION2 character whose character set is
3401 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3402 invocation codes are also produced in advance if necessary. */
3403
6e85d753
KH
3404#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3405 do { \
df7492f9 3406 int id = CHARSET_ID (charset); \
bf16eb23
KH
3407 \
3408 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3409 && id == charset_jisx0208) \
3410 { \
3411 id = charset_jisx0208_1978; \
3412 charset = CHARSET_FROM_ID (id); \
3413 } \
3414 \
df7492f9 3415 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3416 { \
df7492f9
KH
3417 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3418 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3419 else \
df7492f9
KH
3420 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3421 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3422 break; \
3423 } \
df7492f9 3424 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3425 { \
df7492f9 3426 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3427 break; \
3428 } \
df7492f9 3429 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3430 { \
df7492f9 3431 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3432 break; \
3433 } \
6e85d753
KH
3434 else \
3435 /* Since CHARSET is not yet invoked to any graphic planes, we \
3436 must invoke it, or, at first, designate it to some graphic \
3437 register. Then repeat the loop to actually produce the \
3438 character. */ \
df7492f9
KH
3439 dst = encode_invocation_designation (charset, coding, dst, \
3440 &produced_chars); \
4ed46869
KH
3441 } while (1)
3442
05e6f5dc 3443
df7492f9
KH
3444#define ENCODE_ISO_CHARACTER(charset, c) \
3445 do { \
3446 int code = ENCODE_CHAR ((charset),(c)); \
3447 \
3448 if (CHARSET_DIMENSION (charset) == 1) \
3449 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3450 else \
3451 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3452 } while (0)
bdd9fb48 3453
05e6f5dc 3454
4ed46869 3455/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3456 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3457 Return new DST. */
3458
3459unsigned char *
df7492f9
KH
3460encode_invocation_designation (charset, coding, dst, p_nchars)
3461 struct charset *charset;
4ed46869
KH
3462 struct coding_system *coding;
3463 unsigned char *dst;
df7492f9 3464 int *p_nchars;
4ed46869 3465{
df7492f9
KH
3466 int multibytep = coding->dst_multibyte;
3467 int produced_chars = *p_nchars;
4ed46869 3468 int reg; /* graphic register number */
df7492f9 3469 int id = CHARSET_ID (charset);
4ed46869
KH
3470
3471 /* At first, check designations. */
3472 for (reg = 0; reg < 4; reg++)
df7492f9 3473 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3474 break;
3475
3476 if (reg >= 4)
3477 {
3478 /* CHARSET is not yet designated to any graphic registers. */
3479 /* At first check the requested designation. */
df7492f9
KH
3480 reg = CODING_ISO_REQUEST (coding, id);
3481 if (reg < 0)
1ba9e4ab
KH
3482 /* Since CHARSET requests no special designation, designate it
3483 to graphic register 0. */
4ed46869
KH
3484 reg = 0;
3485
3486 ENCODE_DESIGNATION (charset, reg, coding);
3487 }
3488
df7492f9
KH
3489 if (CODING_ISO_INVOCATION (coding, 0) != reg
3490 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3491 {
3492 /* Since the graphic register REG is not invoked to any graphic
3493 planes, invoke it to graphic plane 0. */
3494 switch (reg)
3495 {
3496 case 0: /* graphic register 0 */
3497 ENCODE_SHIFT_IN;
3498 break;
3499
3500 case 1: /* graphic register 1 */
3501 ENCODE_SHIFT_OUT;
3502 break;
3503
3504 case 2: /* graphic register 2 */
df7492f9 3505 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3506 ENCODE_SINGLE_SHIFT_2;
3507 else
3508 ENCODE_LOCKING_SHIFT_2;
3509 break;
3510
3511 case 3: /* graphic register 3 */
df7492f9 3512 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3513 ENCODE_SINGLE_SHIFT_3;
3514 else
3515 ENCODE_LOCKING_SHIFT_3;
3516 break;
3517 }
3518 }
b73bfc1c 3519
df7492f9 3520 *p_nchars = produced_chars;
4ed46869
KH
3521 return dst;
3522}
3523
df7492f9
KH
3524/* The following three macros produce codes for indicating direction
3525 of text. */
3526#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3527 do { \
df7492f9
KH
3528 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3529 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3530 else \
df7492f9 3531 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3532 } while (0)
3533
ec6d2bb8 3534
df7492f9
KH
3535#define ENCODE_DIRECTION_R2L() \
3536 do { \
3537 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3538 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3539 } while (0)
3540
ec6d2bb8 3541
df7492f9 3542#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3543 do { \
df7492f9
KH
3544 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3545 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3546 } while (0)
3547
4ed46869
KH
3548
3549/* Produce codes for designation and invocation to reset the graphic
3550 planes and registers to initial state. */
df7492f9
KH
3551#define ENCODE_RESET_PLANE_AND_REGISTER() \
3552 do { \
3553 int reg; \
3554 struct charset *charset; \
3555 \
3556 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3557 ENCODE_SHIFT_IN; \
3558 for (reg = 0; reg < 4; reg++) \
3559 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3560 && (CODING_ISO_DESIGNATION (coding, reg) \
3561 != CODING_ISO_INITIAL (coding, reg))) \
3562 { \
3563 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3564 ENCODE_DESIGNATION (charset, reg, coding); \
3565 } \
4ed46869
KH
3566 } while (0)
3567
df7492f9 3568
bdd9fb48 3569/* Produce designation sequences of charsets in the line started from
b73bfc1c 3570 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3571
3572 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3573 find all the necessary designations. */
3574
b73bfc1c 3575static unsigned char *
df7492f9 3576encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3577 struct coding_system *coding;
df7492f9
KH
3578 int *charbuf, *charbuf_end;
3579 unsigned char *dst;
e0e989f6 3580{
df7492f9 3581 struct charset *charset;
bdd9fb48
KH
3582 /* Table of charsets to be designated to each graphic register. */
3583 int r[4];
df7492f9
KH
3584 int c, found = 0, reg;
3585 int produced_chars = 0;
3586 int multibytep = coding->dst_multibyte;
3587 Lisp_Object attrs;
3588 Lisp_Object charset_list;
3589
3590 attrs = CODING_ID_ATTRS (coding->id);
3591 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3592 if (EQ (charset_list, Qiso_2022))
3593 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3594
3595 for (reg = 0; reg < 4; reg++)
3596 r[reg] = -1;
3597
b73bfc1c 3598 while (found < 4)
e0e989f6 3599 {
df7492f9
KH
3600 int id;
3601
3602 c = *charbuf++;
b73bfc1c
KH
3603 if (c == '\n')
3604 break;
df7492f9
KH
3605 charset = char_charset (c, charset_list, NULL);
3606 id = CHARSET_ID (charset);
3607 reg = CODING_ISO_REQUEST (coding, id);
3608 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3609 {
3610 found++;
df7492f9 3611 r[reg] = id;
bdd9fb48 3612 }
bdd9fb48
KH
3613 }
3614
3615 if (found)
3616 {
3617 for (reg = 0; reg < 4; reg++)
3618 if (r[reg] >= 0
df7492f9
KH
3619 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3620 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3621 }
b73bfc1c
KH
3622
3623 return dst;
e0e989f6
KH
3624}
3625
4ed46869
KH
3626/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3627
df7492f9
KH
3628static int
3629encode_coding_iso_2022 (coding)
4ed46869 3630 struct coding_system *coding;
4ed46869 3631{
df7492f9
KH
3632 int multibytep = coding->dst_multibyte;
3633 int *charbuf = coding->charbuf;
3634 int *charbuf_end = charbuf + coding->charbuf_used;
3635 unsigned char *dst = coding->destination + coding->produced;
3636 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3637 int safe_room = 16;
3638 int bol_designation
3639 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3640 && CODING_ISO_BOL (coding));
3641 int produced_chars = 0;
3642 Lisp_Object attrs, eol_type, charset_list;
3643 int ascii_compatible;
b73bfc1c 3644 int c;
ff0dacd7 3645 int preferred_charset_id = -1;
05e6f5dc 3646
df7492f9 3647 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4 3648 setup_iso_safe_charsets (attrs);
ff0dacd7
KH
3649 /* Charset list may have been changed. */
3650 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
004068e4
KH
3651 coding->safe_charsets
3652 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
bdd9fb48 3653
df7492f9 3654 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3655
df7492f9 3656 while (charbuf < charbuf_end)
4ed46869 3657 {
df7492f9 3658 ASSURE_DESTINATION (safe_room);
b73bfc1c 3659
df7492f9 3660 if (bol_designation)
b73bfc1c 3661 {
df7492f9 3662 unsigned char *dst_prev = dst;
4ed46869 3663
bdd9fb48 3664 /* We have to produce designation sequences if any now. */
df7492f9
KH
3665 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3666 bol_designation = 0;
3667 /* We are sure that designation sequences are all ASCII bytes. */
3668 produced_chars += dst - dst_prev;
4ed46869 3669 }
ec6d2bb8 3670
df7492f9 3671 c = *charbuf++;
4ed46869 3672
ff0dacd7
KH
3673 if (c < 0)
3674 {
3675 /* Handle an annotation. */
3676 switch (*charbuf)
3677 {
3678 case CODING_ANNOTATE_COMPOSITION_MASK:
3679 /* Not yet implemented. */
3680 break;
3681 case CODING_ANNOTATE_CHARSET_MASK:
3682 preferred_charset_id = charbuf[3];
3683 if (preferred_charset_id >= 0
3684 && NILP (Fmemq (make_number (preferred_charset_id),
3685 charset_list)))
3686 preferred_charset_id = -1;
3687 break;
3688 default:
3689 abort ();
3690 }
3691 charbuf += -c - 1;
3692 continue;
3693 }
3694
b73bfc1c
KH
3695 /* Now encode the character C. */
3696 if (c < 0x20 || c == 0x7F)
3697 {
df7492f9
KH
3698 if (c == '\n'
3699 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3700 {
df7492f9
KH
3701 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3702 ENCODE_RESET_PLANE_AND_REGISTER ();
3703 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3704 {
df7492f9
KH
3705 int i;
3706
3707 for (i = 0; i < 4; i++)
3708 CODING_ISO_DESIGNATION (coding, i)
3709 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3710 }
df7492f9
KH
3711 bol_designation
3712 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3713 }
df7492f9
KH
3714 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3715 ENCODE_RESET_PLANE_AND_REGISTER ();
3716 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3717 }
df7492f9 3718 else if (ASCII_CHAR_P (c))
88993dfd 3719 {
df7492f9
KH
3720 if (ascii_compatible)
3721 EMIT_ONE_ASCII_BYTE (c);
3722 else
bf16eb23
KH
3723 {
3724 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3725 ENCODE_ISO_CHARACTER (charset, c);
3726 }
88993dfd 3727 }
16eafb5d
KH
3728 else if (CHAR_BYTE8_P (c))
3729 {
3730 c = CHAR_TO_BYTE8 (c);
3731 EMIT_ONE_BYTE (c);
3732 }
b73bfc1c 3733 else
df7492f9 3734 {
ff0dacd7 3735 struct charset *charset;
b73bfc1c 3736
ff0dacd7
KH
3737 if (preferred_charset_id >= 0)
3738 {
3739 charset = CHARSET_FROM_ID (preferred_charset_id);
3740 if (! CHAR_CHARSET_P (c, charset))
3741 charset = char_charset (c, charset_list, NULL);
3742 }
3743 else
3744 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3745 if (!charset)
3746 {
41cbe562
KH
3747 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3748 {
3749 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3750 charset = CHARSET_FROM_ID (charset_ascii);
3751 }
3752 else
3753 {
3754 c = coding->default_char;
3755 charset = char_charset (c, charset_list, NULL);
3756 }
df7492f9
KH
3757 }
3758 ENCODE_ISO_CHARACTER (charset, c);
3759 }
84fbb8a0 3760 }
b73bfc1c 3761
df7492f9
KH
3762 if (coding->mode & CODING_MODE_LAST_BLOCK
3763 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3764 {
3765 ASSURE_DESTINATION (safe_room);
3766 ENCODE_RESET_PLANE_AND_REGISTER ();
3767 }
3768 coding->result = CODING_RESULT_SUCCESS;
3769 CODING_ISO_BOL (coding) = bol_designation;
3770 coding->produced_char += produced_chars;
3771 coding->produced = dst - coding->destination;
3772 return 0;
4ed46869
KH
3773}
3774
3775\f
df7492f9 3776/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3777
df7492f9 3778/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3779 quite widely. So, for the moment, Emacs supports them in the bare
3780 C code. But, in the future, they may be supported only by CCL. */
3781
3782/* SJIS is a coding system encoding three character sets: ASCII, right
3783 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3784 as is. A character of charset katakana-jisx0201 is encoded by
3785 "position-code + 0x80". A character of charset japanese-jisx0208
3786 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3787 so that it fit in the range below.
4ed46869
KH
3788
3789 --- CODE RANGE of SJIS ---
3790 (character set) (range)
3791 ASCII 0x00 .. 0x7F
df7492f9 3792 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3793 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3794 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3795 -------------------------------
3796
3797*/
3798
3799/* BIG5 is a coding system encoding two character sets: ASCII and
3800 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3801 character set and is encoded in two-byte.
4ed46869
KH
3802
3803 --- CODE RANGE of BIG5 ---
3804 (character set) (range)
3805 ASCII 0x00 .. 0x7F
3806 Big5 (1st byte) 0xA1 .. 0xFE
3807 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3808 --------------------------
3809
df7492f9 3810 */
4ed46869
KH
3811
3812/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3813 Check if a text is encoded in SJIS. If it is, return
df7492f9 3814 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3815
0a28aafb 3816static int
ff0dacd7 3817detect_coding_sjis (coding, detect_info)
df7492f9 3818 struct coding_system *coding;
ff0dacd7 3819 struct coding_detection_info *detect_info;
4ed46869 3820{
df7492f9
KH
3821 unsigned char *src = coding->source, *src_base = src;
3822 unsigned char *src_end = coding->source + coding->src_bytes;
3823 int multibytep = coding->src_multibyte;
3824 int consumed_chars = 0;
3825 int found = 0;
b73bfc1c 3826 int c;
89528eb3 3827 int incomplete;
df7492f9 3828
ff0dacd7 3829 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
3830 /* A coding system of this category is always ASCII compatible. */
3831 src += coding->head_ascii;
4ed46869 3832
b73bfc1c 3833 while (1)
4ed46869 3834 {
89528eb3 3835 incomplete = 0;
df7492f9 3836 ONE_MORE_BYTE (c);
89528eb3 3837 incomplete = 1;
682169fe
KH
3838 if (c < 0x80)
3839 continue;
df7492f9 3840 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3841 {
df7492f9 3842 ONE_MORE_BYTE (c);
682169fe 3843 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 3844 break;
ff0dacd7 3845 found = CATEGORY_MASK_SJIS;
4ed46869 3846 }
df7492f9 3847 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 3848 found = CATEGORY_MASK_SJIS;
df7492f9
KH
3849 else
3850 break;
4ed46869 3851 }
ff0dacd7 3852 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
3853 return 0;
3854
3855 no_more_source:
89528eb3
KH
3856 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3857 {
ff0dacd7 3858 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3
KH
3859 return 0;
3860 }
ff0dacd7
KH
3861 detect_info->found |= found;
3862 return 1;
4ed46869
KH
3863}
3864
3865/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3866 Check if a text is encoded in BIG5. If it is, return
df7492f9 3867 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3868
0a28aafb 3869static int
ff0dacd7 3870detect_coding_big5 (coding, detect_info)
df7492f9 3871 struct coding_system *coding;
ff0dacd7 3872 struct coding_detection_info *detect_info;
4ed46869 3873{
df7492f9
KH
3874 unsigned char *src = coding->source, *src_base = src;
3875 unsigned char *src_end = coding->source + coding->src_bytes;
3876 int multibytep = coding->src_multibyte;
3877 int consumed_chars = 0;
3878 int found = 0;
b73bfc1c 3879 int c;
89528eb3 3880 int incomplete;
fa42c37f 3881
ff0dacd7 3882 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
3883 /* A coding system of this category is always ASCII compatible. */
3884 src += coding->head_ascii;
fa42c37f 3885
b73bfc1c 3886 while (1)
fa42c37f 3887 {
89528eb3 3888 incomplete = 0;
df7492f9 3889 ONE_MORE_BYTE (c);
89528eb3 3890 incomplete = 1;
df7492f9 3891 if (c < 0x80)
fa42c37f 3892 continue;
df7492f9 3893 if (c >= 0xA1)
fa42c37f 3894 {
df7492f9
KH
3895 ONE_MORE_BYTE (c);
3896 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3897 return 0;
ff0dacd7 3898 found = CATEGORY_MASK_BIG5;
fa42c37f 3899 }
df7492f9
KH
3900 else
3901 break;
fa42c37f 3902 }
ff0dacd7 3903 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 3904 return 0;
df7492f9
KH
3905
3906 no_more_source:
89528eb3
KH
3907 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3908 {
ff0dacd7 3909 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
3910 return 0;
3911 }
ff0dacd7
KH
3912 detect_info->found |= found;
3913 return 1;
fa42c37f
KH
3914}
3915
4ed46869
KH
3916/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3917 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3918
b73bfc1c 3919static void
df7492f9 3920decode_coding_sjis (coding)
4ed46869 3921 struct coding_system *coding;
4ed46869 3922{
df7492f9
KH
3923 unsigned char *src = coding->source + coding->consumed;
3924 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3925 unsigned char *src_base;
df7492f9 3926 int *charbuf = coding->charbuf;
ff0dacd7 3927 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
3928 int consumed_chars = 0, consumed_chars_base;
3929 int multibytep = coding->src_multibyte;
3930 struct charset *charset_roman, *charset_kanji, *charset_kana;
3931 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
3932 int char_offset = coding->produced_char;
3933 int last_offset = char_offset;
3934 int last_id = charset_ascii;
a5d301df 3935
df7492f9
KH
3936 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3937
3938 val = charset_list;
3939 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3
KH
3940 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3941 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3942
b73bfc1c 3943 while (1)
4ed46869 3944 {
df7492f9 3945 int c, c1;
b73bfc1c
KH
3946
3947 src_base = src;
df7492f9
KH
3948 consumed_chars_base = consumed_chars;
3949
3950 if (charbuf >= charbuf_end)
3951 break;
3952
3953 ONE_MORE_BYTE (c);
b73bfc1c 3954
df7492f9 3955 if (c == '\r')
4ed46869 3956 {
df7492f9 3957 if (EQ (eol_type, Qdos))
4ed46869 3958 {
df7492f9 3959 if (src == src_end)
98725083
KH
3960 {
3961 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3962 goto no_more_source;
3963 }
df7492f9
KH
3964 if (*src == '\n')
3965 ONE_MORE_BYTE (c);
4ed46869 3966 }
df7492f9
KH
3967 else if (EQ (eol_type, Qmac))
3968 c = '\n';
4ed46869 3969 }
54f78171 3970 else
df7492f9
KH
3971 {
3972 struct charset *charset;
3973
3974 if (c < 0x80)
3975 charset = charset_roman;
3976 else
4ed46869 3977 {
df7492f9
KH
3978 if (c >= 0xF0)
3979 goto invalid_code;
3980 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3981 {
54f78171 3982 /* SJIS -> JISX0208 */
df7492f9
KH
3983 ONE_MORE_BYTE (c1);
3984 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3985 goto invalid_code;
3986 c = (c << 8) | c1;
3987 SJIS_TO_JIS (c);
3988 charset = charset_kanji;
5e34de15 3989 }
fb88bf2d 3990 else
89528eb3
KH
3991 {
3992 /* SJIS -> JISX0201-Kana */
3993 c &= 0x7F;
3994 charset = charset_kana;
3995 }
df7492f9 3996 }
ff0dacd7
KH
3997 if (charset->id != charset_ascii
3998 && last_id != charset->id)
3999 {
4000 if (last_id != charset_ascii)
4001 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4002 last_id = charset->id;
4003 last_offset = char_offset;
4004 }
df7492f9
KH
4005 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4006 }
4007 *charbuf++ = c;
ff0dacd7 4008 char_offset++;
df7492f9
KH
4009 continue;
4010
4011 invalid_code:
4012 src = src_base;
4013 consumed_chars = consumed_chars_base;
4014 ONE_MORE_BYTE (c);
4015 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4016 char_offset++;
df7492f9
KH
4017 coding->errors++;
4018 }
4019
4020 no_more_source:
ff0dacd7
KH
4021 if (last_id != charset_ascii)
4022 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4023 coding->consumed_char += consumed_chars_base;
4024 coding->consumed = src_base - coding->source;
4025 coding->charbuf_used = charbuf - coding->charbuf;
4026}
4027
4028static void
4029decode_coding_big5 (coding)
4030 struct coding_system *coding;
4031{
4032 unsigned char *src = coding->source + coding->consumed;
4033 unsigned char *src_end = coding->source + coding->src_bytes;
4034 unsigned char *src_base;
4035 int *charbuf = coding->charbuf;
ff0dacd7 4036 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4037 int consumed_chars = 0, consumed_chars_base;
4038 int multibytep = coding->src_multibyte;
4039 struct charset *charset_roman, *charset_big5;
4040 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
4041 int char_offset = coding->produced_char;
4042 int last_offset = char_offset;
4043 int last_id = charset_ascii;
df7492f9
KH
4044
4045 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4046 val = charset_list;
4047 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4048 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4049
4050 while (1)
4051 {
4052 int c, c1;
4053
4054 src_base = src;
4055 consumed_chars_base = consumed_chars;
4056
4057 if (charbuf >= charbuf_end)
4058 break;
4059
4060 ONE_MORE_BYTE (c);
4061
4062 if (c == '\r')
4063 {
4064 if (EQ (eol_type, Qdos))
4065 {
4066 if (src == src_end)
98725083
KH
4067 {
4068 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4069 goto no_more_source;
4070 }
df7492f9
KH
4071 if (*src == '\n')
4072 ONE_MORE_BYTE (c);
4ed46869 4073 }
df7492f9
KH
4074 else if (EQ (eol_type, Qmac))
4075 c = '\n';
4076 }
4077 else
4078 {
4079 struct charset *charset;
4080 if (c < 0x80)
4081 charset = charset_roman;
fb88bf2d 4082 else
fb88bf2d 4083 {
54f78171 4084 /* BIG5 -> Big5 */
df7492f9
KH
4085 if (c < 0xA1 || c > 0xFE)
4086 goto invalid_code;
4087 ONE_MORE_BYTE (c1);
4088 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4089 goto invalid_code;
4090 c = c << 8 | c1;
4091 charset = charset_big5;
4ed46869 4092 }
ff0dacd7
KH
4093 if (charset->id != charset_ascii
4094 && last_id != charset->id)
4095 {
4096 if (last_id != charset_ascii)
4097 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4098 last_id = charset->id;
4099 last_offset = char_offset;
4100 }
df7492f9 4101 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 4102 }
4ed46869 4103
df7492f9 4104 *charbuf++ = c;
ff0dacd7 4105 char_offset++;
fb88bf2d
KH
4106 continue;
4107
df7492f9 4108 invalid_code:
4ed46869 4109 src = src_base;
df7492f9
KH
4110 consumed_chars = consumed_chars_base;
4111 ONE_MORE_BYTE (c);
4112 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4113 char_offset++;
df7492f9 4114 coding->errors++;
fb88bf2d 4115 }
d46c5b12 4116
df7492f9 4117 no_more_source:
ff0dacd7
KH
4118 if (last_id != charset_ascii)
4119 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4120 coding->consumed_char += consumed_chars_base;
4121 coding->consumed = src_base - coding->source;
4122 coding->charbuf_used = charbuf - coding->charbuf;
4123}
4124
4125/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4126 This function can encode charsets `ascii', `katakana-jisx0201',
4127 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4128 are sure that all these charsets are registered as official charset
4129 (i.e. do not have extended leading-codes). Characters of other
4130 charsets are produced without any encoding. If SJIS_P is 1, encode
4131 SJIS text, else encode BIG5 text. */
4132
4133static int
4134encode_coding_sjis (coding)
4135 struct coding_system *coding;
4136{
4137 int multibytep = coding->dst_multibyte;
4138 int *charbuf = coding->charbuf;
4139 int *charbuf_end = charbuf + coding->charbuf_used;
4140 unsigned char *dst = coding->destination + coding->produced;
4141 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4142 int safe_room = 4;
4143 int produced_chars = 0;
4144 Lisp_Object attrs, eol_type, charset_list, val;
4145 int ascii_compatible;
4146 struct charset *charset_roman, *charset_kanji, *charset_kana;
4147 int c;
4148
4149 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4150 val = charset_list;
4151 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4152 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4153 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4154
4155 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4156
4157 while (charbuf < charbuf_end)
4158 {
4159 ASSURE_DESTINATION (safe_room);
4160 c = *charbuf++;
4161 /* Now encode the character C. */
4162 if (ASCII_CHAR_P (c) && ascii_compatible)
4163 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4164 else if (CHAR_BYTE8_P (c))
4165 {
4166 c = CHAR_TO_BYTE8 (c);
4167 EMIT_ONE_BYTE (c);
4168 }
df7492f9
KH
4169 else
4170 {
4171 unsigned code;
4172 struct charset *charset = char_charset (c, charset_list, &code);
4173
4174 if (!charset)
4175 {
41cbe562
KH
4176 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4177 {
4178 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4179 charset = CHARSET_FROM_ID (charset_ascii);
4180 }
4181 else
4182 {
4183 c = coding->default_char;
4184 charset = char_charset (c, charset_list, &code);
4185 }
df7492f9
KH
4186 }
4187 if (code == CHARSET_INVALID_CODE (charset))
4188 abort ();
4189 if (charset == charset_kanji)
4190 {
4191 int c1, c2;
4192 JIS_TO_SJIS (code);
4193 c1 = code >> 8, c2 = code & 0xFF;
4194 EMIT_TWO_BYTES (c1, c2);
4195 }
4196 else if (charset == charset_kana)
4197 EMIT_ONE_BYTE (code | 0x80);
4198 else
4199 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4200 }
4201 }
4202 coding->result = CODING_RESULT_SUCCESS;
4203 coding->produced_char += produced_chars;
4204 coding->produced = dst - coding->destination;
4205 return 0;
4206}
4207
4208static int
4209encode_coding_big5 (coding)
4210 struct coding_system *coding;
4211{
4212 int multibytep = coding->dst_multibyte;
4213 int *charbuf = coding->charbuf;
4214 int *charbuf_end = charbuf + coding->charbuf_used;
4215 unsigned char *dst = coding->destination + coding->produced;
4216 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4217 int safe_room = 4;
4218 int produced_chars = 0;
4219 Lisp_Object attrs, eol_type, charset_list, val;
4220 int ascii_compatible;
4221 struct charset *charset_roman, *charset_big5;
4222 int c;
4223
4224 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4225 val = charset_list;
4226 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4227 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4228 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4229
4230 while (charbuf < charbuf_end)
4231 {
4232 ASSURE_DESTINATION (safe_room);
4233 c = *charbuf++;
4234 /* Now encode the character C. */
4235 if (ASCII_CHAR_P (c) && ascii_compatible)
4236 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4237 else if (CHAR_BYTE8_P (c))
4238 {
4239 c = CHAR_TO_BYTE8 (c);
4240 EMIT_ONE_BYTE (c);
4241 }
df7492f9
KH
4242 else
4243 {
4244 unsigned code;
4245 struct charset *charset = char_charset (c, charset_list, &code);
4246
4247 if (! charset)
4248 {
41cbe562
KH
4249 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4250 {
4251 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4252 charset = CHARSET_FROM_ID (charset_ascii);
4253 }
4254 else
4255 {
4256 c = coding->default_char;
4257 charset = char_charset (c, charset_list, &code);
4258 }
df7492f9
KH
4259 }
4260 if (code == CHARSET_INVALID_CODE (charset))
4261 abort ();
4262 if (charset == charset_big5)
4263 {
4264 int c1, c2;
4265
4266 c1 = code >> 8, c2 = code & 0xFF;
4267 EMIT_TWO_BYTES (c1, c2);
4268 }
4269 else
4270 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4271 }
4272 }
4273 coding->result = CODING_RESULT_SUCCESS;
4274 coding->produced_char += produced_chars;
4275 coding->produced = dst - coding->destination;
4276 return 0;
4277}
4278
4279\f
4280/*** 10. CCL handlers ***/
4281
4282/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4283 Check if a text is encoded in a coding system of which
4284 encoder/decoder are written in CCL program. If it is, return
4285 CATEGORY_MASK_CCL, else return 0. */
4286
4287static int
ff0dacd7 4288detect_coding_ccl (coding, detect_info)
df7492f9 4289 struct coding_system *coding;
ff0dacd7 4290 struct coding_detection_info *detect_info;
df7492f9
KH
4291{
4292 unsigned char *src = coding->source, *src_base = src;
4293 unsigned char *src_end = coding->source + coding->src_bytes;
4294 int multibytep = coding->src_multibyte;
4295 int consumed_chars = 0;
4296 int found = 0;
4297 unsigned char *valids = CODING_CCL_VALIDS (coding);
4298 int head_ascii = coding->head_ascii;
4299 Lisp_Object attrs;
4300
ff0dacd7
KH
4301 detect_info->checked |= CATEGORY_MASK_CCL;
4302
df7492f9
KH
4303 coding = &coding_categories[coding_category_ccl];
4304 attrs = CODING_ID_ATTRS (coding->id);
4305 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4306 src += head_ascii;
4307
4308 while (1)
4309 {
4310 int c;
4311 ONE_MORE_BYTE (c);
4312 if (! valids[c])
4313 break;
ff0dacd7
KH
4314 if ((valids[c] > 1))
4315 found = CATEGORY_MASK_CCL;
df7492f9 4316 }
ff0dacd7 4317 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4318 return 0;
4319
4320 no_more_source:
ff0dacd7
KH
4321 detect_info->found |= found;
4322 return 1;
df7492f9
KH
4323}
4324
4325static void
4326decode_coding_ccl (coding)
4327 struct coding_system *coding;
4328{
7c78e542 4329 const unsigned char *src = coding->source + coding->consumed;
df7492f9
KH
4330 unsigned char *src_end = coding->source + coding->src_bytes;
4331 int *charbuf = coding->charbuf;
4332 int *charbuf_end = charbuf + coding->charbuf_size;
4333 int consumed_chars = 0;
4334 int multibytep = coding->src_multibyte;
4335 struct ccl_program ccl;
4336 int source_charbuf[1024];
4337 int source_byteidx[1024];
4776e638 4338 Lisp_Object attrs, eol_type, charset_list;
df7492f9 4339
8dcbea82 4340 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4341 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4342
4343 while (src < src_end)
4344 {
7c78e542 4345 const unsigned char *p = src;
df7492f9
KH
4346 int *source, *source_end;
4347 int i = 0;
4348
4349 if (multibytep)
4350 while (i < 1024 && p < src_end)
4351 {
4352 source_byteidx[i] = p - src;
4353 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4354 }
4355 else
4356 while (i < 1024 && p < src_end)
4357 source_charbuf[i++] = *p++;
4358
4359 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4360 ccl.last_block = 1;
4361
4362 source = source_charbuf;
4363 source_end = source + i;
4364 while (source < source_end)
4365 {
4366 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4367 source_end - source, charbuf_end - charbuf,
4368 charset_list);
df7492f9
KH
4369 source += ccl.consumed;
4370 charbuf += ccl.produced;
4371 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4372 break;
4373 }
4374 if (source < source_end)
4375 src += source_byteidx[source - source_charbuf];
4376 else
4377 src = p;
4378 consumed_chars += source - source_charbuf;
4379
4380 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4381 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4382 break;
4383 }
4384
4385 switch (ccl.status)
4386 {
4387 case CCL_STAT_SUSPEND_BY_SRC:
4388 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4389 break;
4390 case CCL_STAT_SUSPEND_BY_DST:
4391 break;
4392 case CCL_STAT_QUIT:
4393 case CCL_STAT_INVALID_CMD:
4394 coding->result = CODING_RESULT_INTERRUPT;
4395 break;
4396 default:
4397 coding->result = CODING_RESULT_SUCCESS;
4398 break;
4399 }
4400 coding->consumed_char += consumed_chars;
4401 coding->consumed = src - coding->source;
4402 coding->charbuf_used = charbuf - coding->charbuf;
4403}
4404
4405static int
4406encode_coding_ccl (coding)
4407 struct coding_system *coding;
4408{
4409 struct ccl_program ccl;
4410 int multibytep = coding->dst_multibyte;
4411 int *charbuf = coding->charbuf;
4412 int *charbuf_end = charbuf + coding->charbuf_used;
4413 unsigned char *dst = coding->destination + coding->produced;
4414 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4415 unsigned char *adjusted_dst_end = dst_end - 1;
4416 int destination_charbuf[1024];
4417 int i, produced_chars = 0;
8dcbea82 4418 Lisp_Object attrs, eol_type, charset_list;
df7492f9 4419
8dcbea82 4420 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4421 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4422
4423 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4424 ccl.dst_multibyte = coding->dst_multibyte;
4425
4426 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4427 {
4428 int dst_bytes = dst_end - dst;
4429 if (dst_bytes > 1024)
4430 dst_bytes = 1024;
4431
4432 ccl_driver (&ccl, charbuf, destination_charbuf,
8dcbea82 4433 charbuf_end - charbuf, dst_bytes, charset_list);
df7492f9
KH
4434 charbuf += ccl.consumed;
4435 if (multibytep)
4436 for (i = 0; i < ccl.produced; i++)
4437 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4438 else
4439 {
4440 for (i = 0; i < ccl.produced; i++)
4441 *dst++ = destination_charbuf[i] & 0xFF;
4442 produced_chars += ccl.produced;
4443 }
4444 }
4445
4446 switch (ccl.status)
4447 {
4448 case CCL_STAT_SUSPEND_BY_SRC:
4449 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4450 break;
4451 case CCL_STAT_SUSPEND_BY_DST:
4452 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4453 break;
4454 case CCL_STAT_QUIT:
4455 case CCL_STAT_INVALID_CMD:
4456 coding->result = CODING_RESULT_INTERRUPT;
4457 break;
4458 default:
4459 coding->result = CODING_RESULT_SUCCESS;
4460 break;
4461 }
4462
4463 coding->produced_char += produced_chars;
4464 coding->produced = dst - coding->destination;
4465 return 0;
4ed46869
KH
4466}
4467
df7492f9
KH
4468
4469\f
4470/*** 10, 11. no-conversion handlers ***/
4471
4472/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4473
b73bfc1c 4474static void
df7492f9 4475decode_coding_raw_text (coding)
4ed46869 4476 struct coding_system *coding;
4ed46869 4477{
df7492f9 4478 coding->chars_at_source = 1;
2c78b7e1
KH
4479 coding->consumed_char = 0;
4480 coding->consumed = 0;
df7492f9
KH
4481 coding->result = CODING_RESULT_SUCCESS;
4482}
4ed46869 4483
df7492f9
KH
4484static int
4485encode_coding_raw_text (coding)
4486 struct coding_system *coding;
4487{
4488 int multibytep = coding->dst_multibyte;
4489 int *charbuf = coding->charbuf;
4490 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4491 unsigned char *dst = coding->destination + coding->produced;
4492 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4493 int produced_chars = 0;
4494 int c;
a5d301df 4495
df7492f9 4496 if (multibytep)
b73bfc1c 4497 {
df7492f9 4498 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4499
df7492f9
KH
4500 if (coding->src_multibyte)
4501 while (charbuf < charbuf_end)
4502 {
4503 ASSURE_DESTINATION (safe_room);
4504 c = *charbuf++;
4505 if (ASCII_CHAR_P (c))
4506 EMIT_ONE_ASCII_BYTE (c);
4507 else if (CHAR_BYTE8_P (c))
4508 {
4509 c = CHAR_TO_BYTE8 (c);
4510 EMIT_ONE_BYTE (c);
4511 }
4512 else
4513 {
4514 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4515
df7492f9
KH
4516 CHAR_STRING_ADVANCE (c, p1);
4517 while (p0 < p1)
9d123124
KH
4518 {
4519 EMIT_ONE_BYTE (*p0);
4520 p0++;
4521 }
df7492f9
KH
4522 }
4523 }
b73bfc1c 4524 else
df7492f9
KH
4525 while (charbuf < charbuf_end)
4526 {
4527 ASSURE_DESTINATION (safe_room);
4528 c = *charbuf++;
4529 EMIT_ONE_BYTE (c);
4530 }
4531 }
4532 else
4533 {
4534 if (coding->src_multibyte)
b73bfc1c 4535 {
df7492f9
KH
4536 int safe_room = MAX_MULTIBYTE_LENGTH;
4537
4538 while (charbuf < charbuf_end)
b73bfc1c 4539 {
df7492f9
KH
4540 ASSURE_DESTINATION (safe_room);
4541 c = *charbuf++;
4542 if (ASCII_CHAR_P (c))
4543 *dst++ = c;
4544 else if (CHAR_BYTE8_P (c))
4545 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4546 else
df7492f9
KH
4547 CHAR_STRING_ADVANCE (c, dst);
4548 produced_chars++;
b73bfc1c 4549 }
4ed46869 4550 }
df7492f9
KH
4551 else
4552 {
4553 ASSURE_DESTINATION (charbuf_end - charbuf);
4554 while (charbuf < charbuf_end && dst < dst_end)
4555 *dst++ = *charbuf++;
4556 produced_chars = dst - (coding->destination + coding->dst_bytes);
4557 }
4ed46869 4558 }
df7492f9
KH
4559 coding->result = CODING_RESULT_SUCCESS;
4560 coding->produced_char += produced_chars;
4561 coding->produced = dst - coding->destination;
4562 return 0;
4ed46869
KH
4563}
4564
ff0dacd7
KH
4565/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4566 Check if a text is encoded in a charset-based coding system. If it
4567 is, return 1, else return 0. */
4568
0a28aafb 4569static int
ff0dacd7 4570detect_coding_charset (coding, detect_info)
df7492f9 4571 struct coding_system *coding;
ff0dacd7 4572 struct coding_detection_info *detect_info;
1397dc18 4573{
df7492f9
KH
4574 unsigned char *src = coding->source, *src_base = src;
4575 unsigned char *src_end = coding->source + coding->src_bytes;
4576 int multibytep = coding->src_multibyte;
4577 int consumed_chars = 0;
4578 Lisp_Object attrs, valids;
584948ac 4579 int found = 0;
1397dc18 4580
ff0dacd7
KH
4581 detect_info->checked |= CATEGORY_MASK_CHARSET;
4582
df7492f9
KH
4583 coding = &coding_categories[coding_category_charset];
4584 attrs = CODING_ID_ATTRS (coding->id);
4585 valids = AREF (attrs, coding_attr_charset_valids);
4586
4587 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4588 src += coding->head_ascii;
1397dc18 4589
b73bfc1c 4590 while (1)
1397dc18 4591 {
df7492f9 4592 int c;
1397dc18 4593
df7492f9
KH
4594 ONE_MORE_BYTE (c);
4595 if (NILP (AREF (valids, c)))
4596 break;
584948ac 4597 if (c >= 0x80)
ff0dacd7 4598 found = CATEGORY_MASK_CHARSET;
df7492f9 4599 }
ff0dacd7 4600 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4601 return 0;
4ed46869 4602
df7492f9 4603 no_more_source:
ff0dacd7
KH
4604 detect_info->found |= found;
4605 return 1;
df7492f9 4606}
4ed46869 4607
b73bfc1c 4608static void
df7492f9 4609decode_coding_charset (coding)
4ed46869 4610 struct coding_system *coding;
4ed46869 4611{
df7492f9
KH
4612 unsigned char *src = coding->source + coding->consumed;
4613 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4614 unsigned char *src_base;
df7492f9 4615 int *charbuf = coding->charbuf;
ff0dacd7 4616 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4617 int consumed_chars = 0, consumed_chars_base;
4618 int multibytep = coding->src_multibyte;
4eb6d3f1 4619 Lisp_Object attrs, eol_type, charset_list, valids;
ff0dacd7
KH
4620 int char_offset = coding->produced_char;
4621 int last_offset = char_offset;
4622 int last_id = charset_ascii;
df7492f9
KH
4623
4624 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4625 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4626
df7492f9 4627 while (1)
4ed46869 4628 {
4eb6d3f1 4629 int c;
df7492f9
KH
4630
4631 src_base = src;
4632 consumed_chars_base = consumed_chars;
b73bfc1c 4633
df7492f9
KH
4634 if (charbuf >= charbuf_end)
4635 break;
4636
4eb6d3f1 4637 ONE_MORE_BYTE (c);
df7492f9 4638 if (c == '\r')
d46c5b12 4639 {
c7c66a95
KH
4640 /* Here we assume that no charset maps '\r' to something
4641 else. */
df7492f9 4642 if (EQ (eol_type, Qdos))
b73bfc1c 4643 {
98725083
KH
4644 if (src == src_end)
4645 {
4646 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4647 goto no_more_source;
4648 }
4649 if (*src == '\n')
df7492f9 4650 ONE_MORE_BYTE (c);
b73bfc1c 4651 }
df7492f9 4652 else if (EQ (eol_type, Qmac))
b73bfc1c 4653 c = '\n';
d46c5b12 4654 }
df7492f9 4655 else
d46c5b12 4656 {
4eb6d3f1
KH
4657 Lisp_Object val;
4658 struct charset *charset;
c7c66a95 4659 int dim;
acb2a965
KH
4660 int len = 1;
4661 unsigned code = c;
4eb6d3f1
KH
4662
4663 val = AREF (valids, c);
4664 if (NILP (val))
4665 goto invalid_code;
c7c66a95 4666 if (INTEGERP (val))
4eb6d3f1 4667 {
c7c66a95
KH
4668 charset = CHARSET_FROM_ID (XFASTINT (val));
4669 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4670 while (len < dim)
4eb6d3f1 4671 {
acb2a965
KH
4672 ONE_MORE_BYTE (c);
4673 code = (code << 8) | c;
f9d71dcd 4674 len++;
4eb6d3f1 4675 }
c7c66a95
KH
4676 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4677 charset, code, c);
4678 }
4679 else
4680 {
4681 /* VAL is a list of charset IDs. It is assured that the
4682 list is sorted by charset dimensions (smaller one
4683 comes first). */
c7c66a95
KH
4684 while (CONSP (val))
4685 {
4686 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4687 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4688 while (len < dim)
c7c66a95 4689 {
acb2a965
KH
4690 ONE_MORE_BYTE (c);
4691 code = (code << 8) | c;
f9d71dcd 4692 len++;
c7c66a95 4693 }
c7c66a95
KH
4694 CODING_DECODE_CHAR (coding, src, src_base,
4695 src_end, charset, code, c);
4696 if (c >= 0)
4697 break;
4698 val = XCDR (val);
4699 }
4eb6d3f1 4700 }
df7492f9
KH
4701 if (c < 0)
4702 goto invalid_code;
ff0dacd7
KH
4703 if (charset->id != charset_ascii
4704 && last_id != charset->id)
4705 {
4706 if (last_id != charset_ascii)
4707 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4708 last_id = charset->id;
4709 last_offset = char_offset;
4710 }
d46c5b12 4711 }
df7492f9 4712 *charbuf++ = c;
ff0dacd7 4713 char_offset++;
df7492f9
KH
4714 continue;
4715
4716 invalid_code:
4717 src = src_base;
4718 consumed_chars = consumed_chars_base;
4719 ONE_MORE_BYTE (c);
4720 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4721 char_offset++;
df7492f9 4722 coding->errors++;
4ed46869
KH
4723 }
4724
df7492f9 4725 no_more_source:
ff0dacd7
KH
4726 if (last_id != charset_ascii)
4727 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4728 coding->consumed_char += consumed_chars_base;
4729 coding->consumed = src_base - coding->source;
4730 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4731}
4732
df7492f9
KH
4733static int
4734encode_coding_charset (coding)
4ed46869 4735 struct coding_system *coding;
4ed46869 4736{
df7492f9
KH
4737 int multibytep = coding->dst_multibyte;
4738 int *charbuf = coding->charbuf;
4739 int *charbuf_end = charbuf + coding->charbuf_used;
4740 unsigned char *dst = coding->destination + coding->produced;
4741 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4742 int safe_room = MAX_MULTIBYTE_LENGTH;
4743 int produced_chars = 0;
df7492f9
KH
4744 Lisp_Object attrs, eol_type, charset_list;
4745 int ascii_compatible;
b73bfc1c 4746 int c;
b73bfc1c 4747
df7492f9 4748 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4749 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4750
df7492f9 4751 while (charbuf < charbuf_end)
4ed46869 4752 {
4eb6d3f1 4753 struct charset *charset;
df7492f9
KH
4754 unsigned code;
4755
4756 ASSURE_DESTINATION (safe_room);
4757 c = *charbuf++;
4758 if (ascii_compatible && ASCII_CHAR_P (c))
4759 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4760 else if (CHAR_BYTE8_P (c))
4761 {
4762 c = CHAR_TO_BYTE8 (c);
4763 EMIT_ONE_BYTE (c);
4764 }
d46c5b12 4765 else
4eb6d3f1
KH
4766 {
4767 charset = char_charset (c, charset_list, &code);
4768 if (charset)
4769 {
4770 if (CHARSET_DIMENSION (charset) == 1)
4771 EMIT_ONE_BYTE (code);
4772 else if (CHARSET_DIMENSION (charset) == 2)
4773 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4774 else if (CHARSET_DIMENSION (charset) == 3)
4775 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4776 else
4777 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4778 (code >> 8) & 0xFF, code & 0xFF);
4779 }
4780 else
41cbe562
KH
4781 {
4782 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4783 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4784 else
4785 c = coding->default_char;
4786 EMIT_ONE_BYTE (c);
4787 }
4eb6d3f1 4788 }
4ed46869
KH
4789 }
4790
df7492f9
KH
4791 coding->result = CODING_RESULT_SUCCESS;
4792 coding->produced_char += produced_chars;
4793 coding->produced = dst - coding->destination;
4794 return 0;
4ed46869
KH
4795}
4796
4797\f
1397dc18 4798/*** 7. C library functions ***/
4ed46869 4799
df7492f9
KH
4800/* Setup coding context CODING from information about CODING_SYSTEM.
4801 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4802 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4803
4804void
df7492f9
KH
4805setup_coding_system (coding_system, coding)
4806 Lisp_Object coding_system;
ec6d2bb8
KH
4807 struct coding_system *coding;
4808{
df7492f9
KH
4809 Lisp_Object attrs;
4810 Lisp_Object eol_type;
4811 Lisp_Object coding_type;
4812 Lisp_Object val;
ec6d2bb8 4813
df7492f9
KH
4814 if (NILP (coding_system))
4815 coding_system = Qno_conversion;
4816
4817 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4818
4819 attrs = CODING_ID_ATTRS (coding->id);
4820 eol_type = CODING_ID_EOL_TYPE (coding->id);
4821
4822 coding->mode = 0;
4823 coding->head_ascii = -1;
4824 coding->common_flags
4825 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4826
4827 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4828 coding->max_charset_id = XSTRING (val)->size - 1;
4829 coding->safe_charsets = (char *) XSTRING (val)->data;
4830 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4831
4832 coding_type = CODING_ATTR_TYPE (attrs);
4833 if (EQ (coding_type, Qundecided))
4834 {
4835 coding->detector = NULL;
4836 coding->decoder = decode_coding_raw_text;
4837 coding->encoder = encode_coding_raw_text;
4838 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4839 }
4840 else if (EQ (coding_type, Qiso_2022))
4841 {
4842 int i;
4843 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4844
4845 /* Invoke graphic register 0 to plane 0. */
4846 CODING_ISO_INVOCATION (coding, 0) = 0;
4847 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4848 CODING_ISO_INVOCATION (coding, 1)
4849 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4850 /* Setup the initial status of designation. */
4851 for (i = 0; i < 4; i++)
4852 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4853 /* Not single shifting initially. */
4854 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4855 /* Beginning of buffer should also be regarded as bol. */
4856 CODING_ISO_BOL (coding) = 1;
4857 coding->detector = detect_coding_iso_2022;
4858 coding->decoder = decode_coding_iso_2022;
4859 coding->encoder = encode_coding_iso_2022;
4860 if (flags & CODING_ISO_FLAG_SAFE)
4861 coding->mode |= CODING_MODE_SAFE_ENCODING;
4862 coding->common_flags
4863 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4864 | CODING_REQUIRE_FLUSHING_MASK);
4865 if (flags & CODING_ISO_FLAG_COMPOSITION)
4866 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
4867 if (flags & CODING_ISO_FLAG_DESIGNATION)
4868 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
4869 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4870 {
4871 setup_iso_safe_charsets (attrs);
4872 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4873 coding->max_charset_id = XSTRING (val)->size - 1;
4874 coding->safe_charsets = (char *) XSTRING (val)->data;
4875 }
4876 CODING_ISO_FLAGS (coding) = flags;
4877 }
4878 else if (EQ (coding_type, Qcharset))
4879 {
4880 coding->detector = detect_coding_charset;
4881 coding->decoder = decode_coding_charset;
4882 coding->encoder = encode_coding_charset;
4883 coding->common_flags
4884 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4885 }
4886 else if (EQ (coding_type, Qutf_8))
4887 {
4888 coding->detector = detect_coding_utf_8;
4889 coding->decoder = decode_coding_utf_8;
4890 coding->encoder = encode_coding_utf_8;
4891 coding->common_flags
4892 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4893 }
4894 else if (EQ (coding_type, Qutf_16))
4895 {
4896 val = AREF (attrs, coding_attr_utf_16_bom);
4897 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4898 : EQ (val, Qt) ? utf_16_with_bom
4899 : utf_16_without_bom);
4900 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 4901 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 4902 : utf_16_little_endian);
e19c3639 4903 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4904 coding->detector = detect_coding_utf_16;
4905 coding->decoder = decode_coding_utf_16;
4906 coding->encoder = encode_coding_utf_16;
4907 coding->common_flags
4908 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
4909 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4910 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
df7492f9
KH
4911 }
4912 else if (EQ (coding_type, Qccl))
4913 {
4914 coding->detector = detect_coding_ccl;
4915 coding->decoder = decode_coding_ccl;
4916 coding->encoder = encode_coding_ccl;
4917 coding->common_flags
4918 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4919 | CODING_REQUIRE_FLUSHING_MASK);
4920 }
4921 else if (EQ (coding_type, Qemacs_mule))
4922 {
4923 coding->detector = detect_coding_emacs_mule;
4924 coding->decoder = decode_coding_emacs_mule;
4925 coding->encoder = encode_coding_emacs_mule;
4926 coding->common_flags
4927 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4928 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4929 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4930 {
4931 Lisp_Object tail, safe_charsets;
4932 int max_charset_id = 0;
4933
4934 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4935 tail = XCDR (tail))
4936 if (max_charset_id < XFASTINT (XCAR (tail)))
4937 max_charset_id = XFASTINT (XCAR (tail));
4938 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4939 make_number (255));
4940 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4941 tail = XCDR (tail))
4942 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4943 coding->max_charset_id = max_charset_id;
4944 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4945 }
4946 }
4947 else if (EQ (coding_type, Qshift_jis))
4948 {
4949 coding->detector = detect_coding_sjis;
4950 coding->decoder = decode_coding_sjis;
4951 coding->encoder = encode_coding_sjis;
4952 coding->common_flags
4953 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4954 }
4955 else if (EQ (coding_type, Qbig5))
4956 {
4957 coding->detector = detect_coding_big5;
4958 coding->decoder = decode_coding_big5;
4959 coding->encoder = encode_coding_big5;
4960 coding->common_flags
4961 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4962 }
4963 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4964 {
df7492f9
KH
4965 coding->detector = NULL;
4966 coding->decoder = decode_coding_raw_text;
4967 coding->encoder = encode_coding_raw_text;
4968 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4969 }
df7492f9
KH
4970
4971 return;
ec6d2bb8
KH
4972}
4973
df7492f9
KH
4974/* Return raw-text or one of its subsidiaries that has the same
4975 eol_type as CODING-SYSTEM. */
ec6d2bb8 4976
df7492f9
KH
4977Lisp_Object
4978raw_text_coding_system (coding_system)
4979 Lisp_Object coding_system;
ec6d2bb8 4980{
0be8721c 4981 Lisp_Object spec, attrs;
df7492f9
KH
4982 Lisp_Object eol_type, raw_text_eol_type;
4983
4984 spec = CODING_SYSTEM_SPEC (coding_system);
4985 attrs = AREF (spec, 0);
4986
4987 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4988 return coding_system;
ec6d2bb8 4989
df7492f9
KH
4990 eol_type = AREF (spec, 2);
4991 if (VECTORP (eol_type))
4992 return Qraw_text;
4993 spec = CODING_SYSTEM_SPEC (Qraw_text);
4994 raw_text_eol_type = AREF (spec, 2);
4995 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4996 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4997 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4998}
4999
54f78171 5000
df7492f9
KH
5001/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5002 does, return one of the subsidiary that has the same eol-spec as
5003 PARENT. Otherwise, return CODING_SYSTEM. */
5004
5005Lisp_Object
5006coding_inherit_eol_type (coding_system, parent)
b74e4686 5007 Lisp_Object coding_system, parent;
54f78171 5008{
df7492f9 5009 Lisp_Object spec, attrs, eol_type;
54f78171 5010
df7492f9
KH
5011 spec = CODING_SYSTEM_SPEC (coding_system);
5012 attrs = AREF (spec, 0);
5013 eol_type = AREF (spec, 2);
5014 if (VECTORP (eol_type))
5015 {
5016 Lisp_Object parent_spec;
df7492f9
KH
5017 Lisp_Object parent_eol_type;
5018
5019 parent_spec
5020 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5021 parent_eol_type = AREF (parent_spec, 2);
5022 if (EQ (parent_eol_type, Qunix))
5023 coding_system = AREF (eol_type, 0);
5024 else if (EQ (parent_eol_type, Qdos))
5025 coding_system = AREF (eol_type, 1);
5026 else if (EQ (parent_eol_type, Qmac))
5027 coding_system = AREF (eol_type, 2);
54f78171 5028 }
df7492f9 5029 return coding_system;
54f78171
KH
5030}
5031
4ed46869
KH
5032/* Emacs has a mechanism to automatically detect a coding system if it
5033 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5034 it's impossible to distinguish some coding systems accurately
5035 because they use the same range of codes. So, at first, coding
5036 systems are categorized into 7, those are:
5037
0ef69138 5038 o coding-category-emacs-mule
4ed46869
KH
5039
5040 The category for a coding system which has the same code range
5041 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5042 symbol) `emacs-mule' by default.
4ed46869
KH
5043
5044 o coding-category-sjis
5045
5046 The category for a coding system which has the same code range
5047 as SJIS. Assigned the coding-system (Lisp
7717c392 5048 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5049
5050 o coding-category-iso-7
5051
5052 The category for a coding system which has the same code range
7717c392 5053 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5054 shift and single shift functions. This can encode/decode all
5055 charsets. Assigned the coding-system (Lisp symbol)
5056 `iso-2022-7bit' by default.
5057
5058 o coding-category-iso-7-tight
5059
5060 Same as coding-category-iso-7 except that this can
5061 encode/decode only the specified charsets.
4ed46869
KH
5062
5063 o coding-category-iso-8-1
5064
5065 The category for a coding system which has the same code range
5066 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5067 for DIMENSION1 charset. This doesn't use any locking shift
5068 and single shift functions. Assigned the coding-system (Lisp
5069 symbol) `iso-latin-1' by default.
4ed46869
KH
5070
5071 o coding-category-iso-8-2
5072
5073 The category for a coding system which has the same code range
5074 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5075 for DIMENSION2 charset. This doesn't use any locking shift
5076 and single shift functions. Assigned the coding-system (Lisp
5077 symbol) `japanese-iso-8bit' by default.
4ed46869 5078
7717c392 5079 o coding-category-iso-7-else
4ed46869
KH
5080
5081 The category for a coding system which has the same code range
df7492f9 5082 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5083 single shift functions. Assigned the coding-system (Lisp
5084 symbol) `iso-2022-7bit-lock' by default.
5085
5086 o coding-category-iso-8-else
5087
5088 The category for a coding system which has the same code range
df7492f9 5089 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5090 single shift functions. Assigned the coding-system (Lisp
5091 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5092
5093 o coding-category-big5
5094
5095 The category for a coding system which has the same code range
5096 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5097 `cn-big5' by default.
4ed46869 5098
fa42c37f
KH
5099 o coding-category-utf-8
5100
5101 The category for a coding system which has the same code range
5102 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5103 symbol) `utf-8' by default.
5104
5105 o coding-category-utf-16-be
5106
5107 The category for a coding system in which a text has an
5108 Unicode signature (cf. Unicode Standard) in the order of BIG
5109 endian at the head. Assigned the coding-system (Lisp symbol)
5110 `utf-16-be' by default.
5111
5112 o coding-category-utf-16-le
5113
5114 The category for a coding system in which a text has an
5115 Unicode signature (cf. Unicode Standard) in the order of
5116 LITTLE endian at the head. Assigned the coding-system (Lisp
5117 symbol) `utf-16-le' by default.
5118
1397dc18
KH
5119 o coding-category-ccl
5120
5121 The category for a coding system of which encoder/decoder is
5122 written in CCL programs. The default value is nil, i.e., no
5123 coding system is assigned.
5124
4ed46869
KH
5125 o coding-category-binary
5126
5127 The category for a coding system not categorized in any of the
5128 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5129 `no-conversion' by default.
4ed46869
KH
5130
5131 Each of them is a Lisp symbol and the value is an actual
df7492f9 5132 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5133 What Emacs does actually is to detect a category of coding system.
5134 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5135 decide only one possible category, it selects a category of the
4ed46869
KH
5136 highest priority. Priorities of categories are also specified by a
5137 user in a Lisp variable `coding-category-list'.
5138
5139*/
5140
df7492f9
KH
5141#define EOL_SEEN_NONE 0
5142#define EOL_SEEN_LF 1
5143#define EOL_SEEN_CR 2
5144#define EOL_SEEN_CRLF 4
4ed46869 5145
ff0dacd7
KH
5146/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5147 SOURCE is encoded. If CATEGORY is one of
5148 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5149 two-byte, else they are encoded by one-byte.
5150
5151 Return one of EOL_SEEN_XXX. */
4ed46869 5152
bc4bc72a
RS
5153#define MAX_EOL_CHECK_COUNT 3
5154
d46c5b12 5155static int
89528eb3 5156detect_eol (source, src_bytes, category)
d46c5b12 5157 unsigned char *source;
df7492f9 5158 EMACS_INT src_bytes;
89528eb3 5159 enum coding_category category;
4ed46869 5160{
d46c5b12 5161 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5162 unsigned char c;
df7492f9
KH
5163 int total = 0;
5164 int eol_seen = EOL_SEEN_NONE;
4ed46869 5165
89528eb3 5166 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5167 {
df7492f9 5168 int msb, lsb;
fa42c37f 5169
89528eb3
KH
5170 msb = category == (coding_category_utf_16_le
5171 | coding_category_utf_16_le_nosig);
df7492f9 5172 lsb = 1 - msb;
fa42c37f 5173
df7492f9 5174 while (src + 1 < src_end)
fa42c37f 5175 {
df7492f9
KH
5176 c = src[lsb];
5177 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5178 {
df7492f9
KH
5179 int this_eol;
5180
5181 if (c == '\n')
5182 this_eol = EOL_SEEN_LF;
5183 else if (src + 3 >= src_end
5184 || src[msb + 2] != 0
5185 || src[lsb + 2] != '\n')
5186 this_eol = EOL_SEEN_CR;
fa42c37f 5187 else
df7492f9
KH
5188 this_eol = EOL_SEEN_CRLF;
5189
5190 if (eol_seen == EOL_SEEN_NONE)
5191 /* This is the first end-of-line. */
5192 eol_seen = this_eol;
5193 else if (eol_seen != this_eol)
fa42c37f 5194 {
df7492f9
KH
5195 /* The found type is different from what found before. */
5196 eol_seen = EOL_SEEN_LF;
5197 break;
fa42c37f 5198 }
df7492f9
KH
5199 if (++total == MAX_EOL_CHECK_COUNT)
5200 break;
fa42c37f 5201 }
df7492f9 5202 src += 2;
fa42c37f 5203 }
df7492f9 5204 }
d46c5b12 5205 else
27901516 5206 {
df7492f9 5207 while (src < src_end)
27901516 5208 {
df7492f9
KH
5209 c = *src++;
5210 if (c == '\n' || c == '\r')
5211 {
5212 int this_eol;
d46c5b12 5213
df7492f9
KH
5214 if (c == '\n')
5215 this_eol = EOL_SEEN_LF;
5216 else if (src >= src_end || *src != '\n')
5217 this_eol = EOL_SEEN_CR;
5218 else
5219 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5220
df7492f9
KH
5221 if (eol_seen == EOL_SEEN_NONE)
5222 /* This is the first end-of-line. */
5223 eol_seen = this_eol;
5224 else if (eol_seen != this_eol)
5225 {
5226 /* The found type is different from what found before. */
5227 eol_seen = EOL_SEEN_LF;
5228 break;
5229 }
5230 if (++total == MAX_EOL_CHECK_COUNT)
5231 break;
5232 }
5233 }
73be902c 5234 }
df7492f9 5235 return eol_seen;
73be902c
KH
5236}
5237
df7492f9 5238
73be902c 5239static void
df7492f9
KH
5240adjust_coding_eol_type (coding, eol_seen)
5241 struct coding_system *coding;
5242 int eol_seen;
73be902c 5243{
0be8721c 5244 Lisp_Object eol_type;
df7492f9
KH
5245
5246 eol_type = CODING_ID_EOL_TYPE (coding->id);
5247 if (eol_seen & EOL_SEEN_LF)
5248 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6f197c07 5249 else if (eol_seen & EOL_SEEN_CRLF)
df7492f9 5250 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6f197c07 5251 else if (eol_seen & EOL_SEEN_CR)
df7492f9 5252 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
5253}
5254
df7492f9
KH
5255/* Detect how a text specified in CODING is encoded. If a coding
5256 system is detected, update fields of CODING by the detected coding
5257 system. */
5258
5259void
5260detect_coding (coding)
d46c5b12 5261 struct coding_system *coding;
d46c5b12 5262{
df7492f9
KH
5263 unsigned char *src, *src_end;
5264 Lisp_Object attrs, coding_type;
d46c5b12 5265
df7492f9
KH
5266 coding->consumed = coding->consumed_char = 0;
5267 coding->produced = coding->produced_char = 0;
5268 coding_set_source (coding);
1c3478b0 5269
df7492f9 5270 src_end = coding->source + coding->src_bytes;
1c3478b0 5271
df7492f9
KH
5272 /* If we have not yet decided the text encoding type, detect it
5273 now. */
5274 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5275 {
df7492f9
KH
5276 int c, i;
5277
5278 for (src = coding->source; src < src_end; src++)
5279 {
5280 c = *src;
5281 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5282 || c == ISO_CODE_SI
5283 || c == ISO_CODE_SO)))
5284 break;
5285 }
5286 coding->head_ascii = src - (coding->source + coding->consumed);
5287
5288 if (coding->head_ascii < coding->src_bytes)
1c3478b0 5289 {
ff0dacd7
KH
5290 struct coding_detection_info detect_info;
5291 enum coding_category category;
5292 struct coding_system *this;
df7492f9 5293
ff0dacd7 5294 detect_info.checked = detect_info.found = detect_info.rejected = 0;
df7492f9 5295 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 5296 {
ff0dacd7
KH
5297 category = coding_priorities[i];
5298 this = coding_categories + category;
df7492f9 5299 if (this->id < 0)
1c3478b0 5300 {
df7492f9 5301 /* No coding system of this category is defined. */
ff0dacd7 5302 detect_info.rejected |= (1 << category);
df7492f9 5303 }
ff0dacd7 5304 else if (category >= coding_category_raw_text)
89528eb3 5305 continue;
ff0dacd7 5306 else if (detect_info.checked & (1 << category))
df7492f9 5307 {
ff0dacd7
KH
5308 if (detect_info.found & (1 << category))
5309 break;
1c3478b0 5310 }
ff0dacd7
KH
5311 else if ((*(this->detector)) (coding, &detect_info)
5312 && detect_info.found & (1 << category))
5313 break;
1c3478b0 5314 }
ff0dacd7
KH
5315 if (i < coding_category_raw_text)
5316 setup_coding_system (CODING_ID_NAME (this->id), coding);
5317 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5318 setup_coding_system (Qraw_text, coding);
ff0dacd7 5319 else if (detect_info.rejected)
df7492f9 5320 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5321 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5322 {
5323 this = coding_categories + coding_priorities[i];
5324 setup_coding_system (CODING_ID_NAME (this->id), coding);
5325 break;
5326 }
1c3478b0 5327 }
b73bfc1c 5328 }
b49a1807
KH
5329 else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
5330 {
5331 Lisp_Object coding_systems;
5332 struct coding_detection_info detect_info;
5333
5334 coding_systems
5335 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5336 detect_info.found = detect_info.rejected = 0;
5337 if (CONSP (coding_systems)
5338 && detect_coding_utf_16 (coding, &detect_info)
5339 && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
5340 | CATEGORY_MASK_UTF_16_BE)))
5341 {
5342 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5343 setup_coding_system (XCAR (coding_systems), coding);
5344 else
5345 setup_coding_system (XCDR (coding_systems), coding);
5346 }
5347 }
69f76525 5348
df7492f9
KH
5349 attrs = CODING_ID_ATTRS (coding->id);
5350 coding_type = CODING_ATTR_TYPE (attrs);
5351
5352 /* If we have not yet decided the EOL type, detect it now. But, the
5353 detection is impossible for a CCL based coding system, in which
5354 case, we detct the EOL type after decoding. */
5355 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5356 && ! EQ (coding_type, Qccl))
d46c5b12 5357 {
89528eb3
KH
5358 int eol_seen = detect_eol (coding->source, coding->src_bytes,
5359 XINT (CODING_ATTR_CATEGORY (attrs)));
df7492f9
KH
5360
5361 if (eol_seen != EOL_SEEN_NONE)
5362 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5363 }
4ed46869
KH
5364}
5365
aaaf0b1e
KH
5366
5367static void
df7492f9 5368decode_eol (coding)
aaaf0b1e 5369 struct coding_system *coding;
aaaf0b1e 5370{
df7492f9 5371 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5372 {
df7492f9
KH
5373 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5374 unsigned char *pend = p + coding->produced;
5375 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5376
df7492f9 5377 for (; p < pend; p++)
aaaf0b1e 5378 {
df7492f9
KH
5379 if (*p == '\n')
5380 eol_seen |= EOL_SEEN_LF;
5381 else if (*p == '\r')
aaaf0b1e 5382 {
df7492f9 5383 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5384 {
df7492f9
KH
5385 eol_seen |= EOL_SEEN_CRLF;
5386 p++;
aaaf0b1e 5387 }
aaaf0b1e 5388 else
df7492f9 5389 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5390 }
aaaf0b1e 5391 }
df7492f9
KH
5392 if (eol_seen != EOL_SEEN_NONE)
5393 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5394 }
aaaf0b1e 5395
df7492f9
KH
5396 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5397 {
5398 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5399 unsigned char *pend = p + coding->produced;
5400
5401 for (; p < pend; p++)
5402 if (*p == '\r')
5403 *p = '\n';
5404 }
5405 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5406 {
5407 unsigned char *p, *pbeg, *pend;
5408 Lisp_Object undo_list;
5409
5410 move_gap_both (coding->dst_pos + coding->produced_char,
5411 coding->dst_pos_byte + coding->produced);
5412 undo_list = current_buffer->undo_list;
5413 current_buffer->undo_list = Qt;
c197f191 5414 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
df7492f9
KH
5415 current_buffer->undo_list = undo_list;
5416 pbeg = GPT_ADDR;
5417 pend = pbeg + coding->produced;
5418
5419 for (p = pend - 1; p >= pbeg; p--)
5420 if (*p == '\r')
5421 {
5422 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5423 pend--;
5424 }
5425 coding->produced_char -= coding->produced - (pend - pbeg);
5426 coding->produced = pend - pbeg;
5427 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5428 }
5429}
5430
df7492f9
KH
5431static void
5432translate_chars (coding, table)
4ed46869 5433 struct coding_system *coding;
df7492f9 5434 Lisp_Object table;
4ed46869 5435{
df7492f9
KH
5436 int *charbuf = coding->charbuf;
5437 int *charbuf_end = charbuf + coding->charbuf_used;
5438 int c;
5439
5440 if (coding->chars_at_source)
5441 return;
4ed46869 5442
df7492f9 5443 while (charbuf < charbuf_end)
8844fa83 5444 {
df7492f9
KH
5445 c = *charbuf;
5446 if (c < 0)
5447 charbuf += c;
5448 else
5449 *charbuf++ = translate_char (table, c);
8844fa83 5450 }
df7492f9 5451}
4ed46869 5452
df7492f9
KH
5453static int
5454produce_chars (coding)
5455 struct coding_system *coding;
5456{
5457 unsigned char *dst = coding->destination + coding->produced;
5458 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5459 int produced;
5460 int produced_chars = 0;
b73bfc1c 5461
df7492f9 5462 if (! coding->chars_at_source)
4ed46869 5463 {
df7492f9
KH
5464 /* Characters are in coding->charbuf. */
5465 int *buf = coding->charbuf;
5466 int *buf_end = buf + coding->charbuf_used;
5467 unsigned char *adjusted_dst_end;
4ed46869 5468
df7492f9
KH
5469 if (BUFFERP (coding->src_object)
5470 && EQ (coding->src_object, coding->dst_object))
5471 dst_end = coding->source + coding->consumed;
5472 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5473
df7492f9
KH
5474 while (buf < buf_end)
5475 {
5476 int c = *buf++;
5477
5478 if (dst >= adjusted_dst_end)
5479 {
5480 dst = alloc_destination (coding,
5481 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5482 dst);
5483 dst_end = coding->destination + coding->dst_bytes;
5484 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5485 }
5486 if (c >= 0)
5487 {
5488 if (coding->dst_multibyte
5489 || ! CHAR_BYTE8_P (c))
5490 CHAR_STRING_ADVANCE (c, dst);
5491 else
5492 *dst++ = CHAR_TO_BYTE8 (c);
5493 produced_chars++;
5494 }
5495 else
e1c23804 5496 /* This is an annotation datum. */
df7492f9
KH
5497 buf -= c + 1;
5498 }
5499 }
5500 else
5501 {
df7492f9
KH
5502 unsigned char *src = coding->source;
5503 unsigned char *src_end = src + coding->src_bytes;
5504 Lisp_Object eol_type;
b73bfc1c 5505
df7492f9 5506 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5507
df7492f9 5508 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5509 {
df7492f9
KH
5510 if (coding->src_multibyte)
5511 {
71c81426 5512 int multibytep = 1;
df7492f9 5513 int consumed_chars;
d46c5b12 5514
df7492f9
KH
5515 while (1)
5516 {
5517 unsigned char *src_base = src;
5518 int c;
b73bfc1c 5519
df7492f9
KH
5520 ONE_MORE_BYTE (c);
5521 if (c == '\r')
5522 {
5523 if (EQ (eol_type, Qdos))
5524 {
98725083
KH
5525 if (src == src_end)
5526 {
5527 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5528 goto no_more_source;
5529 }
5530 if (*src == '\n')
df7492f9
KH
5531 c = *src++;
5532 }
5533 else if (EQ (eol_type, Qmac))
5534 c = '\n';
5535 }
5536 if (dst == dst_end)
5537 {
2c78b7e1 5538 coding->consumed = src - coding->source;
b73bfc1c 5539
2c78b7e1
KH
5540 if (EQ (coding->src_object, coding->dst_object))
5541 dst_end = src;
5542 if (dst == dst_end)
5543 {
5544 dst = alloc_destination (coding, src_end - src + 1,
5545 dst);
5546 dst_end = coding->destination + coding->dst_bytes;
5547 coding_set_source (coding);
5548 src = coding->source + coding->consumed;
5549 src_end = coding->source + coding->src_bytes;
5550 }
df7492f9
KH
5551 }
5552 *dst++ = c;
5553 produced_chars++;
5554 }
5555 no_more_source:
5556 ;
5557 }
5558 else
5559 while (src < src_end)
5560 {
71c81426 5561 int multibytep = 1;
df7492f9 5562 int c = *src++;
b73bfc1c 5563
df7492f9
KH
5564 if (c == '\r')
5565 {
5566 if (EQ (eol_type, Qdos))
5567 {
5568 if (src < src_end
5569 && *src == '\n')
5570 c = *src++;
5571 }
5572 else if (EQ (eol_type, Qmac))
5573 c = '\n';
5574 }
5575 if (dst >= dst_end - 1)
5576 {
2c78b7e1 5577 coding->consumed = src - coding->source;
df7492f9 5578
2c78b7e1
KH
5579 if (EQ (coding->src_object, coding->dst_object))
5580 dst_end = src;
5581 if (dst >= dst_end - 1)
5582 {
5583 dst = alloc_destination (coding, src_end - src + 2,
5584 dst);
5585 dst_end = coding->destination + coding->dst_bytes;
5586 coding_set_source (coding);
5587 src = coding->source + coding->consumed;
5588 src_end = coding->source + coding->src_bytes;
5589 }
df7492f9
KH
5590 }
5591 EMIT_ONE_BYTE (c);
5592 }
d46c5b12 5593 }
df7492f9
KH
5594 else
5595 {
5596 if (!EQ (coding->src_object, coding->dst_object))
5597 {
5598 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5599
df7492f9
KH
5600 if (require > 0)
5601 {
5602 EMACS_INT offset = src - coding->source;
5603
5604 dst = alloc_destination (coding, require, dst);
5605 coding_set_source (coding);
5606 src = coding->source + offset;
5607 src_end = coding->source + coding->src_bytes;
5608 }
5609 }
5610 produced_chars = coding->src_chars;
5611 while (src < src_end)
5612 {
5613 int c = *src++;
5614
5615 if (c == '\r')
5616 {
5617 if (EQ (eol_type, Qdos))
5618 {
5619 if (src < src_end
5620 && *src == '\n')
5621 c = *src++;
5622 produced_chars--;
5623 }
5624 else if (EQ (eol_type, Qmac))
5625 c = '\n';
5626 }
5627 *dst++ = c;
5628 }
5629 }
2c78b7e1
KH
5630 coding->consumed = coding->src_bytes;
5631 coding->consumed_char = coding->src_chars;
b73bfc1c 5632 }
4ed46869 5633
df7492f9
KH
5634 produced = dst - (coding->destination + coding->produced);
5635 if (BUFFERP (coding->dst_object))
5636 insert_from_gap (produced_chars, produced);
5637 coding->produced += produced;
5638 coding->produced_char += produced_chars;
5639 return produced_chars;
b73bfc1c 5640}
52d41803 5641
ff0dacd7
KH
5642/* Compose text in CODING->object according to the annotation data at
5643 CHARBUF. CHARBUF is an array:
5644 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 5645 */
4ed46869 5646
df7492f9
KH
5647static INLINE void
5648produce_composition (coding, charbuf)
4ed46869 5649 struct coding_system *coding;
df7492f9 5650 int *charbuf;
4ed46869 5651{
df7492f9 5652 int len;
ff0dacd7 5653 EMACS_INT from, to;
df7492f9 5654 enum composition_method method;
df7492f9
KH
5655 Lisp_Object components;
5656
df7492f9 5657 len = -charbuf[0];
ff0dacd7
KH
5658 from = coding->dst_pos + charbuf[2];
5659 to = coding->dst_pos + charbuf[3];
5660 method = (enum composition_method) (charbuf[4]);
df7492f9
KH
5661
5662 if (method == COMPOSITION_RELATIVE)
5663 components = Qnil;
5664 else
d46c5b12 5665 {
df7492f9
KH
5666 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5667 int i;
4ed46869 5668
df7492f9
KH
5669 len -= 5;
5670 charbuf += 5;
5671 for (i = 0; i < len; i++)
5672 args[i] = make_number (charbuf[i]);
5673 components = (method == COMPOSITION_WITH_ALTCHARS
5674 ? Fstring (len, args) : Fvector (len, args));
5675 }
ff0dacd7 5676 compose_text (from, to, components, Qnil, coding->dst_object);
df7492f9 5677}
b73bfc1c 5678
d46c5b12 5679
ff0dacd7
KH
5680/* Put `charset' property on text in CODING->object according to
5681 the annotation data at CHARBUF. CHARBUF is an array:
5682 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5683 */
b73bfc1c 5684
ff0dacd7
KH
5685static INLINE void
5686produce_charset (coding, charbuf)
5687 struct coding_system *coding;
5688 int *charbuf;
5689{
5690 EMACS_INT from = coding->dst_pos + charbuf[2];
5691 EMACS_INT to = coding->dst_pos + charbuf[3];
5692 struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
b73bfc1c 5693
ff0dacd7
KH
5694 Fput_text_property (make_number (from), make_number (to),
5695 Qcharset, CHARSET_NAME (charset),
5696 coding->dst_object);
4ed46869
KH
5697}
5698
ff0dacd7 5699
df7492f9
KH
5700#define CHARBUF_SIZE 0x4000
5701
5702#define ALLOC_CONVERSION_WORK_AREA(coding) \
5703 do { \
5704 int size = CHARBUF_SIZE;; \
5705 \
5706 coding->charbuf = NULL; \
5707 while (size > 1024) \
5708 { \
5709 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5710 if (coding->charbuf) \
5711 break; \
5712 size >>= 1; \
5713 } \
5714 if (! coding->charbuf) \
5715 { \
5716 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5717 return coding->result; \
5718 } \
5719 coding->charbuf_size = size; \
5720 } while (0)
4ed46869 5721
d46c5b12
KH
5722
5723static void
df7492f9 5724produce_annotation (coding)
d46c5b12 5725 struct coding_system *coding;
d46c5b12 5726{
df7492f9
KH
5727 int *charbuf = coding->charbuf;
5728 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5729
ff0dacd7
KH
5730 if (NILP (coding->dst_object))
5731 return;
5732
df7492f9 5733 while (charbuf < charbuf_end)
d46c5b12 5734 {
df7492f9
KH
5735 if (*charbuf >= 0)
5736 charbuf++;
d46c5b12 5737 else
d46c5b12 5738 {
df7492f9 5739 int len = -*charbuf;
ff0dacd7 5740 switch (charbuf[1])
df7492f9
KH
5741 {
5742 case CODING_ANNOTATE_COMPOSITION_MASK:
5743 produce_composition (coding, charbuf);
5744 break;
ff0dacd7
KH
5745 case CODING_ANNOTATE_CHARSET_MASK:
5746 produce_charset (coding, charbuf);
5747 break;
df7492f9
KH
5748 default:
5749 abort ();
5750 }
5751 charbuf += len;
d46c5b12 5752 }
df7492f9
KH
5753 }
5754}
d46c5b12 5755
df7492f9
KH
5756/* Decode the data at CODING->src_object into CODING->dst_object.
5757 CODING->src_object is a buffer, a string, or nil.
5758 CODING->dst_object is a buffer.
de79a6a5 5759
df7492f9
KH
5760 If CODING->src_object is a buffer, it must be the current buffer.
5761 In this case, if CODING->src_pos is positive, it is a position of
5762 the source text in the buffer, otherwise, the source text is in the
5763 gap area of the buffer, and CODING->src_pos specifies the offset of
5764 the text from GPT (which must be the same as PT). If this is the
5765 same buffer as CODING->dst_object, CODING->src_pos must be
5766 negative.
b73bfc1c 5767
df7492f9
KH
5768 If CODING->src_object is a string, CODING->src_pos in an index to
5769 that string.
d46c5b12 5770
df7492f9
KH
5771 If CODING->src_object is nil, CODING->source must already point to
5772 the non-relocatable memory area. In this case, CODING->src_pos is
5773 an offset from CODING->source.
d46c5b12 5774
df7492f9
KH
5775 The decoded data is inserted at the current point of the buffer
5776 CODING->dst_object.
5777*/
5778
5779static int
5780decode_coding (coding)
d46c5b12 5781 struct coding_system *coding;
d46c5b12 5782{
df7492f9 5783 Lisp_Object attrs;
d46c5b12 5784
df7492f9
KH
5785 if (BUFFERP (coding->src_object)
5786 && coding->src_pos > 0
5787 && coding->src_pos < GPT
5788 && coding->src_pos + coding->src_chars > GPT)
5789 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5790
df7492f9 5791 if (BUFFERP (coding->dst_object))
88993dfd 5792 {
df7492f9
KH
5793 if (current_buffer != XBUFFER (coding->dst_object))
5794 set_buffer_internal (XBUFFER (coding->dst_object));
5795 if (GPT != PT)
5796 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5797 }
5798
df7492f9
KH
5799 coding->consumed = coding->consumed_char = 0;
5800 coding->produced = coding->produced_char = 0;
5801 coding->chars_at_source = 0;
5802 coding->result = CODING_RESULT_SUCCESS;
5803 coding->errors = 0;
5804
5805 ALLOC_CONVERSION_WORK_AREA (coding);
5806
5807 attrs = CODING_ID_ATTRS (coding->id);
5808
5809 do
d46c5b12 5810 {
df7492f9
KH
5811 coding_set_source (coding);
5812 coding->annotated = 0;
5813 (*(coding->decoder)) (coding);
5814 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
da4109a9
KH
5815 translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5816 else if (!NILP (Vstandard_translation_table_for_decode))
5817 translate_chars (coding, Vstandard_translation_table_for_decode);
df7492f9
KH
5818 coding_set_destination (coding);
5819 produce_chars (coding);
5820 if (coding->annotated)
5821 produce_annotation (coding);
d46c5b12 5822 }
df7492f9
KH
5823 while (coding->consumed < coding->src_bytes
5824 && ! coding->result);
d46c5b12 5825
df7492f9
KH
5826 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5827 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5828 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5829 decode_eol (coding);
d46c5b12 5830
df7492f9
KH
5831 coding->carryover_bytes = 0;
5832 if (coding->consumed < coding->src_bytes)
d46c5b12 5833 {
df7492f9
KH
5834 int nbytes = coding->src_bytes - coding->consumed;
5835 unsigned char *src;
5836
5837 coding_set_source (coding);
5838 coding_set_destination (coding);
5839 src = coding->source + coding->consumed;
5840
5841 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5842 {
df7492f9
KH
5843 /* Flush out unprocessed data as binary chars. We are sure
5844 that the number of data is less than the size of
5845 coding->charbuf. */
df7492f9 5846 while (nbytes-- > 0)
d46c5b12 5847 {
df7492f9 5848 int c = *src++;
98725083
KH
5849
5850 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
d46c5b12 5851 }
df7492f9 5852 produce_chars (coding);
d46c5b12 5853 }
d46c5b12 5854 else
df7492f9
KH
5855 {
5856 /* Record unprocessed bytes in coding->carryover. We are
5857 sure that the number of data is less than the size of
5858 coding->carryover. */
5859 unsigned char *p = coding->carryover;
5860
5861 coding->carryover_bytes = nbytes;
5862 while (nbytes-- > 0)
5863 *p++ = *src++;
5864 }
5865 coding->consumed = coding->src_bytes;
5866 }
b73bfc1c 5867
df7492f9 5868 return coding->result;
d46c5b12
KH
5869}
5870
ff0dacd7 5871
e1c23804 5872/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
5873 ending before LIMIT of CODING->src_object (buffer or string), store
5874 the data in BUF, set *STOP to a starting position of the next
5875 composition (if any) or to LIMIT, and return the address of the
5876 next element of BUF.
5877
5878 If such an annotation is not found, set *STOP to a starting
5879 position of a composition after POS (if any) or to LIMIT, and
5880 return BUF. */
5881
5882static INLINE int *
5883handle_composition_annotation (pos, limit, coding, buf, stop)
5884 EMACS_INT pos, limit;
5885 struct coding_system *coding;
5886 int *buf;
5887 EMACS_INT *stop;
5888{
5889 EMACS_INT start, end;
5890 Lisp_Object prop;
5891
5892 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5893 || end > limit)
5894 *stop = limit;
5895 else if (start > pos)
5896 *stop = start;
5897 else
5898 {
5899 if (start == pos)
5900 {
5901 /* We found a composition. Store the corresponding
5902 annotation data in BUF. */
5903 int *head = buf;
5904 enum composition_method method = COMPOSITION_METHOD (prop);
5905 int nchars = COMPOSITION_LENGTH (prop);
5906
5907 ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5908 if (method != COMPOSITION_RELATIVE)
5909 {
5910 Lisp_Object components;
5911 int len, i, i_byte;
5912
5913 components = COMPOSITION_COMPONENTS (prop);
5914 if (VECTORP (components))
5915 {
5916 len = XVECTOR (components)->size;
5917 for (i = 0; i < len; i++)
5918 *buf++ = XINT (AREF (components, i));
5919 }
5920 else if (STRINGP (components))
5921 {
5922 len = XSTRING (components)->size;
5923 i = i_byte = 0;
5924 while (i < len)
5925 {
5926 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5927 buf++;
5928 }
5929 }
5930 else if (INTEGERP (components))
5931 {
5932 len = 1;
5933 *buf++ = XINT (components);
5934 }
5935 else if (CONSP (components))
5936 {
5937 for (len = 0; CONSP (components);
5938 len++, components = XCDR (components))
5939 *buf++ = XINT (XCAR (components));
5940 }
5941 else
5942 abort ();
5943 *head -= len;
5944 }
5945 }
5946
5947 if (find_composition (end, limit, &start, &end, &prop,
5948 coding->src_object)
5949 && end <= limit)
5950 *stop = start;
5951 else
5952 *stop = limit;
5953 }
5954 return buf;
5955}
5956
5957
e1c23804 5958/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
5959 CODING->src_object (buffer of string), store the data in BUF, set
5960 *STOP to the position where the value of `charset' property changes
5961 (limiting by LIMIT), and return the address of the next element of
5962 BUF.
5963
5964 If the property value is nil, set *STOP to the position where the
5965 property value is non-nil (limiting by LIMIT), and return BUF. */
5966
5967static INLINE int *
5968handle_charset_annotation (pos, limit, coding, buf, stop)
5969 EMACS_INT pos, limit;
5970 struct coding_system *coding;
5971 int *buf;
5972 EMACS_INT *stop;
5973{
5974 Lisp_Object val, next;
5975 int id;
5976
5977 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5978 if (! NILP (val) && CHARSETP (val))
5979 id = XINT (CHARSET_SYMBOL_ID (val));
5980 else
5981 id = -1;
5982 ADD_CHARSET_DATA (buf, 0, 0, id);
5983 next = Fnext_single_property_change (make_number (pos), Qcharset,
5984 coding->src_object,
5985 make_number (limit));
5986 *stop = XINT (next);
5987 return buf;
5988}
5989
5990
df7492f9
KH
5991static void
5992consume_chars (coding)
5993 struct coding_system *coding;
5994{
5995 int *buf = coding->charbuf;
ff0dacd7 5996 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 5997 const unsigned char *src = coding->source + coding->consumed;
4776e638 5998 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
5999 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6000 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6001 int multibytep = coding->src_multibyte;
6002 Lisp_Object eol_type;
6003 int c;
ff0dacd7 6004 EMACS_INT stop, stop_composition, stop_charset;
88993dfd 6005
df7492f9
KH
6006 eol_type = CODING_ID_EOL_TYPE (coding->id);
6007 if (VECTORP (eol_type))
6008 eol_type = Qunix;
88993dfd 6009
df7492f9
KH
6010 /* Note: composition handling is not yet implemented. */
6011 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6012
ff0dacd7
KH
6013 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6014 stop = stop_composition = pos;
6015 else
6016 stop = stop_composition = end_pos;
6017 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6018 stop = stop_charset = pos;
df7492f9 6019 else
ff0dacd7 6020 stop_charset = end_pos;
ec6d2bb8 6021
ff0dacd7
KH
6022 /* Compensate for CRLF and annotation. */
6023 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6024 while (buf < buf_end)
ec6d2bb8 6025 {
df7492f9 6026 if (pos == stop)
ec6d2bb8 6027 {
df7492f9
KH
6028 if (pos == end_pos)
6029 break;
ff0dacd7
KH
6030 if (pos == stop_composition)
6031 buf = handle_composition_annotation (pos, end_pos, coding,
6032 buf, &stop_composition);
6033 if (pos == stop_charset)
6034 buf = handle_charset_annotation (pos, end_pos, coding,
6035 buf, &stop_charset);
6036 stop = (stop_composition < stop_charset
6037 ? stop_composition : stop_charset);
df7492f9
KH
6038 }
6039
6040 if (! multibytep)
4776e638
KH
6041 {
6042 EMACS_INT bytes = MULTIBYTE_LENGTH (src, src_end);
6043
6044 if (bytes > 0)
6045 c = STRING_CHAR_ADVANCE (src), pos += bytes;
6046 else
6047 c = *src++, pos++;
6048 }
df7492f9 6049 else
4776e638 6050 c = STRING_CHAR_ADVANCE (src), pos++;
df7492f9
KH
6051 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6052 c = '\n';
6053 if (! EQ (eol_type, Qunix))
6054 {
6055 if (c == '\n')
6056 {
6057 if (EQ (eol_type, Qdos))
6058 *buf++ = '\r';
6059 else
6060 c = '\r';
ec6d2bb8 6061 }
ec6d2bb8 6062 }
df7492f9 6063 *buf++ = c;
ec6d2bb8 6064 }
ec6d2bb8 6065
df7492f9
KH
6066 coding->consumed = src - coding->source;
6067 coding->consumed_char = pos - coding->src_pos;
6068 coding->charbuf_used = buf - coding->charbuf;
6069 coding->chars_at_source = 0;
ec6d2bb8
KH
6070}
6071
ec6d2bb8 6072
df7492f9
KH
6073/* Encode the text at CODING->src_object into CODING->dst_object.
6074 CODING->src_object is a buffer or a string.
6075 CODING->dst_object is a buffer or nil.
6076
6077 If CODING->src_object is a buffer, it must be the current buffer.
6078 In this case, if CODING->src_pos is positive, it is a position of
6079 the source text in the buffer, otherwise. the source text is in the
6080 gap area of the buffer, and coding->src_pos specifies the offset of
6081 the text from GPT (which must be the same as PT). If this is the
6082 same buffer as CODING->dst_object, CODING->src_pos must be
6083 negative and CODING should not have `pre-write-conversion'.
6084
6085 If CODING->src_object is a string, CODING should not have
6086 `pre-write-conversion'.
6087
6088 If CODING->dst_object is a buffer, the encoded data is inserted at
6089 the current point of that buffer.
6090
6091 If CODING->dst_object is nil, the encoded data is placed at the
6092 memory area specified by CODING->destination. */
6093
6094static int
6095encode_coding (coding)
ec6d2bb8 6096 struct coding_system *coding;
ec6d2bb8 6097{
df7492f9 6098 Lisp_Object attrs;
ec6d2bb8 6099
df7492f9 6100 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 6101
df7492f9 6102 if (BUFFERP (coding->dst_object))
ec6d2bb8 6103 {
df7492f9
KH
6104 set_buffer_internal (XBUFFER (coding->dst_object));
6105 coding->dst_multibyte
6106 = ! NILP (current_buffer->enable_multibyte_characters);
6107 }
ec6d2bb8 6108
df7492f9
KH
6109 coding->consumed = coding->consumed_char = 0;
6110 coding->produced = coding->produced_char = 0;
6111 coding->result = CODING_RESULT_SUCCESS;
6112 coding->errors = 0;
ec6d2bb8 6113
df7492f9 6114 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 6115
df7492f9
KH
6116 do {
6117 coding_set_source (coding);
6118 consume_chars (coding);
6119
6120 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
da4109a9
KH
6121 translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6122 else if (!NILP (Vstandard_translation_table_for_encode))
6123 translate_chars (coding, Vstandard_translation_table_for_encode);
df7492f9
KH
6124
6125 coding_set_destination (coding);
6126 (*(coding->encoder)) (coding);
6127 } while (coding->consumed_char < coding->src_chars);
6128
6129 if (BUFFERP (coding->dst_object))
6130 insert_from_gap (coding->produced_char, coding->produced);
6131
6132 return (coding->result);
ec6d2bb8
KH
6133}
6134
fb88bf2d 6135
4776e638
KH
6136/* Stack of working buffers used in code conversion. An nil element
6137 means that the code conversion of that level is not using a working
6138 buffer. */
df7492f9 6139Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 6140
df7492f9
KH
6141/* A working buffer used by the top level conversion. */
6142Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 6143
4ed46869 6144
df7492f9
KH
6145/* Return a working buffer that can be freely used by the following
6146 code conversion. MULTIBYTEP specifies the multibyteness of the
6147 buffer. */
b73bfc1c 6148
df7492f9 6149Lisp_Object
4776e638
KH
6150make_conversion_work_buffer (multibytep, depth)
6151 int multibytep, depth;
df7492f9
KH
6152{
6153 struct buffer *current = current_buffer;
4776e638 6154 Lisp_Object buf, name;
d46c5b12 6155
4776e638 6156 if (depth == 0)
e133c8fa 6157 {
df7492f9
KH
6158 if (NILP (Vcode_conversion_reused_work_buf))
6159 Vcode_conversion_reused_work_buf
857dccb0 6160 = Fget_buffer_create (build_string (" *code-converting-work<0>*"));
4776e638 6161 buf = Vcode_conversion_reused_work_buf;
e133c8fa 6162 }
df7492f9 6163 else
d46c5b12 6164 {
4776e638
KH
6165 if (depth < 0)
6166 {
857dccb0 6167 name = build_string (" *code-converting-work*");
4776e638
KH
6168 name = Fgenerate_new_buffer_name (name, Qnil);
6169 }
6170 else
6171 {
6172 char str[128];
e077cc80 6173
857dccb0 6174 sprintf (str, " *code-converting-work*<%d>", depth);
4776e638
KH
6175 name = build_string (str);
6176 }
6177 buf = Fget_buffer_create (name);
d46c5b12 6178 }
df7492f9
KH
6179 set_buffer_internal (XBUFFER (buf));
6180 current_buffer->undo_list = Qt;
6181 Ferase_buffer ();
9d123124 6182 Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
df7492f9
KH
6183 set_buffer_internal (current);
6184 return buf;
6185}
d46c5b12 6186
4776e638
KH
6187static Lisp_Object
6188code_conversion_restore (buffer)
6189 Lisp_Object buffer;
6190{
6191 Lisp_Object workbuf;
6192
6193 workbuf = XCAR (Vcode_conversion_work_buf_list);
6194 if (! NILP (workbuf)
6195 && ! EQ (workbuf, Vcode_conversion_reused_work_buf)
6196 && ! NILP (Fbuffer_live_p (workbuf)))
6197 Fkill_buffer (workbuf);
6198 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6199 set_buffer_internal (XBUFFER (buffer));
6200 return Qnil;
6201}
d46c5b12 6202
4776e638
KH
6203static Lisp_Object
6204code_conversion_save (buffer, with_work_buf, multibyte)
6205 Lisp_Object buffer;
6206 int with_work_buf, multibyte;
df7492f9 6207{
4776e638 6208 Lisp_Object workbuf;
d46c5b12 6209
4776e638 6210 if (with_work_buf)
d46c5b12 6211 {
4776e638 6212 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
b843d1ae 6213
4776e638
KH
6214 workbuf = make_conversion_work_buffer (multibyte, depth);
6215 }
6216 else
6217 workbuf = Qnil;
6218 Vcode_conversion_work_buf_list
6219 = Fcons (workbuf, Vcode_conversion_work_buf_list);
6220 record_unwind_protect (code_conversion_restore, buffer);
6221 return workbuf;
df7492f9 6222}
d46c5b12 6223
df7492f9
KH
6224int
6225decode_coding_gap (coding, chars, bytes)
6226 struct coding_system *coding;
6227 EMACS_INT chars, bytes;
6228{
6229 int count = specpdl_ptr - specpdl;
4776e638 6230 Lisp_Object buffer;
fb88bf2d 6231
4776e638
KH
6232 buffer = Fcurrent_buffer ();
6233 code_conversion_save (buffer, 0, 0);
ec6d2bb8 6234
4776e638 6235 coding->src_object = buffer;
df7492f9
KH
6236 coding->src_chars = chars;
6237 coding->src_bytes = bytes;
6238 coding->src_pos = -chars;
6239 coding->src_pos_byte = -bytes;
6240 coding->src_multibyte = chars < bytes;
6241 coding->dst_object = coding->src_object;
6242 coding->dst_pos = PT;
6243 coding->dst_pos_byte = PT_BYTE;
71c81426 6244 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
98725083 6245 coding->mode |= CODING_MODE_LAST_BLOCK;
4956c225 6246
df7492f9
KH
6247 if (CODING_REQUIRE_DETECTION (coding))
6248 detect_coding (coding);
6249
6250 decode_coding (coding);
d46c5b12 6251
df7492f9
KH
6252 unbind_to (count, Qnil);
6253 return coding->result;
6254}
d46c5b12 6255
df7492f9
KH
6256int
6257encode_coding_gap (coding, chars, bytes)
6258 struct coding_system *coding;
6259 EMACS_INT chars, bytes;
6260{
6261 int count = specpdl_ptr - specpdl;
6262 Lisp_Object buffer;
d46c5b12 6263
df7492f9 6264 buffer = Fcurrent_buffer ();
4776e638
KH
6265 code_conversion_save (buffer, 0, 0);
6266
df7492f9
KH
6267 coding->src_object = buffer;
6268 coding->src_chars = chars;
6269 coding->src_bytes = bytes;
6270 coding->src_pos = -chars;
6271 coding->src_pos_byte = -bytes;
6272 coding->src_multibyte = chars < bytes;
6273 coding->dst_object = coding->src_object;
6274 coding->dst_pos = PT;
6275 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 6276
df7492f9 6277 encode_coding (coding);
f2558efd 6278
df7492f9
KH
6279 unbind_to (count, Qnil);
6280 return coding->result;
6281}
b73bfc1c 6282
d46c5b12 6283
df7492f9
KH
6284/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6285 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 6286
df7492f9 6287 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 6288
df7492f9
KH
6289 If it is a buffer, the text is at point of the buffer. FROM and TO
6290 are positions in the buffer.
ec6d2bb8 6291
df7492f9
KH
6292 If it is a string, the text is at the beginning of the string.
6293 FROM and TO are indices to the string.
ec6d2bb8 6294
df7492f9
KH
6295 If it is nil, the text is at coding->source. FROM and TO are
6296 indices to coding->source.
ec6d2bb8 6297
df7492f9 6298 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 6299
df7492f9
KH
6300 If it is a buffer, the decoded text is inserted at point of the
6301 buffer. If the buffer is the same as SRC_OBJECT, the source text
6302 is deleted.
d46c5b12 6303
df7492f9
KH
6304 If it is Qt, a string is made from the decoded text, and
6305 set in CODING->dst_object.
d46c5b12 6306
df7492f9 6307 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6308 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6309 CODING->destination by xmalloc. If the decoded text is longer than
6310 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6311 */
d46c5b12 6312
df7492f9
KH
6313void
6314decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6315 dst_object)
6316 struct coding_system *coding;
6317 Lisp_Object src_object;
6318 EMACS_INT from, from_byte, to, to_byte;
6319 Lisp_Object dst_object;
6320{
6321 int count = specpdl_ptr - specpdl;
6322 unsigned char *destination;
6323 EMACS_INT dst_bytes;
6324 EMACS_INT chars = to - from;
6325 EMACS_INT bytes = to_byte - from_byte;
6326 Lisp_Object attrs;
4776e638
KH
6327 Lisp_Object buffer;
6328 int saved_pt = -1, saved_pt_byte;
d46c5b12 6329
4776e638 6330 buffer = Fcurrent_buffer ();
93dec019 6331
df7492f9
KH
6332 if (NILP (dst_object))
6333 {
6334 destination = coding->destination;
6335 dst_bytes = coding->dst_bytes;
6336 }
93dec019 6337
df7492f9
KH
6338 coding->src_object = src_object;
6339 coding->src_chars = chars;
6340 coding->src_bytes = bytes;
6341 coding->src_multibyte = chars < bytes;
70ad9fc4 6342
df7492f9
KH
6343 if (STRINGP (src_object))
6344 {
6345 coding->src_pos = from;
6346 coding->src_pos_byte = from_byte;
6347 }
6348 else if (BUFFERP (src_object))
6349 {
6350 set_buffer_internal (XBUFFER (src_object));
6351 if (from != GPT)
6352 move_gap_both (from, from_byte);
6353 if (EQ (src_object, dst_object))
fb88bf2d 6354 {
4776e638 6355 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6356 TEMP_SET_PT_BOTH (from, from_byte);
6357 del_range_both (from, from_byte, to, to_byte, 1);
6358 coding->src_pos = -chars;
6359 coding->src_pos_byte = -bytes;
fb88bf2d 6360 }
df7492f9 6361 else
fb88bf2d 6362 {
df7492f9
KH
6363 coding->src_pos = from;
6364 coding->src_pos_byte = from_byte;
fb88bf2d 6365 }
d46c5b12 6366 }
fb88bf2d 6367
df7492f9
KH
6368 if (CODING_REQUIRE_DETECTION (coding))
6369 detect_coding (coding);
6370 attrs = CODING_ID_ATTRS (coding->id);
6371
2cb26057
KH
6372 if (EQ (dst_object, Qt)
6373 || (! NILP (CODING_ATTR_POST_READ (attrs))
6374 && NILP (dst_object)))
b73bfc1c 6375 {
4776e638 6376 coding->dst_object = code_conversion_save (buffer, 1, 1);
df7492f9
KH
6377 coding->dst_pos = BEG;
6378 coding->dst_pos_byte = BEG_BYTE;
6379 coding->dst_multibyte = 1;
b73bfc1c 6380 }
df7492f9 6381 else if (BUFFERP (dst_object))
12410ef1 6382 {
4776e638 6383 code_conversion_save (buffer, 0, 0);
df7492f9
KH
6384 coding->dst_object = dst_object;
6385 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6386 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6387 coding->dst_multibyte
6388 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 6389 }
72d1a715 6390 else
df7492f9 6391 {
4776e638 6392 code_conversion_save (buffer, 0, 0);
df7492f9
KH
6393 coding->dst_object = Qnil;
6394 coding->dst_multibyte = 1;
6395 }
6396
6397 decode_coding (coding);
4ed46869 6398
df7492f9
KH
6399 if (BUFFERP (coding->dst_object))
6400 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 6401
df7492f9 6402 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6403 {
df7492f9
KH
6404 struct gcpro gcpro1, gcpro2;
6405 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6406 Lisp_Object val;
4ed46869 6407
c0cc7f7f 6408 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6409 GCPRO2 (coding->src_object, coding->dst_object);
6410 val = call1 (CODING_ATTR_POST_READ (attrs),
6411 make_number (coding->produced_char));
6412 UNGCPRO;
6413 CHECK_NATNUM (val);
6414 coding->produced_char += Z - prev_Z;
6415 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6416 }
4ed46869 6417
df7492f9 6418 if (EQ (dst_object, Qt))
ec6d2bb8 6419 {
df7492f9
KH
6420 coding->dst_object = Fbuffer_string ();
6421 }
6422 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6423 {
6424 set_buffer_internal (XBUFFER (coding->dst_object));
6425 if (dst_bytes < coding->produced)
6426 {
6427 destination
6428 = (unsigned char *) xrealloc (destination, coding->produced);
6429 if (! destination)
6430 {
6431 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6432 unbind_to (count, Qnil);
6433 return;
6434 }
6435 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6436 move_gap_both (BEGV, BEGV_BYTE);
6437 bcopy (BEGV_ADDR, destination, coding->produced);
6438 coding->destination = destination;
6439 }
ec6d2bb8 6440 }
2b4f9037 6441
4776e638
KH
6442 if (saved_pt >= 0)
6443 {
6444 /* This is the case of:
6445 (BUFFERP (src_object) && EQ (src_object, dst_object))
6446 As we have moved PT while replacing the original buffer
6447 contents, we must recover it now. */
6448 set_buffer_internal (XBUFFER (src_object));
6449 if (saved_pt < from)
6450 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6451 else if (saved_pt < from + chars)
6452 TEMP_SET_PT_BOTH (from, from_byte);
6453 else if (! NILP (current_buffer->enable_multibyte_characters))
6454 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6455 saved_pt_byte + (coding->produced - bytes));
6456 else
6457 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6458 saved_pt_byte + (coding->produced - bytes));
6459 }
6460
df7492f9 6461 unbind_to (count, Qnil);
d46c5b12
KH
6462}
6463
df7492f9
KH
6464
6465void
6466encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6467 dst_object)
b73bfc1c 6468 struct coding_system *coding;
df7492f9
KH
6469 Lisp_Object src_object;
6470 EMACS_INT from, from_byte, to, to_byte;
6471 Lisp_Object dst_object;
b73bfc1c
KH
6472{
6473 int count = specpdl_ptr - specpdl;
df7492f9
KH
6474 EMACS_INT chars = to - from;
6475 EMACS_INT bytes = to_byte - from_byte;
6476 Lisp_Object attrs;
4776e638
KH
6477 Lisp_Object buffer;
6478 int saved_pt = -1, saved_pt_byte;
df7492f9 6479
4776e638 6480 buffer = Fcurrent_buffer ();
df7492f9
KH
6481
6482 coding->src_object = src_object;
6483 coding->src_chars = chars;
6484 coding->src_bytes = bytes;
6485 coding->src_multibyte = chars < bytes;
6486
6487 attrs = CODING_ID_ATTRS (coding->id);
6488
6489 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6490 {
4776e638
KH
6491 coding->src_object = code_conversion_save (buffer, 1,
6492 coding->src_multibyte);
df7492f9
KH
6493 set_buffer_internal (XBUFFER (coding->src_object));
6494 if (STRINGP (src_object))
6495 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6496 else if (BUFFERP (src_object))
6497 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6498 else
6499 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6500
6501 if (EQ (src_object, dst_object))
6502 {
6503 set_buffer_internal (XBUFFER (src_object));
4776e638 6504 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6505 del_range_both (from, from_byte, to, to_byte, 1);
6506 set_buffer_internal (XBUFFER (coding->src_object));
6507 }
6508
ac87bbef
KH
6509 call2 (CODING_ATTR_PRE_WRITE (attrs),
6510 make_number (BEG), make_number (Z));
6511 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6512 if (BEG != GPT)
6513 move_gap_both (BEG, BEG_BYTE);
6514 coding->src_chars = Z - BEG;
6515 coding->src_bytes = Z_BYTE - BEG_BYTE;
6516 coding->src_pos = BEG;
6517 coding->src_pos_byte = BEG_BYTE;
6518 coding->src_multibyte = Z < Z_BYTE;
6519 }
6520 else if (STRINGP (src_object))
6521 {
4776e638 6522 code_conversion_save (buffer, 0, 0);
df7492f9
KH
6523 coding->src_pos = from;
6524 coding->src_pos_byte = from_byte;
6525 }
6526 else if (BUFFERP (src_object))
d46c5b12 6527 {
4776e638 6528 code_conversion_save (buffer, 0, 0);
df7492f9 6529 set_buffer_internal (XBUFFER (src_object));
df7492f9 6530 if (EQ (src_object, dst_object))
d46c5b12 6531 {
4776e638 6532 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
6533 coding->src_object = del_range_1 (from, to, 1, 1);
6534 coding->src_pos = 0;
6535 coding->src_pos_byte = 0;
d46c5b12 6536 }
df7492f9 6537 else
d46c5b12 6538 {
ff0dacd7
KH
6539 if (from < GPT && to >= GPT)
6540 move_gap_both (from, from_byte);
df7492f9
KH
6541 coding->src_pos = from;
6542 coding->src_pos_byte = from_byte;
d46c5b12
KH
6543 }
6544 }
4776e638
KH
6545 else
6546 code_conversion_save (buffer, 0, 0);
4ed46869 6547
df7492f9 6548 if (BUFFERP (dst_object))
d46c5b12 6549 {
df7492f9 6550 coding->dst_object = dst_object;
28f67a95
KH
6551 if (EQ (src_object, dst_object))
6552 {
6553 coding->dst_pos = from;
6554 coding->dst_pos_byte = from_byte;
6555 }
6556 else
6557 {
6558 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6559 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6560 }
df7492f9
KH
6561 coding->dst_multibyte
6562 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6563 }
df7492f9 6564 else if (EQ (dst_object, Qt))
4956c225 6565 {
df7492f9 6566 coding->dst_object = Qnil;
df7492f9 6567 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6568 if (coding->dst_bytes == 0)
6569 coding->dst_bytes = 1;
6570 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6571 coding->dst_multibyte = 0;
4956c225 6572 }
df7492f9 6573 else
78108bcd 6574 {
df7492f9
KH
6575 coding->dst_object = Qnil;
6576 coding->dst_multibyte = 0;
78108bcd
KH
6577 }
6578
df7492f9 6579 encode_coding (coding);
4ed46869 6580
df7492f9 6581 if (EQ (dst_object, Qt))
4ed46869 6582 {
df7492f9
KH
6583 if (BUFFERP (coding->dst_object))
6584 coding->dst_object = Fbuffer_string ();
6585 else
73be902c 6586 {
df7492f9
KH
6587 coding->dst_object
6588 = make_unibyte_string ((char *) coding->destination,
6589 coding->produced);
6590 xfree (coding->destination);
73be902c 6591 }
4ed46869 6592 }
d46c5b12 6593
4776e638
KH
6594 if (saved_pt >= 0)
6595 {
6596 /* This is the case of:
6597 (BUFFERP (src_object) && EQ (src_object, dst_object))
6598 As we have moved PT while replacing the original buffer
6599 contents, we must recover it now. */
6600 set_buffer_internal (XBUFFER (src_object));
6601 if (saved_pt < from)
6602 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6603 else if (saved_pt < from + chars)
6604 TEMP_SET_PT_BOTH (from, from_byte);
6605 else if (! NILP (current_buffer->enable_multibyte_characters))
6606 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6607 saved_pt_byte + (coding->produced - bytes));
6608 else
6609 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6610 saved_pt_byte + (coding->produced - bytes));
6611 }
6612
df7492f9 6613 unbind_to (count, Qnil);
b73bfc1c
KH
6614}
6615
df7492f9 6616
b73bfc1c 6617Lisp_Object
df7492f9 6618preferred_coding_system ()
b73bfc1c 6619{
df7492f9 6620 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6621
df7492f9 6622 return CODING_ID_NAME (id);
4ed46869
KH
6623}
6624
6625\f
6626#ifdef emacs
1397dc18 6627/*** 8. Emacs Lisp library functions ***/
4ed46869 6628
4ed46869 6629DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6630 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6631See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6632about coding-system objects. */)
6633 (obj)
4ed46869
KH
6634 Lisp_Object obj;
6635{
df7492f9 6636 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6637}
6638
9d991de8
RS
6639DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6640 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6641 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6642 (prompt)
4ed46869
KH
6643 Lisp_Object prompt;
6644{
e0e989f6 6645 Lisp_Object val;
9d991de8
RS
6646 do
6647 {
4608c386
KH
6648 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6649 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6650 }
6651 while (XSTRING (val)->size == 0);
e0e989f6 6652 return (Fintern (val, Qnil));
4ed46869
KH
6653}
6654
9b787f3e 6655DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6656 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6657If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6658 (prompt, default_coding_system)
9b787f3e 6659 Lisp_Object prompt, default_coding_system;
4ed46869 6660{
f44d27ce 6661 Lisp_Object val;
9b787f3e
RS
6662 if (SYMBOLP (default_coding_system))
6663 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6664 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6665 Qt, Qnil, Qcoding_system_history,
6666 default_coding_system, Qnil);
e0e989f6 6667 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6668}
6669
6670DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6671 1, 1, 0,
48b0f3ae 6672 doc: /* Check validity of CODING-SYSTEM.
b054002f 6673If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
df7492f9 6674 (coding_system)
4ed46869
KH
6675 Lisp_Object coding_system;
6676{
b7826503 6677 CHECK_SYMBOL (coding_system);
4ed46869
KH
6678 if (!NILP (Fcoding_system_p (coding_system)))
6679 return coding_system;
6680 while (1)
02ba4723 6681 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6682}
df7492f9 6683
3a73fa5d 6684\f
89528eb3
KH
6685/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6686 HIGHEST is nonzero, return the coding system of the highest
6687 priority among the detected coding systems. Otherwize return a
6688 list of detected coding systems sorted by their priorities. If
6689 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6690 multibyte form but contains only ASCII and eight-bit chars.
6691 Otherwise, the bytes are raw bytes.
6692
6693 CODING-SYSTEM controls the detection as below:
6694
6695 If it is nil, detect both text-format and eol-format. If the
6696 text-format part of CODING-SYSTEM is already specified
6697 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6698 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6699 detect only text-format. */
6700
d46c5b12 6701Lisp_Object
df7492f9 6702detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6703 unsigned char *src;
6704 int src_bytes, highest;
0a28aafb 6705 int multibytep;
df7492f9 6706 Lisp_Object coding_system;
4ed46869 6707{
df7492f9 6708 unsigned char *src_end = src + src_bytes;
df7492f9
KH
6709 Lisp_Object attrs, eol_type;
6710 Lisp_Object val;
6711 struct coding_system coding;
89528eb3 6712 int id;
ff0dacd7 6713 struct coding_detection_info detect_info;
df7492f9
KH
6714
6715 if (NILP (coding_system))
6716 coding_system = Qundecided;
6717 setup_coding_system (coding_system, &coding);
6718 attrs = CODING_ID_ATTRS (coding.id);
6719 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 6720 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 6721
df7492f9
KH
6722 coding.source = src;
6723 coding.src_bytes = src_bytes;
6724 coding.src_multibyte = multibytep;
6725 coding.consumed = 0;
89528eb3 6726 coding.mode |= CODING_MODE_LAST_BLOCK;
4ed46869 6727
ff0dacd7
KH
6728 detect_info.checked = detect_info.found = detect_info.rejected = 0;
6729
89528eb3
KH
6730 /* At first, detect text-format if necessary. */
6731 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
4ed46869 6732 {
ff0dacd7
KH
6733 enum coding_category category;
6734 struct coding_system *this;
6735 int c, i;
6736
df7492f9 6737 for (; src < src_end; src++)
4ed46869 6738 {
df7492f9 6739 c = *src;
89528eb3
KH
6740 if (c & 0x80
6741 || (c < 0x20 && (c == ISO_CODE_ESC
6742 || c == ISO_CODE_SI
584948ac 6743 || c == ISO_CODE_SO)))
d46c5b12 6744 break;
4ed46869 6745 }
df7492f9
KH
6746 coding.head_ascii = src - coding.source;
6747
6748 if (src < src_end)
6749 for (i = 0; i < coding_category_raw_text; i++)
6750 {
ff0dacd7
KH
6751 category = coding_priorities[i];
6752 this = coding_categories + category;
df7492f9 6753
df7492f9
KH
6754 if (this->id < 0)
6755 {
6756 /* No coding system of this category is defined. */
ff0dacd7 6757 detect_info.rejected |= (1 << category);
df7492f9 6758 }
ff0dacd7 6759 else if (category >= coding_category_raw_text)
89528eb3 6760 continue;
ff0dacd7
KH
6761 else if (detect_info.checked & (1 << category))
6762 {
6763 if (highest
6764 && (detect_info.found & (1 << category)))
6765 break;
6766 }
df7492f9
KH
6767 else
6768 {
ff0dacd7 6769 if ((*(this->detector)) (&coding, &detect_info)
89528eb3 6770 && highest
ff0dacd7
KH
6771 && (detect_info.found & (1 << category)))
6772 break;
df7492f9
KH
6773 }
6774 }
4ed46869 6775
ff0dacd7
KH
6776
6777 if (detect_info.rejected == CATEGORY_MASK_ANY)
89528eb3 6778 {
ff0dacd7 6779 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
6780 id = coding_categories[coding_category_raw_text].id;
6781 val = Fcons (make_number (id), Qnil);
6782 }
ff0dacd7 6783 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 6784 {
ff0dacd7 6785 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
6786 id = coding_categories[coding_category_undecided].id;
6787 val = Fcons (make_number (id), Qnil);
6788 }
6789 else if (highest)
6790 {
ff0dacd7
KH
6791 if (detect_info.found)
6792 {
6793 detect_info.found = 1 << category;
6794 val = Fcons (make_number (this->id), Qnil);
6795 }
6796 else
6797 for (i = 0; i < coding_category_raw_text; i++)
6798 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6799 {
6800 detect_info.found = 1 << coding_priorities[i];
6801 id = coding_categories[coding_priorities[i]].id;
6802 val = Fcons (make_number (id), Qnil);
6803 break;
6804 }
6805 }
89528eb3
KH
6806 else
6807 {
ff0dacd7
KH
6808 int mask = detect_info.rejected | detect_info.found;
6809 int found = 0;
89528eb3 6810 val = Qnil;
ff0dacd7 6811
89528eb3 6812 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
6813 {
6814 category = coding_priorities[i];
6815 if (! (mask & (1 << category)))
6816 {
6817 found |= 1 << category;
6818 id = coding_categories[category].id;
6819 val = Fcons (make_number (id), val);
6820 }
6821 }
6822 for (i = coding_category_raw_text - 1; i >= 0; i--)
6823 {
6824 category = coding_priorities[i];
6825 if (detect_info.found & (1 << category))
6826 {
6827 id = coding_categories[category].id;
6828 val = Fcons (make_number (id), val);
6829 }
6830 }
6831 detect_info.found |= found;
89528eb3
KH
6832 }
6833 }
df7492f9
KH
6834 else
6835 {
ff0dacd7 6836 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 6837 val = Fcons (make_number (coding.id), Qnil);
4ed46869 6838 }
df7492f9 6839
89528eb3 6840 /* Then, detect eol-format if necessary. */
df7492f9 6841 {
89528eb3 6842 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
6843 Lisp_Object tail;
6844
89528eb3
KH
6845 if (VECTORP (eol_type))
6846 {
ff0dacd7 6847 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
6848 normal_eol = detect_eol (coding.source, src_bytes,
6849 coding_category_raw_text);
ff0dacd7
KH
6850 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6851 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
6852 utf_16_be_eol = detect_eol (coding.source, src_bytes,
6853 coding_category_utf_16_be);
ff0dacd7
KH
6854 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6855 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
6856 utf_16_le_eol = detect_eol (coding.source, src_bytes,
6857 coding_category_utf_16_le);
6858 }
6859 else
6860 {
6861 if (EQ (eol_type, Qunix))
6862 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6863 else if (EQ (eol_type, Qdos))
6864 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6865 else
6866 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6867 }
6868
df7492f9
KH
6869 for (tail = val; CONSP (tail); tail = XCDR (tail))
6870 {
89528eb3 6871 enum coding_category category;
df7492f9 6872 int this_eol;
89528eb3
KH
6873
6874 id = XINT (XCAR (tail));
6875 attrs = CODING_ID_ATTRS (id);
6876 category = XINT (CODING_ATTR_CATEGORY (attrs));
6877 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
6878 if (VECTORP (eol_type))
6879 {
89528eb3
KH
6880 if (category == coding_category_utf_16_be
6881 || category == coding_category_utf_16_be_nosig)
6882 this_eol = utf_16_be_eol;
6883 else if (category == coding_category_utf_16_le
6884 || category == coding_category_utf_16_le_nosig)
6885 this_eol = utf_16_le_eol;
df7492f9 6886 else
89528eb3
KH
6887 this_eol = normal_eol;
6888
df7492f9
KH
6889 if (this_eol == EOL_SEEN_LF)
6890 XSETCAR (tail, AREF (eol_type, 0));
6891 else if (this_eol == EOL_SEEN_CRLF)
6892 XSETCAR (tail, AREF (eol_type, 1));
6893 else if (this_eol == EOL_SEEN_CR)
6894 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
6895 else
6896 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 6897 }
89528eb3
KH
6898 else
6899 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
6900 }
6901 }
6902
03699b14 6903 return (highest ? XCAR (val) : val);
93dec019 6904}
4ed46869 6905
df7492f9 6906
d46c5b12
KH
6907DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6908 2, 3, 0,
48b0f3ae
PJ
6909 doc: /* Detect coding system of the text in the region between START and END.
6910Return a list of possible coding systems ordered by priority.
6911
6912If only ASCII characters are found, it returns a list of single element
6913`undecided' or its subsidiary coding system according to a detected
6914end-of-line format.
6915
6916If optional argument HIGHEST is non-nil, return the coding system of
6917highest priority. */)
6918 (start, end, highest)
d46c5b12
KH
6919 Lisp_Object start, end, highest;
6920{
6921 int from, to;
6922 int from_byte, to_byte;
6289dd10 6923
b7826503
PJ
6924 CHECK_NUMBER_COERCE_MARKER (start);
6925 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6926
d46c5b12
KH
6927 validate_region (&start, &end);
6928 from = XINT (start), to = XINT (end);
6929 from_byte = CHAR_TO_BYTE (from);
6930 to_byte = CHAR_TO_BYTE (to);
6289dd10 6931
d46c5b12
KH
6932 if (from < GPT && to >= GPT)
6933 move_gap_both (to, to_byte);
c210f766 6934
d46c5b12 6935 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6936 to_byte - from_byte,
0a28aafb
KH
6937 !NILP (highest),
6938 !NILP (current_buffer
df7492f9
KH
6939 ->enable_multibyte_characters),
6940 Qnil);
d46c5b12 6941}
6289dd10 6942
d46c5b12
KH
6943DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6944 1, 2, 0,
48b0f3ae
PJ
6945 doc: /* Detect coding system of the text in STRING.
6946Return a list of possible coding systems ordered by priority.
6947
6948If only ASCII characters are found, it returns a list of single element
6949`undecided' or its subsidiary coding system according to a detected
6950end-of-line format.
6951
6952If optional argument HIGHEST is non-nil, return the coding system of
6953highest priority. */)
6954 (string, highest)
d46c5b12
KH
6955 Lisp_Object string, highest;
6956{
b7826503 6957 CHECK_STRING (string);
4ed46869 6958
d46c5b12 6959 return detect_coding_system (XSTRING (string)->data,
df7492f9 6960 STRING_BYTES (XSTRING (string)),
0a28aafb 6961 !NILP (highest),
df7492f9
KH
6962 STRING_MULTIBYTE (string),
6963 Qnil);
4ed46869
KH
6964}
6965
05e6f5dc 6966
df7492f9
KH
6967static INLINE int
6968char_encodable_p (c, attrs)
6969 int c;
6970 Lisp_Object attrs;
05e6f5dc 6971{
df7492f9 6972 Lisp_Object tail;
df7492f9 6973 struct charset *charset;
05e6f5dc 6974
df7492f9
KH
6975 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6976 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6977 {
df7492f9
KH
6978 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6979 if (CHAR_CHARSET_P (c, charset))
6980 break;
05e6f5dc 6981 }
df7492f9 6982 return (! NILP (tail));
05e6f5dc
KH
6983}
6984
6985
df7492f9
KH
6986/* Return a list of coding systems that safely encode the text between
6987 START and END. If EXCLUDE is non-nil, it is a list of coding
6988 systems not to check. The returned list doesn't contain any such
48468dac 6989 coding systems. In any case, if the text contains only ASCII or is
df7492f9
KH
6990 unibyte, return t. */
6991
6992DEFUN ("find-coding-systems-region-internal",
6993 Ffind_coding_systems_region_internal,
6994 Sfind_coding_systems_region_internal, 2, 3, 0,
6995 doc: /* Internal use only. */)
6996 (start, end, exclude)
6997 Lisp_Object start, end, exclude;
6998{
6999 Lisp_Object coding_attrs_list, safe_codings;
7000 EMACS_INT start_byte, end_byte;
7c78e542 7001 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7002 int c;
7003 Lisp_Object tail, elt;
05e6f5dc 7004
df7492f9
KH
7005 if (STRINGP (start))
7006 {
7007 if (!STRING_MULTIBYTE (start)
48468dac 7008 || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
df7492f9
KH
7009 return Qt;
7010 start_byte = 0;
7011 end_byte = STRING_BYTES (XSTRING (start));
7012 }
7013 else
7014 {
7015 CHECK_NUMBER_COERCE_MARKER (start);
7016 CHECK_NUMBER_COERCE_MARKER (end);
7017 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7018 args_out_of_range (start, end);
7019 if (NILP (current_buffer->enable_multibyte_characters))
7020 return Qt;
7021 start_byte = CHAR_TO_BYTE (XINT (start));
7022 end_byte = CHAR_TO_BYTE (XINT (end));
7023 if (XINT (end) - XINT (start) == end_byte - start_byte)
7024 return Qt;
05e6f5dc 7025
e1c23804 7026 if (XINT (start) < GPT && XINT (end) > GPT)
df7492f9 7027 {
e1c23804
DL
7028 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7029 move_gap_both (XINT (start), start_byte);
df7492f9 7030 else
e1c23804 7031 move_gap_both (XINT (end), end_byte);
df7492f9
KH
7032 }
7033 }
05e6f5dc 7034
df7492f9
KH
7035 coding_attrs_list = Qnil;
7036 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7037 if (NILP (exclude)
7038 || NILP (Fmemq (XCAR (tail), exclude)))
7039 {
7040 Lisp_Object attrs;
05e6f5dc 7041
df7492f9
KH
7042 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7043 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7044 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7045 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7046 }
7047
7048 if (STRINGP (start))
7049 p = pbeg = XSTRING (start)->data;
7050 else
7051 p = pbeg = BYTE_POS_ADDR (start_byte);
7052 pend = p + (end_byte - start_byte);
7053
7054 while (p < pend && ASCII_BYTE_P (*p)) p++;
7055 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
7056
7057 while (p < pend)
7058 {
df7492f9
KH
7059 if (ASCII_BYTE_P (*p))
7060 p++;
7061 else
7062 {
7063 c = STRING_CHAR_ADVANCE (p);
7064
7065 charset_map_loaded = 0;
7066 for (tail = coding_attrs_list; CONSP (tail);)
7067 {
7068 elt = XCAR (tail);
7069 if (NILP (elt))
7070 tail = XCDR (tail);
7071 else if (char_encodable_p (c, elt))
7072 tail = XCDR (tail);
7073 else if (CONSP (XCDR (tail)))
7074 {
7075 XSETCAR (tail, XCAR (XCDR (tail)));
7076 XSETCDR (tail, XCDR (XCDR (tail)));
7077 }
7078 else
7079 {
7080 XSETCAR (tail, Qnil);
7081 tail = XCDR (tail);
7082 }
7083 }
7084 if (charset_map_loaded)
7085 {
7086 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 7087
df7492f9
KH
7088 if (STRINGP (start))
7089 pbeg = XSTRING (start)->data;
7090 else
7091 pbeg = BYTE_POS_ADDR (start_byte);
7092 p = pbeg + p_offset;
7093 pend = pbeg + pend_offset;
7094 }
7095 }
05e6f5dc 7096 }
df7492f9
KH
7097
7098 safe_codings = Qnil;
7099 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7100 if (! NILP (XCAR (tail)))
7101 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7102
05e6f5dc
KH
7103 return safe_codings;
7104}
7105
7106
df7492f9
KH
7107DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7108 Scheck_coding_systems_region, 3, 3, 0,
7109 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 7110
df7492f9
KH
7111START and END are buffer positions specifying the region.
7112CODING-SYSTEM-LIST is a list of coding systems to check.
7113
7114The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7115CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7116whole region, POS0, POS1, ... are buffer positions where non-encodable
7117characters are found.
7118
7119If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7120value is nil.
7121
7122START may be a string. In that case, check if the string is
7123encodable, and the value contains indices to the string instead of
7124buffer positions. END is ignored. */)
7125 (start, end, coding_system_list)
7126 Lisp_Object start, end, coding_system_list;
05e6f5dc 7127{
df7492f9
KH
7128 Lisp_Object list;
7129 EMACS_INT start_byte, end_byte;
7130 int pos;
7c78e542 7131 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7132 int c;
7133 Lisp_Object tail, elt;
05e6f5dc
KH
7134
7135 if (STRINGP (start))
7136 {
df7492f9
KH
7137 if (!STRING_MULTIBYTE (start)
7138 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
7139 return Qnil;
7140 start_byte = 0;
7141 end_byte = STRING_BYTES (XSTRING (start));
7142 pos = 0;
05e6f5dc
KH
7143 }
7144 else
7145 {
b7826503
PJ
7146 CHECK_NUMBER_COERCE_MARKER (start);
7147 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7148 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7149 args_out_of_range (start, end);
7150 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7151 return Qnil;
7152 start_byte = CHAR_TO_BYTE (XINT (start));
7153 end_byte = CHAR_TO_BYTE (XINT (end));
7154 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7155 return Qt;
df7492f9 7156
e1c23804 7157 if (XINT (start) < GPT && XINT (end) > GPT)
df7492f9 7158 {
e1c23804
DL
7159 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7160 move_gap_both (XINT (start), start_byte);
df7492f9 7161 else
e1c23804 7162 move_gap_both (XINT (end), end_byte);
df7492f9 7163 }
e1c23804 7164 pos = XINT (start);
df7492f9
KH
7165 }
7166
7167 list = Qnil;
7168 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7169 {
7170 elt = XCAR (tail);
7171 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7172 Qnil)),
7173 list);
05e6f5dc
KH
7174 }
7175
df7492f9
KH
7176 if (STRINGP (start))
7177 p = pbeg = XSTRING (start)->data;
7178 else
7179 p = pbeg = BYTE_POS_ADDR (start_byte);
7180 pend = p + (end_byte - start_byte);
7181
7182 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7183 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7184
7185 while (p < pend)
05e6f5dc 7186 {
df7492f9
KH
7187 if (ASCII_BYTE_P (*p))
7188 p++;
7189 else
05e6f5dc 7190 {
df7492f9
KH
7191 c = STRING_CHAR_ADVANCE (p);
7192
7193 charset_map_loaded = 0;
7194 for (tail = list; CONSP (tail); tail = XCDR (tail))
7195 {
7196 elt = XCDR (XCAR (tail));
7197 if (! char_encodable_p (c, XCAR (elt)))
7198 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7199 }
7200 if (charset_map_loaded)
7201 {
7202 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7203
7204 if (STRINGP (start))
7205 pbeg = XSTRING (start)->data;
7206 else
7207 pbeg = BYTE_POS_ADDR (start_byte);
7208 p = pbeg + p_offset;
7209 pend = pbeg + pend_offset;
7210 }
05e6f5dc 7211 }
df7492f9 7212 pos++;
05e6f5dc
KH
7213 }
7214
df7492f9
KH
7215 tail = list;
7216 list = Qnil;
7217 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 7218 {
df7492f9
KH
7219 elt = XCAR (tail);
7220 if (CONSP (XCDR (XCDR (elt))))
7221 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7222 list);
05e6f5dc 7223 }
df7492f9
KH
7224
7225 return list;
05e6f5dc
KH
7226}
7227
7228
df7492f9 7229
4031e2bf 7230Lisp_Object
df7492f9
KH
7231code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7232 Lisp_Object start, end, coding_system, dst_object;
7233 int encodep, norecord;
3a73fa5d
RS
7234{
7235 struct coding_system coding;
df7492f9
KH
7236 EMACS_INT from, from_byte, to, to_byte;
7237 Lisp_Object src_object;
3a73fa5d 7238
b7826503
PJ
7239 CHECK_NUMBER_COERCE_MARKER (start);
7240 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
7241 if (NILP (coding_system))
7242 coding_system = Qno_conversion;
7243 else
7244 CHECK_CODING_SYSTEM (coding_system);
7245 src_object = Fcurrent_buffer ();
7246 if (NILP (dst_object))
7247 dst_object = src_object;
7248 else if (! EQ (dst_object, Qt))
7249 CHECK_BUFFER (dst_object);
3a73fa5d 7250
d46c5b12
KH
7251 validate_region (&start, &end);
7252 from = XFASTINT (start);
df7492f9 7253 from_byte = CHAR_TO_BYTE (from);
d46c5b12 7254 to = XFASTINT (end);
df7492f9 7255 to_byte = CHAR_TO_BYTE (to);
d46c5b12 7256
df7492f9
KH
7257 setup_coding_system (coding_system, &coding);
7258 coding.mode |= CODING_MODE_LAST_BLOCK;
7259
7260 if (encodep)
7261 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7262 dst_object);
7263 else
7264 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7265 dst_object);
7266 if (! norecord)
7267 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 7268
df7492f9
KH
7269 if (coding.result != CODING_RESULT_SUCCESS)
7270 error ("Code conversion error: %d", coding.result);
3a73fa5d 7271
df7492f9
KH
7272 return (BUFFERP (dst_object)
7273 ? make_number (coding.produced_char)
7274 : coding.dst_object);
4031e2bf
KH
7275}
7276
df7492f9 7277
4031e2bf 7278DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 7279 3, 4, "r\nzCoding system: ",
48b0f3ae 7280 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
7281When called from a program, takes four arguments:
7282 START, END, CODING-SYSTEM, and DESTINATION.
7283START and END are buffer positions.
7284
7285Optional 4th arguments DESTINATION specifies where the decoded text goes.
7286If nil, the region between START and END is replace by the decoded text.
7287If buffer, the decoded text is inserted in the buffer.
7288If t, the decoded text is returned.
7289
48b0f3ae
PJ
7290This function sets `last-coding-system-used' to the precise coding system
7291used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7292not fully specified.)
7293It returns the length of the decoded text. */)
df7492f9
KH
7294 (start, end, coding_system, destination)
7295 Lisp_Object start, end, coding_system, destination;
4031e2bf 7296{
df7492f9 7297 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
7298}
7299
7300DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
7301 3, 4, "r\nzCoding system: ",
7302 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
7303When called from a program, takes three arguments:
7304START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
7305
7306Optional 4th arguments DESTINATION specifies where the encoded text goes.
7307If nil, the region between START and END is replace by the encoded text.
7308If buffer, the encoded text is inserted in the buffer.
7309If t, the encoded text is returned.
7310
48b0f3ae
PJ
7311This function sets `last-coding-system-used' to the precise coding system
7312used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7313not fully specified.)
7314It returns the length of the encoded text. */)
df7492f9
KH
7315 (start, end, coding_system, destination)
7316 Lisp_Object start, end, coding_system, destination;
3a73fa5d 7317{
df7492f9 7318 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 7319}
3a73fa5d 7320
4031e2bf 7321Lisp_Object
df7492f9
KH
7322code_convert_string (string, coding_system, dst_object,
7323 encodep, nocopy, norecord)
7324 Lisp_Object string, coding_system, dst_object;
7325 int encodep, nocopy, norecord;
4031e2bf
KH
7326{
7327 struct coding_system coding;
df7492f9 7328 EMACS_INT chars, bytes;
3a73fa5d 7329
b7826503 7330 CHECK_STRING (string);
d46c5b12 7331 if (NILP (coding_system))
df7492f9
KH
7332 {
7333 if (! norecord)
7334 Vlast_coding_system_used = Qno_conversion;
7335 if (NILP (dst_object))
7336 return (nocopy ? Fcopy_sequence (string) : string);
7337 }
4ed46869 7338
df7492f9
KH
7339 if (NILP (coding_system))
7340 coding_system = Qno_conversion;
7341 else
7342 CHECK_CODING_SYSTEM (coding_system);
7343 if (NILP (dst_object))
7344 dst_object = Qt;
7345 else if (! EQ (dst_object, Qt))
7346 CHECK_BUFFER (dst_object);
5f1cd180 7347
df7492f9 7348 setup_coding_system (coding_system, &coding);
d46c5b12 7349 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
7350 chars = XSTRING (string)->size;
7351 bytes = STRING_BYTES (XSTRING (string));
7352 if (encodep)
7353 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7354 else
7355 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7356 if (! norecord)
7357 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 7358
df7492f9
KH
7359 if (coding.result != CODING_RESULT_SUCCESS)
7360 error ("Code conversion error: %d", coding.result);
4ed46869 7361
df7492f9
KH
7362 return (BUFFERP (dst_object)
7363 ? make_number (coding.produced_char)
7364 : coding.dst_object);
4ed46869
KH
7365}
7366
4031e2bf 7367
ecec61c1 7368/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
7369 Do not set Vlast_coding_system_used.
7370
7371 This function is called only from macros DECODE_FILE and
7372 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
7373
7374Lisp_Object
7375code_convert_string_norecord (string, coding_system, encodep)
7376 Lisp_Object string, coding_system;
7377 int encodep;
7378{
0be8721c 7379 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 7380}
ecec61c1 7381
ecec61c1 7382
df7492f9
KH
7383DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7384 2, 4, 0,
7385 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7386
7387Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7388if the decoding operation is trivial.
ecec61c1 7389
df7492f9 7390Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 7391inserted in BUFFER instead of returned as a string. In this case,
df7492f9 7392the return value is BUFFER.
ecec61c1 7393
df7492f9
KH
7394This function sets `last-coding-system-used' to the precise coding system
7395used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7396not fully specified. */)
7397 (string, coding_system, nocopy, buffer)
7398 Lisp_Object string, coding_system, nocopy, buffer;
7399{
7400 return code_convert_string (string, coding_system, buffer,
7401 0, ! NILP (nocopy), 0);
7402}
7403
7404DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7405 2, 4, 0,
7406 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7407
7408Optional third arg NOCOPY non-nil means it is OK to return STRING
7409itself if the encoding operation is trivial.
7410
7411Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 7412inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
7413the return value is BUFFER.
7414
7415This function sets `last-coding-system-used' to the precise coding system
7416used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7417not fully specified.) */)
7418 (string, coding_system, nocopy, buffer)
7419 Lisp_Object string, coding_system, nocopy, buffer;
7420{
7421 return code_convert_string (string, coding_system, buffer,
c197f191 7422 1, ! NILP (nocopy), 1);
ecec61c1 7423}
df7492f9 7424
3a73fa5d 7425\f
4ed46869 7426DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7427 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7428Return the corresponding character. */)
7429 (code)
4ed46869
KH
7430 Lisp_Object code;
7431{
df7492f9
KH
7432 Lisp_Object spec, attrs, val;
7433 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7434 int c;
7435
7436 CHECK_NATNUM (code);
7437 c = XFASTINT (code);
7438 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7439 attrs = AREF (spec, 0);
4ed46869 7440
df7492f9
KH
7441 if (ASCII_BYTE_P (c)
7442 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7443 return code;
7444
7445 val = CODING_ATTR_CHARSET_LIST (attrs);
7446 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
7447 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7448 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
df7492f9
KH
7449
7450 if (c <= 0x7F)
7451 charset = charset_roman;
7452 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 7453 {
df7492f9
KH
7454 charset = charset_kana;
7455 c -= 0x80;
55ab7be3
KH
7456 }
7457 else
7458 {
004068e4 7459 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
7460
7461 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7462 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7463 error ("Invalid code: %d", code);
7464 SJIS_TO_JIS (c);
7465 charset = charset_kanji;
55ab7be3 7466 }
df7492f9
KH
7467 c = DECODE_CHAR (charset, c);
7468 if (c < 0)
7469 error ("Invalid code: %d", code);
7470 return make_number (c);
4ed46869
KH
7471}
7472
df7492f9 7473
4ed46869 7474DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7475 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7476Return the corresponding code in SJIS. */)
7477 (ch)
df7492f9 7478 Lisp_Object ch;
4ed46869 7479{
df7492f9
KH
7480 Lisp_Object spec, attrs, charset_list;
7481 int c;
7482 struct charset *charset;
7483 unsigned code;
4ed46869 7484
df7492f9
KH
7485 CHECK_CHARACTER (ch);
7486 c = XFASTINT (ch);
7487 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7488 attrs = AREF (spec, 0);
7489
7490 if (ASCII_CHAR_P (c)
7491 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7492 return ch;
7493
7494 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7495 charset = char_charset (c, charset_list, &code);
7496 if (code == CHARSET_INVALID_CODE (charset))
7497 error ("Can't encode by shift_jis encoding: %d", c);
7498 JIS_TO_SJIS (code);
7499
7500 return make_number (code);
4ed46869
KH
7501}
7502
7503DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7504 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7505Return the corresponding character. */)
7506 (code)
4ed46869
KH
7507 Lisp_Object code;
7508{
df7492f9
KH
7509 Lisp_Object spec, attrs, val;
7510 struct charset *charset_roman, *charset_big5, *charset;
7511 int c;
4ed46869 7512
df7492f9
KH
7513 CHECK_NATNUM (code);
7514 c = XFASTINT (code);
7515 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7516 attrs = AREF (spec, 0);
7517
7518 if (ASCII_BYTE_P (c)
7519 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7520 return code;
7521
7522 val = CODING_ATTR_CHARSET_LIST (attrs);
7523 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7524 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7525
7526 if (c <= 0x7F)
7527 charset = charset_roman;
c28a9453
KH
7528 else
7529 {
df7492f9
KH
7530 int b1 = c >> 8, b2 = c & 0x7F;
7531 if (b1 < 0xA1 || b1 > 0xFE
7532 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7533 error ("Invalid code: %d", code);
7534 charset = charset_big5;
c28a9453 7535 }
df7492f9
KH
7536 c = DECODE_CHAR (charset, (unsigned )c);
7537 if (c < 0)
7538 error ("Invalid code: %d", code);
7539 return make_number (c);
4ed46869
KH
7540}
7541
7542DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7543 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7544Return the corresponding character code in Big5. */)
7545 (ch)
4ed46869
KH
7546 Lisp_Object ch;
7547{
df7492f9
KH
7548 Lisp_Object spec, attrs, charset_list;
7549 struct charset *charset;
7550 int c;
7551 unsigned code;
7552
7553 CHECK_CHARACTER (ch);
7554 c = XFASTINT (ch);
7555 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7556 attrs = AREF (spec, 0);
7557 if (ASCII_CHAR_P (c)
7558 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7559 return ch;
7560
7561 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7562 charset = char_charset (c, charset_list, &code);
7563 if (code == CHARSET_INVALID_CODE (charset))
7564 error ("Can't encode by Big5 encoding: %d", c);
7565
7566 return make_number (code);
4ed46869 7567}
df7492f9 7568
3a73fa5d 7569\f
1ba9e4ab
KH
7570DEFUN ("set-terminal-coding-system-internal",
7571 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7572 Sset_terminal_coding_system_internal, 1, 1, 0,
7573 doc: /* Internal use only. */)
7574 (coding_system)
b74e4686 7575 Lisp_Object coding_system;
4ed46869 7576{
b7826503 7577 CHECK_SYMBOL (coding_system);
df7492f9
KH
7578 setup_coding_system (Fcheck_coding_system (coding_system),
7579 &terminal_coding);
7580
70c22245 7581 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7582 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7583 /* Characer composition should be disabled. */
7584 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7585 terminal_coding.src_multibyte = 1;
7586 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7587 return Qnil;
7588}
7589
c4825358
KH
7590DEFUN ("set-safe-terminal-coding-system-internal",
7591 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7592 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7593 doc: /* Internal use only. */)
48b0f3ae 7594 (coding_system)
b74e4686 7595 Lisp_Object coding_system;
c4825358 7596{
b7826503 7597 CHECK_SYMBOL (coding_system);
c4825358
KH
7598 setup_coding_system (Fcheck_coding_system (coding_system),
7599 &safe_terminal_coding);
df7492f9
KH
7600 /* Characer composition should be disabled. */
7601 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7602 safe_terminal_coding.src_multibyte = 1;
7603 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7604 return Qnil;
7605}
7606
4ed46869
KH
7607DEFUN ("terminal-coding-system",
7608 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7609 doc: /* Return coding system specified for terminal output. */)
7610 ()
4ed46869 7611{
df7492f9 7612 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7613}
7614
1ba9e4ab
KH
7615DEFUN ("set-keyboard-coding-system-internal",
7616 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7617 Sset_keyboard_coding_system_internal, 1, 1, 0,
7618 doc: /* Internal use only. */)
7619 (coding_system)
4ed46869
KH
7620 Lisp_Object coding_system;
7621{
b7826503 7622 CHECK_SYMBOL (coding_system);
df7492f9
KH
7623 setup_coding_system (Fcheck_coding_system (coding_system),
7624 &keyboard_coding);
7625 /* Characer composition should be disabled. */
7626 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7627 return Qnil;
7628}
7629
7630DEFUN ("keyboard-coding-system",
7631 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7632 doc: /* Return coding system specified for decoding keyboard input. */)
7633 ()
4ed46869 7634{
df7492f9 7635 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7636}
7637
7638\f
a5d301df
KH
7639DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7640 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7641 doc: /* Choose a coding system for an operation based on the target name.
7642The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7643DECODING-SYSTEM is the coding system to use for decoding
7644\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7645for encoding (in case OPERATION does encoding).
7646
7647The first argument OPERATION specifies an I/O primitive:
7648 For file I/O, `insert-file-contents' or `write-region'.
7649 For process I/O, `call-process', `call-process-region', or `start-process'.
7650 For network I/O, `open-network-stream'.
7651
7652The remaining arguments should be the same arguments that were passed
7653to the primitive. Depending on which primitive, one of those arguments
7654is selected as the TARGET. For example, if OPERATION does file I/O,
7655whichever argument specifies the file name is TARGET.
7656
7657TARGET has a meaning which depends on OPERATION:
7658 For file I/O, TARGET is a file name.
7659 For process I/O, TARGET is a process name.
7660 For network I/O, TARGET is a service name or a port number
7661
7662This function looks up what specified for TARGET in,
7663`file-coding-system-alist', `process-coding-system-alist',
7664or `network-coding-system-alist' depending on OPERATION.
7665They may specify a coding system, a cons of coding systems,
7666or a function symbol to call.
7667In the last case, we call the function with one argument,
7668which is a list of all the arguments given to this function.
7669
7670usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7671 (nargs, args)
4ed46869
KH
7672 int nargs;
7673 Lisp_Object *args;
7674{
7675 Lisp_Object operation, target_idx, target, val;
7676 register Lisp_Object chain;
7677
7678 if (nargs < 2)
7679 error ("Too few arguments");
7680 operation = args[0];
7681 if (!SYMBOLP (operation)
7682 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7683 error ("Invalid first arguement");
4ed46869
KH
7684 if (nargs < 1 + XINT (target_idx))
7685 error ("Too few arguments for operation: %s",
7686 XSYMBOL (operation)->name->data);
7687 target = args[XINT (target_idx) + 1];
7688 if (!(STRINGP (target)
7689 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7690 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7691
2e34157c
RS
7692 chain = ((EQ (operation, Qinsert_file_contents)
7693 || EQ (operation, Qwrite_region))
02ba4723 7694 ? Vfile_coding_system_alist
2e34157c 7695 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7696 ? Vnetwork_coding_system_alist
7697 : Vprocess_coding_system_alist));
4ed46869
KH
7698 if (NILP (chain))
7699 return Qnil;
7700
03699b14 7701 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7702 {
f44d27ce 7703 Lisp_Object elt;
4ed46869 7704
df7492f9 7705 elt = XCAR (chain);
4ed46869
KH
7706 if (CONSP (elt)
7707 && ((STRINGP (target)
03699b14
KR
7708 && STRINGP (XCAR (elt))
7709 && fast_string_match (XCAR (elt), target) >= 0)
7710 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7711 {
03699b14 7712 val = XCDR (elt);
b19fd4c5
KH
7713 /* Here, if VAL is both a valid coding system and a valid
7714 function symbol, we return VAL as a coding system. */
02ba4723
KH
7715 if (CONSP (val))
7716 return val;
7717 if (! SYMBOLP (val))
7718 return Qnil;
7719 if (! NILP (Fcoding_system_p (val)))
7720 return Fcons (val, val);
b19fd4c5
KH
7721 if (! NILP (Ffboundp (val)))
7722 {
7723 val = call1 (val, Flist (nargs, args));
7724 if (CONSP (val))
7725 return val;
7726 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7727 return Fcons (val, val);
7728 }
02ba4723
KH
7729 return Qnil;
7730 }
4ed46869
KH
7731 }
7732 return Qnil;
7733}
7734
df7492f9 7735DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 7736 Sset_coding_system_priority, 0, MANY, 0,
da7db224 7737 doc: /* Assign higher priority to the coding systems given as arguments.
1fcd6c8b 7738usage: (set-coding-system-priority CODING-SYSTEM ...) */)
df7492f9
KH
7739 (nargs, args)
7740 int nargs;
7741 Lisp_Object *args;
7742{
7743 int i, j;
7744 int changed[coding_category_max];
7745 enum coding_category priorities[coding_category_max];
7746
7747 bzero (changed, sizeof changed);
7748
7749 for (i = j = 0; i < nargs; i++)
7750 {
7751 enum coding_category category;
7752 Lisp_Object spec, attrs;
7753
7754 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7755 attrs = AREF (spec, 0);
7756 category = XINT (CODING_ATTR_CATEGORY (attrs));
7757 if (changed[category])
7758 /* Ignore this coding system because a coding system of the
7759 same category already had a higher priority. */
7760 continue;
7761 changed[category] = 1;
7762 priorities[j++] = category;
7763 if (coding_categories[category].id >= 0
7764 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7765 setup_coding_system (args[i], &coding_categories[category]);
7766 }
7767
7768 /* Now we have decided top J priorities. Reflect the order of the
7769 original priorities to the remaining priorities. */
7770
7771 for (i = j, j = 0; i < coding_category_max; i++, j++)
7772 {
7773 while (j < coding_category_max
7774 && changed[coding_priorities[j]])
7775 j++;
7776 if (j == coding_category_max)
7777 abort ();
7778 priorities[i] = coding_priorities[j];
7779 }
7780
7781 bcopy (priorities, coding_priorities, sizeof priorities);
7782 return Qnil;
7783}
7784
7785DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7786 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7787 doc: /* Return a list of coding systems ordered by their priorities.
7788HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7789 (highestp)
7790 Lisp_Object highestp;
d46c5b12
KH
7791{
7792 int i;
df7492f9 7793 Lisp_Object val;
d46c5b12 7794
df7492f9 7795 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7796 {
df7492f9
KH
7797 enum coding_category category = coding_priorities[i];
7798 int id = coding_categories[category].id;
7799 Lisp_Object attrs;
7800
7801 if (id < 0)
7802 continue;
7803 attrs = CODING_ID_ATTRS (id);
7804 if (! NILP (highestp))
7805 return CODING_ATTR_BASE_NAME (attrs);
7806 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7807 }
7808 return Fnreverse (val);
7809}
7810
f0064e1f
DL
7811static char *suffixes[] = { "-unix", "-dos", "-mac" };
7812
df7492f9
KH
7813static Lisp_Object
7814make_subsidiaries (base)
7815 Lisp_Object base;
7816{
7817 Lisp_Object subsidiaries;
df7492f9
KH
7818 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7819 char *buf = (char *) alloca (base_name_len + 6);
7820 int i;
7821
7822 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7823 subsidiaries = Fmake_vector (make_number (3), Qnil);
7824 for (i = 0; i < 3; i++)
7825 {
7826 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7827 ASET (subsidiaries, i, intern (buf));
7828 }
7829 return subsidiaries;
7830}
7831
7832
7833DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7834 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7835 doc: /* For internal use only.
7836usage: (define-coding-system-internal ...) */)
df7492f9
KH
7837 (nargs, args)
7838 int nargs;
7839 Lisp_Object *args;
7840{
7841 Lisp_Object name;
7842 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7843 Lisp_Object attrs; /* Vector of attributes. */
7844 Lisp_Object eol_type;
7845 Lisp_Object aliases;
7846 Lisp_Object coding_type, charset_list, safe_charsets;
7847 enum coding_category category;
7848 Lisp_Object tail, val;
7849 int max_charset_id = 0;
7850 int i;
7851
7852 if (nargs < coding_arg_max)
7853 goto short_args;
7854
7855 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7856
7857 name = args[coding_arg_name];
7858 CHECK_SYMBOL (name);
7859 CODING_ATTR_BASE_NAME (attrs) = name;
7860
7861 val = args[coding_arg_mnemonic];
7862 if (! STRINGP (val))
7863 CHECK_CHARACTER (val);
7864 CODING_ATTR_MNEMONIC (attrs) = val;
7865
7866 coding_type = args[coding_arg_coding_type];
7867 CHECK_SYMBOL (coding_type);
7868 CODING_ATTR_TYPE (attrs) = coding_type;
7869
7870 charset_list = args[coding_arg_charset_list];
7871 if (SYMBOLP (charset_list))
7872 {
7873 if (EQ (charset_list, Qiso_2022))
7874 {
7875 if (! EQ (coding_type, Qiso_2022))
7876 error ("Invalid charset-list");
7877 charset_list = Viso_2022_charset_list;
7878 }
7879 else if (EQ (charset_list, Qemacs_mule))
7880 {
7881 if (! EQ (coding_type, Qemacs_mule))
7882 error ("Invalid charset-list");
7883 charset_list = Vemacs_mule_charset_list;
7884 }
7885 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7886 if (max_charset_id < XFASTINT (XCAR (tail)))
7887 max_charset_id = XFASTINT (XCAR (tail));
7888 }
7889 else
7890 {
7891 charset_list = Fcopy_sequence (charset_list);
7892 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7893 {
7894 struct charset *charset;
7895
7896 val = Fcar (tail);
7897 CHECK_CHARSET_GET_CHARSET (val, charset);
7898 if (EQ (coding_type, Qiso_2022)
7899 ? CHARSET_ISO_FINAL (charset) < 0
7900 : EQ (coding_type, Qemacs_mule)
7901 ? CHARSET_EMACS_MULE_ID (charset) < 0
7902 : 0)
7903 error ("Can't handle charset `%s'",
7904 XSYMBOL (CHARSET_NAME (charset))->name->data);
7905
7906 XCAR (tail) = make_number (charset->id);
7907 if (max_charset_id < charset->id)
7908 max_charset_id = charset->id;
7909 }
7910 }
7911 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7912
7913 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7914 make_number (255));
7915 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7916 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7917 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7918
584948ac
KH
7919 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
7920
df7492f9
KH
7921 val = args[coding_arg_decode_translation_table];
7922 if (! NILP (val))
7923 CHECK_CHAR_TABLE (val);
7924 CODING_ATTR_DECODE_TBL (attrs) = val;
7925
7926 val = args[coding_arg_encode_translation_table];
7927 if (! NILP (val))
7928 CHECK_CHAR_TABLE (val);
7929 CODING_ATTR_ENCODE_TBL (attrs) = val;
7930
7931 val = args[coding_arg_post_read_conversion];
7932 CHECK_SYMBOL (val);
7933 CODING_ATTR_POST_READ (attrs) = val;
7934
7935 val = args[coding_arg_pre_write_conversion];
7936 CHECK_SYMBOL (val);
7937 CODING_ATTR_PRE_WRITE (attrs) = val;
7938
7939 val = args[coding_arg_default_char];
7940 if (NILP (val))
7941 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7942 else
7943 {
7944 CHECK_CHARACTER (val);
7945 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7946 }
7947
7948 val = args[coding_arg_plist];
7949 CHECK_LIST (val);
7950 CODING_ATTR_PLIST (attrs) = val;
7951
7952 if (EQ (coding_type, Qcharset))
7953 {
4187a77d 7954 Lisp_Object list;
c7c66a95
KH
7955 /* Generate a lisp vector of 256 elements. Each element is nil,
7956 integer, or a list of charset IDs.
7957
7958 If Nth element is nil, the byte code N is invalid in this
7959 coding system.
7960
7961 If Nth element is a number NUM, N is the first byte of a
7962 charset whose ID is NUM.
7963
7964 If Nth element is a list of charset IDs, N is the first byte
7965 of one of them. The list is sorted by dimensions of the
7966 charsets. A charset of smaller dimension comes firtst.
7967 */
4187a77d
KH
7968 for (list = Qnil, tail = charset_list; CONSP (tail); tail = XCDR (tail))
7969 {
7970 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7971
7972 if (charset->method == CHARSET_METHOD_SUPERSET)
7973 {
7974 val = CHARSET_SUPERSET (charset);
7975 for (; CONSP (val); val = XCDR (val))
7976 list = Fcons (XCAR (XCAR (val)), list);
7977 }
7978 else
7979 list = Fcons (XCAR (tail), list);
7980 }
7981
df7492f9
KH
7982 val = Fmake_vector (make_number (256), Qnil);
7983
4187a77d 7984 for (tail = Fnreverse (list); CONSP (tail); tail = XCDR (tail))
df7492f9 7985 {
c7c66a95
KH
7986 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7987 int dim = CHARSET_DIMENSION (charset);
7988 int idx = (dim - 1) * 4;
7989
584948ac
KH
7990 if (CHARSET_ASCII_COMPATIBLE_P (charset))
7991 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7992
15d143f7
KH
7993 for (i = charset->code_space[idx];
7994 i <= charset->code_space[idx + 1]; i++)
7995 {
c7c66a95
KH
7996 Lisp_Object tmp, tmp2;
7997 int dim2;
7998
7999 tmp = AREF (val, i);
8000 if (NILP (tmp))
8001 tmp = XCAR (tail);
8002 else if (NUMBERP (tmp))
8003 {
8004 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8005 if (dim < dim2)
c7c66a95 8006 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
8007 else
8008 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 8009 }
15d143f7 8010 else
c7c66a95
KH
8011 {
8012 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8013 {
8014 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8015 if (dim < dim2)
8016 break;
8017 }
8018 if (NILP (tmp2))
8019 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8020 else
8021 {
8022 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8023 XSETCAR (tmp2, XCAR (tail));
8024 }
8025 }
8026 ASET (val, i, tmp);
15d143f7 8027 }
df7492f9
KH
8028 }
8029 ASET (attrs, coding_attr_charset_valids, val);
8030 category = coding_category_charset;
8031 }
8032 else if (EQ (coding_type, Qccl))
8033 {
8034 Lisp_Object valids;
8035
8036 if (nargs < coding_arg_ccl_max)
8037 goto short_args;
8038
8039 val = args[coding_arg_ccl_decoder];
8040 CHECK_CCL_PROGRAM (val);
8041 if (VECTORP (val))
8042 val = Fcopy_sequence (val);
8043 ASET (attrs, coding_attr_ccl_decoder, val);
8044
8045 val = args[coding_arg_ccl_encoder];
8046 CHECK_CCL_PROGRAM (val);
8047 if (VECTORP (val))
8048 val = Fcopy_sequence (val);
8049 ASET (attrs, coding_attr_ccl_encoder, val);
8050
8051 val = args[coding_arg_ccl_valids];
8052 valids = Fmake_string (make_number (256), make_number (0));
8053 for (tail = val; !NILP (tail); tail = Fcdr (tail))
8054 {
8dcbea82
KH
8055 int from, to;
8056
df7492f9
KH
8057 val = Fcar (tail);
8058 if (INTEGERP (val))
8dcbea82
KH
8059 {
8060 from = to = XINT (val);
8061 if (from < 0 || from > 255)
8062 args_out_of_range_3 (val, make_number (0), make_number (255));
8063 }
df7492f9
KH
8064 else
8065 {
df7492f9
KH
8066 CHECK_CONS (val);
8067 CHECK_NUMBER (XCAR (val));
8068 CHECK_NUMBER (XCDR (val));
8069 from = XINT (XCAR (val));
8dcbea82
KH
8070 if (from < 0 || from > 255)
8071 args_out_of_range_3 (XCAR (val),
8072 make_number (0), make_number (255));
df7492f9 8073 to = XINT (XCDR (val));
8dcbea82
KH
8074 if (to < from || to > 255)
8075 args_out_of_range_3 (XCDR (val),
8076 XCAR (val), make_number (255));
df7492f9 8077 }
8dcbea82
KH
8078 for (i = from; i <= to; i++)
8079 XSTRING (valids)->data[i] = 1;
df7492f9
KH
8080 }
8081 ASET (attrs, coding_attr_ccl_valids, valids);
8082
8083 category = coding_category_ccl;
8084 }
8085 else if (EQ (coding_type, Qutf_16))
8086 {
8087 Lisp_Object bom, endian;
8088
584948ac
KH
8089 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8090
df7492f9
KH
8091 if (nargs < coding_arg_utf16_max)
8092 goto short_args;
8093
8094 bom = args[coding_arg_utf16_bom];
8095 if (! NILP (bom) && ! EQ (bom, Qt))
8096 {
8097 CHECK_CONS (bom);
8098 CHECK_CODING_SYSTEM (XCAR (bom));
8099 CHECK_CODING_SYSTEM (XCDR (bom));
8100 }
8101 ASET (attrs, coding_attr_utf_16_bom, bom);
8102
8103 endian = args[coding_arg_utf16_endian];
b49a1807
KH
8104 CHECK_SYMBOL (endian);
8105 if (NILP (endian))
8106 endian = Qbig;
8107 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8108 error ("Invalid endian: %s", XSYMBOL (endian)->name->data);
df7492f9
KH
8109 ASET (attrs, coding_attr_utf_16_endian, endian);
8110
8111 category = (CONSP (bom)
8112 ? coding_category_utf_16_auto
8113 : NILP (bom)
b49a1807 8114 ? (EQ (endian, Qbig)
df7492f9
KH
8115 ? coding_category_utf_16_be_nosig
8116 : coding_category_utf_16_le_nosig)
b49a1807 8117 : (EQ (endian, Qbig)
df7492f9
KH
8118 ? coding_category_utf_16_be
8119 : coding_category_utf_16_le));
8120 }
8121 else if (EQ (coding_type, Qiso_2022))
8122 {
8123 Lisp_Object initial, reg_usage, request, flags;
4776e638 8124 int i;
1397dc18 8125
df7492f9
KH
8126 if (nargs < coding_arg_iso2022_max)
8127 goto short_args;
8128
8129 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8130 CHECK_VECTOR (initial);
8131 for (i = 0; i < 4; i++)
8132 {
8133 val = Faref (initial, make_number (i));
8134 if (! NILP (val))
8135 {
584948ac
KH
8136 struct charset *charset;
8137
8138 CHECK_CHARSET_GET_CHARSET (val, charset);
8139 ASET (initial, i, make_number (CHARSET_ID (charset)));
8140 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8141 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8142 }
8143 else
8144 ASET (initial, i, make_number (-1));
8145 }
8146
8147 reg_usage = args[coding_arg_iso2022_reg_usage];
8148 CHECK_CONS (reg_usage);
8149 CHECK_NATNUM (XCAR (reg_usage));
8150 CHECK_NATNUM (XCDR (reg_usage));
8151
8152 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8153 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 8154 {
df7492f9
KH
8155 int id;
8156
8157 val = Fcar (tail);
8158 CHECK_CONS (val);
8159 CHECK_CHARSET_GET_ID (XCAR (val), id);
8160 CHECK_NATNUM (XCDR (val));
8161 if (XINT (XCDR (val)) >= 4)
8162 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8163 XCAR (val) = make_number (id);
1397dc18 8164 }
df7492f9
KH
8165
8166 flags = args[coding_arg_iso2022_flags];
8167 CHECK_NATNUM (flags);
8168 i = XINT (flags);
8169 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8170 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8171
8172 ASET (attrs, coding_attr_iso_initial, initial);
8173 ASET (attrs, coding_attr_iso_usage, reg_usage);
8174 ASET (attrs, coding_attr_iso_request, request);
8175 ASET (attrs, coding_attr_iso_flags, flags);
8176 setup_iso_safe_charsets (attrs);
8177
8178 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8179 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8180 | CODING_ISO_FLAG_SINGLE_SHIFT))
8181 ? coding_category_iso_7_else
8182 : EQ (args[coding_arg_charset_list], Qiso_2022)
8183 ? coding_category_iso_7
8184 : coding_category_iso_7_tight);
8185 else
8186 {
8187 int id = XINT (AREF (initial, 1));
8188
c6fb6e98 8189 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
8190 || EQ (args[coding_arg_charset_list], Qiso_2022)
8191 || id < 0)
8192 ? coding_category_iso_8_else
8193 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8194 ? coding_category_iso_8_1
8195 : coding_category_iso_8_2);
8196 }
0ce7886f
KH
8197 if (category != coding_category_iso_8_1
8198 && category != coding_category_iso_8_2)
8199 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
8200 }
8201 else if (EQ (coding_type, Qemacs_mule))
8202 {
8203 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8204 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 8205 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8206 category = coding_category_emacs_mule;
8207 }
8208 else if (EQ (coding_type, Qshift_jis))
8209 {
8210
8211 struct charset *charset;
8212
8213 if (XINT (Flength (charset_list)) != 3)
8214 error ("There should be just three charsets");
8215
8216 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8217 if (CHARSET_DIMENSION (charset) != 1)
8218 error ("Dimension of charset %s is not one",
8219 XSYMBOL (CHARSET_NAME (charset))->name->data);
584948ac
KH
8220 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8221 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8222
8223 charset_list = XCDR (charset_list);
8224 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8225 if (CHARSET_DIMENSION (charset) != 1)
8226 error ("Dimension of charset %s is not one",
8227 XSYMBOL (CHARSET_NAME (charset))->name->data);
8228
8229 charset_list = XCDR (charset_list);
8230 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8231 if (CHARSET_DIMENSION (charset) != 2)
8232 error ("Dimension of charset %s is not two",
8233 XSYMBOL (CHARSET_NAME (charset))->name->data);
8234
8235 category = coding_category_sjis;
8236 Vsjis_coding_system = name;
8237 }
8238 else if (EQ (coding_type, Qbig5))
8239 {
8240 struct charset *charset;
8241
8242 if (XINT (Flength (charset_list)) != 2)
8243 error ("There should be just two charsets");
8244
8245 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8246 if (CHARSET_DIMENSION (charset) != 1)
8247 error ("Dimension of charset %s is not one",
8248 XSYMBOL (CHARSET_NAME (charset))->name->data);
584948ac
KH
8249 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8250 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8251
8252 charset_list = XCDR (charset_list);
8253 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8254 if (CHARSET_DIMENSION (charset) != 2)
8255 error ("Dimension of charset %s is not two",
8256 XSYMBOL (CHARSET_NAME (charset))->name->data);
8257
8258 category = coding_category_big5;
8259 Vbig5_coding_system = name;
8260 }
8261 else if (EQ (coding_type, Qraw_text))
584948ac
KH
8262 {
8263 category = coding_category_raw_text;
8264 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8265 }
df7492f9 8266 else if (EQ (coding_type, Qutf_8))
584948ac
KH
8267 {
8268 category = coding_category_utf_8;
8269 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8270 }
df7492f9
KH
8271 else if (EQ (coding_type, Qundecided))
8272 category = coding_category_undecided;
8273 else
8274 error ("Invalid coding system type: %s",
8275 XSYMBOL (coding_type)->name->data);
8276
8277 CODING_ATTR_CATEGORY (attrs) = make_number (category);
8278
8279 eol_type = args[coding_arg_eol_type];
8280 if (! NILP (eol_type)
8281 && ! EQ (eol_type, Qunix)
8282 && ! EQ (eol_type, Qdos)
8283 && ! EQ (eol_type, Qmac))
8284 error ("Invalid eol-type");
8285
8286 aliases = Fcons (name, Qnil);
8287
8288 if (NILP (eol_type))
8289 {
8290 eol_type = make_subsidiaries (name);
8291 for (i = 0; i < 3; i++)
1397dc18 8292 {
df7492f9
KH
8293 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8294
8295 this_name = AREF (eol_type, i);
8296 this_aliases = Fcons (this_name, Qnil);
8297 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8298 this_spec = Fmake_vector (make_number (3), attrs);
8299 ASET (this_spec, 1, this_aliases);
8300 ASET (this_spec, 2, this_eol_type);
8301 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8302 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8303 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8304 Vcoding_system_alist);
1397dc18 8305 }
d46c5b12 8306 }
1397dc18 8307
df7492f9
KH
8308 spec_vec = Fmake_vector (make_number (3), attrs);
8309 ASET (spec_vec, 1, aliases);
8310 ASET (spec_vec, 2, eol_type);
8311
8312 Fputhash (name, spec_vec, Vcoding_system_hash_table);
8313 Vcoding_system_list = Fcons (name, Vcoding_system_list);
8314 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8315 Vcoding_system_alist);
8316
8317 {
8318 int id = coding_categories[category].id;
8319
8320 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8321 setup_coding_system (name, &coding_categories[category]);
8322 }
8323
d46c5b12 8324 return Qnil;
df7492f9
KH
8325
8326 short_args:
8327 return Fsignal (Qwrong_number_of_arguments,
8328 Fcons (intern ("define-coding-system-internal"),
8329 make_number (nargs)));
d46c5b12
KH
8330}
8331
da7db224 8332/* Fixme: should this record the alias relationships for
e1c23804 8333 diagnostics? Should it update coding-system-list? */
df7492f9
KH
8334DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8335 Sdefine_coding_system_alias, 2, 2, 0,
8336 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8337 (alias, coding_system)
8338 Lisp_Object alias, coding_system;
66cfb530 8339{
df7492f9 8340 Lisp_Object spec, aliases, eol_type;
84d60297 8341
df7492f9
KH
8342 CHECK_SYMBOL (alias);
8343 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8344 aliases = AREF (spec, 1);
8345 while (!NILP (XCDR (aliases)))
8346 aliases = XCDR (aliases);
8347 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 8348
df7492f9
KH
8349 eol_type = AREF (spec, 2);
8350 if (VECTORP (eol_type))
66cfb530 8351 {
df7492f9
KH
8352 Lisp_Object subsidiaries;
8353 int i;
8354
8355 subsidiaries = make_subsidiaries (alias);
8356 for (i = 0; i < 3; i++)
8357 Fdefine_coding_system_alias (AREF (subsidiaries, i),
8358 AREF (eol_type, i));
8359
8360 ASET (spec, 2, subsidiaries);
66cfb530 8361 }
df7492f9
KH
8362
8363 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
8364 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8365 Vcoding_system_alist);
66cfb530
KH
8366
8367 return Qnil;
8368}
8369
df7492f9
KH
8370DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8371 1, 1, 0,
8372 doc: /* Return the base of CODING-SYSTEM.
da7db224 8373Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
8374 (coding_system)
8375 Lisp_Object coding_system;
8376{
8377 Lisp_Object spec, attrs;
8378
8379 if (NILP (coding_system))
8380 return (Qno_conversion);
8381 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8382 attrs = AREF (spec, 0);
8383 return CODING_ATTR_BASE_NAME (attrs);
8384}
8385
8386DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8387 1, 1, 0,
8388 doc: "Return the property list of CODING-SYSTEM.")
8389 (coding_system)
8390 Lisp_Object coding_system;
8391{
8392 Lisp_Object spec, attrs;
8393
8394 if (NILP (coding_system))
8395 coding_system = Qno_conversion;
8396 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8397 attrs = AREF (spec, 0);
8398 return CODING_ATTR_PLIST (attrs);
8399}
8400
8401
8402DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8403 1, 1, 0,
da7db224 8404 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
8405 (coding_system)
8406 Lisp_Object coding_system;
8407{
8408 Lisp_Object spec;
8409
8410 if (NILP (coding_system))
8411 coding_system = Qno_conversion;
8412 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 8413 return AREF (spec, 1);
df7492f9
KH
8414}
8415
8416DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8417 Scoding_system_eol_type, 1, 1, 0,
8418 doc: /* Return eol-type of CODING-SYSTEM.
8419An eol-type is integer 0, 1, 2, or a vector of coding systems.
8420
8421Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8422and CR respectively.
8423
8424A vector value indicates that a format of end-of-line should be
8425detected automatically. Nth element of the vector is the subsidiary
8426coding system whose eol-type is N. */)
8427 (coding_system)
8428 Lisp_Object coding_system;
8429{
8430 Lisp_Object spec, eol_type;
8431 int n;
8432
8433 if (NILP (coding_system))
8434 coding_system = Qno_conversion;
8435 if (! CODING_SYSTEM_P (coding_system))
8436 return Qnil;
8437 spec = CODING_SYSTEM_SPEC (coding_system);
8438 eol_type = AREF (spec, 2);
8439 if (VECTORP (eol_type))
8440 return Fcopy_sequence (eol_type);
8441 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8442 return make_number (n);
8443}
8444
4ed46869
KH
8445#endif /* emacs */
8446
8447\f
1397dc18 8448/*** 9. Post-amble ***/
4ed46869 8449
dfcf069d 8450void
4ed46869
KH
8451init_coding_once ()
8452{
8453 int i;
8454
df7492f9
KH
8455 for (i = 0; i < coding_category_max; i++)
8456 {
8457 coding_categories[i].id = -1;
8458 coding_priorities[i] = i;
8459 }
4ed46869
KH
8460
8461 /* ISO2022 specific initialize routine. */
8462 for (i = 0; i < 0x20; i++)
b73bfc1c 8463 iso_code_class[i] = ISO_control_0;
4ed46869
KH
8464 for (i = 0x21; i < 0x7F; i++)
8465 iso_code_class[i] = ISO_graphic_plane_0;
8466 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 8467 iso_code_class[i] = ISO_control_1;
4ed46869
KH
8468 for (i = 0xA1; i < 0xFF; i++)
8469 iso_code_class[i] = ISO_graphic_plane_1;
8470 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8471 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8472 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8473 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8474 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8475 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8476 iso_code_class[ISO_CODE_ESC] = ISO_escape;
8477 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8478 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8479 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8480
b843d1ae 8481 inhibit_pre_post_conversion = 0;
df7492f9
KH
8482
8483 for (i = 0; i < 256; i++)
8484 {
8485 emacs_mule_bytes[i] = 1;
8486 }
7c78e542
KH
8487 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8488 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8489 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8490 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
8491}
8492
8493#ifdef emacs
8494
dfcf069d 8495void
e0e989f6
KH
8496syms_of_coding ()
8497{
df7492f9
KH
8498 staticpro (&Vcoding_system_hash_table);
8499 Vcoding_system_hash_table = Fmakehash (Qeq);
8500
8501 staticpro (&Vsjis_coding_system);
8502 Vsjis_coding_system = Qnil;
8503
8504 staticpro (&Vbig5_coding_system);
8505 Vbig5_coding_system = Qnil;
8506
8507 staticpro (&Vcode_conversion_work_buf_list);
8508 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 8509
df7492f9
KH
8510 staticpro (&Vcode_conversion_reused_work_buf);
8511 Vcode_conversion_reused_work_buf = Qnil;
8512
8513 DEFSYM (Qcharset, "charset");
8514 DEFSYM (Qtarget_idx, "target-idx");
8515 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
8516 Fset (Qcoding_system_history, Qnil);
8517
9ce27fde 8518 /* Target FILENAME is the first argument. */
e0e989f6 8519 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 8520 /* Target FILENAME is the third argument. */
e0e989f6
KH
8521 Fput (Qwrite_region, Qtarget_idx, make_number (2));
8522
df7492f9 8523 DEFSYM (Qcall_process, "call-process");
9ce27fde 8524 /* Target PROGRAM is the first argument. */
e0e989f6
KH
8525 Fput (Qcall_process, Qtarget_idx, make_number (0));
8526
df7492f9 8527 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 8528 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8529 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8530
df7492f9 8531 DEFSYM (Qstart_process, "start-process");
9ce27fde 8532 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8533 Fput (Qstart_process, Qtarget_idx, make_number (2));
8534
df7492f9 8535 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 8536 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
8537 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8538
df7492f9
KH
8539 DEFSYM (Qcoding_system, "coding-system");
8540 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 8541
df7492f9
KH
8542 DEFSYM (Qeol_type, "eol-type");
8543 DEFSYM (Qunix, "unix");
8544 DEFSYM (Qdos, "dos");
4ed46869 8545
df7492f9
KH
8546 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8547 DEFSYM (Qpost_read_conversion, "post-read-conversion");
8548 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8549 DEFSYM (Qdefault_char, "default-char");
8550 DEFSYM (Qundecided, "undecided");
8551 DEFSYM (Qno_conversion, "no-conversion");
8552 DEFSYM (Qraw_text, "raw-text");
4ed46869 8553
df7492f9 8554 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 8555
df7492f9 8556 DEFSYM (Qutf_8, "utf-8");
27901516 8557
df7492f9 8558 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
8559 DEFSYM (Qbig, "big");
8560 DEFSYM (Qlittle, "little");
27901516 8561
df7492f9
KH
8562 DEFSYM (Qshift_jis, "shift-jis");
8563 DEFSYM (Qbig5, "big5");
4ed46869 8564
df7492f9 8565 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 8566
df7492f9 8567 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
8568 Fput (Qcoding_system_error, Qerror_conditions,
8569 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8570 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 8571 build_string ("Invalid coding system"));
4ed46869 8572
df7492f9
KH
8573 /* Intern this now in case it isn't already done.
8574 Setting this variable twice is harmless.
8575 But don't staticpro it here--that is done in alloc.c. */
8576 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 8577
df7492f9 8578 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 8579 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
8580 DEFSYM (Qtranslation_table_id, "translation-table-id");
8581 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8582 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 8583
df7492f9 8584 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 8585
df7492f9 8586 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 8587
df7492f9
KH
8588 Vcoding_category_table
8589 = Fmake_vector (make_number (coding_category_max), Qnil);
8590 staticpro (&Vcoding_category_table);
8591 /* Followings are target of code detection. */
8592 ASET (Vcoding_category_table, coding_category_iso_7,
8593 intern ("coding-category-iso-7"));
8594 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8595 intern ("coding-category-iso-7-tight"));
8596 ASET (Vcoding_category_table, coding_category_iso_8_1,
8597 intern ("coding-category-iso-8-1"));
8598 ASET (Vcoding_category_table, coding_category_iso_8_2,
8599 intern ("coding-category-iso-8-2"));
8600 ASET (Vcoding_category_table, coding_category_iso_7_else,
8601 intern ("coding-category-iso-7-else"));
8602 ASET (Vcoding_category_table, coding_category_iso_8_else,
8603 intern ("coding-category-iso-8-else"));
8604 ASET (Vcoding_category_table, coding_category_utf_8,
8605 intern ("coding-category-utf-8"));
8606 ASET (Vcoding_category_table, coding_category_utf_16_be,
8607 intern ("coding-category-utf-16-be"));
8608 ASET (Vcoding_category_table, coding_category_utf_16_le,
8609 intern ("coding-category-utf-16-le"));
8610 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8611 intern ("coding-category-utf-16-be-nosig"));
8612 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8613 intern ("coding-category-utf-16-le-nosig"));
8614 ASET (Vcoding_category_table, coding_category_charset,
8615 intern ("coding-category-charset"));
8616 ASET (Vcoding_category_table, coding_category_sjis,
8617 intern ("coding-category-sjis"));
8618 ASET (Vcoding_category_table, coding_category_big5,
8619 intern ("coding-category-big5"));
8620 ASET (Vcoding_category_table, coding_category_ccl,
8621 intern ("coding-category-ccl"));
8622 ASET (Vcoding_category_table, coding_category_emacs_mule,
8623 intern ("coding-category-emacs-mule"));
8624 /* Followings are NOT target of code detection. */
8625 ASET (Vcoding_category_table, coding_category_raw_text,
8626 intern ("coding-category-raw-text"));
8627 ASET (Vcoding_category_table, coding_category_undecided,
8628 intern ("coding-category-undecided"));
70c22245 8629
4ed46869
KH
8630 defsubr (&Scoding_system_p);
8631 defsubr (&Sread_coding_system);
8632 defsubr (&Sread_non_nil_coding_system);
8633 defsubr (&Scheck_coding_system);
8634 defsubr (&Sdetect_coding_region);
d46c5b12 8635 defsubr (&Sdetect_coding_string);
05e6f5dc 8636 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 8637 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8638 defsubr (&Sdecode_coding_region);
8639 defsubr (&Sencode_coding_region);
8640 defsubr (&Sdecode_coding_string);
8641 defsubr (&Sencode_coding_string);
8642 defsubr (&Sdecode_sjis_char);
8643 defsubr (&Sencode_sjis_char);
8644 defsubr (&Sdecode_big5_char);
8645 defsubr (&Sencode_big5_char);
1ba9e4ab 8646 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8647 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8648 defsubr (&Sterminal_coding_system);
1ba9e4ab 8649 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8650 defsubr (&Skeyboard_coding_system);
a5d301df 8651 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
8652 defsubr (&Sset_coding_system_priority);
8653 defsubr (&Sdefine_coding_system_internal);
8654 defsubr (&Sdefine_coding_system_alias);
8655 defsubr (&Scoding_system_base);
8656 defsubr (&Scoding_system_plist);
8657 defsubr (&Scoding_system_aliases);
8658 defsubr (&Scoding_system_eol_type);
8659 defsubr (&Scoding_system_priority_list);
4ed46869 8660
4608c386 8661 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8662 doc: /* List of coding systems.
8663
8664Do not alter the value of this variable manually. This variable should be
df7492f9 8665updated by the functions `define-coding-system' and
48b0f3ae 8666`define-coding-system-alias'. */);
4608c386
KH
8667 Vcoding_system_list = Qnil;
8668
8669 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8670 doc: /* Alist of coding system names.
8671Each element is one element list of coding system name.
8672This variable is given to `completing-read' as TABLE argument.
8673
8674Do not alter the value of this variable manually. This variable should be
8675updated by the functions `make-coding-system' and
8676`define-coding-system-alias'. */);
4608c386
KH
8677 Vcoding_system_alist = Qnil;
8678
4ed46869 8679 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8680 doc: /* List of coding-categories (symbols) ordered by priority.
8681
8682On detecting a coding system, Emacs tries code detection algorithms
8683associated with each coding-category one by one in this order. When
8684one algorithm agrees with a byte sequence of source text, the coding
8685system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8686 {
8687 int i;
8688
8689 Vcoding_category_list = Qnil;
df7492f9 8690 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8691 Vcoding_category_list
d46c5b12
KH
8692 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8693 Vcoding_category_list);
4ed46869
KH
8694 }
8695
8696 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8697 doc: /* Specify the coding system for read operations.
8698It is useful to bind this variable with `let', but do not set it globally.
8699If the value is a coding system, it is used for decoding on read operation.
8700If not, an appropriate element is used from one of the coding system alists:
8701There are three such tables, `file-coding-system-alist',
8702`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8703 Vcoding_system_for_read = Qnil;
8704
8705 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8706 doc: /* Specify the coding system for write operations.
8707Programs bind this variable with `let', but you should not set it globally.
8708If the value is a coding system, it is used for encoding of output,
8709when writing it to a file and when sending it to a file or subprocess.
8710
8711If this does not specify a coding system, an appropriate element
8712is used from one of the coding system alists:
8713There are three such tables, `file-coding-system-alist',
8714`process-coding-system-alist', and `network-coding-system-alist'.
8715For output to files, if the above procedure does not specify a coding system,
8716the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8717 Vcoding_system_for_write = Qnil;
8718
8719 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8720 doc: /*
8721Coding system used in the latest file or process I/O. */);
4ed46869
KH
8722 Vlast_coding_system_used = Qnil;
8723
9ce27fde 8724 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8725 doc: /*
8726*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8727See info node `Coding Systems' and info node `Text and Binary' concerning
8728such conversion. */);
9ce27fde
KH
8729 inhibit_eol_conversion = 0;
8730
ed29121d 8731 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8732 doc: /*
8733Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8734Bind it to t if the process output is to be treated as if it were a file
8735read from some filesystem. */);
ed29121d
EZ
8736 inherit_process_coding_system = 0;
8737
02ba4723 8738 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8739 doc: /*
8740Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8741The format is ((PATTERN . VAL) ...),
8742where PATTERN is a regular expression matching a file name,
8743VAL is a coding system, a cons of coding systems, or a function symbol.
8744If VAL is a coding system, it is used for both decoding and encoding
8745the file contents.
8746If VAL is a cons of coding systems, the car part is used for decoding,
8747and the cdr part is used for encoding.
8748If VAL is a function symbol, the function must return a coding system
0192762c
DL
8749or a cons of coding systems which are used as above. The function gets
8750the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8751
8752See also the function `find-operation-coding-system'
8753and the variable `auto-coding-alist'. */);
02ba4723
KH
8754 Vfile_coding_system_alist = Qnil;
8755
8756 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8757 doc: /*
8758Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8759The format is ((PATTERN . VAL) ...),
8760where PATTERN is a regular expression matching a program name,
8761VAL is a coding system, a cons of coding systems, or a function symbol.
8762If VAL is a coding system, it is used for both decoding what received
8763from the program and encoding what sent to the program.
8764If VAL is a cons of coding systems, the car part is used for decoding,
8765and the cdr part is used for encoding.
8766If VAL is a function symbol, the function must return a coding system
8767or a cons of coding systems which are used as above.
8768
8769See also the function `find-operation-coding-system'. */);
02ba4723
KH
8770 Vprocess_coding_system_alist = Qnil;
8771
8772 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8773 doc: /*
8774Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8775The format is ((PATTERN . VAL) ...),
8776where PATTERN is a regular expression matching a network service name
8777or is a port number to connect to,
8778VAL is a coding system, a cons of coding systems, or a function symbol.
8779If VAL is a coding system, it is used for both decoding what received
8780from the network stream and encoding what sent to the network stream.
8781If VAL is a cons of coding systems, the car part is used for decoding,
8782and the cdr part is used for encoding.
8783If VAL is a function symbol, the function must return a coding system
8784or a cons of coding systems which are used as above.
8785
8786See also the function `find-operation-coding-system'. */);
02ba4723 8787 Vnetwork_coding_system_alist = Qnil;
4ed46869 8788
68c45bf0 8789 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8790 doc: /* Coding system to use with system messages.
8791Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8792 Vlocale_coding_system = Qnil;
8793
005f0d35 8794 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8795 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8796 doc: /*
8797*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8798 eol_mnemonic_unix = build_string (":");
4ed46869 8799
7722baf9 8800 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8801 doc: /*
8802*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8803 eol_mnemonic_dos = build_string ("\\");
4ed46869 8804
7722baf9 8805 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8806 doc: /*
8807*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8808 eol_mnemonic_mac = build_string ("/");
4ed46869 8809
7722baf9 8810 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8811 doc: /*
8812*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8813 eol_mnemonic_undecided = build_string (":");
4ed46869 8814
84fbb8a0 8815 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8816 doc: /*
8817*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8818 Venable_character_translation = Qt;
bdd9fb48 8819
f967223b 8820 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8821 &Vstandard_translation_table_for_decode,
8822 doc: /* Table for translating characters while decoding. */);
f967223b 8823 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8824
f967223b 8825 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8826 &Vstandard_translation_table_for_encode,
8827 doc: /* Table for translating characters while encoding. */);
f967223b 8828 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8829
df7492f9 8830 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8831 doc: /* Alist of charsets vs revision numbers.
8832While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8833designate it with the escape sequence identifying revision (cdr part
8834of the element). */);
8835 Vcharset_revision_table = Qnil;
02ba4723
KH
8836
8837 DEFVAR_LISP ("default-process-coding-system",
8838 &Vdefault_process_coding_system,
48b0f3ae
PJ
8839 doc: /* Cons of coding systems used for process I/O by default.
8840The car part is used for decoding a process output,
8841the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8842 Vdefault_process_coding_system = Qnil;
c4825358 8843
3f003981 8844 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8845 doc: /*
8846Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8847This is a vector of length 256.
8848If Nth element is non-nil, the existence of code N in a file
8849\(or output of subprocess) doesn't prevent it to be detected as
8850a coding system of ISO 2022 variant which has a flag
8851`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8852or reading output of a subprocess.
8853Only 128th through 159th elements has a meaning. */);
3f003981 8854 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8855
8856 DEFVAR_LISP ("select-safe-coding-system-function",
8857 &Vselect_safe_coding_system_function,
df7492f9
KH
8858 doc: /*
8859Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8860
8861If set, this function is called to force a user to select a proper
8862coding system which can encode the text in the case that a default
8863coding system used in each operation can't encode the text.
8864
8865The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8866 Vselect_safe_coding_system_function = Qnil;
8867
22ab2303 8868 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8869 &inhibit_iso_escape_detection,
df7492f9
KH
8870 doc: /*
8871If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8872
8873By default, on reading a file, Emacs tries to detect how the text is
8874encoded. This code detection is sensitive to escape sequences. If
8875the sequence is valid as ISO2022, the code is determined as one of
8876the ISO2022 encodings, and the file is decoded by the corresponding
8877coding system (e.g. `iso-2022-7bit').
8878
8879However, there may be a case that you want to read escape sequences in
8880a file as is. In such a case, you can set this variable to non-nil.
8881Then, as the code detection ignores any escape sequences, no file is
8882detected as encoded in some ISO2022 encoding. The result is that all
8883escape sequences become visible in a buffer.
8884
8885The default value is nil, and it is strongly recommended not to change
8886it. That is because many Emacs Lisp source files that contain
8887non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8888in Emacs's distribution, and they won't be decoded correctly on
8889reading if you suppress escape sequence detection.
8890
8891The other way to read escape sequences in a file without decoding is
8892to explicitly specify some coding system that doesn't use ISO2022's
8893escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8894 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8895
8896 {
8897 Lisp_Object args[coding_arg_max];
8898 Lisp_Object plist[14];
8899 int i;
8900
8901 for (i = 0; i < coding_arg_max; i++)
8902 args[i] = Qnil;
8903
8904 plist[0] = intern (":name");
8905 plist[1] = args[coding_arg_name] = Qno_conversion;
8906 plist[2] = intern (":mnemonic");
8907 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8908 plist[4] = intern (":coding-type");
8909 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8910 plist[6] = intern (":ascii-compatible-p");
8911 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8912 plist[8] = intern (":default-char");
8913 plist[9] = args[coding_arg_default_char] = make_number (0);
8914 plist[10] = intern (":docstring");
8915 plist[11] = build_string ("Do no conversion.\n\
8916\n\
8917When you visit a file with this coding, the file is read into a\n\
8918unibyte buffer as is, thus each byte of a file is treated as a\n\
8919character.");
8920 plist[12] = intern (":eol-type");
8921 plist[13] = args[coding_arg_eol_type] = Qunix;
8922 args[coding_arg_plist] = Flist (14, plist);
8923 Fdefine_coding_system_internal (coding_arg_max, args);
8924 }
8925
8926 setup_coding_system (Qno_conversion, &keyboard_coding);
8927 setup_coding_system (Qno_conversion, &terminal_coding);
8928 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8929}
8930
68c45bf0
PE
8931char *
8932emacs_strerror (error_number)
8933 int error_number;
8934{
8935 char *str;
8936
ca9c0567 8937 synchronize_system_messages_locale ();
68c45bf0
PE
8938 str = strerror (error_number);
8939
8940 if (! NILP (Vlocale_coding_system))
8941 {
8942 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8943 Vlocale_coding_system,
8944 0);
8945 str = (char *) XSTRING (dec)->data;
8946 }
8947
8948 return str;
8949}
8950
4ed46869 8951#endif /* emacs */