(decode_coding_ccl, encode_coding_ccl): Call ccl_driver
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
6f197c07 4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
5a936b46 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
5bad0796 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
e19c3639 138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9 146 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
df7492f9 149
ff0dacd7 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
151
152 Below is the template of these functions. */
153
4ed46869 154#if 0
df7492f9 155static int
ff0dacd7 156detect_coding_XXX (coding, detect_info)
df7492f9 157 struct coding_system *coding;
ff0dacd7 158 struct coding_detection_info *detect_info;
4ed46869 159{
df7492f9
KH
160 unsigned char *src = coding->source;
161 unsigned char *src_end = coding->source + coding->src_bytes;
162 int multibytep = coding->src_multibyte;
ff0dacd7 163 int consumed_chars = 0;
df7492f9
KH
164 int found = 0;
165 ...;
166
167 while (1)
168 {
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
171 ONE_MORE_BYTE (c);
ff0dacd7
KH
172
173 if (! __C_conforms_to_XXX___ (c))
174 break;
175 if (! __C_strongly_suggests_XXX__ (c))
176 found = CATEGORY_MASK_XXX;
df7492f9 177 }
ff0dacd7
KH
178 /* The byte sequence is invalid for XXX. */
179 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 180 return 0;
ff0dacd7 181
df7492f9 182 no_more_source:
ff0dacd7
KH
183 /* The source exausted successfully. */
184 detect_info->found |= found;
df7492f9 185 return 1;
4ed46869
KH
186}
187#endif
188
189/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
190
df7492f9
KH
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
d46c5b12 195
df7492f9
KH
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
d46c5b12 200
df7492f9 201 Below is the template of these functions. */
d46c5b12 202
4ed46869 203#if 0
b73bfc1c 204static void
df7492f9 205decode_coding_XXXX (coding)
4ed46869 206 struct coding_system *coding;
4ed46869 207{
df7492f9
KH
208 unsigned char *src = coding->source + coding->consumed;
209 unsigned char *src_end = coding->source + coding->src_bytes;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base;
214 /* A buffer to produce decoded characters. */
215 int *charbuf = coding->charbuf;
216 int *charbuf_end = charbuf + coding->charbuf_size;
217 int multibytep = coding->src_multibyte;
218
219 while (1)
220 {
221 src_base = src;
222 if (charbuf < charbuf_end)
223 /* No more room to produce a decoded character. */
224 break;
225 ONE_MORE_BYTE (c);
226 /* Decode it. */
227 }
228
229 no_more_source:
230 if (src_base < src_end
231 && coding->mode & CODING_MODE_LAST_BLOCK)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base < src_end && charbuf < charbuf_end)
235 *charbuf++ = *src_base++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding->consumed = coding->consumed_char = src_base - coding->source;
239 /* Remember how many characters we produced. */
240 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
241}
242#endif
243
244/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
245
df7492f9
KH
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
d46c5b12 250
df7492f9
KH
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 255
df7492f9
KH
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
d46c5b12 259
df7492f9 260 Below is a template of these functions. */
4ed46869 261#if 0
b73bfc1c 262static void
df7492f9 263encode_coding_XXX (coding)
4ed46869 264 struct coding_system *coding;
4ed46869 265{
df7492f9
KH
266 int multibytep = coding->dst_multibyte;
267 int *charbuf = coding->charbuf;
268 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
269 unsigned char *dst = coding->destination + coding->produced;
270 unsigned char *dst_end = coding->destination + coding->dst_bytes;
271 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
272 int produced_chars = 0;
273
274 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
275 {
276 int c = *charbuf;
277 /* Encode C into DST, and increment DST. */
278 }
279 label_no_more_destination:
280 /* How many chars and bytes we produced. */
281 coding->produced_char += produced_chars;
282 coding->produced = dst - coding->destination;
4ed46869
KH
283}
284#endif
285
4ed46869
KH
286\f
287/*** 1. Preamble ***/
288
68c45bf0 289#include <config.h>
4ed46869
KH
290#include <stdio.h>
291
4ed46869
KH
292#include "lisp.h"
293#include "buffer.h"
df7492f9 294#include "character.h"
4ed46869
KH
295#include "charset.h"
296#include "ccl.h"
df7492f9 297#include "composite.h"
4ed46869
KH
298#include "coding.h"
299#include "window.h"
300
df7492f9 301Lisp_Object Vcoding_system_hash_table;
4ed46869 302
df7492f9 303Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
304Lisp_Object Qunix, Qdos;
305extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
306Lisp_Object Qbuffer_file_coding_system;
307Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 308Lisp_Object Qdefault_char;
27901516 309Lisp_Object Qno_conversion, Qundecided;
df7492f9
KH
310Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
311Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
312Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
bb0115a2 313Lisp_Object Qcoding_system_history;
1397dc18 314Lisp_Object Qvalid_codes;
4ed46869
KH
315
316extern Lisp_Object Qinsert_file_contents, Qwrite_region;
317Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
318Lisp_Object Qstart_process, Qopen_network_stream;
319Lisp_Object Qtarget_idx;
320
d46c5b12
KH
321Lisp_Object Vselect_safe_coding_system_function;
322
7722baf9
EZ
323/* Mnemonic string for each format of end-of-line. */
324Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
325/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 326 decided. */
7722baf9 327Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
328
329#ifdef emacs
330
4608c386
KH
331Lisp_Object Vcoding_system_list, Vcoding_system_alist;
332
333Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 334
d46c5b12
KH
335/* Coding system emacs-mule and raw-text are for converting only
336 end-of-line format. */
337Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 338
4ed46869
KH
339/* Coding-systems are handed between Emacs Lisp programs and C internal
340 routines by the following three variables. */
341/* Coding-system for reading files and receiving data from process. */
342Lisp_Object Vcoding_system_for_read;
343/* Coding-system for writing files and sending data to process. */
344Lisp_Object Vcoding_system_for_write;
345/* Coding-system actually used in the latest I/O. */
346Lisp_Object Vlast_coding_system_used;
347
c4825358 348/* A vector of length 256 which contains information about special
94487c4e 349 Latin codes (especially for dealing with Microsoft codes). */
3f003981 350Lisp_Object Vlatin_extra_code_table;
c4825358 351
9ce27fde
KH
352/* Flag to inhibit code conversion of end-of-line format. */
353int inhibit_eol_conversion;
354
74383408
KH
355/* Flag to inhibit ISO2022 escape sequence detection. */
356int inhibit_iso_escape_detection;
357
ed29121d
EZ
358/* Flag to make buffer-file-coding-system inherit from process-coding. */
359int inherit_process_coding_system;
360
c4825358 361/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
362struct coding_system terminal_coding;
363
c4825358
KH
364/* Coding system to be used to encode text for terminal display when
365 terminal coding system is nil. */
366struct coding_system safe_terminal_coding;
367
368/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
369struct coding_system keyboard_coding;
370
02ba4723
KH
371Lisp_Object Vfile_coding_system_alist;
372Lisp_Object Vprocess_coding_system_alist;
373Lisp_Object Vnetwork_coding_system_alist;
4ed46869 374
68c45bf0
PE
375Lisp_Object Vlocale_coding_system;
376
4ed46869
KH
377#endif /* emacs */
378
f967223b
KH
379/* Flag to tell if we look up translation table on character code
380 conversion. */
84fbb8a0 381Lisp_Object Venable_character_translation;
f967223b
KH
382/* Standard translation table to look up on decoding (reading). */
383Lisp_Object Vstandard_translation_table_for_decode;
384/* Standard translation table to look up on encoding (writing). */
385Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 386
f967223b
KH
387Lisp_Object Qtranslation_table;
388Lisp_Object Qtranslation_table_id;
389Lisp_Object Qtranslation_table_for_decode;
390Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
391
392/* Alist of charsets vs revision number. */
df7492f9 393static Lisp_Object Vcharset_revision_table;
4ed46869 394
02ba4723
KH
395/* Default coding systems used for process I/O. */
396Lisp_Object Vdefault_process_coding_system;
397
b843d1ae
KH
398/* Global flag to tell that we can't call post-read-conversion and
399 pre-write-conversion functions. Usually the value is zero, but it
400 is set to 1 temporarily while such functions are running. This is
401 to avoid infinite recursive call. */
402static int inhibit_pre_post_conversion;
403
df7492f9
KH
404/* Two special coding systems. */
405Lisp_Object Vsjis_coding_system;
406Lisp_Object Vbig5_coding_system;
407
408
ff0dacd7
KH
409static int detect_coding_utf_8 P_ ((struct coding_system *,
410 struct coding_detection_info *info));
df7492f9
KH
411static void decode_coding_utf_8 P_ ((struct coding_system *));
412static int encode_coding_utf_8 P_ ((struct coding_system *));
413
ff0dacd7
KH
414static int detect_coding_utf_16 P_ ((struct coding_system *,
415 struct coding_detection_info *info));
df7492f9
KH
416static void decode_coding_utf_16 P_ ((struct coding_system *));
417static int encode_coding_utf_16 P_ ((struct coding_system *));
418
ff0dacd7
KH
419static int detect_coding_iso_2022 P_ ((struct coding_system *,
420 struct coding_detection_info *info));
df7492f9
KH
421static void decode_coding_iso_2022 P_ ((struct coding_system *));
422static int encode_coding_iso_2022 P_ ((struct coding_system *));
423
ff0dacd7
KH
424static int detect_coding_emacs_mule P_ ((struct coding_system *,
425 struct coding_detection_info *info));
df7492f9
KH
426static void decode_coding_emacs_mule P_ ((struct coding_system *));
427static int encode_coding_emacs_mule P_ ((struct coding_system *));
428
ff0dacd7
KH
429static int detect_coding_sjis P_ ((struct coding_system *,
430 struct coding_detection_info *info));
df7492f9
KH
431static void decode_coding_sjis P_ ((struct coding_system *));
432static int encode_coding_sjis P_ ((struct coding_system *));
433
ff0dacd7
KH
434static int detect_coding_big5 P_ ((struct coding_system *,
435 struct coding_detection_info *info));
df7492f9
KH
436static void decode_coding_big5 P_ ((struct coding_system *));
437static int encode_coding_big5 P_ ((struct coding_system *));
438
ff0dacd7
KH
439static int detect_coding_ccl P_ ((struct coding_system *,
440 struct coding_detection_info *info));
df7492f9
KH
441static void decode_coding_ccl P_ ((struct coding_system *));
442static int encode_coding_ccl P_ ((struct coding_system *));
443
444static void decode_coding_raw_text P_ ((struct coding_system *));
445static int encode_coding_raw_text P_ ((struct coding_system *));
446
447
448/* ISO2022 section */
449
450#define CODING_ISO_INITIAL(coding, reg) \
451 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
452 coding_attr_iso_initial), \
453 reg)))
454
455
456#define CODING_ISO_REQUEST(coding, charset_id) \
457 ((charset_id <= (coding)->max_charset_id \
458 ? (coding)->safe_charsets[charset_id] \
459 : -1))
460
461
462#define CODING_ISO_FLAGS(coding) \
463 ((coding)->spec.iso_2022.flags)
464#define CODING_ISO_DESIGNATION(coding, reg) \
465 ((coding)->spec.iso_2022.current_designation[reg])
466#define CODING_ISO_INVOCATION(coding, plane) \
467 ((coding)->spec.iso_2022.current_invocation[plane])
468#define CODING_ISO_SINGLE_SHIFTING(coding) \
469 ((coding)->spec.iso_2022.single_shifting)
470#define CODING_ISO_BOL(coding) \
471 ((coding)->spec.iso_2022.bol)
472#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
473 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
474
475/* Control characters of ISO2022. */
476 /* code */ /* function */
477#define ISO_CODE_LF 0x0A /* line-feed */
478#define ISO_CODE_CR 0x0D /* carriage-return */
479#define ISO_CODE_SO 0x0E /* shift-out */
480#define ISO_CODE_SI 0x0F /* shift-in */
481#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
482#define ISO_CODE_ESC 0x1B /* escape */
483#define ISO_CODE_SS2 0x8E /* single-shift-2 */
484#define ISO_CODE_SS3 0x8F /* single-shift-3 */
485#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
486
487/* All code (1-byte) of ISO2022 is classified into one of the
488 followings. */
489enum iso_code_class_type
490 {
491 ISO_control_0, /* Control codes in the range
492 0x00..0x1F and 0x7F, except for the
493 following 5 codes. */
494 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
495 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
496 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
497 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
498 ISO_escape, /* ISO_CODE_SO (0x1B) */
499 ISO_control_1, /* Control codes in the range
500 0x80..0x9F, except for the
501 following 3 codes. */
502 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
503 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
504 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
505 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
506 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
507 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
508 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
509 };
05e6f5dc 510
df7492f9
KH
511/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
512 `iso-flags' attribute of an iso2022 coding system. */
93dec019 513
df7492f9
KH
514/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
515 instead of the correct short-form sequence (e.g. ESC $ A). */
516#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 517
df7492f9
KH
518/* If set, reset graphic planes and registers at end-of-line to the
519 initial state. */
520#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 521
df7492f9
KH
522/* If set, reset graphic planes and registers before any control
523 characters to the initial state. */
524#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 525
df7492f9
KH
526/* If set, encode by 7-bit environment. */
527#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 528
df7492f9
KH
529/* If set, use locking-shift function. */
530#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 531
df7492f9
KH
532/* If set, use single-shift function. Overwrite
533 CODING_ISO_FLAG_LOCKING_SHIFT. */
534#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 535
df7492f9
KH
536/* If set, use designation escape sequence. */
537#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 538
df7492f9
KH
539/* If set, produce revision number sequence. */
540#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 541
df7492f9
KH
542/* If set, produce ISO6429's direction specifying sequence. */
543#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 544
df7492f9
KH
545/* If set, assume designation states are reset at beginning of line on
546 output. */
547#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 548
df7492f9
KH
549/* If set, designation sequence should be placed at beginning of line
550 on output. */
551#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 552
df7492f9
KH
553/* If set, do not encode unsafe charactes on output. */
554#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 555
df7492f9
KH
556/* If set, extra latin codes (128..159) are accepted as a valid code
557 on input. */
558#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 559
df7492f9 560#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 561
df7492f9 562#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 563
bf16eb23
KH
564#define CODING_ISO_FLAG_USE_ROMAN 0x8000
565
566#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
567
568#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 569
df7492f9
KH
570/* A character to be produced on output if encoding of the original
571 character is prohibited by CODING_ISO_FLAG_SAFE. */
572#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 573
aa72b389 574
df7492f9
KH
575/* UTF-16 section */
576#define CODING_UTF_16_BOM(coding) \
577 ((coding)->spec.utf_16.bom)
4ed46869 578
df7492f9
KH
579#define CODING_UTF_16_ENDIAN(coding) \
580 ((coding)->spec.utf_16.endian)
4ed46869 581
df7492f9
KH
582#define CODING_UTF_16_SURROGATE(coding) \
583 ((coding)->spec.utf_16.surrogate)
4ed46869 584
4ed46869 585
df7492f9
KH
586/* CCL section */
587#define CODING_CCL_DECODER(coding) \
588 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
589#define CODING_CCL_ENCODER(coding) \
590 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
591#define CODING_CCL_VALIDS(coding) \
592 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
593 ->data)
4ed46869 594
5a936b46 595/* Index for each coding category in `coding_categories' */
4ed46869 596
df7492f9
KH
597enum coding_category
598 {
599 coding_category_iso_7,
600 coding_category_iso_7_tight,
601 coding_category_iso_8_1,
602 coding_category_iso_8_2,
603 coding_category_iso_7_else,
604 coding_category_iso_8_else,
605 coding_category_utf_8,
606 coding_category_utf_16_auto,
607 coding_category_utf_16_be,
608 coding_category_utf_16_le,
609 coding_category_utf_16_be_nosig,
610 coding_category_utf_16_le_nosig,
611 coding_category_charset,
612 coding_category_sjis,
613 coding_category_big5,
614 coding_category_ccl,
615 coding_category_emacs_mule,
616 /* All above are targets of code detection. */
617 coding_category_raw_text,
618 coding_category_undecided,
619 coding_category_max
620 };
621
622/* Definitions of flag bits used in detect_coding_XXXX. */
623#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
624#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
625#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
626#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
627#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
628#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
629#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
630#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
631#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
632#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
633#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
634#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
635#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
636#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
637#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
638#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 639#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
640
641/* This value is returned if detect_coding_mask () find nothing other
642 than ASCII characters. */
643#define CATEGORY_MASK_ANY \
644 (CATEGORY_MASK_ISO_7 \
645 | CATEGORY_MASK_ISO_7_TIGHT \
646 | CATEGORY_MASK_ISO_8_1 \
647 | CATEGORY_MASK_ISO_8_2 \
648 | CATEGORY_MASK_ISO_7_ELSE \
649 | CATEGORY_MASK_ISO_8_ELSE \
650 | CATEGORY_MASK_UTF_8 \
651 | CATEGORY_MASK_UTF_16_BE \
652 | CATEGORY_MASK_UTF_16_LE \
653 | CATEGORY_MASK_UTF_16_BE_NOSIG \
654 | CATEGORY_MASK_UTF_16_LE_NOSIG \
655 | CATEGORY_MASK_CHARSET \
656 | CATEGORY_MASK_SJIS \
657 | CATEGORY_MASK_BIG5 \
658 | CATEGORY_MASK_CCL \
659 | CATEGORY_MASK_EMACS_MULE)
660
661
662#define CATEGORY_MASK_ISO_7BIT \
663 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
664
665#define CATEGORY_MASK_ISO_8BIT \
666 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
667
668#define CATEGORY_MASK_ISO_ELSE \
669 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
670
671#define CATEGORY_MASK_ISO_ESCAPE \
672 (CATEGORY_MASK_ISO_7 \
673 | CATEGORY_MASK_ISO_7_TIGHT \
674 | CATEGORY_MASK_ISO_7_ELSE \
675 | CATEGORY_MASK_ISO_8_ELSE)
676
677#define CATEGORY_MASK_ISO \
678 ( CATEGORY_MASK_ISO_7BIT \
679 | CATEGORY_MASK_ISO_8BIT \
680 | CATEGORY_MASK_ISO_ELSE)
681
682#define CATEGORY_MASK_UTF_16 \
683 (CATEGORY_MASK_UTF_16_BE \
684 | CATEGORY_MASK_UTF_16_LE \
685 | CATEGORY_MASK_UTF_16_BE_NOSIG \
686 | CATEGORY_MASK_UTF_16_LE_NOSIG)
687
688
689/* List of symbols `coding-category-xxx' ordered by priority. This
690 variable is exposed to Emacs Lisp. */
691static Lisp_Object Vcoding_category_list;
692
693/* Table of coding categories (Lisp symbols). This variable is for
694 internal use oly. */
695static Lisp_Object Vcoding_category_table;
696
697/* Table of coding-categories ordered by priority. */
698static enum coding_category coding_priorities[coding_category_max];
699
700/* Nth element is a coding context for the coding system bound to the
701 Nth coding category. */
702static struct coding_system coding_categories[coding_category_max];
703
704static int detected_mask[coding_category_raw_text] =
705 { CATEGORY_MASK_ISO,
706 CATEGORY_MASK_ISO,
707 CATEGORY_MASK_ISO,
708 CATEGORY_MASK_ISO,
709 CATEGORY_MASK_ISO,
710 CATEGORY_MASK_ISO,
711 CATEGORY_MASK_UTF_8,
712 CATEGORY_MASK_UTF_16,
713 CATEGORY_MASK_UTF_16,
714 CATEGORY_MASK_UTF_16,
715 CATEGORY_MASK_UTF_16,
716 CATEGORY_MASK_UTF_16,
717 CATEGORY_MASK_CHARSET,
718 CATEGORY_MASK_SJIS,
719 CATEGORY_MASK_BIG5,
720 CATEGORY_MASK_CCL,
721 CATEGORY_MASK_EMACS_MULE
722 };
723
724/*** Commonly used macros and functions ***/
725
726#ifndef min
727#define min(a, b) ((a) < (b) ? (a) : (b))
728#endif
729#ifndef max
730#define max(a, b) ((a) > (b) ? (a) : (b))
731#endif
4ed46869 732
df7492f9
KH
733#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
734 do { \
735 attrs = CODING_ID_ATTRS (coding->id); \
736 eol_type = CODING_ID_EOL_TYPE (coding->id); \
737 if (VECTORP (eol_type)) \
738 eol_type = Qunix; \
739 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
740 } while (0)
4ed46869 741
4ed46869 742
df7492f9
KH
743/* Safely get one byte from the source text pointed by SRC which ends
744 at SRC_END, and set C to that byte. If there are not enough bytes
745 in the source, it jumps to `no_more_source'. The caller
746 should declare and set these variables appropriately in advance:
747 src, src_end, multibytep
748*/
aa72b389 749
df7492f9 750#define ONE_MORE_BYTE(c) \
aa72b389 751 do { \
df7492f9
KH
752 if (src == src_end) \
753 { \
754 if (src_base < src) \
755 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
756 goto no_more_source; \
757 } \
758 c = *src++; \
759 if (multibytep && (c & 0x80)) \
760 { \
761 if ((c & 0xFE) != 0xC0) \
762 error ("Undecodable char found"); \
763 c = ((c & 1) << 6) | *src++; \
764 } \
765 consumed_chars++; \
aa72b389
KH
766 } while (0)
767
aa72b389 768
df7492f9
KH
769#define ONE_MORE_BYTE_NO_CHECK(c) \
770 do { \
771 c = *src++; \
772 if (multibytep && (c & 0x80)) \
773 { \
774 if ((c & 0xFE) != 0xC0) \
775 error ("Undecodable char found"); \
776 c = ((c & 1) << 6) | *src++; \
777 } \
781d7a48 778 consumed_chars++; \
aa72b389
KH
779 } while (0)
780
aa72b389 781
df7492f9
KH
782/* Store a byte C in the place pointed by DST and increment DST to the
783 next free point, and increment PRODUCED_CHARS. The caller should
784 assure that C is 0..127, and declare and set the variable `dst'
785 appropriately in advance.
786*/
aa72b389
KH
787
788
df7492f9
KH
789#define EMIT_ONE_ASCII_BYTE(c) \
790 do { \
791 produced_chars++; \
792 *dst++ = (c); \
793 } while (0)
aa72b389 794
aa72b389 795
df7492f9 796/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 797
df7492f9
KH
798#define EMIT_TWO_ASCII_BYTES(c1, c2) \
799 do { \
800 produced_chars += 2; \
801 *dst++ = (c1), *dst++ = (c2); \
802 } while (0)
aa72b389 803
df7492f9
KH
804
805/* Store a byte C in the place pointed by DST and increment DST to the
806 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
807 nonzero, store in an appropriate multibyte from. The caller should
808 declare and set the variables `dst' and `multibytep' appropriately
809 in advance. */
810
811#define EMIT_ONE_BYTE(c) \
812 do { \
813 produced_chars++; \
814 if (multibytep) \
815 { \
816 int ch = (c); \
817 if (ch >= 0x80) \
818 ch = BYTE8_TO_CHAR (ch); \
819 CHAR_STRING_ADVANCE (ch, dst); \
820 } \
821 else \
822 *dst++ = (c); \
aa72b389
KH
823 } while (0)
824
825
df7492f9 826/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 827
e19c3639
KH
828#define EMIT_TWO_BYTES(c1, c2) \
829 do { \
830 produced_chars += 2; \
831 if (multibytep) \
832 { \
833 int ch; \
834 \
835 ch = (c1); \
836 if (ch >= 0x80) \
837 ch = BYTE8_TO_CHAR (ch); \
838 CHAR_STRING_ADVANCE (ch, dst); \
839 ch = (c2); \
840 if (ch >= 0x80) \
841 ch = BYTE8_TO_CHAR (ch); \
842 CHAR_STRING_ADVANCE (ch, dst); \
843 } \
844 else \
845 { \
846 *dst++ = (c1); \
847 *dst++ = (c2); \
848 } \
aa72b389
KH
849 } while (0)
850
851
df7492f9
KH
852#define EMIT_THREE_BYTES(c1, c2, c3) \
853 do { \
854 EMIT_ONE_BYTE (c1); \
855 EMIT_TWO_BYTES (c2, c3); \
856 } while (0)
aa72b389 857
aa72b389 858
df7492f9
KH
859#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
860 do { \
861 EMIT_TWO_BYTES (c1, c2); \
862 EMIT_TWO_BYTES (c3, c4); \
863 } while (0)
aa72b389 864
aa72b389 865
df7492f9
KH
866#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
867 do { \
868 charset_map_loaded = 0; \
869 c = DECODE_CHAR (charset, code); \
870 if (charset_map_loaded) \
871 { \
872 unsigned char *orig = coding->source; \
873 EMACS_INT offset; \
874 \
875 coding_set_source (coding); \
876 offset = coding->source - orig; \
877 src += offset; \
878 src_base += offset; \
879 src_end += offset; \
880 } \
881 } while (0)
aa72b389 882
aa72b389 883
df7492f9
KH
884#define ASSURE_DESTINATION(bytes) \
885 do { \
886 if (dst + (bytes) >= dst_end) \
887 { \
888 int more_bytes = charbuf_end - charbuf + (bytes); \
889 \
890 dst = alloc_destination (coding, more_bytes, dst); \
891 dst_end = coding->destination + coding->dst_bytes; \
892 } \
893 } while (0)
b1887814 894
df7492f9
KH
895
896
897static void
898coding_set_source (coding)
899 struct coding_system *coding;
900{
901 if (BUFFERP (coding->src_object))
902 {
903 if (coding->src_pos < 0)
904 coding->source = GAP_END_ADDR + coding->src_pos_byte;
905 else
906 {
e19c3639 907 struct buffer *buf = XBUFFER (coding->src_object);
e19c3639
KH
908 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
909 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
910
911 coding->source = beg_addr + coding->src_pos_byte - 1;
912 if (coding->src_pos_byte >= gpt_byte)
913 coding->source += BUF_GAP_SIZE (buf);
aa72b389
KH
914 }
915 }
df7492f9 916 else if (STRINGP (coding->src_object))
aa72b389 917 {
df7492f9
KH
918 coding->source = (XSTRING (coding->src_object)->data
919 + coding->src_pos_byte);
920 }
921 else
922 /* Otherwise, the source is C string and is never relocated
923 automatically. Thus we don't have to update anything. */
924 ;
925}
926
927static void
928coding_set_destination (coding)
929 struct coding_system *coding;
930{
931 if (BUFFERP (coding->dst_object))
932 {
df7492f9 933 if (coding->src_pos < 0)
28f67a95
KH
934 {
935 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
936 coding->dst_bytes = (GAP_END_ADDR
937 - (coding->src_bytes - coding->consumed)
938 - coding->destination);
939 }
df7492f9 940 else
28f67a95
KH
941 {
942 /* We are sure that coding->dst_pos_byte is before the gap
943 of the buffer. */
944 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
945 + coding->dst_pos_byte - 1);
946 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
947 - coding->destination);
948 }
df7492f9
KH
949 }
950 else
951 /* Otherwise, the destination is C string and is never relocated
952 automatically. Thus we don't have to update anything. */
953 ;
954}
955
956
957static void
958coding_alloc_by_realloc (coding, bytes)
959 struct coding_system *coding;
960 EMACS_INT bytes;
961{
962 coding->destination = (unsigned char *) xrealloc (coding->destination,
963 coding->dst_bytes + bytes);
964 coding->dst_bytes += bytes;
965}
966
967static void
968coding_alloc_by_making_gap (coding, bytes)
969 struct coding_system *coding;
970 EMACS_INT bytes;
971{
2c78b7e1
KH
972 if (BUFFERP (coding->dst_object)
973 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
974 {
975 EMACS_INT add = coding->src_bytes - coding->consumed;
976
977 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
978 make_gap (bytes);
979 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
980 }
981 else
982 {
2c78b7e1
KH
983 Lisp_Object this_buffer;
984
985 this_buffer = Fcurrent_buffer ();
df7492f9
KH
986 set_buffer_internal (XBUFFER (coding->dst_object));
987 make_gap (bytes);
988 set_buffer_internal (XBUFFER (this_buffer));
989 }
990}
991
992
993static unsigned char *
994alloc_destination (coding, nbytes, dst)
995 struct coding_system *coding;
996 int nbytes;
997 unsigned char *dst;
998{
999 EMACS_INT offset = dst - coding->destination;
1000
1001 if (BUFFERP (coding->dst_object))
1002 coding_alloc_by_making_gap (coding, nbytes);
1003 else
1004 coding_alloc_by_realloc (coding, nbytes);
1005 coding->result = CODING_RESULT_SUCCESS;
1006 coding_set_destination (coding);
1007 dst = coding->destination + offset;
1008 return dst;
1009}
aa72b389 1010
ff0dacd7
KH
1011/** Macros for annotations. */
1012
1013/* Maximum length of annotation data (sum of annotations for
1014 composition and charset). */
1015#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1016
1017/* An annotation data is stored in the array coding->charbuf in this
1018 format:
1019 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1020 LENGTH is the number of elements in the annotation.
1021 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1022 FROM and TO specify the range of text annotated. They are relative
1023 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1024
1025 The format of the following elements depend on ANNOTATION_MASK.
1026
1027 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1028 follows:
1029 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1030 METHOD is one of enum composition_method.
1031 Optionnal COMPOSITION-COMPONENTS are characters and composition
1032 rules.
1033
1034 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1035 follows. */
1036
1037#define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1038 do { \
1039 *(buf)++ = -(len); \
1040 *(buf)++ = (mask); \
1041 *(buf)++ = (from); \
1042 *(buf)++ = (to); \
1043 coding->annotated = 1; \
1044 } while (0);
1045
1046#define ADD_COMPOSITION_DATA(buf, from, to, method) \
1047 do { \
1048 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1049 *buf++ = method; \
1050 } while (0)
1051
1052
1053#define ADD_CHARSET_DATA(buf, from, to, id) \
1054 do { \
1055 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1056 *buf++ = id; \
1057 } while (0)
1058
df7492f9
KH
1059\f
1060/*** 2. Emacs' internal format (emacs-utf-8) ***/
1061
1062
1063
1064\f
1065/*** 3. UTF-8 ***/
1066
1067/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1068 Check if a text is encoded in UTF-8. If it is, return 1, else
1069 return 0. */
df7492f9
KH
1070
1071#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1072#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1073#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1074#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1075#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1076#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1077
1078static int
ff0dacd7 1079detect_coding_utf_8 (coding, detect_info)
df7492f9 1080 struct coding_system *coding;
ff0dacd7 1081 struct coding_detection_info *detect_info;
df7492f9
KH
1082{
1083 unsigned char *src = coding->source, *src_base = src;
1084 unsigned char *src_end = coding->source + coding->src_bytes;
1085 int multibytep = coding->src_multibyte;
1086 int consumed_chars = 0;
1087 int found = 0;
89528eb3 1088 int incomplete;
df7492f9 1089
ff0dacd7 1090 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1091 /* A coding system of this category is always ASCII compatible. */
1092 src += coding->head_ascii;
1093
1094 while (1)
1095 {
1096 int c, c1, c2, c3, c4;
1097
89528eb3 1098 incomplete = 0;
df7492f9
KH
1099 ONE_MORE_BYTE (c);
1100 if (UTF_8_1_OCTET_P (c))
1101 continue;
89528eb3 1102 incomplete = 1;
df7492f9
KH
1103 ONE_MORE_BYTE (c1);
1104 if (! UTF_8_EXTRA_OCTET_P (c1))
1105 break;
1106 if (UTF_8_2_OCTET_LEADING_P (c))
1107 {
ff0dacd7 1108 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1109 continue;
1110 }
1111 ONE_MORE_BYTE (c2);
1112 if (! UTF_8_EXTRA_OCTET_P (c2))
1113 break;
1114 if (UTF_8_3_OCTET_LEADING_P (c))
1115 {
ff0dacd7 1116 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1117 continue;
1118 }
1119 ONE_MORE_BYTE (c3);
1120 if (! UTF_8_EXTRA_OCTET_P (c3))
1121 break;
1122 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1123 {
ff0dacd7 1124 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1125 continue;
1126 }
1127 ONE_MORE_BYTE (c4);
1128 if (! UTF_8_EXTRA_OCTET_P (c4))
1129 break;
1130 if (UTF_8_5_OCTET_LEADING_P (c))
1131 {
ff0dacd7 1132 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1133 continue;
1134 }
1135 break;
1136 }
ff0dacd7 1137 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1138 return 0;
1139
1140 no_more_source:
89528eb3
KH
1141 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1142 {
ff0dacd7 1143 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3
KH
1144 return 0;
1145 }
ff0dacd7
KH
1146 detect_info->found |= found;
1147 return 1;
df7492f9
KH
1148}
1149
1150
1151static void
1152decode_coding_utf_8 (coding)
1153 struct coding_system *coding;
1154{
1155 unsigned char *src = coding->source + coding->consumed;
1156 unsigned char *src_end = coding->source + coding->src_bytes;
1157 unsigned char *src_base;
1158 int *charbuf = coding->charbuf;
1159 int *charbuf_end = charbuf + coding->charbuf_size;
1160 int consumed_chars = 0, consumed_chars_base;
1161 int multibytep = coding->src_multibyte;
1162 Lisp_Object attr, eol_type, charset_list;
1163
1164 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1165
1166 while (1)
1167 {
1168 int c, c1, c2, c3, c4, c5;
1169
1170 src_base = src;
1171 consumed_chars_base = consumed_chars;
1172
1173 if (charbuf >= charbuf_end)
1174 break;
1175
1176 ONE_MORE_BYTE (c1);
1177 if (UTF_8_1_OCTET_P(c1))
1178 {
1179 c = c1;
1180 if (c == '\r')
aa72b389 1181 {
df7492f9
KH
1182 if (EQ (eol_type, Qdos))
1183 {
1184 if (src == src_end)
98725083
KH
1185 {
1186 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1187 goto no_more_source;
1188 }
df7492f9
KH
1189 if (*src == '\n')
1190 ONE_MORE_BYTE (c);
1191 }
1192 else if (EQ (eol_type, Qmac))
1193 c = '\n';
aa72b389 1194 }
aa72b389 1195 }
df7492f9 1196 else
aa72b389 1197 {
df7492f9
KH
1198 ONE_MORE_BYTE (c2);
1199 if (! UTF_8_EXTRA_OCTET_P (c2))
1200 goto invalid_code;
1201 if (UTF_8_2_OCTET_LEADING_P (c1))
b0edb2c5
DL
1202 {
1203 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1204 /* Reject overlong sequences here and below. Encoders
1205 producing them are incorrect, they can be misleading,
1206 and they mess up read/write invariance. */
1207 if (c < 128)
1208 goto invalid_code;
1209 }
df7492f9 1210 else
aa72b389 1211 {
df7492f9
KH
1212 ONE_MORE_BYTE (c3);
1213 if (! UTF_8_EXTRA_OCTET_P (c3))
1214 goto invalid_code;
1215 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1216 {
1217 c = (((c1 & 0xF) << 12)
1218 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1219 if (c < 0x800
1220 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1221 goto invalid_code;
1222 }
df7492f9
KH
1223 else
1224 {
1225 ONE_MORE_BYTE (c4);
1226 if (! UTF_8_EXTRA_OCTET_P (c4))
1227 goto invalid_code;
1228 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1229 {
df7492f9
KH
1230 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1231 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1232 if (c < 0x10000)
1233 goto invalid_code;
1234 }
df7492f9
KH
1235 else
1236 {
1237 ONE_MORE_BYTE (c5);
1238 if (! UTF_8_EXTRA_OCTET_P (c5))
1239 goto invalid_code;
1240 if (UTF_8_5_OCTET_LEADING_P (c1))
1241 {
1242 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1243 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1244 | (c5 & 0x3F));
b0edb2c5 1245 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1246 goto invalid_code;
1247 }
1248 else
1249 goto invalid_code;
1250 }
1251 }
aa72b389 1252 }
aa72b389 1253 }
df7492f9
KH
1254
1255 *charbuf++ = c;
1256 continue;
1257
1258 invalid_code:
1259 src = src_base;
1260 consumed_chars = consumed_chars_base;
1261 ONE_MORE_BYTE (c);
1262 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1263 coding->errors++;
aa72b389
KH
1264 }
1265
df7492f9
KH
1266 no_more_source:
1267 coding->consumed_char += consumed_chars_base;
1268 coding->consumed = src_base - coding->source;
1269 coding->charbuf_used = charbuf - coding->charbuf;
1270}
1271
1272
1273static int
1274encode_coding_utf_8 (coding)
1275 struct coding_system *coding;
1276{
1277 int multibytep = coding->dst_multibyte;
1278 int *charbuf = coding->charbuf;
1279 int *charbuf_end = charbuf + coding->charbuf_used;
1280 unsigned char *dst = coding->destination + coding->produced;
1281 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1282 int produced_chars = 0;
df7492f9
KH
1283 int c;
1284
1285 if (multibytep)
aa72b389 1286 {
df7492f9
KH
1287 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1288
1289 while (charbuf < charbuf_end)
aa72b389 1290 {
df7492f9
KH
1291 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1292
1293 ASSURE_DESTINATION (safe_room);
1294 c = *charbuf++;
28f67a95
KH
1295 if (CHAR_BYTE8_P (c))
1296 {
1297 c = CHAR_TO_BYTE8 (c);
1298 EMIT_ONE_BYTE (c);
1299 }
1300 else
1301 {
1302 CHAR_STRING_ADVANCE (c, pend);
1303 for (p = str; p < pend; p++)
1304 EMIT_ONE_BYTE (*p);
1305 }
aa72b389 1306 }
aa72b389 1307 }
df7492f9
KH
1308 else
1309 {
1310 int safe_room = MAX_MULTIBYTE_LENGTH;
1311
1312 while (charbuf < charbuf_end)
1313 {
1314 ASSURE_DESTINATION (safe_room);
1315 c = *charbuf++;
1316 dst += CHAR_STRING (c, dst);
1317 produced_chars++;
1318 }
1319 }
1320 coding->result = CODING_RESULT_SUCCESS;
1321 coding->produced_char += produced_chars;
1322 coding->produced = dst - coding->destination;
1323 return 0;
aa72b389
KH
1324}
1325
4ed46869 1326
df7492f9 1327/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1328 Check if a text is encoded in one of UTF-16 based coding systems.
1329 If it is, return 1, else return 0. */
df7492f9
KH
1330
1331#define UTF_16_HIGH_SURROGATE_P(val) \
1332 (((val) & 0xFC00) == 0xD800)
1333
1334#define UTF_16_LOW_SURROGATE_P(val) \
1335 (((val) & 0xFC00) == 0xDC00)
1336
1337#define UTF_16_INVALID_P(val) \
1338 (((val) == 0xFFFE) \
1339 || ((val) == 0xFFFF) \
1340 || UTF_16_LOW_SURROGATE_P (val))
1341
1342
1343static int
ff0dacd7 1344detect_coding_utf_16 (coding, detect_info)
b73bfc1c 1345 struct coding_system *coding;
ff0dacd7 1346 struct coding_detection_info *detect_info;
b73bfc1c 1347{
df7492f9
KH
1348 unsigned char *src = coding->source, *src_base = src;
1349 unsigned char *src_end = coding->source + coding->src_bytes;
1350 int multibytep = coding->src_multibyte;
1351 int consumed_chars = 0;
1352 int c1, c2;
1353
ff0dacd7 1354 detect_info->checked |= CATEGORY_MASK_UTF_16;
89528eb3 1355
ff0dacd7
KH
1356 if (coding->mode & CODING_MODE_LAST_BLOCK
1357 && (coding->src_bytes & 1))
1358 {
1359 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1360 return 0;
1361 }
df7492f9
KH
1362 ONE_MORE_BYTE (c1);
1363 ONE_MORE_BYTE (c2);
4ed46869 1364
df7492f9 1365 if ((c1 == 0xFF) && (c2 == 0xFE))
ff0dacd7
KH
1366 {
1367 detect_info->found |= CATEGORY_MASK_UTF_16_LE;
1368 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1369 }
df7492f9 1370 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7
KH
1371 {
1372 detect_info->found |= CATEGORY_MASK_UTF_16_BE;
1373 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1374 }
df7492f9 1375 no_more_source:
ff0dacd7 1376 return 1;
df7492f9 1377}
ec6d2bb8 1378
df7492f9
KH
1379static void
1380decode_coding_utf_16 (coding)
1381 struct coding_system *coding;
1382{
1383 unsigned char *src = coding->source + coding->consumed;
1384 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1385 unsigned char *src_base;
df7492f9
KH
1386 int *charbuf = coding->charbuf;
1387 int *charbuf_end = charbuf + coding->charbuf_size;
1388 int consumed_chars = 0, consumed_chars_base;
1389 int multibytep = coding->src_multibyte;
1390 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1391 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1392 int surrogate = CODING_UTF_16_SURROGATE (coding);
1393 Lisp_Object attr, eol_type, charset_list;
1394
1395 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1396
1397 if (bom != utf_16_without_bom)
1398 {
1399 int c, c1, c2;
4af310db 1400
df7492f9
KH
1401 src_base = src;
1402 ONE_MORE_BYTE (c1);
1403 ONE_MORE_BYTE (c2);
e19c3639 1404 c = (c1 << 8) | c2;
df7492f9
KH
1405 if (bom == utf_16_with_bom)
1406 {
1407 if (endian == utf_16_big_endian
1408 ? c != 0xFFFE : c != 0xFEFF)
4af310db 1409 {
df7492f9
KH
1410 /* We are sure that there's enouph room at CHARBUF. */
1411 *charbuf++ = c1;
1412 *charbuf++ = c2;
1413 coding->errors++;
4af310db 1414 }
4af310db 1415 }
df7492f9 1416 else
4af310db 1417 {
df7492f9
KH
1418 if (c == 0xFFFE)
1419 CODING_UTF_16_ENDIAN (coding)
1420 = endian = utf_16_big_endian;
1421 else if (c == 0xFEFF)
1422 CODING_UTF_16_ENDIAN (coding)
1423 = endian = utf_16_little_endian;
1424 else
4af310db 1425 {
df7492f9
KH
1426 CODING_UTF_16_ENDIAN (coding)
1427 = endian = utf_16_big_endian;
1428 src = src_base;
4af310db 1429 }
4af310db 1430 }
df7492f9
KH
1431 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1432 }
1433
1434 while (1)
1435 {
1436 int c, c1, c2;
1437
1438 src_base = src;
1439 consumed_chars_base = consumed_chars;
1440
1441 if (charbuf + 2 >= charbuf_end)
1442 break;
1443
1444 ONE_MORE_BYTE (c1);
1445 ONE_MORE_BYTE (c2);
1446 c = (endian == utf_16_big_endian
e19c3639 1447 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1448 if (surrogate)
aa72b389 1449 {
df7492f9 1450 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1451 {
df7492f9
KH
1452 if (endian == utf_16_big_endian)
1453 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1454 else
1455 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1456 *charbuf++ = c1;
1457 *charbuf++ = c2;
1458 coding->errors++;
1459 if (UTF_16_HIGH_SURROGATE_P (c))
1460 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1461 else
1462 *charbuf++ = c;
aa72b389 1463 }
df7492f9
KH
1464 else
1465 {
1466 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1467 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1468 *charbuf++ = c;
1469 }
1470 }
1471 else
1472 {
1473 if (UTF_16_HIGH_SURROGATE_P (c))
1474 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1475 else
1476 *charbuf++ = c;
1477 }
1478 }
1479
1480 no_more_source:
1481 coding->consumed_char += consumed_chars_base;
1482 coding->consumed = src_base - coding->source;
1483 coding->charbuf_used = charbuf - coding->charbuf;
1484}
1485
1486static int
1487encode_coding_utf_16 (coding)
1488 struct coding_system *coding;
1489{
1490 int multibytep = coding->dst_multibyte;
1491 int *charbuf = coding->charbuf;
1492 int *charbuf_end = charbuf + coding->charbuf_used;
1493 unsigned char *dst = coding->destination + coding->produced;
1494 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1495 int safe_room = 8;
1496 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1497 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1498 int produced_chars = 0;
1499 Lisp_Object attrs, eol_type, charset_list;
1500 int c;
1501
1502 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1503
1504 if (bom == utf_16_with_bom)
1505 {
1506 ASSURE_DESTINATION (safe_room);
1507 if (big_endian)
1508 EMIT_TWO_BYTES (0xFF, 0xFE);
1509 else
1510 EMIT_TWO_BYTES (0xFE, 0xFF);
1511 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1512 }
1513
1514 while (charbuf < charbuf_end)
1515 {
1516 ASSURE_DESTINATION (safe_room);
1517 c = *charbuf++;
e19c3639
KH
1518 if (c >= MAX_UNICODE_CHAR)
1519 c = coding->default_char;
df7492f9
KH
1520
1521 if (c < 0x10000)
1522 {
1523 if (big_endian)
1524 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1525 else
1526 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1527 }
1528 else
1529 {
1530 int c1, c2;
1531
1532 c -= 0x10000;
1533 c1 = (c >> 10) + 0xD800;
1534 c2 = (c & 0x3FF) + 0xDC00;
1535 if (big_endian)
1536 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1537 else
1538 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1539 }
1540 }
1541 coding->result = CODING_RESULT_SUCCESS;
1542 coding->produced = dst - coding->destination;
1543 coding->produced_char += produced_chars;
1544 return 0;
1545}
1546
1547\f
1548/*** 6. Old Emacs' internal format (emacs-mule) ***/
1549
1550/* Emacs' internal format for representation of multiple character
1551 sets is a kind of multi-byte encoding, i.e. characters are
1552 represented by variable-length sequences of one-byte codes.
1553
1554 ASCII characters and control characters (e.g. `tab', `newline') are
1555 represented by one-byte sequences which are their ASCII codes, in
1556 the range 0x00 through 0x7F.
1557
1558 8-bit characters of the range 0x80..0x9F are represented by
1559 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1560 code + 0x20).
1561
1562 8-bit characters of the range 0xA0..0xFF are represented by
1563 one-byte sequences which are their 8-bit code.
1564
1565 The other characters are represented by a sequence of `base
1566 leading-code', optional `extended leading-code', and one or two
1567 `position-code's. The length of the sequence is determined by the
1568 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1569 whereas extended leading-code and position-code take the range 0xA0
1570 through 0xFF. See `charset.h' for more details about leading-code
1571 and position-code.
1572
1573 --- CODE RANGE of Emacs' internal format ---
1574 character set range
1575 ------------- -----
1576 ascii 0x00..0x7F
1577 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1578 eight-bit-graphic 0xA0..0xBF
1579 ELSE 0x81..0x9D + [0xA0..0xFF]+
1580 ---------------------------------------------
1581
1582 As this is the internal character representation, the format is
1583 usually not used externally (i.e. in a file or in a data sent to a
1584 process). But, it is possible to have a text externally in this
1585 format (i.e. by encoding by the coding system `emacs-mule').
1586
1587 In that case, a sequence of one-byte codes has a slightly different
1588 form.
1589
1590 At first, all characters in eight-bit-control are represented by
1591 one-byte sequences which are their 8-bit code.
1592
1593 Next, character composition data are represented by the byte
1594 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1595 where,
1596 METHOD is 0xF0 plus one of composition method (enum
1597 composition_method),
1598
1599 BYTES is 0xA0 plus a byte length of this composition data,
1600
1601 CHARS is 0x20 plus a number of characters composed by this
1602 data,
1603
1604 COMPONENTs are characters of multibye form or composition
1605 rules encoded by two-byte of ASCII codes.
1606
1607 In addition, for backward compatibility, the following formats are
1608 also recognized as composition data on decoding.
1609
1610 0x80 MSEQ ...
1611 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1612
1613 Here,
1614 MSEQ is a multibyte form but in these special format:
1615 ASCII: 0xA0 ASCII_CODE+0x80,
1616 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1617 RULE is a one byte code of the range 0xA0..0xF0 that
1618 represents a composition rule.
1619 */
1620
1621char emacs_mule_bytes[256];
1622
df7492f9 1623int
ff0dacd7 1624emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1625 struct coding_system *coding;
781d7a48 1626 unsigned char *src;
ff0dacd7 1627 int *nbytes, *nchars, *id;
df7492f9 1628{
df7492f9
KH
1629 unsigned char *src_end = coding->source + coding->src_bytes;
1630 int multibytep = coding->src_multibyte;
1631 unsigned char *src_base = src;
1632 struct charset *charset;
1633 unsigned code;
1634 int c;
1635 int consumed_chars = 0;
1636
1637 ONE_MORE_BYTE (c);
df7492f9
KH
1638 switch (emacs_mule_bytes[c])
1639 {
1640 case 2:
1641 if (! (charset = emacs_mule_charset[c]))
1642 goto invalid_code;
1643 ONE_MORE_BYTE (c);
1644 code = c & 0x7F;
1645 break;
1646
1647 case 3:
7c78e542
KH
1648 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1649 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
b73bfc1c 1650 {
df7492f9
KH
1651 ONE_MORE_BYTE (c);
1652 if (! (charset = emacs_mule_charset[c]))
1653 goto invalid_code;
1654 ONE_MORE_BYTE (c);
1655 code = c & 0x7F;
b73bfc1c
KH
1656 }
1657 else
1658 {
df7492f9
KH
1659 if (! (charset = emacs_mule_charset[c]))
1660 goto invalid_code;
1661 ONE_MORE_BYTE (c);
781d7a48 1662 code = (c & 0x7F) << 8;
df7492f9
KH
1663 ONE_MORE_BYTE (c);
1664 code |= c & 0x7F;
1665 }
1666 break;
1667
1668 case 4:
781d7a48 1669 ONE_MORE_BYTE (c);
df7492f9
KH
1670 if (! (charset = emacs_mule_charset[c]))
1671 goto invalid_code;
1672 ONE_MORE_BYTE (c);
781d7a48 1673 code = (c & 0x7F) << 8;
df7492f9
KH
1674 ONE_MORE_BYTE (c);
1675 code |= c & 0x7F;
1676 break;
1677
1678 case 1:
1679 code = c;
9d123124
KH
1680 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1681 ? charset_ascii : charset_eight_bit);
df7492f9
KH
1682 break;
1683
1684 default:
1685 abort ();
1686 }
1687 c = DECODE_CHAR (charset, code);
1688 if (c < 0)
1689 goto invalid_code;
1690 *nbytes = src - src_base;
1691 *nchars = consumed_chars;
ff0dacd7
KH
1692 if (id)
1693 *id = charset->id;
df7492f9
KH
1694 return c;
1695
1696 no_more_source:
1697 return -2;
1698
1699 invalid_code:
1700 return -1;
1701}
1702
1703
1704/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1705 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1706 else return 0. */
df7492f9
KH
1707
1708static int
ff0dacd7 1709detect_coding_emacs_mule (coding, detect_info)
df7492f9 1710 struct coding_system *coding;
ff0dacd7 1711 struct coding_detection_info *detect_info;
df7492f9
KH
1712{
1713 unsigned char *src = coding->source, *src_base = src;
1714 unsigned char *src_end = coding->source + coding->src_bytes;
1715 int multibytep = coding->src_multibyte;
1716 int consumed_chars = 0;
1717 int c;
1718 int found = 0;
89528eb3 1719 int incomplete;
df7492f9 1720
ff0dacd7 1721 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1722 /* A coding system of this category is always ASCII compatible. */
1723 src += coding->head_ascii;
1724
1725 while (1)
1726 {
89528eb3 1727 incomplete = 0;
df7492f9 1728 ONE_MORE_BYTE (c);
89528eb3 1729 incomplete = 1;
df7492f9
KH
1730
1731 if (c == 0x80)
1732 {
1733 /* Perhaps the start of composite character. We simple skip
1734 it because analyzing it is too heavy for detecting. But,
1735 at least, we check that the composite character
1736 constitues of more than 4 bytes. */
1737 unsigned char *src_base;
1738
1739 repeat:
1740 src_base = src;
1741 do
1742 {
1743 ONE_MORE_BYTE (c);
1744 }
1745 while (c >= 0xA0);
1746
1747 if (src - src_base <= 4)
1748 break;
ff0dacd7 1749 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1750 if (c == 0x80)
1751 goto repeat;
b73bfc1c 1752 }
df7492f9
KH
1753
1754 if (c < 0x80)
b73bfc1c 1755 {
df7492f9
KH
1756 if (c < 0x20
1757 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1758 break;
1759 }
1760 else
1761 {
1762 unsigned char *src_base = src - 1;
1763
1764 do
1765 {
1766 ONE_MORE_BYTE (c);
1767 }
1768 while (c >= 0xA0);
1769 if (src - src_base != emacs_mule_bytes[*src_base])
1770 break;
ff0dacd7 1771 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1772 }
1773 }
ff0dacd7 1774 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1775 return 0;
1776
1777 no_more_source:
89528eb3
KH
1778 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1779 {
ff0dacd7 1780 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1781 return 0;
1782 }
ff0dacd7
KH
1783 detect_info->found |= found;
1784 return 1;
4ed46869
KH
1785}
1786
b73bfc1c 1787
df7492f9
KH
1788/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1789
1790/* Decode a character represented as a component of composition
1791 sequence of Emacs 20/21 style at SRC. Set C to that character and
1792 update SRC to the head of next character (or an encoded composition
1793 rule). If SRC doesn't points a composition component, set C to -1.
1794 If SRC points an invalid byte sequence, global exit by a return
1795 value 0. */
1796
1797#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1798 if (1) \
1799 { \
1800 int c; \
1801 int nbytes, nchars; \
1802 \
1803 if (src == src_end) \
1804 break; \
ff0dacd7 1805 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1806 if (c < 0) \
1807 { \
1808 if (c == -2) \
1809 break; \
1810 goto invalid_code; \
1811 } \
1812 *buf++ = c; \
1813 src += nbytes; \
1814 consumed_chars += nchars; \
1815 } \
1816 else
1817
1818
1819/* Decode a composition rule represented as a component of composition
781d7a48
KH
1820 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1821 and increment BUF. If SRC points an invalid byte sequence, set C
1822 to -1. */
df7492f9 1823
781d7a48 1824#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1825 do { \
1826 int c, gref, nref; \
1827 \
781d7a48 1828 if (src >= src_end) \
df7492f9
KH
1829 goto invalid_code; \
1830 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1831 c -= 0x20; \
df7492f9
KH
1832 if (c < 0 || c >= 81) \
1833 goto invalid_code; \
1834 \
1835 gref = c / 9, nref = c % 9; \
1836 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1837 } while (0)
1838
1839
781d7a48
KH
1840/* Decode a composition rule represented as a component of composition
1841 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1842 and increment BUF. If SRC points an invalid byte sequence, set C
1843 to -1. */
1844
1845#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1846 do { \
1847 int gref, nref; \
1848 \
1849 if (src + 1>= src_end) \
1850 goto invalid_code; \
1851 ONE_MORE_BYTE_NO_CHECK (gref); \
1852 gref -= 0x20; \
1853 ONE_MORE_BYTE_NO_CHECK (nref); \
1854 nref -= 0x20; \
1855 if (gref < 0 || gref >= 81 \
1856 || nref < 0 || nref >= 81) \
1857 goto invalid_code; \
1858 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1859 } while (0)
1860
1861
df7492f9 1862#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1863 do { \
df7492f9 1864 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1865 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1866 the byte length of this composition information, CHARS is the \
1867 number of characters composed by this composition. */ \
781d7a48
KH
1868 enum composition_method method = c - 0xF2; \
1869 int *charbuf_base = charbuf; \
ff0dacd7 1870 int from, to; \
df7492f9
KH
1871 int consumed_chars_limit; \
1872 int nbytes, nchars; \
1873 \
1874 ONE_MORE_BYTE (c); \
1875 nbytes = c - 0xA0; \
1876 if (nbytes < 3) \
1877 goto invalid_code; \
1878 ONE_MORE_BYTE (c); \
1879 nchars = c - 0xA0; \
ff0dacd7
KH
1880 from = coding->produced + char_offset; \
1881 to = from + nchars; \
1882 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1883 consumed_chars_limit = consumed_chars_base + nbytes; \
1884 if (method != COMPOSITION_RELATIVE) \
aa72b389 1885 { \
df7492f9
KH
1886 int i = 0; \
1887 while (consumed_chars < consumed_chars_limit) \
aa72b389 1888 { \
df7492f9 1889 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1890 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1891 else \
1892 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1893 i++; \
aa72b389 1894 } \
df7492f9
KH
1895 if (consumed_chars < consumed_chars_limit) \
1896 goto invalid_code; \
781d7a48 1897 charbuf_base[0] -= i; \
aa72b389
KH
1898 } \
1899 } while (0)
93dec019 1900
aa72b389 1901
df7492f9
KH
1902#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1903 do { \
1904 /* Emacs 20 style format for relative composition. */ \
1905 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1906 enum composition_method method = COMPOSITION_RELATIVE; \
df7492f9
KH
1907 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1908 int *buf = components; \
1909 int i, j; \
ff0dacd7 1910 int from, to; \
df7492f9
KH
1911 \
1912 src = src_base; \
1913 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1914 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1915 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1916 if (i < 2) \
1917 goto invalid_code; \
ff0dacd7
KH
1918 from = coding->produced_char + char_offset; \
1919 to = from + i; \
1920 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1921 for (j = 0; j < i; j++) \
1922 *charbuf++ = components[j]; \
1923 } while (0)
1924
1925
1926#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1927 do { \
1928 /* Emacs 20 style format for rule-base composition. */ \
1929 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1930 enum composition_method method = COMPOSITION_WITH_RULE; \
df7492f9
KH
1931 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1932 int *buf = components; \
1933 int i, j; \
ff0dacd7 1934 int from, to; \
df7492f9
KH
1935 \
1936 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1937 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1938 { \
781d7a48 1939 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1940 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1941 } \
1942 if (i < 1 || (buf - components) % 2 == 0) \
1943 goto invalid_code; \
1944 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1945 goto no_more_source; \
ff0dacd7
KH
1946 from = coding->produced_char + char_offset; \
1947 to = from + i; \
1948 ADD_COMPOSITION_DATA (buf, from, to, method); \
df7492f9
KH
1949 for (j = 0; j < i; j++) \
1950 *charbuf++ = components[j]; \
1951 for (j = 0; j < i; j += 2) \
1952 *charbuf++ = components[j]; \
1953 } while (0)
1954
aa72b389
KH
1955
1956static void
df7492f9 1957decode_coding_emacs_mule (coding)
aa72b389 1958 struct coding_system *coding;
aa72b389 1959{
df7492f9
KH
1960 unsigned char *src = coding->source + coding->consumed;
1961 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1962 unsigned char *src_base;
df7492f9 1963 int *charbuf = coding->charbuf;
ff0dacd7 1964 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 1965 int consumed_chars = 0, consumed_chars_base;
df7492f9
KH
1966 int multibytep = coding->src_multibyte;
1967 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
1968 int char_offset = coding->produced_char;
1969 int last_offset = char_offset;
1970 int last_id = charset_ascii;
aa72b389 1971
df7492f9 1972 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1973
aa72b389
KH
1974 while (1)
1975 {
df7492f9
KH
1976 int c;
1977
aa72b389 1978 src_base = src;
df7492f9
KH
1979 consumed_chars_base = consumed_chars;
1980
1981 if (charbuf >= charbuf_end)
1982 break;
aa72b389 1983
df7492f9
KH
1984 ONE_MORE_BYTE (c);
1985
1986 if (c < 0x80)
aa72b389 1987 {
df7492f9
KH
1988 if (c == '\r')
1989 {
1990 if (EQ (eol_type, Qdos))
1991 {
1992 if (src == src_end)
98725083
KH
1993 {
1994 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1995 goto no_more_source;
1996 }
df7492f9
KH
1997 if (*src == '\n')
1998 ONE_MORE_BYTE (c);
1999 }
2000 else if (EQ (eol_type, Qmac))
2001 c = '\n';
2002 }
2003 *charbuf++ = c;
2004 char_offset++;
aa72b389 2005 }
df7492f9
KH
2006 else if (c == 0x80)
2007 {
df7492f9 2008 ONE_MORE_BYTE (c);
781d7a48
KH
2009 if (c - 0xF2 >= COMPOSITION_RELATIVE
2010 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
2011 DECODE_EMACS_MULE_21_COMPOSITION (c);
2012 else if (c < 0xC0)
2013 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2014 else if (c == 0xFF)
2015 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2016 else
2017 goto invalid_code;
2018 }
2019 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2020 {
2021 int nbytes, nchars;
ff0dacd7
KH
2022 int id;
2023
781d7a48
KH
2024 src = src_base;
2025 consumed_chars = consumed_chars_base;
ff0dacd7 2026 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2027 if (c < 0)
2028 {
2029 if (c == -2)
2030 break;
2031 goto invalid_code;
2032 }
ff0dacd7
KH
2033 if (last_id != id)
2034 {
2035 if (last_id != charset_ascii)
2036 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2037 last_id = id;
2038 last_offset = char_offset;
2039 }
df7492f9 2040 *charbuf++ = c;
781d7a48
KH
2041 src += nbytes;
2042 consumed_chars += nchars;
df7492f9
KH
2043 char_offset++;
2044 }
2045 continue;
2046
2047 invalid_code:
2048 src = src_base;
2049 consumed_chars = consumed_chars_base;
2050 ONE_MORE_BYTE (c);
2051 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2052 char_offset++;
df7492f9
KH
2053 coding->errors++;
2054 }
2055
2056 no_more_source:
ff0dacd7
KH
2057 if (last_id != charset_ascii)
2058 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
2059 coding->consumed_char += consumed_chars_base;
2060 coding->consumed = src_base - coding->source;
2061 coding->charbuf_used = charbuf - coding->charbuf;
2062}
2063
2064
2065#define EMACS_MULE_LEADING_CODES(id, codes) \
2066 do { \
2067 if (id < 0xA0) \
2068 codes[0] = id, codes[1] = 0; \
2069 else if (id < 0xE0) \
2070 codes[0] = 0x9A, codes[1] = id; \
2071 else if (id < 0xF0) \
2072 codes[0] = 0x9B, codes[1] = id; \
2073 else if (id < 0xF5) \
2074 codes[0] = 0x9C, codes[1] = id; \
2075 else \
2076 codes[0] = 0x9D, codes[1] = id; \
2077 } while (0);
2078
aa72b389 2079
df7492f9
KH
2080static int
2081encode_coding_emacs_mule (coding)
2082 struct coding_system *coding;
2083{
2084 int multibytep = coding->dst_multibyte;
2085 int *charbuf = coding->charbuf;
2086 int *charbuf_end = charbuf + coding->charbuf_used;
2087 unsigned char *dst = coding->destination + coding->produced;
2088 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2089 int safe_room = 8;
df7492f9
KH
2090 int produced_chars = 0;
2091 Lisp_Object attrs, eol_type, charset_list;
2092 int c;
ff0dacd7 2093 int preferred_charset_id = -1;
df7492f9
KH
2094
2095 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2096
2097 while (charbuf < charbuf_end)
2098 {
2099 ASSURE_DESTINATION (safe_room);
2100 c = *charbuf++;
ff0dacd7
KH
2101
2102 if (c < 0)
2103 {
2104 /* Handle an annotation. */
2105 switch (*charbuf)
2106 {
2107 case CODING_ANNOTATE_COMPOSITION_MASK:
2108 /* Not yet implemented. */
2109 break;
2110 case CODING_ANNOTATE_CHARSET_MASK:
2111 preferred_charset_id = charbuf[3];
2112 if (preferred_charset_id >= 0
2113 && NILP (Fmemq (make_number (preferred_charset_id),
2114 charset_list)))
2115 preferred_charset_id = -1;
2116 break;
2117 default:
2118 abort ();
2119 }
2120 charbuf += -c - 1;
2121 continue;
2122 }
2123
df7492f9
KH
2124 if (ASCII_CHAR_P (c))
2125 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2126 else if (CHAR_BYTE8_P (c))
2127 {
2128 c = CHAR_TO_BYTE8 (c);
2129 EMIT_ONE_BYTE (c);
2130 }
df7492f9 2131 else
aa72b389 2132 {
df7492f9
KH
2133 struct charset *charset;
2134 unsigned code;
2135 int dimension;
2136 int emacs_mule_id;
2137 unsigned char leading_codes[2];
2138
ff0dacd7
KH
2139 if (preferred_charset_id >= 0)
2140 {
2141 charset = CHARSET_FROM_ID (preferred_charset_id);
2142 if (! CHAR_CHARSET_P (c, charset))
2143 charset = char_charset (c, charset_list, NULL);
2144 }
2145 else
2146 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2147 if (! charset)
2148 {
2149 c = coding->default_char;
2150 if (ASCII_CHAR_P (c))
2151 {
2152 EMIT_ONE_ASCII_BYTE (c);
2153 continue;
2154 }
2155 charset = char_charset (c, charset_list, &code);
2156 }
2157 dimension = CHARSET_DIMENSION (charset);
2158 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2159 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2160 EMIT_ONE_BYTE (leading_codes[0]);
2161 if (leading_codes[1])
2162 EMIT_ONE_BYTE (leading_codes[1]);
2163 if (dimension == 1)
2164 EMIT_ONE_BYTE (code);
aa72b389 2165 else
df7492f9
KH
2166 {
2167 EMIT_ONE_BYTE (code >> 8);
2168 EMIT_ONE_BYTE (code & 0xFF);
2169 }
aa72b389 2170 }
aa72b389 2171 }
df7492f9
KH
2172 coding->result = CODING_RESULT_SUCCESS;
2173 coding->produced_char += produced_chars;
2174 coding->produced = dst - coding->destination;
2175 return 0;
aa72b389 2176}
b73bfc1c 2177
4ed46869 2178\f
df7492f9 2179/*** 7. ISO2022 handlers ***/
4ed46869
KH
2180
2181/* The following note describes the coding system ISO2022 briefly.
39787efd 2182 Since the intention of this note is to help understand the
5a936b46 2183 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2184 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46
DL
2185 original document of ISO2022. This is equivalent to the standard
2186 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2187
2188 ISO2022 provides many mechanisms to encode several character sets
5a936b46 2189 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2190 is encoded using bytes less than 128. This may make the encoded
2191 text a little bit longer, but the text passes more easily through
5a936b46
DL
2192 several types of gateway, some of which strip off the MSB (Most
2193 Significant Bit).
b73bfc1c 2194
5a936b46
DL
2195 There are two kinds of character sets: control character sets and
2196 graphic character sets. The former contain control characters such
4ed46869 2197 as `newline' and `escape' to provide control functions (control
39787efd 2198 functions are also provided by escape sequences). The latter
5a936b46 2199 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2200 two control character sets and many graphic character sets.
2201
2202 Graphic character sets are classified into one of the following
39787efd
KH
2203 four classes, according to the number of bytes (DIMENSION) and
2204 number of characters in one dimension (CHARS) of the set:
2205 - DIMENSION1_CHARS94
2206 - DIMENSION1_CHARS96
2207 - DIMENSION2_CHARS94
2208 - DIMENSION2_CHARS96
2209
2210 In addition, each character set is assigned an identification tag,
5a936b46 2211 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2212 hereafter). The <F> of each character set is decided by ECMA(*)
2213 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2214 (0x30..0x3F are for private use only).
4ed46869
KH
2215
2216 Note (*): ECMA = European Computer Manufacturers Association
2217
5a936b46 2218 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2219 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2220 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2221 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2222 o DIMENSION2_CHARS96 -- none for the moment
2223
39787efd 2224 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2225 C0 [0x00..0x1F] -- control character plane 0
2226 GL [0x20..0x7F] -- graphic character plane 0
2227 C1 [0x80..0x9F] -- control character plane 1
2228 GR [0xA0..0xFF] -- graphic character plane 1
2229
2230 A control character set is directly designated and invoked to C0 or
39787efd
KH
2231 C1 by an escape sequence. The most common case is that:
2232 - ISO646's control character set is designated/invoked to C0, and
2233 - ISO6429's control character set is designated/invoked to C1,
2234 and usually these designations/invocations are omitted in encoded
2235 text. In a 7-bit environment, only C0 can be used, and a control
2236 character for C1 is encoded by an appropriate escape sequence to
2237 fit into the environment. All control characters for C1 are
2238 defined to have corresponding escape sequences.
4ed46869
KH
2239
2240 A graphic character set is at first designated to one of four
2241 graphic registers (G0 through G3), then these graphic registers are
2242 invoked to GL or GR. These designations and invocations can be
2243 done independently. The most common case is that G0 is invoked to
39787efd
KH
2244 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2245 these invocations and designations are omitted in encoded text.
2246 In a 7-bit environment, only GL can be used.
4ed46869 2247
39787efd
KH
2248 When a graphic character set of CHARS94 is invoked to GL, codes
2249 0x20 and 0x7F of the GL area work as control characters SPACE and
2250 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2251 be used.
4ed46869
KH
2252
2253 There are two ways of invocation: locking-shift and single-shift.
2254 With locking-shift, the invocation lasts until the next different
39787efd
KH
2255 invocation, whereas with single-shift, the invocation affects the
2256 following character only and doesn't affect the locking-shift
2257 state. Invocations are done by the following control characters or
2258 escape sequences:
4ed46869
KH
2259
2260 ----------------------------------------------------------------------
39787efd 2261 abbrev function cntrl escape seq description
4ed46869 2262 ----------------------------------------------------------------------
39787efd
KH
2263 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2264 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2265 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2266 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2267 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2268 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2269 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2270 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2271 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2272 ----------------------------------------------------------------------
39787efd
KH
2273 (*) These are not used by any known coding system.
2274
2275 Control characters for these functions are defined by macros
2276 ISO_CODE_XXX in `coding.h'.
4ed46869 2277
39787efd 2278 Designations are done by the following escape sequences:
4ed46869
KH
2279 ----------------------------------------------------------------------
2280 escape sequence description
2281 ----------------------------------------------------------------------
2282 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2283 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2284 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2285 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2286 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2287 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2288 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2289 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2290 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2291 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2292 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2293 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2294 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2295 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2296 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2297 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2298 ----------------------------------------------------------------------
2299
2300 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2301 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2302
2303 Note (*): Although these designations are not allowed in ISO2022,
2304 Emacs accepts them on decoding, and produces them on encoding
39787efd 2305 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2306 7-bit environment, non-locking-shift, and non-single-shift.
2307
2308 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2309 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2310
5a936b46 2311 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2312 same multilingual text in ISO2022. Actually, there exist many
2313 coding systems such as Compound Text (used in X11's inter client
5a936b46
DL
2314 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2315 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2316 localized platforms), and all of these are variants of ISO2022.
2317
2318 In addition to the above, Emacs handles two more kinds of escape
2319 sequences: ISO6429's direction specification and Emacs' private
2320 sequence for specifying character composition.
2321
39787efd 2322 ISO6429's direction specification takes the following form:
4ed46869
KH
2323 o CSI ']' -- end of the current direction
2324 o CSI '0' ']' -- end of the current direction
2325 o CSI '1' ']' -- start of left-to-right text
2326 o CSI '2' ']' -- start of right-to-left text
2327 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2328 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2329
2330 Character composition specification takes the following form:
ec6d2bb8
KH
2331 o ESC '0' -- start relative composition
2332 o ESC '1' -- end composition
2333 o ESC '2' -- start rule-base composition (*)
2334 o ESC '3' -- start relative composition with alternate chars (**)
2335 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2336 Since these are not standard escape sequences of any ISO standard,
5a936b46 2337 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2338
5a936b46
DL
2339 (*) This form is used only in Emacs 20.7 and older versions,
2340 but newer versions can safely decode it.
2341 (**) This form is used only in Emacs 21.1 and newer versions,
2342 and older versions can't decode it.
ec6d2bb8 2343
5a936b46 2344 Here's a list of example usages of these composition escape
b73bfc1c 2345 sequences (categorized by `enum composition_method').
ec6d2bb8 2346
b73bfc1c 2347 COMPOSITION_RELATIVE:
ec6d2bb8 2348 ESC 0 CHAR [ CHAR ] ESC 1
5a936b46 2349 COMPOSITION_WITH_RULE:
ec6d2bb8 2350 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2351 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2352 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2353 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2354 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2355
2356enum iso_code_class_type iso_code_class[256];
2357
df7492f9
KH
2358#define SAFE_CHARSET_P(coding, id) \
2359 ((id) <= (coding)->max_charset_id \
2360 && (coding)->safe_charsets[id] >= 0)
2361
2362
2363#define SHIFT_OUT_OK(category) \
2364 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2365
2366static void
f0064e1f
DL
2367setup_iso_safe_charsets (attrs)
2368 Lisp_Object attrs;
df7492f9
KH
2369{
2370 Lisp_Object charset_list, safe_charsets;
2371 Lisp_Object request;
2372 Lisp_Object reg_usage;
2373 Lisp_Object tail;
2374 int reg94, reg96;
2375 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2376 int max_charset_id;
2377
2378 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2379 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2380 && ! EQ (charset_list, Viso_2022_charset_list))
2381 {
2382 CODING_ATTR_CHARSET_LIST (attrs)
2383 = charset_list = Viso_2022_charset_list;
2384 ASET (attrs, coding_attr_safe_charsets, Qnil);
2385 }
2386
2387 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2388 return;
2389
2390 max_charset_id = 0;
2391 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2392 {
2393 int id = XINT (XCAR (tail));
2394 if (max_charset_id < id)
2395 max_charset_id = id;
2396 }
2397
2398 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2399 make_number (255));
2400 request = AREF (attrs, coding_attr_iso_request);
2401 reg_usage = AREF (attrs, coding_attr_iso_usage);
2402 reg94 = XINT (XCAR (reg_usage));
2403 reg96 = XINT (XCDR (reg_usage));
2404
2405 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2406 {
2407 Lisp_Object id;
2408 Lisp_Object reg;
2409 struct charset *charset;
2410
2411 id = XCAR (tail);
2412 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2413 reg = Fcdr (Fassq (id, request));
df7492f9
KH
2414 if (! NILP (reg))
2415 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2416 else if (charset->iso_chars_96)
2417 {
2418 if (reg96 < 4)
2419 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2420 }
2421 else
2422 {
2423 if (reg94 < 4)
2424 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2425 }
2426 }
2427 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2428}
d46c5b12 2429
d46c5b12 2430
4ed46869 2431/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2432 Check if a text is encoded in one of ISO-2022 based codig systems.
2433 If it is, return 1, else return 0. */
4ed46869 2434
0a28aafb 2435static int
ff0dacd7 2436detect_coding_iso_2022 (coding, detect_info)
df7492f9 2437 struct coding_system *coding;
ff0dacd7 2438 struct coding_detection_info *detect_info;
4ed46869 2439{
df7492f9
KH
2440 unsigned char *src = coding->source, *src_base = src;
2441 unsigned char *src_end = coding->source + coding->src_bytes;
2442 int multibytep = coding->src_multibyte;
ff0dacd7 2443 int single_shifting = 0;
df7492f9
KH
2444 int id;
2445 int c, c1;
2446 int consumed_chars = 0;
2447 int i;
ff0dacd7
KH
2448 int rejected = 0;
2449 int found = 0;
2450
2451 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2452
2453 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2454 {
2455 struct coding_system *this = &(coding_categories[i]);
2456 Lisp_Object attrs, val;
2457
2458 attrs = CODING_ID_ATTRS (this->id);
2459 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2460 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2461 setup_iso_safe_charsets (attrs);
2462 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2463 this->max_charset_id = XSTRING (val)->size - 1;
2464 this->safe_charsets = (char *) XSTRING (val)->data;
2465 }
2466
2467 /* A coding system of this category is always ASCII compatible. */
2468 src += coding->head_ascii;
3f003981 2469
ff0dacd7 2470 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2471 {
df7492f9 2472 ONE_MORE_BYTE (c);
4ed46869
KH
2473 switch (c)
2474 {
2475 case ISO_CODE_ESC:
74383408
KH
2476 if (inhibit_iso_escape_detection)
2477 break;
f46869e4 2478 single_shifting = 0;
df7492f9 2479 ONE_MORE_BYTE (c);
d46c5b12 2480 if (c >= '(' && c <= '/')
4ed46869 2481 {
bf9cdd4e 2482 /* Designation sequence for a charset of dimension 1. */
df7492f9 2483 ONE_MORE_BYTE (c1);
d46c5b12 2484 if (c1 < ' ' || c1 >= 0x80
df7492f9 2485 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2486 /* Invalid designation sequence. Just ignore. */
2487 break;
bf9cdd4e
KH
2488 }
2489 else if (c == '$')
2490 {
2491 /* Designation sequence for a charset of dimension 2. */
df7492f9 2492 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2493 if (c >= '@' && c <= 'B')
2494 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2495 id = iso_charset_table[1][0][c];
bf9cdd4e 2496 else if (c >= '(' && c <= '/')
bcf26d6a 2497 {
df7492f9 2498 ONE_MORE_BYTE (c1);
d46c5b12 2499 if (c1 < ' ' || c1 >= 0x80
df7492f9 2500 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2501 /* Invalid designation sequence. Just ignore. */
2502 break;
bcf26d6a 2503 }
bf9cdd4e 2504 else
ff0dacd7 2505 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2506 break;
2507 }
ae9ff118 2508 else if (c == 'N' || c == 'O')
d46c5b12 2509 {
ae9ff118 2510 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2511 single_shifting = 1;
2512 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2513 break;
4ed46869 2514 }
ec6d2bb8
KH
2515 else if (c >= '0' && c <= '4')
2516 {
2517 /* ESC <Fp> for start/end composition. */
ff0dacd7 2518 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2519 break;
2520 }
bf9cdd4e 2521 else
df7492f9 2522 {
ff0dacd7 2523 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2524 break;
2525 }
d46c5b12
KH
2526
2527 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2528 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2529 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2530 id))
ff0dacd7 2531 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2532 else
ff0dacd7 2533 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2534 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2535 id))
ff0dacd7 2536 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2537 else
ff0dacd7 2538 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2539 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2540 id))
ff0dacd7 2541 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2542 else
ff0dacd7 2543 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2544 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2545 id))
ff0dacd7 2546 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2547 else
ff0dacd7 2548 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2549 break;
2550
4ed46869 2551 case ISO_CODE_SO:
d46c5b12 2552 case ISO_CODE_SI:
ff0dacd7 2553 /* Locking shift out/in. */
74383408
KH
2554 if (inhibit_iso_escape_detection)
2555 break;
f46869e4 2556 single_shifting = 0;
ff0dacd7
KH
2557 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2558 found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2559 break;
ff0dacd7 2560
4ed46869 2561 case ISO_CODE_CSI:
ff0dacd7 2562 /* Control sequence introducer. */
f46869e4 2563 single_shifting = 0;
ff0dacd7
KH
2564 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2565 found |= CATEGORY_MASK_ISO_8_ELSE;
2566 goto check_extra_latin;
2567
2568
4ed46869
KH
2569 case ISO_CODE_SS2:
2570 case ISO_CODE_SS3:
ff0dacd7
KH
2571 /* Single shift. */
2572 if (inhibit_iso_escape_detection)
2573 break;
2574 single_shifting = 1;
2575 rejected |= CATEGORY_MASK_ISO_7BIT;
2576 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2577 & CODING_ISO_FLAG_SINGLE_SHIFT)
2578 found |= CATEGORY_MASK_ISO_8_1;
2579 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2580 & CODING_ISO_FLAG_SINGLE_SHIFT)
2581 found |= CATEGORY_MASK_ISO_8_2;
2582 goto check_extra_latin;
4ed46869
KH
2583
2584 default:
2585 if (c < 0x80)
f46869e4
KH
2586 {
2587 single_shifting = 0;
2588 break;
2589 }
ff0dacd7 2590 if (c >= 0xA0)
c4825358 2591 {
ff0dacd7
KH
2592 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2593 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2594 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2595 0xA0..0FF. If the byte length is even, we include
2596 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2597 only when we are not single shifting. */
2598 if (! single_shifting
2599 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2600 {
e17de821 2601 int i = 1;
b73bfc1c
KH
2602 while (src < src_end)
2603 {
df7492f9 2604 ONE_MORE_BYTE (c);
b73bfc1c
KH
2605 if (c < 0xA0)
2606 break;
2607 i++;
2608 }
2609
2610 if (i & 1 && src < src_end)
ff0dacd7 2611 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2612 else
ff0dacd7 2613 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2614 }
ff0dacd7 2615 break;
4ed46869 2616 }
ff0dacd7
KH
2617 check_extra_latin:
2618 single_shifting = 0;
2619 if (! VECTORP (Vlatin_extra_code_table)
2620 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2621 {
2622 rejected = CATEGORY_MASK_ISO;
2623 break;
2624 }
2625 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2626 & CODING_ISO_FLAG_LATIN_EXTRA)
2627 found |= CATEGORY_MASK_ISO_8_1;
2628 else
2629 rejected |= CATEGORY_MASK_ISO_8_1;
2630 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2631 & CODING_ISO_FLAG_LATIN_EXTRA)
2632 found |= CATEGORY_MASK_ISO_8_2;
2633 else
2634 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2635 }
2636 }
ff0dacd7
KH
2637 detect_info->rejected |= CATEGORY_MASK_ISO;
2638 return 0;
2639
df7492f9 2640 no_more_source:
ff0dacd7
KH
2641 detect_info->rejected |= rejected;
2642 detect_info->found |= (found & ~rejected);
df7492f9 2643 return 1;
4ed46869
KH
2644}
2645
4ed46869
KH
2646
2647/* Set designation state into CODING. */
df7492f9
KH
2648#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2649 do { \
2650 int id, prev; \
2651 \
2652 if (final < '0' || final >= 128 \
2653 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2654 || !SAFE_CHARSET_P (coding, id)) \
2655 { \
2656 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2657 goto invalid_code; \
2658 } \
2659 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2660 if (id == charset_jisx0201_roman) \
2661 { \
2662 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2663 id = charset_ascii; \
2664 } \
2665 else if (id == charset_jisx0208_1978) \
2666 { \
2667 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2668 id = charset_jisx0208; \
2669 } \
df7492f9
KH
2670 CODING_ISO_DESIGNATION (coding, reg) = id; \
2671 /* If there was an invalid designation to REG previously, and this \
2672 designation is ASCII to REG, we should keep this designation \
2673 sequence. */ \
2674 if (prev == -2 && id == charset_ascii) \
2675 goto invalid_code; \
4ed46869
KH
2676 } while (0)
2677
d46c5b12 2678
df7492f9
KH
2679#define MAYBE_FINISH_COMPOSITION() \
2680 do { \
2681 int i; \
2682 if (composition_state == COMPOSING_NO) \
2683 break; \
2684 /* It is assured that we have enough room for producing \
2685 characters stored in the table `components'. */ \
2686 if (charbuf + component_idx > charbuf_end) \
2687 goto no_more_source; \
2688 composition_state = COMPOSING_NO; \
2689 if (method == COMPOSITION_RELATIVE \
2690 || method == COMPOSITION_WITH_ALTCHARS) \
2691 { \
2692 for (i = 0; i < component_idx; i++) \
2693 *charbuf++ = components[i]; \
2694 char_offset += component_idx; \
2695 } \
2696 else \
2697 { \
2698 for (i = 0; i < component_idx; i += 2) \
2699 *charbuf++ = components[i]; \
2700 char_offset += (component_idx / 2) + 1; \
2701 } \
2702 } while (0)
2703
d46c5b12 2704
aa72b389
KH
2705/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2706 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2707 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2708 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2709 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2710 */
ec6d2bb8 2711
df7492f9
KH
2712#define DECODE_COMPOSITION_START(c1) \
2713 do { \
2714 if (c1 == '0' \
781d7a48 2715 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2716 { \
2717 component_len = component_idx; \
2718 composition_state = COMPOSING_CHAR; \
2719 } \
2720 else \
2721 { \
2722 unsigned char *p; \
2723 \
2724 MAYBE_FINISH_COMPOSITION (); \
2725 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2726 goto no_more_source; \
2727 for (p = src; p < src_end - 1; p++) \
2728 if (*p == ISO_CODE_ESC && p[1] == '1') \
2729 break; \
2730 if (p == src_end - 1) \
2731 { \
2732 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2733 goto invalid_code; \
2734 goto no_more_source; \
2735 } \
2736 \
2737 /* This is surely the start of a composition. */ \
2738 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2739 : c1 == '2' ? COMPOSITION_WITH_RULE \
2740 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2741 : COMPOSITION_WITH_RULE_ALTCHARS); \
2742 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2743 : COMPOSING_COMPONENT_CHAR); \
2744 component_idx = component_len = 0; \
2745 } \
ec6d2bb8
KH
2746 } while (0)
2747
ec6d2bb8 2748
df7492f9
KH
2749/* Handle compositoin end sequence ESC 1. */
2750
2751#define DECODE_COMPOSITION_END() \
ec6d2bb8 2752 do { \
df7492f9
KH
2753 int nchars = (component_len > 0 ? component_idx - component_len \
2754 : method == COMPOSITION_RELATIVE ? component_idx \
2755 : (component_idx + 1) / 2); \
2756 int i; \
2757 int *saved_charbuf = charbuf; \
ff0dacd7
KH
2758 int from = coding->produced_char + char_offset; \
2759 int to = from + nchars; \
df7492f9 2760 \
ff0dacd7 2761 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9 2762 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2763 { \
df7492f9
KH
2764 if (component_len == 0) \
2765 for (i = 0; i < component_idx; i++) \
2766 *charbuf++ = components[i]; \
2767 else \
2768 for (i = 0; i < component_len; i++) \
2769 *charbuf++ = components[i]; \
2770 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2771 } \
df7492f9
KH
2772 if (method == COMPOSITION_WITH_RULE) \
2773 for (i = 0; i < component_idx; i += 2, char_offset++) \
2774 *charbuf++ = components[i]; \
ec6d2bb8 2775 else \
df7492f9
KH
2776 for (i = component_len; i < component_idx; i++, char_offset++) \
2777 *charbuf++ = components[i]; \
2778 coding->annotated = 1; \
2779 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2780 } while (0)
2781
df7492f9 2782
ec6d2bb8
KH
2783/* Decode a composition rule from the byte C1 (and maybe one more byte
2784 from SRC) and store one encoded composition rule in
2785 coding->cmp_data. */
2786
2787#define DECODE_COMPOSITION_RULE(c1) \
2788 do { \
ec6d2bb8
KH
2789 (c1) -= 32; \
2790 if (c1 < 81) /* old format (before ver.21) */ \
2791 { \
2792 int gref = (c1) / 9; \
2793 int nref = (c1) % 9; \
2794 if (gref == 4) gref = 10; \
2795 if (nref == 4) nref = 10; \
df7492f9 2796 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2797 } \
b73bfc1c 2798 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2799 { \
2800 ONE_MORE_BYTE (c2); \
df7492f9 2801 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2802 } \
df7492f9
KH
2803 else \
2804 c1 = 0; \
ec6d2bb8 2805 } while (0)
88993dfd 2806
d46c5b12 2807
4ed46869
KH
2808/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2809
b73bfc1c 2810static void
df7492f9 2811decode_coding_iso_2022 (coding)
4ed46869 2812 struct coding_system *coding;
4ed46869 2813{
df7492f9
KH
2814 unsigned char *src = coding->source + coding->consumed;
2815 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2816 unsigned char *src_base;
df7492f9 2817 int *charbuf = coding->charbuf;
ff0dacd7
KH
2818 int *charbuf_end
2819 = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2820 int consumed_chars = 0, consumed_chars_base;
df7492f9
KH
2821 int multibytep = coding->src_multibyte;
2822 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2823 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2824 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2825 struct charset *charset;
2826 int c;
2827 /* For handling composition sequence. */
2828#define COMPOSING_NO 0
2829#define COMPOSING_CHAR 1
2830#define COMPOSING_RULE 2
2831#define COMPOSING_COMPONENT_CHAR 3
2832#define COMPOSING_COMPONENT_RULE 4
2833
2834 int composition_state = COMPOSING_NO;
2835 enum composition_method method;
2836 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2837 int component_idx;
2838 int component_len;
2839 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
2840 int char_offset = coding->produced_char;
2841 int last_offset = char_offset;
2842 int last_id = charset_ascii;
df7492f9
KH
2843
2844 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2845 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2846
2847 while (1)
4ed46869 2848 {
b73bfc1c
KH
2849 int c1, c2;
2850
2851 src_base = src;
df7492f9
KH
2852 consumed_chars_base = consumed_chars;
2853
2854 if (charbuf >= charbuf_end)
2855 break;
2856
b73bfc1c 2857 ONE_MORE_BYTE (c1);
4ed46869 2858
98725083 2859 /* We produce at most one character. */
4ed46869
KH
2860 switch (iso_code_class [c1])
2861 {
2862 case ISO_0x20_or_0x7F:
df7492f9 2863 if (composition_state != COMPOSING_NO)
ec6d2bb8 2864 {
df7492f9
KH
2865 if (composition_state == COMPOSING_RULE
2866 || composition_state == COMPOSING_COMPONENT_RULE)
2867 {
2868 DECODE_COMPOSITION_RULE (c1);
2869 components[component_idx++] = c1;
2870 composition_state--;
2871 continue;
2872 }
ec6d2bb8 2873 }
df7492f9
KH
2874 if (charset_id_0 < 0
2875 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2876 /* This is SPACE or DEL. */
2877 charset = CHARSET_FROM_ID (charset_ascii);
2878 else
2879 charset = CHARSET_FROM_ID (charset_id_0);
2880 break;
4ed46869
KH
2881
2882 case ISO_graphic_plane_0:
781d7a48 2883 if (composition_state != COMPOSING_NO)
b73bfc1c 2884 {
781d7a48
KH
2885 if (composition_state == COMPOSING_RULE
2886 || composition_state == COMPOSING_COMPONENT_RULE)
2887 {
2888 DECODE_COMPOSITION_RULE (c1);
2889 components[component_idx++] = c1;
2890 composition_state--;
2891 continue;
2892 }
b73bfc1c 2893 }
df7492f9 2894 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2895 break;
2896
2897 case ISO_0xA0_or_0xFF:
df7492f9
KH
2898 if (charset_id_1 < 0
2899 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2900 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2901 goto invalid_code;
4ed46869
KH
2902 /* This is a graphic character, we fall down ... */
2903
2904 case ISO_graphic_plane_1:
df7492f9
KH
2905 if (charset_id_1 < 0)
2906 goto invalid_code;
2907 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2908 break;
2909
2910 case ISO_carriage_return:
df7492f9 2911 if (c1 == '\r')
4ed46869 2912 {
df7492f9 2913 if (EQ (eol_type, Qdos))
4ed46869 2914 {
df7492f9 2915 if (src == src_end)
98725083
KH
2916 {
2917 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2918 goto no_more_source;
2919 }
df7492f9
KH
2920 if (*src == '\n')
2921 ONE_MORE_BYTE (c1);
4ed46869 2922 }
df7492f9
KH
2923 else if (EQ (eol_type, Qmac))
2924 c1 = '\n';
4ed46869 2925 }
df7492f9
KH
2926 /* fall through */
2927
2928 case ISO_control_0:
2929 MAYBE_FINISH_COMPOSITION ();
2930 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2931 break;
2932
df7492f9
KH
2933 case ISO_control_1:
2934 MAYBE_FINISH_COMPOSITION ();
2935 goto invalid_code;
2936
4ed46869 2937 case ISO_shift_out:
df7492f9
KH
2938 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2939 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2940 goto invalid_code;
2941 CODING_ISO_INVOCATION (coding, 0) = 1;
2942 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2943 continue;
4ed46869
KH
2944
2945 case ISO_shift_in:
df7492f9
KH
2946 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2947 goto invalid_code;
2948 CODING_ISO_INVOCATION (coding, 0) = 0;
2949 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2950 continue;
4ed46869
KH
2951
2952 case ISO_single_shift_2_7:
2953 case ISO_single_shift_2:
df7492f9
KH
2954 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2955 goto invalid_code;
4ed46869
KH
2956 /* SS2 is handled as an escape sequence of ESC 'N' */
2957 c1 = 'N';
2958 goto label_escape_sequence;
2959
2960 case ISO_single_shift_3:
df7492f9
KH
2961 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2962 goto invalid_code;
4ed46869
KH
2963 /* SS2 is handled as an escape sequence of ESC 'O' */
2964 c1 = 'O';
2965 goto label_escape_sequence;
2966
2967 case ISO_control_sequence_introducer:
2968 /* CSI is handled as an escape sequence of ESC '[' ... */
2969 c1 = '[';
2970 goto label_escape_sequence;
2971
2972 case ISO_escape:
2973 ONE_MORE_BYTE (c1);
2974 label_escape_sequence:
df7492f9 2975 /* Escape sequences handled here are invocation,
4ed46869
KH
2976 designation, direction specification, and character
2977 composition specification. */
2978 switch (c1)
2979 {
2980 case '&': /* revision of following character set */
2981 ONE_MORE_BYTE (c1);
2982 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2983 goto invalid_code;
4ed46869
KH
2984 ONE_MORE_BYTE (c1);
2985 if (c1 != ISO_CODE_ESC)
df7492f9 2986 goto invalid_code;
4ed46869
KH
2987 ONE_MORE_BYTE (c1);
2988 goto label_escape_sequence;
2989
2990 case '$': /* designation of 2-byte character set */
df7492f9
KH
2991 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2992 goto invalid_code;
4ed46869
KH
2993 ONE_MORE_BYTE (c1);
2994 if (c1 >= '@' && c1 <= 'B')
2995 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2996 or JISX0208.1980 */
df7492f9 2997 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2998 }
2999 else if (c1 >= 0x28 && c1 <= 0x2B)
3000 { /* designation of DIMENSION2_CHARS94 character set */
3001 ONE_MORE_BYTE (c2);
df7492f9 3002 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
3003 }
3004 else if (c1 >= 0x2C && c1 <= 0x2F)
3005 { /* designation of DIMENSION2_CHARS96 character set */
3006 ONE_MORE_BYTE (c2);
df7492f9 3007 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
3008 }
3009 else
df7492f9 3010 goto invalid_code;
b73bfc1c 3011 /* We must update these variables now. */
df7492f9
KH
3012 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3013 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3014 continue;
4ed46869
KH
3015
3016 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3017 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3018 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3019 goto invalid_code;
3020 CODING_ISO_INVOCATION (coding, 0) = 2;
3021 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3022 continue;
4ed46869
KH
3023
3024 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3025 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3026 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3027 goto invalid_code;
3028 CODING_ISO_INVOCATION (coding, 0) = 3;
3029 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3030 continue;
4ed46869
KH
3031
3032 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3033 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3034 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3035 goto invalid_code;
3036 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 3037 ONE_MORE_BYTE (c1);
e7046a18 3038 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3039 goto invalid_code;
4ed46869
KH
3040 break;
3041
3042 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3043 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3044 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3045 goto invalid_code;
3046 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 3047 ONE_MORE_BYTE (c1);
e7046a18 3048 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3049 goto invalid_code;
4ed46869
KH
3050 break;
3051
ec6d2bb8 3052 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3053 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3054 goto invalid_code;
ec6d2bb8 3055 DECODE_COMPOSITION_START (c1);
b73bfc1c 3056 continue;
4ed46869 3057
ec6d2bb8 3058 case '1': /* end composition */
df7492f9
KH
3059 if (composition_state == COMPOSING_NO)
3060 goto invalid_code;
3061 DECODE_COMPOSITION_END ();
b73bfc1c 3062 continue;
4ed46869
KH
3063
3064 case '[': /* specification of direction */
df7492f9
KH
3065 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3066 goto invalid_code;
4ed46869 3067 /* For the moment, nested direction is not supported.
d46c5b12 3068 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3069 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3070 ONE_MORE_BYTE (c1);
3071 switch (c1)
3072 {
3073 case ']': /* end of the current direction */
d46c5b12 3074 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3075
3076 case '0': /* end of the current direction */
3077 case '1': /* start of left-to-right direction */
3078 ONE_MORE_BYTE (c1);
3079 if (c1 == ']')
d46c5b12 3080 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3081 else
df7492f9 3082 goto invalid_code;
4ed46869
KH
3083 break;
3084
3085 case '2': /* start of right-to-left direction */
3086 ONE_MORE_BYTE (c1);
3087 if (c1 == ']')
d46c5b12 3088 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3089 else
df7492f9 3090 goto invalid_code;
4ed46869
KH
3091 break;
3092
3093 default:
df7492f9 3094 goto invalid_code;
4ed46869 3095 }
b73bfc1c 3096 continue;
4ed46869
KH
3097
3098 default:
df7492f9
KH
3099 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3100 goto invalid_code;
4ed46869
KH
3101 if (c1 >= 0x28 && c1 <= 0x2B)
3102 { /* designation of DIMENSION1_CHARS94 character set */
3103 ONE_MORE_BYTE (c2);
df7492f9 3104 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
3105 }
3106 else if (c1 >= 0x2C && c1 <= 0x2F)
3107 { /* designation of DIMENSION1_CHARS96 character set */
3108 ONE_MORE_BYTE (c2);
df7492f9 3109 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
3110 }
3111 else
df7492f9 3112 goto invalid_code;
b73bfc1c 3113 /* We must update these variables now. */
df7492f9
KH
3114 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3115 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3116 continue;
4ed46869 3117 }
b73bfc1c 3118 }
4ed46869 3119
ff0dacd7
KH
3120 if (charset->id != charset_ascii
3121 && last_id != charset->id)
3122 {
3123 if (last_id != charset_ascii)
3124 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3125 last_id = charset->id;
3126 last_offset = char_offset;
3127 }
3128
b73bfc1c 3129 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3130 Produce a decoded character while getting 2nd position code
3131 C2 if necessary. */
3132 c1 &= 0x7F;
3133 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3134 {
3135 ONE_MORE_BYTE (c2);
df7492f9 3136 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3137 /* C2 is not in a valid range. */
df7492f9
KH
3138 goto invalid_code;
3139 c1 = (c1 << 8) | (c2 & 0x7F);
3140 if (CHARSET_DIMENSION (charset) > 2)
3141 {
3142 ONE_MORE_BYTE (c2);
3143 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3144 /* C2 is not in a valid range. */
3145 goto invalid_code;
3146 c1 = (c1 << 8) | (c2 & 0x7F);
3147 }
3148 }
3149
3150 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3151 if (c < 0)
3152 {
3153 MAYBE_FINISH_COMPOSITION ();
3154 for (; src_base < src; src_base++, char_offset++)
3155 {
3156 if (ASCII_BYTE_P (*src_base))
3157 *charbuf++ = *src_base;
3158 else
3159 *charbuf++ = BYTE8_TO_CHAR (*src_base);
ff0dacd7 3160 char_offset++;
df7492f9
KH
3161 }
3162 }
3163 else if (composition_state == COMPOSING_NO)
3164 {
3165 *charbuf++ = c;
3166 char_offset++;
4ed46869 3167 }
df7492f9 3168 else
781d7a48
KH
3169 {
3170 components[component_idx++] = c;
3171 if (method == COMPOSITION_WITH_RULE
3172 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3173 && composition_state == COMPOSING_COMPONENT_CHAR))
3174 composition_state++;
3175 }
4ed46869
KH
3176 continue;
3177
df7492f9
KH
3178 invalid_code:
3179 MAYBE_FINISH_COMPOSITION ();
4ed46869 3180 src = src_base;
df7492f9
KH
3181 consumed_chars = consumed_chars_base;
3182 ONE_MORE_BYTE (c);
3183 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3184 char_offset++;
df7492f9 3185 coding->errors++;
4ed46869 3186 }
fb88bf2d 3187
df7492f9 3188 no_more_source:
ff0dacd7
KH
3189 if (last_id != charset_ascii)
3190 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
3191 coding->consumed_char += consumed_chars_base;
3192 coding->consumed = src_base - coding->source;
3193 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3194}
3195
b73bfc1c 3196
f4dee582 3197/* ISO2022 encoding stuff. */
4ed46869
KH
3198
3199/*
f4dee582 3200 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3201 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3202 variant has the following specifications:
df7492f9 3203 1. Initial designation to G0 thru G3.
4ed46869
KH
3204 2. Allows short-form designation?
3205 3. ASCII should be designated to G0 before control characters?
3206 4. ASCII should be designated to G0 at end of line?
3207 5. 7-bit environment or 8-bit environment?
3208 6. Use locking-shift?
3209 7. Use Single-shift?
3210 And the following two are only for Japanese:
3211 8. Use ASCII in place of JIS0201-1976-Roman?
3212 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3213 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3214 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3215 details.
4ed46869
KH
3216*/
3217
3218/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3219 register REG at DST, and increment DST. If <final-char> of CHARSET is
3220 '@', 'A', or 'B' and the coding system CODING allows, produce
3221 designation sequence of short-form. */
4ed46869
KH
3222
3223#define ENCODE_DESIGNATION(charset, reg, coding) \
3224 do { \
df7492f9 3225 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3226 char *intermediate_char_94 = "()*+"; \
3227 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3228 int revision = -1; \
3229 int c; \
3230 \
3231 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3232 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3233 \
3234 if (revision >= 0) \
70c22245 3235 { \
df7492f9
KH
3236 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3237 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3238 } \
df7492f9 3239 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3240 if (CHARSET_DIMENSION (charset) == 1) \
3241 { \
df7492f9
KH
3242 if (! CHARSET_ISO_CHARS_96 (charset)) \
3243 c = intermediate_char_94[reg]; \
4ed46869 3244 else \
df7492f9
KH
3245 c = intermediate_char_96[reg]; \
3246 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3247 } \
3248 else \
3249 { \
df7492f9
KH
3250 EMIT_ONE_ASCII_BYTE ('$'); \
3251 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3252 { \
df7492f9 3253 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3254 || reg != 0 \
3255 || final_char < '@' || final_char > 'B') \
df7492f9 3256 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3257 } \
3258 else \
df7492f9 3259 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3260 } \
df7492f9
KH
3261 EMIT_ONE_ASCII_BYTE (final_char); \
3262 \
3263 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3264 } while (0)
3265
df7492f9 3266
4ed46869
KH
3267/* The following two macros produce codes (control character or escape
3268 sequence) for ISO2022 single-shift functions (single-shift-2 and
3269 single-shift-3). */
3270
df7492f9
KH
3271#define ENCODE_SINGLE_SHIFT_2 \
3272 do { \
3273 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3274 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3275 else \
3276 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3277 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3278 } while (0)
3279
df7492f9
KH
3280
3281#define ENCODE_SINGLE_SHIFT_3 \
3282 do { \
3283 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3284 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3285 else \
3286 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3287 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3288 } while (0)
3289
df7492f9 3290
4ed46869
KH
3291/* The following four macros produce codes (control character or
3292 escape sequence) for ISO2022 locking-shift functions (shift-in,
3293 shift-out, locking-shift-2, and locking-shift-3). */
3294
df7492f9
KH
3295#define ENCODE_SHIFT_IN \
3296 do { \
3297 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3298 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3299 } while (0)
3300
df7492f9
KH
3301
3302#define ENCODE_SHIFT_OUT \
3303 do { \
3304 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3305 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3306 } while (0)
3307
df7492f9
KH
3308
3309#define ENCODE_LOCKING_SHIFT_2 \
3310 do { \
3311 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3312 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3313 } while (0)
3314
df7492f9
KH
3315
3316#define ENCODE_LOCKING_SHIFT_3 \
3317 do { \
3318 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3319 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3320 } while (0)
3321
df7492f9 3322
f4dee582
RS
3323/* Produce codes for a DIMENSION1 character whose character set is
3324 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3325 sequences are also produced in advance if necessary. */
3326
6e85d753
KH
3327#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3328 do { \
df7492f9 3329 int id = CHARSET_ID (charset); \
bf16eb23
KH
3330 \
3331 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3332 && id == charset_ascii) \
3333 { \
3334 id = charset_jisx0201_roman; \
3335 charset = CHARSET_FROM_ID (id); \
3336 } \
3337 \
df7492f9 3338 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3339 { \
df7492f9
KH
3340 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3341 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3342 else \
df7492f9
KH
3343 EMIT_ONE_BYTE (c1 | 0x80); \
3344 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3345 break; \
3346 } \
df7492f9 3347 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3348 { \
df7492f9 3349 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3350 break; \
3351 } \
df7492f9 3352 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3353 { \
df7492f9 3354 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3355 break; \
3356 } \
6e85d753
KH
3357 else \
3358 /* Since CHARSET is not yet invoked to any graphic planes, we \
3359 must invoke it, or, at first, designate it to some graphic \
3360 register. Then repeat the loop to actually produce the \
3361 character. */ \
df7492f9
KH
3362 dst = encode_invocation_designation (charset, coding, dst, \
3363 &produced_chars); \
4ed46869
KH
3364 } while (1)
3365
df7492f9 3366
f4dee582
RS
3367/* Produce codes for a DIMENSION2 character whose character set is
3368 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3369 invocation codes are also produced in advance if necessary. */
3370
6e85d753
KH
3371#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3372 do { \
df7492f9 3373 int id = CHARSET_ID (charset); \
bf16eb23
KH
3374 \
3375 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3376 && id == charset_jisx0208) \
3377 { \
3378 id = charset_jisx0208_1978; \
3379 charset = CHARSET_FROM_ID (id); \
3380 } \
3381 \
df7492f9 3382 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3383 { \
df7492f9
KH
3384 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3385 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3386 else \
df7492f9
KH
3387 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3388 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3389 break; \
3390 } \
df7492f9 3391 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3392 { \
df7492f9 3393 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3394 break; \
3395 } \
df7492f9 3396 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3397 { \
df7492f9 3398 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3399 break; \
3400 } \
6e85d753
KH
3401 else \
3402 /* Since CHARSET is not yet invoked to any graphic planes, we \
3403 must invoke it, or, at first, designate it to some graphic \
3404 register. Then repeat the loop to actually produce the \
3405 character. */ \
df7492f9
KH
3406 dst = encode_invocation_designation (charset, coding, dst, \
3407 &produced_chars); \
4ed46869
KH
3408 } while (1)
3409
05e6f5dc 3410
df7492f9
KH
3411#define ENCODE_ISO_CHARACTER(charset, c) \
3412 do { \
3413 int code = ENCODE_CHAR ((charset),(c)); \
3414 \
3415 if (CHARSET_DIMENSION (charset) == 1) \
3416 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3417 else \
3418 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3419 } while (0)
bdd9fb48 3420
05e6f5dc 3421
4ed46869 3422/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3423 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3424 Return new DST. */
3425
3426unsigned char *
df7492f9
KH
3427encode_invocation_designation (charset, coding, dst, p_nchars)
3428 struct charset *charset;
4ed46869
KH
3429 struct coding_system *coding;
3430 unsigned char *dst;
df7492f9 3431 int *p_nchars;
4ed46869 3432{
df7492f9
KH
3433 int multibytep = coding->dst_multibyte;
3434 int produced_chars = *p_nchars;
4ed46869 3435 int reg; /* graphic register number */
df7492f9 3436 int id = CHARSET_ID (charset);
4ed46869
KH
3437
3438 /* At first, check designations. */
3439 for (reg = 0; reg < 4; reg++)
df7492f9 3440 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3441 break;
3442
3443 if (reg >= 4)
3444 {
3445 /* CHARSET is not yet designated to any graphic registers. */
3446 /* At first check the requested designation. */
df7492f9
KH
3447 reg = CODING_ISO_REQUEST (coding, id);
3448 if (reg < 0)
1ba9e4ab
KH
3449 /* Since CHARSET requests no special designation, designate it
3450 to graphic register 0. */
4ed46869
KH
3451 reg = 0;
3452
3453 ENCODE_DESIGNATION (charset, reg, coding);
3454 }
3455
df7492f9
KH
3456 if (CODING_ISO_INVOCATION (coding, 0) != reg
3457 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3458 {
3459 /* Since the graphic register REG is not invoked to any graphic
3460 planes, invoke it to graphic plane 0. */
3461 switch (reg)
3462 {
3463 case 0: /* graphic register 0 */
3464 ENCODE_SHIFT_IN;
3465 break;
3466
3467 case 1: /* graphic register 1 */
3468 ENCODE_SHIFT_OUT;
3469 break;
3470
3471 case 2: /* graphic register 2 */
df7492f9 3472 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3473 ENCODE_SINGLE_SHIFT_2;
3474 else
3475 ENCODE_LOCKING_SHIFT_2;
3476 break;
3477
3478 case 3: /* graphic register 3 */
df7492f9 3479 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3480 ENCODE_SINGLE_SHIFT_3;
3481 else
3482 ENCODE_LOCKING_SHIFT_3;
3483 break;
3484 }
3485 }
b73bfc1c 3486
df7492f9 3487 *p_nchars = produced_chars;
4ed46869
KH
3488 return dst;
3489}
3490
df7492f9
KH
3491/* The following three macros produce codes for indicating direction
3492 of text. */
3493#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3494 do { \
df7492f9
KH
3495 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3496 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3497 else \
df7492f9 3498 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3499 } while (0)
3500
ec6d2bb8 3501
df7492f9
KH
3502#define ENCODE_DIRECTION_R2L() \
3503 do { \
3504 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3505 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3506 } while (0)
3507
ec6d2bb8 3508
df7492f9 3509#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3510 do { \
df7492f9
KH
3511 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3512 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3513 } while (0)
3514
4ed46869
KH
3515
3516/* Produce codes for designation and invocation to reset the graphic
3517 planes and registers to initial state. */
df7492f9
KH
3518#define ENCODE_RESET_PLANE_AND_REGISTER() \
3519 do { \
3520 int reg; \
3521 struct charset *charset; \
3522 \
3523 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3524 ENCODE_SHIFT_IN; \
3525 for (reg = 0; reg < 4; reg++) \
3526 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3527 && (CODING_ISO_DESIGNATION (coding, reg) \
3528 != CODING_ISO_INITIAL (coding, reg))) \
3529 { \
3530 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3531 ENCODE_DESIGNATION (charset, reg, coding); \
3532 } \
4ed46869
KH
3533 } while (0)
3534
df7492f9 3535
bdd9fb48 3536/* Produce designation sequences of charsets in the line started from
b73bfc1c 3537 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3538
3539 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3540 find all the necessary designations. */
3541
b73bfc1c 3542static unsigned char *
df7492f9 3543encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3544 struct coding_system *coding;
df7492f9
KH
3545 int *charbuf, *charbuf_end;
3546 unsigned char *dst;
e0e989f6 3547{
df7492f9 3548 struct charset *charset;
bdd9fb48
KH
3549 /* Table of charsets to be designated to each graphic register. */
3550 int r[4];
df7492f9
KH
3551 int c, found = 0, reg;
3552 int produced_chars = 0;
3553 int multibytep = coding->dst_multibyte;
3554 Lisp_Object attrs;
3555 Lisp_Object charset_list;
3556
3557 attrs = CODING_ID_ATTRS (coding->id);
3558 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3559 if (EQ (charset_list, Qiso_2022))
3560 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3561
3562 for (reg = 0; reg < 4; reg++)
3563 r[reg] = -1;
3564
b73bfc1c 3565 while (found < 4)
e0e989f6 3566 {
df7492f9
KH
3567 int id;
3568
3569 c = *charbuf++;
b73bfc1c
KH
3570 if (c == '\n')
3571 break;
df7492f9
KH
3572 charset = char_charset (c, charset_list, NULL);
3573 id = CHARSET_ID (charset);
3574 reg = CODING_ISO_REQUEST (coding, id);
3575 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3576 {
3577 found++;
df7492f9 3578 r[reg] = id;
bdd9fb48 3579 }
bdd9fb48
KH
3580 }
3581
3582 if (found)
3583 {
3584 for (reg = 0; reg < 4; reg++)
3585 if (r[reg] >= 0
df7492f9
KH
3586 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3587 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3588 }
b73bfc1c
KH
3589
3590 return dst;
e0e989f6
KH
3591}
3592
4ed46869
KH
3593/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3594
df7492f9
KH
3595static int
3596encode_coding_iso_2022 (coding)
4ed46869 3597 struct coding_system *coding;
4ed46869 3598{
df7492f9
KH
3599 int multibytep = coding->dst_multibyte;
3600 int *charbuf = coding->charbuf;
3601 int *charbuf_end = charbuf + coding->charbuf_used;
3602 unsigned char *dst = coding->destination + coding->produced;
3603 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3604 int safe_room = 16;
3605 int bol_designation
3606 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3607 && CODING_ISO_BOL (coding));
3608 int produced_chars = 0;
3609 Lisp_Object attrs, eol_type, charset_list;
3610 int ascii_compatible;
b73bfc1c 3611 int c;
ff0dacd7 3612 int preferred_charset_id = -1;
05e6f5dc 3613
df7492f9 3614 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4 3615 setup_iso_safe_charsets (attrs);
ff0dacd7
KH
3616 /* Charset list may have been changed. */
3617 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
004068e4
KH
3618 coding->safe_charsets
3619 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
bdd9fb48 3620
df7492f9 3621 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3622
df7492f9 3623 while (charbuf < charbuf_end)
4ed46869 3624 {
df7492f9 3625 ASSURE_DESTINATION (safe_room);
b73bfc1c 3626
df7492f9 3627 if (bol_designation)
b73bfc1c 3628 {
df7492f9 3629 unsigned char *dst_prev = dst;
4ed46869 3630
bdd9fb48 3631 /* We have to produce designation sequences if any now. */
df7492f9
KH
3632 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3633 bol_designation = 0;
3634 /* We are sure that designation sequences are all ASCII bytes. */
3635 produced_chars += dst - dst_prev;
4ed46869 3636 }
ec6d2bb8 3637
df7492f9 3638 c = *charbuf++;
4ed46869 3639
ff0dacd7
KH
3640 if (c < 0)
3641 {
3642 /* Handle an annotation. */
3643 switch (*charbuf)
3644 {
3645 case CODING_ANNOTATE_COMPOSITION_MASK:
3646 /* Not yet implemented. */
3647 break;
3648 case CODING_ANNOTATE_CHARSET_MASK:
3649 preferred_charset_id = charbuf[3];
3650 if (preferred_charset_id >= 0
3651 && NILP (Fmemq (make_number (preferred_charset_id),
3652 charset_list)))
3653 preferred_charset_id = -1;
3654 break;
3655 default:
3656 abort ();
3657 }
3658 charbuf += -c - 1;
3659 continue;
3660 }
3661
b73bfc1c
KH
3662 /* Now encode the character C. */
3663 if (c < 0x20 || c == 0x7F)
3664 {
df7492f9
KH
3665 if (c == '\n'
3666 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3667 {
df7492f9
KH
3668 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3669 ENCODE_RESET_PLANE_AND_REGISTER ();
3670 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3671 {
df7492f9
KH
3672 int i;
3673
3674 for (i = 0; i < 4; i++)
3675 CODING_ISO_DESIGNATION (coding, i)
3676 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3677 }
df7492f9
KH
3678 bol_designation
3679 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3680 }
df7492f9
KH
3681 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3682 ENCODE_RESET_PLANE_AND_REGISTER ();
3683 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3684 }
df7492f9 3685 else if (ASCII_CHAR_P (c))
88993dfd 3686 {
df7492f9
KH
3687 if (ascii_compatible)
3688 EMIT_ONE_ASCII_BYTE (c);
3689 else
bf16eb23
KH
3690 {
3691 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3692 ENCODE_ISO_CHARACTER (charset, c);
3693 }
88993dfd 3694 }
16eafb5d
KH
3695 else if (CHAR_BYTE8_P (c))
3696 {
3697 c = CHAR_TO_BYTE8 (c);
3698 EMIT_ONE_BYTE (c);
3699 }
b73bfc1c 3700 else
df7492f9 3701 {
ff0dacd7 3702 struct charset *charset;
b73bfc1c 3703
ff0dacd7
KH
3704 if (preferred_charset_id >= 0)
3705 {
3706 charset = CHARSET_FROM_ID (preferred_charset_id);
3707 if (! CHAR_CHARSET_P (c, charset))
3708 charset = char_charset (c, charset_list, NULL);
3709 }
3710 else
3711 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3712 if (!charset)
3713 {
41cbe562
KH
3714 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3715 {
3716 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3717 charset = CHARSET_FROM_ID (charset_ascii);
3718 }
3719 else
3720 {
3721 c = coding->default_char;
3722 charset = char_charset (c, charset_list, NULL);
3723 }
df7492f9
KH
3724 }
3725 ENCODE_ISO_CHARACTER (charset, c);
3726 }
84fbb8a0 3727 }
b73bfc1c 3728
df7492f9
KH
3729 if (coding->mode & CODING_MODE_LAST_BLOCK
3730 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3731 {
3732 ASSURE_DESTINATION (safe_room);
3733 ENCODE_RESET_PLANE_AND_REGISTER ();
3734 }
3735 coding->result = CODING_RESULT_SUCCESS;
3736 CODING_ISO_BOL (coding) = bol_designation;
3737 coding->produced_char += produced_chars;
3738 coding->produced = dst - coding->destination;
3739 return 0;
4ed46869
KH
3740}
3741
3742\f
df7492f9 3743/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3744
df7492f9 3745/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3746 quite widely. So, for the moment, Emacs supports them in the bare
3747 C code. But, in the future, they may be supported only by CCL. */
3748
3749/* SJIS is a coding system encoding three character sets: ASCII, right
3750 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3751 as is. A character of charset katakana-jisx0201 is encoded by
3752 "position-code + 0x80". A character of charset japanese-jisx0208
3753 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3754 so that it fit in the range below.
4ed46869
KH
3755
3756 --- CODE RANGE of SJIS ---
3757 (character set) (range)
3758 ASCII 0x00 .. 0x7F
df7492f9 3759 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3760 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3761 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3762 -------------------------------
3763
3764*/
3765
3766/* BIG5 is a coding system encoding two character sets: ASCII and
3767 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3768 character set and is encoded in two-byte.
4ed46869
KH
3769
3770 --- CODE RANGE of BIG5 ---
3771 (character set) (range)
3772 ASCII 0x00 .. 0x7F
3773 Big5 (1st byte) 0xA1 .. 0xFE
3774 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3775 --------------------------
3776
df7492f9 3777 */
4ed46869
KH
3778
3779/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3780 Check if a text is encoded in SJIS. If it is, return
df7492f9 3781 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3782
0a28aafb 3783static int
ff0dacd7 3784detect_coding_sjis (coding, detect_info)
df7492f9 3785 struct coding_system *coding;
ff0dacd7 3786 struct coding_detection_info *detect_info;
4ed46869 3787{
df7492f9
KH
3788 unsigned char *src = coding->source, *src_base = src;
3789 unsigned char *src_end = coding->source + coding->src_bytes;
3790 int multibytep = coding->src_multibyte;
3791 int consumed_chars = 0;
3792 int found = 0;
b73bfc1c 3793 int c;
89528eb3 3794 int incomplete;
df7492f9 3795
ff0dacd7 3796 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
3797 /* A coding system of this category is always ASCII compatible. */
3798 src += coding->head_ascii;
4ed46869 3799
b73bfc1c 3800 while (1)
4ed46869 3801 {
89528eb3 3802 incomplete = 0;
df7492f9 3803 ONE_MORE_BYTE (c);
89528eb3 3804 incomplete = 1;
682169fe
KH
3805 if (c < 0x80)
3806 continue;
df7492f9 3807 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3808 {
df7492f9 3809 ONE_MORE_BYTE (c);
682169fe 3810 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 3811 break;
ff0dacd7 3812 found = CATEGORY_MASK_SJIS;
4ed46869 3813 }
df7492f9 3814 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 3815 found = CATEGORY_MASK_SJIS;
df7492f9
KH
3816 else
3817 break;
4ed46869 3818 }
ff0dacd7 3819 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
3820 return 0;
3821
3822 no_more_source:
89528eb3
KH
3823 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3824 {
ff0dacd7 3825 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3
KH
3826 return 0;
3827 }
ff0dacd7
KH
3828 detect_info->found |= found;
3829 return 1;
4ed46869
KH
3830}
3831
3832/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3833 Check if a text is encoded in BIG5. If it is, return
df7492f9 3834 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3835
0a28aafb 3836static int
ff0dacd7 3837detect_coding_big5 (coding, detect_info)
df7492f9 3838 struct coding_system *coding;
ff0dacd7 3839 struct coding_detection_info *detect_info;
4ed46869 3840{
df7492f9
KH
3841 unsigned char *src = coding->source, *src_base = src;
3842 unsigned char *src_end = coding->source + coding->src_bytes;
3843 int multibytep = coding->src_multibyte;
3844 int consumed_chars = 0;
3845 int found = 0;
b73bfc1c 3846 int c;
89528eb3 3847 int incomplete;
fa42c37f 3848
ff0dacd7 3849 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
3850 /* A coding system of this category is always ASCII compatible. */
3851 src += coding->head_ascii;
fa42c37f 3852
b73bfc1c 3853 while (1)
fa42c37f 3854 {
89528eb3 3855 incomplete = 0;
df7492f9 3856 ONE_MORE_BYTE (c);
89528eb3 3857 incomplete = 1;
df7492f9 3858 if (c < 0x80)
fa42c37f 3859 continue;
df7492f9 3860 if (c >= 0xA1)
fa42c37f 3861 {
df7492f9
KH
3862 ONE_MORE_BYTE (c);
3863 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3864 return 0;
ff0dacd7 3865 found = CATEGORY_MASK_BIG5;
fa42c37f 3866 }
df7492f9
KH
3867 else
3868 break;
fa42c37f 3869 }
ff0dacd7 3870 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 3871 return 0;
df7492f9
KH
3872
3873 no_more_source:
89528eb3
KH
3874 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3875 {
ff0dacd7 3876 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
3877 return 0;
3878 }
ff0dacd7
KH
3879 detect_info->found |= found;
3880 return 1;
fa42c37f
KH
3881}
3882
4ed46869
KH
3883/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3884 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3885
b73bfc1c 3886static void
df7492f9 3887decode_coding_sjis (coding)
4ed46869 3888 struct coding_system *coding;
4ed46869 3889{
df7492f9
KH
3890 unsigned char *src = coding->source + coding->consumed;
3891 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3892 unsigned char *src_base;
df7492f9 3893 int *charbuf = coding->charbuf;
ff0dacd7 3894 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
3895 int consumed_chars = 0, consumed_chars_base;
3896 int multibytep = coding->src_multibyte;
3897 struct charset *charset_roman, *charset_kanji, *charset_kana;
3898 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
3899 int char_offset = coding->produced_char;
3900 int last_offset = char_offset;
3901 int last_id = charset_ascii;
a5d301df 3902
df7492f9
KH
3903 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3904
3905 val = charset_list;
3906 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3
KH
3907 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3908 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3909
b73bfc1c 3910 while (1)
4ed46869 3911 {
df7492f9 3912 int c, c1;
b73bfc1c
KH
3913
3914 src_base = src;
df7492f9
KH
3915 consumed_chars_base = consumed_chars;
3916
3917 if (charbuf >= charbuf_end)
3918 break;
3919
3920 ONE_MORE_BYTE (c);
b73bfc1c 3921
df7492f9 3922 if (c == '\r')
4ed46869 3923 {
df7492f9 3924 if (EQ (eol_type, Qdos))
4ed46869 3925 {
df7492f9 3926 if (src == src_end)
98725083
KH
3927 {
3928 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3929 goto no_more_source;
3930 }
df7492f9
KH
3931 if (*src == '\n')
3932 ONE_MORE_BYTE (c);
4ed46869 3933 }
df7492f9
KH
3934 else if (EQ (eol_type, Qmac))
3935 c = '\n';
4ed46869 3936 }
54f78171 3937 else
df7492f9
KH
3938 {
3939 struct charset *charset;
3940
3941 if (c < 0x80)
3942 charset = charset_roman;
3943 else
4ed46869 3944 {
df7492f9
KH
3945 if (c >= 0xF0)
3946 goto invalid_code;
3947 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3948 {
54f78171 3949 /* SJIS -> JISX0208 */
df7492f9
KH
3950 ONE_MORE_BYTE (c1);
3951 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3952 goto invalid_code;
3953 c = (c << 8) | c1;
3954 SJIS_TO_JIS (c);
3955 charset = charset_kanji;
5e34de15 3956 }
fb88bf2d 3957 else
89528eb3
KH
3958 {
3959 /* SJIS -> JISX0201-Kana */
3960 c &= 0x7F;
3961 charset = charset_kana;
3962 }
df7492f9 3963 }
ff0dacd7
KH
3964 if (charset->id != charset_ascii
3965 && last_id != charset->id)
3966 {
3967 if (last_id != charset_ascii)
3968 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3969 last_id = charset->id;
3970 last_offset = char_offset;
3971 }
df7492f9
KH
3972 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3973 }
3974 *charbuf++ = c;
ff0dacd7 3975 char_offset++;
df7492f9
KH
3976 continue;
3977
3978 invalid_code:
3979 src = src_base;
3980 consumed_chars = consumed_chars_base;
3981 ONE_MORE_BYTE (c);
3982 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3983 char_offset++;
df7492f9
KH
3984 coding->errors++;
3985 }
3986
3987 no_more_source:
ff0dacd7
KH
3988 if (last_id != charset_ascii)
3989 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
3990 coding->consumed_char += consumed_chars_base;
3991 coding->consumed = src_base - coding->source;
3992 coding->charbuf_used = charbuf - coding->charbuf;
3993}
3994
3995static void
3996decode_coding_big5 (coding)
3997 struct coding_system *coding;
3998{
3999 unsigned char *src = coding->source + coding->consumed;
4000 unsigned char *src_end = coding->source + coding->src_bytes;
4001 unsigned char *src_base;
4002 int *charbuf = coding->charbuf;
ff0dacd7 4003 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4004 int consumed_chars = 0, consumed_chars_base;
4005 int multibytep = coding->src_multibyte;
4006 struct charset *charset_roman, *charset_big5;
4007 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
4008 int char_offset = coding->produced_char;
4009 int last_offset = char_offset;
4010 int last_id = charset_ascii;
df7492f9
KH
4011
4012 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4013 val = charset_list;
4014 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4015 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4016
4017 while (1)
4018 {
4019 int c, c1;
4020
4021 src_base = src;
4022 consumed_chars_base = consumed_chars;
4023
4024 if (charbuf >= charbuf_end)
4025 break;
4026
4027 ONE_MORE_BYTE (c);
4028
4029 if (c == '\r')
4030 {
4031 if (EQ (eol_type, Qdos))
4032 {
4033 if (src == src_end)
98725083
KH
4034 {
4035 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4036 goto no_more_source;
4037 }
df7492f9
KH
4038 if (*src == '\n')
4039 ONE_MORE_BYTE (c);
4ed46869 4040 }
df7492f9
KH
4041 else if (EQ (eol_type, Qmac))
4042 c = '\n';
4043 }
4044 else
4045 {
4046 struct charset *charset;
4047 if (c < 0x80)
4048 charset = charset_roman;
fb88bf2d 4049 else
fb88bf2d 4050 {
54f78171 4051 /* BIG5 -> Big5 */
df7492f9
KH
4052 if (c < 0xA1 || c > 0xFE)
4053 goto invalid_code;
4054 ONE_MORE_BYTE (c1);
4055 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4056 goto invalid_code;
4057 c = c << 8 | c1;
4058 charset = charset_big5;
4ed46869 4059 }
ff0dacd7
KH
4060 if (charset->id != charset_ascii
4061 && last_id != charset->id)
4062 {
4063 if (last_id != charset_ascii)
4064 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4065 last_id = charset->id;
4066 last_offset = char_offset;
4067 }
df7492f9 4068 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 4069 }
4ed46869 4070
df7492f9 4071 *charbuf++ = c;
ff0dacd7 4072 char_offset++;
fb88bf2d
KH
4073 continue;
4074
df7492f9 4075 invalid_code:
4ed46869 4076 src = src_base;
df7492f9
KH
4077 consumed_chars = consumed_chars_base;
4078 ONE_MORE_BYTE (c);
4079 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4080 char_offset++;
df7492f9 4081 coding->errors++;
fb88bf2d 4082 }
d46c5b12 4083
df7492f9 4084 no_more_source:
ff0dacd7
KH
4085 if (last_id != charset_ascii)
4086 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4087 coding->consumed_char += consumed_chars_base;
4088 coding->consumed = src_base - coding->source;
4089 coding->charbuf_used = charbuf - coding->charbuf;
4090}
4091
4092/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4093 This function can encode charsets `ascii', `katakana-jisx0201',
4094 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4095 are sure that all these charsets are registered as official charset
4096 (i.e. do not have extended leading-codes). Characters of other
4097 charsets are produced without any encoding. If SJIS_P is 1, encode
4098 SJIS text, else encode BIG5 text. */
4099
4100static int
4101encode_coding_sjis (coding)
4102 struct coding_system *coding;
4103{
4104 int multibytep = coding->dst_multibyte;
4105 int *charbuf = coding->charbuf;
4106 int *charbuf_end = charbuf + coding->charbuf_used;
4107 unsigned char *dst = coding->destination + coding->produced;
4108 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4109 int safe_room = 4;
4110 int produced_chars = 0;
4111 Lisp_Object attrs, eol_type, charset_list, val;
4112 int ascii_compatible;
4113 struct charset *charset_roman, *charset_kanji, *charset_kana;
4114 int c;
4115
4116 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4117 val = charset_list;
4118 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4119 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4120 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4121
4122 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4123
4124 while (charbuf < charbuf_end)
4125 {
4126 ASSURE_DESTINATION (safe_room);
4127 c = *charbuf++;
4128 /* Now encode the character C. */
4129 if (ASCII_CHAR_P (c) && ascii_compatible)
4130 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4131 else if (CHAR_BYTE8_P (c))
4132 {
4133 c = CHAR_TO_BYTE8 (c);
4134 EMIT_ONE_BYTE (c);
4135 }
df7492f9
KH
4136 else
4137 {
4138 unsigned code;
4139 struct charset *charset = char_charset (c, charset_list, &code);
4140
4141 if (!charset)
4142 {
41cbe562
KH
4143 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4144 {
4145 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4146 charset = CHARSET_FROM_ID (charset_ascii);
4147 }
4148 else
4149 {
4150 c = coding->default_char;
4151 charset = char_charset (c, charset_list, &code);
4152 }
df7492f9
KH
4153 }
4154 if (code == CHARSET_INVALID_CODE (charset))
4155 abort ();
4156 if (charset == charset_kanji)
4157 {
4158 int c1, c2;
4159 JIS_TO_SJIS (code);
4160 c1 = code >> 8, c2 = code & 0xFF;
4161 EMIT_TWO_BYTES (c1, c2);
4162 }
4163 else if (charset == charset_kana)
4164 EMIT_ONE_BYTE (code | 0x80);
4165 else
4166 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4167 }
4168 }
4169 coding->result = CODING_RESULT_SUCCESS;
4170 coding->produced_char += produced_chars;
4171 coding->produced = dst - coding->destination;
4172 return 0;
4173}
4174
4175static int
4176encode_coding_big5 (coding)
4177 struct coding_system *coding;
4178{
4179 int multibytep = coding->dst_multibyte;
4180 int *charbuf = coding->charbuf;
4181 int *charbuf_end = charbuf + coding->charbuf_used;
4182 unsigned char *dst = coding->destination + coding->produced;
4183 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4184 int safe_room = 4;
4185 int produced_chars = 0;
4186 Lisp_Object attrs, eol_type, charset_list, val;
4187 int ascii_compatible;
4188 struct charset *charset_roman, *charset_big5;
4189 int c;
4190
4191 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4192 val = charset_list;
4193 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4194 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4195 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4196
4197 while (charbuf < charbuf_end)
4198 {
4199 ASSURE_DESTINATION (safe_room);
4200 c = *charbuf++;
4201 /* Now encode the character C. */
4202 if (ASCII_CHAR_P (c) && ascii_compatible)
4203 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4204 else if (CHAR_BYTE8_P (c))
4205 {
4206 c = CHAR_TO_BYTE8 (c);
4207 EMIT_ONE_BYTE (c);
4208 }
df7492f9
KH
4209 else
4210 {
4211 unsigned code;
4212 struct charset *charset = char_charset (c, charset_list, &code);
4213
4214 if (! charset)
4215 {
41cbe562
KH
4216 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4217 {
4218 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4219 charset = CHARSET_FROM_ID (charset_ascii);
4220 }
4221 else
4222 {
4223 c = coding->default_char;
4224 charset = char_charset (c, charset_list, &code);
4225 }
df7492f9
KH
4226 }
4227 if (code == CHARSET_INVALID_CODE (charset))
4228 abort ();
4229 if (charset == charset_big5)
4230 {
4231 int c1, c2;
4232
4233 c1 = code >> 8, c2 = code & 0xFF;
4234 EMIT_TWO_BYTES (c1, c2);
4235 }
4236 else
4237 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4238 }
4239 }
4240 coding->result = CODING_RESULT_SUCCESS;
4241 coding->produced_char += produced_chars;
4242 coding->produced = dst - coding->destination;
4243 return 0;
4244}
4245
4246\f
4247/*** 10. CCL handlers ***/
4248
4249/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4250 Check if a text is encoded in a coding system of which
4251 encoder/decoder are written in CCL program. If it is, return
4252 CATEGORY_MASK_CCL, else return 0. */
4253
4254static int
ff0dacd7 4255detect_coding_ccl (coding, detect_info)
df7492f9 4256 struct coding_system *coding;
ff0dacd7 4257 struct coding_detection_info *detect_info;
df7492f9
KH
4258{
4259 unsigned char *src = coding->source, *src_base = src;
4260 unsigned char *src_end = coding->source + coding->src_bytes;
4261 int multibytep = coding->src_multibyte;
4262 int consumed_chars = 0;
4263 int found = 0;
4264 unsigned char *valids = CODING_CCL_VALIDS (coding);
4265 int head_ascii = coding->head_ascii;
4266 Lisp_Object attrs;
4267
ff0dacd7
KH
4268 detect_info->checked |= CATEGORY_MASK_CCL;
4269
df7492f9
KH
4270 coding = &coding_categories[coding_category_ccl];
4271 attrs = CODING_ID_ATTRS (coding->id);
4272 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4273 src += head_ascii;
4274
4275 while (1)
4276 {
4277 int c;
4278 ONE_MORE_BYTE (c);
4279 if (! valids[c])
4280 break;
ff0dacd7
KH
4281 if ((valids[c] > 1))
4282 found = CATEGORY_MASK_CCL;
df7492f9 4283 }
ff0dacd7 4284 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4285 return 0;
4286
4287 no_more_source:
ff0dacd7
KH
4288 detect_info->found |= found;
4289 return 1;
df7492f9
KH
4290}
4291
4292static void
4293decode_coding_ccl (coding)
4294 struct coding_system *coding;
4295{
7c78e542 4296 const unsigned char *src = coding->source + coding->consumed;
df7492f9
KH
4297 unsigned char *src_end = coding->source + coding->src_bytes;
4298 int *charbuf = coding->charbuf;
4299 int *charbuf_end = charbuf + coding->charbuf_size;
4300 int consumed_chars = 0;
4301 int multibytep = coding->src_multibyte;
4302 struct ccl_program ccl;
4303 int source_charbuf[1024];
4304 int source_byteidx[1024];
8dcbea82 4305 Lisp_Object attrs, eol_type, charset_list, valids;
df7492f9 4306
8dcbea82 4307 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4308 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4309
4310 while (src < src_end)
4311 {
7c78e542 4312 const unsigned char *p = src;
df7492f9
KH
4313 int *source, *source_end;
4314 int i = 0;
4315
4316 if (multibytep)
4317 while (i < 1024 && p < src_end)
4318 {
4319 source_byteidx[i] = p - src;
4320 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4321 }
4322 else
4323 while (i < 1024 && p < src_end)
4324 source_charbuf[i++] = *p++;
4325
4326 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4327 ccl.last_block = 1;
4328
4329 source = source_charbuf;
4330 source_end = source + i;
4331 while (source < source_end)
4332 {
4333 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4334 source_end - source, charbuf_end - charbuf,
4335 charset_list);
df7492f9
KH
4336 source += ccl.consumed;
4337 charbuf += ccl.produced;
4338 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4339 break;
4340 }
4341 if (source < source_end)
4342 src += source_byteidx[source - source_charbuf];
4343 else
4344 src = p;
4345 consumed_chars += source - source_charbuf;
4346
4347 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4348 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4349 break;
4350 }
4351
4352 switch (ccl.status)
4353 {
4354 case CCL_STAT_SUSPEND_BY_SRC:
4355 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4356 break;
4357 case CCL_STAT_SUSPEND_BY_DST:
4358 break;
4359 case CCL_STAT_QUIT:
4360 case CCL_STAT_INVALID_CMD:
4361 coding->result = CODING_RESULT_INTERRUPT;
4362 break;
4363 default:
4364 coding->result = CODING_RESULT_SUCCESS;
4365 break;
4366 }
4367 coding->consumed_char += consumed_chars;
4368 coding->consumed = src - coding->source;
4369 coding->charbuf_used = charbuf - coding->charbuf;
4370}
4371
4372static int
4373encode_coding_ccl (coding)
4374 struct coding_system *coding;
4375{
4376 struct ccl_program ccl;
4377 int multibytep = coding->dst_multibyte;
4378 int *charbuf = coding->charbuf;
4379 int *charbuf_end = charbuf + coding->charbuf_used;
4380 unsigned char *dst = coding->destination + coding->produced;
4381 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4382 unsigned char *adjusted_dst_end = dst_end - 1;
4383 int destination_charbuf[1024];
4384 int i, produced_chars = 0;
8dcbea82 4385 Lisp_Object attrs, eol_type, charset_list;
df7492f9 4386
8dcbea82 4387 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4388 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4389
4390 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4391 ccl.dst_multibyte = coding->dst_multibyte;
4392
4393 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4394 {
4395 int dst_bytes = dst_end - dst;
4396 if (dst_bytes > 1024)
4397 dst_bytes = 1024;
4398
4399 ccl_driver (&ccl, charbuf, destination_charbuf,
8dcbea82 4400 charbuf_end - charbuf, dst_bytes, charset_list);
df7492f9
KH
4401 charbuf += ccl.consumed;
4402 if (multibytep)
4403 for (i = 0; i < ccl.produced; i++)
4404 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4405 else
4406 {
4407 for (i = 0; i < ccl.produced; i++)
4408 *dst++ = destination_charbuf[i] & 0xFF;
4409 produced_chars += ccl.produced;
4410 }
4411 }
4412
4413 switch (ccl.status)
4414 {
4415 case CCL_STAT_SUSPEND_BY_SRC:
4416 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4417 break;
4418 case CCL_STAT_SUSPEND_BY_DST:
4419 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4420 break;
4421 case CCL_STAT_QUIT:
4422 case CCL_STAT_INVALID_CMD:
4423 coding->result = CODING_RESULT_INTERRUPT;
4424 break;
4425 default:
4426 coding->result = CODING_RESULT_SUCCESS;
4427 break;
4428 }
4429
4430 coding->produced_char += produced_chars;
4431 coding->produced = dst - coding->destination;
4432 return 0;
4ed46869
KH
4433}
4434
df7492f9
KH
4435
4436\f
4437/*** 10, 11. no-conversion handlers ***/
4438
4439/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4440
b73bfc1c 4441static void
df7492f9 4442decode_coding_raw_text (coding)
4ed46869 4443 struct coding_system *coding;
4ed46869 4444{
df7492f9 4445 coding->chars_at_source = 1;
2c78b7e1
KH
4446 coding->consumed_char = 0;
4447 coding->consumed = 0;
df7492f9
KH
4448 coding->result = CODING_RESULT_SUCCESS;
4449}
4ed46869 4450
df7492f9
KH
4451static int
4452encode_coding_raw_text (coding)
4453 struct coding_system *coding;
4454{
4455 int multibytep = coding->dst_multibyte;
4456 int *charbuf = coding->charbuf;
4457 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4458 unsigned char *dst = coding->destination + coding->produced;
4459 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4460 int produced_chars = 0;
4461 int c;
a5d301df 4462
df7492f9 4463 if (multibytep)
b73bfc1c 4464 {
df7492f9 4465 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4466
df7492f9
KH
4467 if (coding->src_multibyte)
4468 while (charbuf < charbuf_end)
4469 {
4470 ASSURE_DESTINATION (safe_room);
4471 c = *charbuf++;
4472 if (ASCII_CHAR_P (c))
4473 EMIT_ONE_ASCII_BYTE (c);
4474 else if (CHAR_BYTE8_P (c))
4475 {
4476 c = CHAR_TO_BYTE8 (c);
4477 EMIT_ONE_BYTE (c);
4478 }
4479 else
4480 {
4481 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4482
df7492f9
KH
4483 CHAR_STRING_ADVANCE (c, p1);
4484 while (p0 < p1)
9d123124
KH
4485 {
4486 EMIT_ONE_BYTE (*p0);
4487 p0++;
4488 }
df7492f9
KH
4489 }
4490 }
b73bfc1c 4491 else
df7492f9
KH
4492 while (charbuf < charbuf_end)
4493 {
4494 ASSURE_DESTINATION (safe_room);
4495 c = *charbuf++;
4496 EMIT_ONE_BYTE (c);
4497 }
4498 }
4499 else
4500 {
4501 if (coding->src_multibyte)
b73bfc1c 4502 {
df7492f9
KH
4503 int safe_room = MAX_MULTIBYTE_LENGTH;
4504
4505 while (charbuf < charbuf_end)
b73bfc1c 4506 {
df7492f9
KH
4507 ASSURE_DESTINATION (safe_room);
4508 c = *charbuf++;
4509 if (ASCII_CHAR_P (c))
4510 *dst++ = c;
4511 else if (CHAR_BYTE8_P (c))
4512 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4513 else
df7492f9
KH
4514 CHAR_STRING_ADVANCE (c, dst);
4515 produced_chars++;
b73bfc1c 4516 }
4ed46869 4517 }
df7492f9
KH
4518 else
4519 {
4520 ASSURE_DESTINATION (charbuf_end - charbuf);
4521 while (charbuf < charbuf_end && dst < dst_end)
4522 *dst++ = *charbuf++;
4523 produced_chars = dst - (coding->destination + coding->dst_bytes);
4524 }
4ed46869 4525 }
df7492f9
KH
4526 coding->result = CODING_RESULT_SUCCESS;
4527 coding->produced_char += produced_chars;
4528 coding->produced = dst - coding->destination;
4529 return 0;
4ed46869
KH
4530}
4531
ff0dacd7
KH
4532/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4533 Check if a text is encoded in a charset-based coding system. If it
4534 is, return 1, else return 0. */
4535
0a28aafb 4536static int
ff0dacd7 4537detect_coding_charset (coding, detect_info)
df7492f9 4538 struct coding_system *coding;
ff0dacd7 4539 struct coding_detection_info *detect_info;
1397dc18 4540{
df7492f9
KH
4541 unsigned char *src = coding->source, *src_base = src;
4542 unsigned char *src_end = coding->source + coding->src_bytes;
4543 int multibytep = coding->src_multibyte;
4544 int consumed_chars = 0;
4545 Lisp_Object attrs, valids;
584948ac 4546 int found = 0;
1397dc18 4547
ff0dacd7
KH
4548 detect_info->checked |= CATEGORY_MASK_CHARSET;
4549
df7492f9
KH
4550 coding = &coding_categories[coding_category_charset];
4551 attrs = CODING_ID_ATTRS (coding->id);
4552 valids = AREF (attrs, coding_attr_charset_valids);
4553
4554 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4555 src += coding->head_ascii;
1397dc18 4556
b73bfc1c 4557 while (1)
1397dc18 4558 {
df7492f9 4559 int c;
1397dc18 4560
df7492f9
KH
4561 ONE_MORE_BYTE (c);
4562 if (NILP (AREF (valids, c)))
4563 break;
584948ac 4564 if (c >= 0x80)
ff0dacd7 4565 found = CATEGORY_MASK_CHARSET;
df7492f9 4566 }
ff0dacd7 4567 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4568 return 0;
4ed46869 4569
df7492f9 4570 no_more_source:
ff0dacd7
KH
4571 detect_info->found |= found;
4572 return 1;
df7492f9 4573}
4ed46869 4574
b73bfc1c 4575static void
df7492f9 4576decode_coding_charset (coding)
4ed46869 4577 struct coding_system *coding;
4ed46869 4578{
df7492f9
KH
4579 unsigned char *src = coding->source + coding->consumed;
4580 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4581 unsigned char *src_base;
df7492f9 4582 int *charbuf = coding->charbuf;
ff0dacd7 4583 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4584 int consumed_chars = 0, consumed_chars_base;
4585 int multibytep = coding->src_multibyte;
4eb6d3f1 4586 Lisp_Object attrs, eol_type, charset_list, valids;
ff0dacd7
KH
4587 int char_offset = coding->produced_char;
4588 int last_offset = char_offset;
4589 int last_id = charset_ascii;
df7492f9
KH
4590
4591 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4592 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4593
df7492f9 4594 while (1)
4ed46869 4595 {
4eb6d3f1 4596 int c;
df7492f9
KH
4597
4598 src_base = src;
4599 consumed_chars_base = consumed_chars;
b73bfc1c 4600
df7492f9
KH
4601 if (charbuf >= charbuf_end)
4602 break;
4603
4eb6d3f1 4604 ONE_MORE_BYTE (c);
df7492f9 4605 if (c == '\r')
d46c5b12 4606 {
c7c66a95
KH
4607 /* Here we assume that no charset maps '\r' to something
4608 else. */
df7492f9 4609 if (EQ (eol_type, Qdos))
b73bfc1c 4610 {
98725083
KH
4611 if (src == src_end)
4612 {
4613 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4614 goto no_more_source;
4615 }
4616 if (*src == '\n')
df7492f9 4617 ONE_MORE_BYTE (c);
b73bfc1c 4618 }
df7492f9 4619 else if (EQ (eol_type, Qmac))
b73bfc1c 4620 c = '\n';
d46c5b12 4621 }
df7492f9 4622 else
d46c5b12 4623 {
4eb6d3f1
KH
4624 Lisp_Object val;
4625 struct charset *charset;
c7c66a95 4626 int dim;
acb2a965
KH
4627 int len = 1;
4628 unsigned code = c;
4eb6d3f1
KH
4629
4630 val = AREF (valids, c);
4631 if (NILP (val))
4632 goto invalid_code;
c7c66a95 4633 if (INTEGERP (val))
4eb6d3f1 4634 {
c7c66a95
KH
4635 charset = CHARSET_FROM_ID (XFASTINT (val));
4636 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4637 while (len < dim)
4eb6d3f1 4638 {
acb2a965
KH
4639 ONE_MORE_BYTE (c);
4640 code = (code << 8) | c;
f9d71dcd 4641 len++;
4eb6d3f1 4642 }
c7c66a95
KH
4643 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4644 charset, code, c);
4645 }
4646 else
4647 {
4648 /* VAL is a list of charset IDs. It is assured that the
4649 list is sorted by charset dimensions (smaller one
4650 comes first). */
c7c66a95
KH
4651 while (CONSP (val))
4652 {
4653 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4654 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4655 while (len < dim)
c7c66a95 4656 {
acb2a965
KH
4657 ONE_MORE_BYTE (c);
4658 code = (code << 8) | c;
f9d71dcd 4659 len++;
c7c66a95 4660 }
c7c66a95
KH
4661 CODING_DECODE_CHAR (coding, src, src_base,
4662 src_end, charset, code, c);
4663 if (c >= 0)
4664 break;
4665 val = XCDR (val);
4666 }
4eb6d3f1 4667 }
df7492f9
KH
4668 if (c < 0)
4669 goto invalid_code;
ff0dacd7
KH
4670 if (charset->id != charset_ascii
4671 && last_id != charset->id)
4672 {
4673 if (last_id != charset_ascii)
4674 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4675 last_id = charset->id;
4676 last_offset = char_offset;
4677 }
d46c5b12 4678 }
df7492f9 4679 *charbuf++ = c;
ff0dacd7 4680 char_offset++;
df7492f9
KH
4681 continue;
4682
4683 invalid_code:
4684 src = src_base;
4685 consumed_chars = consumed_chars_base;
4686 ONE_MORE_BYTE (c);
4687 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4688 char_offset++;
df7492f9 4689 coding->errors++;
4ed46869
KH
4690 }
4691
df7492f9 4692 no_more_source:
ff0dacd7
KH
4693 if (last_id != charset_ascii)
4694 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4695 coding->consumed_char += consumed_chars_base;
4696 coding->consumed = src_base - coding->source;
4697 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4698}
4699
df7492f9
KH
4700static int
4701encode_coding_charset (coding)
4ed46869 4702 struct coding_system *coding;
4ed46869 4703{
df7492f9
KH
4704 int multibytep = coding->dst_multibyte;
4705 int *charbuf = coding->charbuf;
4706 int *charbuf_end = charbuf + coding->charbuf_used;
4707 unsigned char *dst = coding->destination + coding->produced;
4708 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4709 int safe_room = MAX_MULTIBYTE_LENGTH;
4710 int produced_chars = 0;
df7492f9
KH
4711 Lisp_Object attrs, eol_type, charset_list;
4712 int ascii_compatible;
b73bfc1c 4713 int c;
b73bfc1c 4714
df7492f9 4715 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4716 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4717
df7492f9 4718 while (charbuf < charbuf_end)
4ed46869 4719 {
4eb6d3f1 4720 struct charset *charset;
df7492f9
KH
4721 unsigned code;
4722
4723 ASSURE_DESTINATION (safe_room);
4724 c = *charbuf++;
4725 if (ascii_compatible && ASCII_CHAR_P (c))
4726 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4727 else if (CHAR_BYTE8_P (c))
4728 {
4729 c = CHAR_TO_BYTE8 (c);
4730 EMIT_ONE_BYTE (c);
4731 }
d46c5b12 4732 else
4eb6d3f1
KH
4733 {
4734 charset = char_charset (c, charset_list, &code);
4735 if (charset)
4736 {
4737 if (CHARSET_DIMENSION (charset) == 1)
4738 EMIT_ONE_BYTE (code);
4739 else if (CHARSET_DIMENSION (charset) == 2)
4740 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4741 else if (CHARSET_DIMENSION (charset) == 3)
4742 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4743 else
4744 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4745 (code >> 8) & 0xFF, code & 0xFF);
4746 }
4747 else
41cbe562
KH
4748 {
4749 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4750 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4751 else
4752 c = coding->default_char;
4753 EMIT_ONE_BYTE (c);
4754 }
4eb6d3f1 4755 }
4ed46869
KH
4756 }
4757
df7492f9
KH
4758 coding->result = CODING_RESULT_SUCCESS;
4759 coding->produced_char += produced_chars;
4760 coding->produced = dst - coding->destination;
4761 return 0;
4ed46869
KH
4762}
4763
4764\f
1397dc18 4765/*** 7. C library functions ***/
4ed46869 4766
df7492f9
KH
4767/* Setup coding context CODING from information about CODING_SYSTEM.
4768 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4769 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4770
4771void
df7492f9
KH
4772setup_coding_system (coding_system, coding)
4773 Lisp_Object coding_system;
ec6d2bb8
KH
4774 struct coding_system *coding;
4775{
df7492f9
KH
4776 Lisp_Object attrs;
4777 Lisp_Object eol_type;
4778 Lisp_Object coding_type;
4779 Lisp_Object val;
ec6d2bb8 4780
df7492f9
KH
4781 if (NILP (coding_system))
4782 coding_system = Qno_conversion;
4783
4784 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4785
4786 attrs = CODING_ID_ATTRS (coding->id);
4787 eol_type = CODING_ID_EOL_TYPE (coding->id);
4788
4789 coding->mode = 0;
4790 coding->head_ascii = -1;
4791 coding->common_flags
4792 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4793
4794 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4795 coding->max_charset_id = XSTRING (val)->size - 1;
4796 coding->safe_charsets = (char *) XSTRING (val)->data;
4797 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4798
4799 coding_type = CODING_ATTR_TYPE (attrs);
4800 if (EQ (coding_type, Qundecided))
4801 {
4802 coding->detector = NULL;
4803 coding->decoder = decode_coding_raw_text;
4804 coding->encoder = encode_coding_raw_text;
4805 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4806 }
4807 else if (EQ (coding_type, Qiso_2022))
4808 {
4809 int i;
4810 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
ff0dacd7 4811 enum coding_category category = XINT (CODING_ATTR_CATEGORY (attrs));
df7492f9
KH
4812
4813 /* Invoke graphic register 0 to plane 0. */
4814 CODING_ISO_INVOCATION (coding, 0) = 0;
4815 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4816 CODING_ISO_INVOCATION (coding, 1)
4817 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4818 /* Setup the initial status of designation. */
4819 for (i = 0; i < 4; i++)
4820 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4821 /* Not single shifting initially. */
4822 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4823 /* Beginning of buffer should also be regarded as bol. */
4824 CODING_ISO_BOL (coding) = 1;
4825 coding->detector = detect_coding_iso_2022;
4826 coding->decoder = decode_coding_iso_2022;
4827 coding->encoder = encode_coding_iso_2022;
4828 if (flags & CODING_ISO_FLAG_SAFE)
4829 coding->mode |= CODING_MODE_SAFE_ENCODING;
4830 coding->common_flags
4831 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4832 | CODING_REQUIRE_FLUSHING_MASK);
4833 if (flags & CODING_ISO_FLAG_COMPOSITION)
4834 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
4835 if (flags & CODING_ISO_FLAG_DESIGNATION)
4836 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
4837 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4838 {
4839 setup_iso_safe_charsets (attrs);
4840 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4841 coding->max_charset_id = XSTRING (val)->size - 1;
4842 coding->safe_charsets = (char *) XSTRING (val)->data;
4843 }
4844 CODING_ISO_FLAGS (coding) = flags;
4845 }
4846 else if (EQ (coding_type, Qcharset))
4847 {
4848 coding->detector = detect_coding_charset;
4849 coding->decoder = decode_coding_charset;
4850 coding->encoder = encode_coding_charset;
4851 coding->common_flags
4852 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4853 }
4854 else if (EQ (coding_type, Qutf_8))
4855 {
4856 coding->detector = detect_coding_utf_8;
4857 coding->decoder = decode_coding_utf_8;
4858 coding->encoder = encode_coding_utf_8;
4859 coding->common_flags
4860 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4861 }
4862 else if (EQ (coding_type, Qutf_16))
4863 {
4864 val = AREF (attrs, coding_attr_utf_16_bom);
4865 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4866 : EQ (val, Qt) ? utf_16_with_bom
4867 : utf_16_without_bom);
4868 val = AREF (attrs, coding_attr_utf_16_endian);
4869 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4870 : utf_16_little_endian);
e19c3639 4871 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4872 coding->detector = detect_coding_utf_16;
4873 coding->decoder = decode_coding_utf_16;
4874 coding->encoder = encode_coding_utf_16;
4875 coding->common_flags
4876 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4877 }
4878 else if (EQ (coding_type, Qccl))
4879 {
4880 coding->detector = detect_coding_ccl;
4881 coding->decoder = decode_coding_ccl;
4882 coding->encoder = encode_coding_ccl;
4883 coding->common_flags
4884 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4885 | CODING_REQUIRE_FLUSHING_MASK);
4886 }
4887 else if (EQ (coding_type, Qemacs_mule))
4888 {
4889 coding->detector = detect_coding_emacs_mule;
4890 coding->decoder = decode_coding_emacs_mule;
4891 coding->encoder = encode_coding_emacs_mule;
4892 coding->common_flags
4893 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4894 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4895 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4896 {
4897 Lisp_Object tail, safe_charsets;
4898 int max_charset_id = 0;
4899
4900 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4901 tail = XCDR (tail))
4902 if (max_charset_id < XFASTINT (XCAR (tail)))
4903 max_charset_id = XFASTINT (XCAR (tail));
4904 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4905 make_number (255));
4906 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4907 tail = XCDR (tail))
4908 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4909 coding->max_charset_id = max_charset_id;
4910 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4911 }
4912 }
4913 else if (EQ (coding_type, Qshift_jis))
4914 {
4915 coding->detector = detect_coding_sjis;
4916 coding->decoder = decode_coding_sjis;
4917 coding->encoder = encode_coding_sjis;
4918 coding->common_flags
4919 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4920 }
4921 else if (EQ (coding_type, Qbig5))
4922 {
4923 coding->detector = detect_coding_big5;
4924 coding->decoder = decode_coding_big5;
4925 coding->encoder = encode_coding_big5;
4926 coding->common_flags
4927 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4928 }
4929 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4930 {
df7492f9
KH
4931 coding->detector = NULL;
4932 coding->decoder = decode_coding_raw_text;
4933 coding->encoder = encode_coding_raw_text;
4934 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4935 }
df7492f9
KH
4936
4937 return;
ec6d2bb8
KH
4938}
4939
df7492f9
KH
4940/* Return raw-text or one of its subsidiaries that has the same
4941 eol_type as CODING-SYSTEM. */
ec6d2bb8 4942
df7492f9
KH
4943Lisp_Object
4944raw_text_coding_system (coding_system)
4945 Lisp_Object coding_system;
ec6d2bb8 4946{
0be8721c 4947 Lisp_Object spec, attrs;
df7492f9
KH
4948 Lisp_Object eol_type, raw_text_eol_type;
4949
4950 spec = CODING_SYSTEM_SPEC (coding_system);
4951 attrs = AREF (spec, 0);
4952
4953 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4954 return coding_system;
ec6d2bb8 4955
df7492f9
KH
4956 eol_type = AREF (spec, 2);
4957 if (VECTORP (eol_type))
4958 return Qraw_text;
4959 spec = CODING_SYSTEM_SPEC (Qraw_text);
4960 raw_text_eol_type = AREF (spec, 2);
4961 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4962 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4963 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4964}
4965
54f78171 4966
df7492f9
KH
4967/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4968 does, return one of the subsidiary that has the same eol-spec as
4969 PARENT. Otherwise, return CODING_SYSTEM. */
4970
4971Lisp_Object
4972coding_inherit_eol_type (coding_system, parent)
b74e4686 4973 Lisp_Object coding_system, parent;
54f78171 4974{
df7492f9 4975 Lisp_Object spec, attrs, eol_type;
54f78171 4976
df7492f9
KH
4977 spec = CODING_SYSTEM_SPEC (coding_system);
4978 attrs = AREF (spec, 0);
4979 eol_type = AREF (spec, 2);
4980 if (VECTORP (eol_type))
4981 {
4982 Lisp_Object parent_spec;
df7492f9
KH
4983 Lisp_Object parent_eol_type;
4984
4985 parent_spec
4986 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4987 parent_eol_type = AREF (parent_spec, 2);
4988 if (EQ (parent_eol_type, Qunix))
4989 coding_system = AREF (eol_type, 0);
4990 else if (EQ (parent_eol_type, Qdos))
4991 coding_system = AREF (eol_type, 1);
4992 else if (EQ (parent_eol_type, Qmac))
4993 coding_system = AREF (eol_type, 2);
54f78171 4994 }
df7492f9 4995 return coding_system;
54f78171
KH
4996}
4997
4ed46869
KH
4998/* Emacs has a mechanism to automatically detect a coding system if it
4999 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5000 it's impossible to distinguish some coding systems accurately
5001 because they use the same range of codes. So, at first, coding
5002 systems are categorized into 7, those are:
5003
0ef69138 5004 o coding-category-emacs-mule
4ed46869
KH
5005
5006 The category for a coding system which has the same code range
5007 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5008 symbol) `emacs-mule' by default.
4ed46869
KH
5009
5010 o coding-category-sjis
5011
5012 The category for a coding system which has the same code range
5013 as SJIS. Assigned the coding-system (Lisp
7717c392 5014 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5015
5016 o coding-category-iso-7
5017
5018 The category for a coding system which has the same code range
7717c392 5019 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5020 shift and single shift functions. This can encode/decode all
5021 charsets. Assigned the coding-system (Lisp symbol)
5022 `iso-2022-7bit' by default.
5023
5024 o coding-category-iso-7-tight
5025
5026 Same as coding-category-iso-7 except that this can
5027 encode/decode only the specified charsets.
4ed46869
KH
5028
5029 o coding-category-iso-8-1
5030
5031 The category for a coding system which has the same code range
5032 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5033 for DIMENSION1 charset. This doesn't use any locking shift
5034 and single shift functions. Assigned the coding-system (Lisp
5035 symbol) `iso-latin-1' by default.
4ed46869
KH
5036
5037 o coding-category-iso-8-2
5038
5039 The category for a coding system which has the same code range
5040 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5041 for DIMENSION2 charset. This doesn't use any locking shift
5042 and single shift functions. Assigned the coding-system (Lisp
5043 symbol) `japanese-iso-8bit' by default.
4ed46869 5044
7717c392 5045 o coding-category-iso-7-else
4ed46869
KH
5046
5047 The category for a coding system which has the same code range
df7492f9 5048 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5049 single shift functions. Assigned the coding-system (Lisp
5050 symbol) `iso-2022-7bit-lock' by default.
5051
5052 o coding-category-iso-8-else
5053
5054 The category for a coding system which has the same code range
df7492f9 5055 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5056 single shift functions. Assigned the coding-system (Lisp
5057 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5058
5059 o coding-category-big5
5060
5061 The category for a coding system which has the same code range
5062 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5063 `cn-big5' by default.
4ed46869 5064
fa42c37f
KH
5065 o coding-category-utf-8
5066
5067 The category for a coding system which has the same code range
5068 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5069 symbol) `utf-8' by default.
5070
5071 o coding-category-utf-16-be
5072
5073 The category for a coding system in which a text has an
5074 Unicode signature (cf. Unicode Standard) in the order of BIG
5075 endian at the head. Assigned the coding-system (Lisp symbol)
5076 `utf-16-be' by default.
5077
5078 o coding-category-utf-16-le
5079
5080 The category for a coding system in which a text has an
5081 Unicode signature (cf. Unicode Standard) in the order of
5082 LITTLE endian at the head. Assigned the coding-system (Lisp
5083 symbol) `utf-16-le' by default.
5084
1397dc18
KH
5085 o coding-category-ccl
5086
5087 The category for a coding system of which encoder/decoder is
5088 written in CCL programs. The default value is nil, i.e., no
5089 coding system is assigned.
5090
4ed46869
KH
5091 o coding-category-binary
5092
5093 The category for a coding system not categorized in any of the
5094 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5095 `no-conversion' by default.
4ed46869
KH
5096
5097 Each of them is a Lisp symbol and the value is an actual
df7492f9 5098 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5099 What Emacs does actually is to detect a category of coding system.
5100 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5101 decide only one possible category, it selects a category of the
4ed46869
KH
5102 highest priority. Priorities of categories are also specified by a
5103 user in a Lisp variable `coding-category-list'.
5104
5105*/
5106
df7492f9
KH
5107#define EOL_SEEN_NONE 0
5108#define EOL_SEEN_LF 1
5109#define EOL_SEEN_CR 2
5110#define EOL_SEEN_CRLF 4
4ed46869 5111
ff0dacd7
KH
5112/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5113 SOURCE is encoded. If CATEGORY is one of
5114 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5115 two-byte, else they are encoded by one-byte.
5116
5117 Return one of EOL_SEEN_XXX. */
4ed46869 5118
bc4bc72a
RS
5119#define MAX_EOL_CHECK_COUNT 3
5120
d46c5b12 5121static int
89528eb3 5122detect_eol (source, src_bytes, category)
d46c5b12 5123 unsigned char *source;
df7492f9 5124 EMACS_INT src_bytes;
89528eb3 5125 enum coding_category category;
4ed46869 5126{
d46c5b12 5127 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5128 unsigned char c;
df7492f9
KH
5129 int total = 0;
5130 int eol_seen = EOL_SEEN_NONE;
4ed46869 5131
89528eb3 5132 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5133 {
df7492f9 5134 int msb, lsb;
fa42c37f 5135
89528eb3
KH
5136 msb = category == (coding_category_utf_16_le
5137 | coding_category_utf_16_le_nosig);
df7492f9 5138 lsb = 1 - msb;
fa42c37f 5139
df7492f9 5140 while (src + 1 < src_end)
fa42c37f 5141 {
df7492f9
KH
5142 c = src[lsb];
5143 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5144 {
df7492f9
KH
5145 int this_eol;
5146
5147 if (c == '\n')
5148 this_eol = EOL_SEEN_LF;
5149 else if (src + 3 >= src_end
5150 || src[msb + 2] != 0
5151 || src[lsb + 2] != '\n')
5152 this_eol = EOL_SEEN_CR;
fa42c37f 5153 else
df7492f9
KH
5154 this_eol = EOL_SEEN_CRLF;
5155
5156 if (eol_seen == EOL_SEEN_NONE)
5157 /* This is the first end-of-line. */
5158 eol_seen = this_eol;
5159 else if (eol_seen != this_eol)
fa42c37f 5160 {
df7492f9
KH
5161 /* The found type is different from what found before. */
5162 eol_seen = EOL_SEEN_LF;
5163 break;
fa42c37f 5164 }
df7492f9
KH
5165 if (++total == MAX_EOL_CHECK_COUNT)
5166 break;
fa42c37f 5167 }
df7492f9 5168 src += 2;
fa42c37f 5169 }
df7492f9 5170 }
d46c5b12 5171 else
27901516 5172 {
df7492f9 5173 while (src < src_end)
27901516 5174 {
df7492f9
KH
5175 c = *src++;
5176 if (c == '\n' || c == '\r')
5177 {
5178 int this_eol;
d46c5b12 5179
df7492f9
KH
5180 if (c == '\n')
5181 this_eol = EOL_SEEN_LF;
5182 else if (src >= src_end || *src != '\n')
5183 this_eol = EOL_SEEN_CR;
5184 else
5185 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5186
df7492f9
KH
5187 if (eol_seen == EOL_SEEN_NONE)
5188 /* This is the first end-of-line. */
5189 eol_seen = this_eol;
5190 else if (eol_seen != this_eol)
5191 {
5192 /* The found type is different from what found before. */
5193 eol_seen = EOL_SEEN_LF;
5194 break;
5195 }
5196 if (++total == MAX_EOL_CHECK_COUNT)
5197 break;
5198 }
5199 }
73be902c 5200 }
df7492f9 5201 return eol_seen;
73be902c
KH
5202}
5203
df7492f9 5204
73be902c 5205static void
df7492f9
KH
5206adjust_coding_eol_type (coding, eol_seen)
5207 struct coding_system *coding;
5208 int eol_seen;
73be902c 5209{
0be8721c 5210 Lisp_Object eol_type;
df7492f9
KH
5211
5212 eol_type = CODING_ID_EOL_TYPE (coding->id);
5213 if (eol_seen & EOL_SEEN_LF)
5214 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6f197c07 5215 else if (eol_seen & EOL_SEEN_CRLF)
df7492f9 5216 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6f197c07 5217 else if (eol_seen & EOL_SEEN_CR)
df7492f9 5218 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
5219}
5220
df7492f9
KH
5221/* Detect how a text specified in CODING is encoded. If a coding
5222 system is detected, update fields of CODING by the detected coding
5223 system. */
5224
5225void
5226detect_coding (coding)
d46c5b12 5227 struct coding_system *coding;
d46c5b12 5228{
df7492f9
KH
5229 unsigned char *src, *src_end;
5230 Lisp_Object attrs, coding_type;
d46c5b12 5231
df7492f9
KH
5232 coding->consumed = coding->consumed_char = 0;
5233 coding->produced = coding->produced_char = 0;
5234 coding_set_source (coding);
1c3478b0 5235
df7492f9 5236 src_end = coding->source + coding->src_bytes;
1c3478b0 5237
df7492f9
KH
5238 /* If we have not yet decided the text encoding type, detect it
5239 now. */
5240 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5241 {
df7492f9
KH
5242 int c, i;
5243
5244 for (src = coding->source; src < src_end; src++)
5245 {
5246 c = *src;
5247 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5248 || c == ISO_CODE_SI
5249 || c == ISO_CODE_SO)))
5250 break;
5251 }
5252 coding->head_ascii = src - (coding->source + coding->consumed);
5253
5254 if (coding->head_ascii < coding->src_bytes)
1c3478b0 5255 {
ff0dacd7
KH
5256 struct coding_detection_info detect_info;
5257 enum coding_category category;
5258 struct coding_system *this;
df7492f9 5259
ff0dacd7 5260 detect_info.checked = detect_info.found = detect_info.rejected = 0;
df7492f9 5261 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 5262 {
ff0dacd7
KH
5263 category = coding_priorities[i];
5264 this = coding_categories + category;
df7492f9 5265 if (this->id < 0)
1c3478b0 5266 {
df7492f9 5267 /* No coding system of this category is defined. */
ff0dacd7 5268 detect_info.rejected |= (1 << category);
df7492f9 5269 }
ff0dacd7 5270 else if (category >= coding_category_raw_text)
89528eb3 5271 continue;
ff0dacd7 5272 else if (detect_info.checked & (1 << category))
df7492f9 5273 {
ff0dacd7
KH
5274 if (detect_info.found & (1 << category))
5275 break;
1c3478b0 5276 }
ff0dacd7
KH
5277 else if ((*(this->detector)) (coding, &detect_info)
5278 && detect_info.found & (1 << category))
5279 break;
1c3478b0 5280 }
ff0dacd7
KH
5281 if (i < coding_category_raw_text)
5282 setup_coding_system (CODING_ID_NAME (this->id), coding);
5283 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5284 setup_coding_system (Qraw_text, coding);
ff0dacd7 5285 else if (detect_info.rejected)
df7492f9 5286 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5287 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5288 {
5289 this = coding_categories + coding_priorities[i];
5290 setup_coding_system (CODING_ID_NAME (this->id), coding);
5291 break;
5292 }
1c3478b0 5293 }
b73bfc1c 5294 }
69f76525 5295
df7492f9
KH
5296 attrs = CODING_ID_ATTRS (coding->id);
5297 coding_type = CODING_ATTR_TYPE (attrs);
5298
5299 /* If we have not yet decided the EOL type, detect it now. But, the
5300 detection is impossible for a CCL based coding system, in which
5301 case, we detct the EOL type after decoding. */
5302 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5303 && ! EQ (coding_type, Qccl))
d46c5b12 5304 {
89528eb3
KH
5305 int eol_seen = detect_eol (coding->source, coding->src_bytes,
5306 XINT (CODING_ATTR_CATEGORY (attrs)));
df7492f9
KH
5307
5308 if (eol_seen != EOL_SEEN_NONE)
5309 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5310 }
4ed46869
KH
5311}
5312
aaaf0b1e
KH
5313
5314static void
df7492f9 5315decode_eol (coding)
aaaf0b1e 5316 struct coding_system *coding;
aaaf0b1e 5317{
df7492f9 5318 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5319 {
df7492f9
KH
5320 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5321 unsigned char *pend = p + coding->produced;
5322 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5323
df7492f9 5324 for (; p < pend; p++)
aaaf0b1e 5325 {
df7492f9
KH
5326 if (*p == '\n')
5327 eol_seen |= EOL_SEEN_LF;
5328 else if (*p == '\r')
aaaf0b1e 5329 {
df7492f9 5330 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5331 {
df7492f9
KH
5332 eol_seen |= EOL_SEEN_CRLF;
5333 p++;
aaaf0b1e 5334 }
aaaf0b1e 5335 else
df7492f9 5336 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5337 }
aaaf0b1e 5338 }
df7492f9
KH
5339 if (eol_seen != EOL_SEEN_NONE)
5340 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5341 }
aaaf0b1e 5342
df7492f9
KH
5343 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5344 {
5345 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5346 unsigned char *pend = p + coding->produced;
5347
5348 for (; p < pend; p++)
5349 if (*p == '\r')
5350 *p = '\n';
5351 }
5352 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5353 {
5354 unsigned char *p, *pbeg, *pend;
5355 Lisp_Object undo_list;
5356
5357 move_gap_both (coding->dst_pos + coding->produced_char,
5358 coding->dst_pos_byte + coding->produced);
5359 undo_list = current_buffer->undo_list;
5360 current_buffer->undo_list = Qt;
c197f191 5361 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
df7492f9
KH
5362 current_buffer->undo_list = undo_list;
5363 pbeg = GPT_ADDR;
5364 pend = pbeg + coding->produced;
5365
5366 for (p = pend - 1; p >= pbeg; p--)
5367 if (*p == '\r')
5368 {
5369 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5370 pend--;
5371 }
5372 coding->produced_char -= coding->produced - (pend - pbeg);
5373 coding->produced = pend - pbeg;
5374 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5375 }
5376}
5377
df7492f9
KH
5378static void
5379translate_chars (coding, table)
4ed46869 5380 struct coding_system *coding;
df7492f9 5381 Lisp_Object table;
4ed46869 5382{
df7492f9
KH
5383 int *charbuf = coding->charbuf;
5384 int *charbuf_end = charbuf + coding->charbuf_used;
5385 int c;
5386
5387 if (coding->chars_at_source)
5388 return;
4ed46869 5389
df7492f9 5390 while (charbuf < charbuf_end)
8844fa83 5391 {
df7492f9
KH
5392 c = *charbuf;
5393 if (c < 0)
5394 charbuf += c;
5395 else
5396 *charbuf++ = translate_char (table, c);
8844fa83 5397 }
df7492f9 5398}
4ed46869 5399
df7492f9
KH
5400static int
5401produce_chars (coding)
5402 struct coding_system *coding;
5403{
5404 unsigned char *dst = coding->destination + coding->produced;
5405 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5406 int produced;
5407 int produced_chars = 0;
b73bfc1c 5408
df7492f9 5409 if (! coding->chars_at_source)
4ed46869 5410 {
df7492f9
KH
5411 /* Characters are in coding->charbuf. */
5412 int *buf = coding->charbuf;
5413 int *buf_end = buf + coding->charbuf_used;
5414 unsigned char *adjusted_dst_end;
4ed46869 5415
df7492f9
KH
5416 if (BUFFERP (coding->src_object)
5417 && EQ (coding->src_object, coding->dst_object))
5418 dst_end = coding->source + coding->consumed;
5419 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5420
df7492f9
KH
5421 while (buf < buf_end)
5422 {
5423 int c = *buf++;
5424
5425 if (dst >= adjusted_dst_end)
5426 {
5427 dst = alloc_destination (coding,
5428 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5429 dst);
5430 dst_end = coding->destination + coding->dst_bytes;
5431 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5432 }
5433 if (c >= 0)
5434 {
5435 if (coding->dst_multibyte
5436 || ! CHAR_BYTE8_P (c))
5437 CHAR_STRING_ADVANCE (c, dst);
5438 else
5439 *dst++ = CHAR_TO_BYTE8 (c);
5440 produced_chars++;
5441 }
5442 else
5443 /* This is an annotation data. */
5444 buf -= c + 1;
5445 }
5446 }
5447 else
5448 {
df7492f9
KH
5449 unsigned char *src = coding->source;
5450 unsigned char *src_end = src + coding->src_bytes;
5451 Lisp_Object eol_type;
b73bfc1c 5452
df7492f9 5453 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5454
df7492f9 5455 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5456 {
df7492f9
KH
5457 if (coding->src_multibyte)
5458 {
71c81426 5459 int multibytep = 1;
df7492f9 5460 int consumed_chars;
d46c5b12 5461
df7492f9
KH
5462 while (1)
5463 {
5464 unsigned char *src_base = src;
5465 int c;
b73bfc1c 5466
df7492f9
KH
5467 ONE_MORE_BYTE (c);
5468 if (c == '\r')
5469 {
5470 if (EQ (eol_type, Qdos))
5471 {
98725083
KH
5472 if (src == src_end)
5473 {
5474 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5475 goto no_more_source;
5476 }
5477 if (*src == '\n')
df7492f9
KH
5478 c = *src++;
5479 }
5480 else if (EQ (eol_type, Qmac))
5481 c = '\n';
5482 }
5483 if (dst == dst_end)
5484 {
2c78b7e1 5485 coding->consumed = src - coding->source;
b73bfc1c 5486
2c78b7e1
KH
5487 if (EQ (coding->src_object, coding->dst_object))
5488 dst_end = src;
5489 if (dst == dst_end)
5490 {
5491 dst = alloc_destination (coding, src_end - src + 1,
5492 dst);
5493 dst_end = coding->destination + coding->dst_bytes;
5494 coding_set_source (coding);
5495 src = coding->source + coding->consumed;
5496 src_end = coding->source + coding->src_bytes;
5497 }
df7492f9
KH
5498 }
5499 *dst++ = c;
5500 produced_chars++;
5501 }
5502 no_more_source:
5503 ;
5504 }
5505 else
5506 while (src < src_end)
5507 {
71c81426 5508 int multibytep = 1;
df7492f9 5509 int c = *src++;
b73bfc1c 5510
df7492f9
KH
5511 if (c == '\r')
5512 {
5513 if (EQ (eol_type, Qdos))
5514 {
5515 if (src < src_end
5516 && *src == '\n')
5517 c = *src++;
5518 }
5519 else if (EQ (eol_type, Qmac))
5520 c = '\n';
5521 }
5522 if (dst >= dst_end - 1)
5523 {
2c78b7e1 5524 coding->consumed = src - coding->source;
df7492f9 5525
2c78b7e1
KH
5526 if (EQ (coding->src_object, coding->dst_object))
5527 dst_end = src;
5528 if (dst >= dst_end - 1)
5529 {
5530 dst = alloc_destination (coding, src_end - src + 2,
5531 dst);
5532 dst_end = coding->destination + coding->dst_bytes;
5533 coding_set_source (coding);
5534 src = coding->source + coding->consumed;
5535 src_end = coding->source + coding->src_bytes;
5536 }
df7492f9
KH
5537 }
5538 EMIT_ONE_BYTE (c);
5539 }
d46c5b12 5540 }
df7492f9
KH
5541 else
5542 {
5543 if (!EQ (coding->src_object, coding->dst_object))
5544 {
5545 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5546
df7492f9
KH
5547 if (require > 0)
5548 {
5549 EMACS_INT offset = src - coding->source;
5550
5551 dst = alloc_destination (coding, require, dst);
5552 coding_set_source (coding);
5553 src = coding->source + offset;
5554 src_end = coding->source + coding->src_bytes;
5555 }
5556 }
5557 produced_chars = coding->src_chars;
5558 while (src < src_end)
5559 {
5560 int c = *src++;
5561
5562 if (c == '\r')
5563 {
5564 if (EQ (eol_type, Qdos))
5565 {
5566 if (src < src_end
5567 && *src == '\n')
5568 c = *src++;
5569 produced_chars--;
5570 }
5571 else if (EQ (eol_type, Qmac))
5572 c = '\n';
5573 }
5574 *dst++ = c;
5575 }
5576 }
2c78b7e1
KH
5577 coding->consumed = coding->src_bytes;
5578 coding->consumed_char = coding->src_chars;
b73bfc1c 5579 }
4ed46869 5580
df7492f9
KH
5581 produced = dst - (coding->destination + coding->produced);
5582 if (BUFFERP (coding->dst_object))
5583 insert_from_gap (produced_chars, produced);
5584 coding->produced += produced;
5585 coding->produced_char += produced_chars;
5586 return produced_chars;
b73bfc1c 5587}
52d41803 5588
ff0dacd7
KH
5589/* Compose text in CODING->object according to the annotation data at
5590 CHARBUF. CHARBUF is an array:
5591 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 5592 */
4ed46869 5593
df7492f9
KH
5594static INLINE void
5595produce_composition (coding, charbuf)
4ed46869 5596 struct coding_system *coding;
df7492f9 5597 int *charbuf;
4ed46869 5598{
df7492f9 5599 int len;
ff0dacd7 5600 EMACS_INT from, to;
df7492f9 5601 enum composition_method method;
df7492f9
KH
5602 Lisp_Object components;
5603
df7492f9 5604 len = -charbuf[0];
ff0dacd7
KH
5605 from = coding->dst_pos + charbuf[2];
5606 to = coding->dst_pos + charbuf[3];
5607 method = (enum composition_method) (charbuf[4]);
df7492f9
KH
5608
5609 if (method == COMPOSITION_RELATIVE)
5610 components = Qnil;
5611 else
d46c5b12 5612 {
df7492f9
KH
5613 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5614 int i;
4ed46869 5615
df7492f9
KH
5616 len -= 5;
5617 charbuf += 5;
5618 for (i = 0; i < len; i++)
5619 args[i] = make_number (charbuf[i]);
5620 components = (method == COMPOSITION_WITH_ALTCHARS
5621 ? Fstring (len, args) : Fvector (len, args));
5622 }
ff0dacd7 5623 compose_text (from, to, components, Qnil, coding->dst_object);
df7492f9 5624}
b73bfc1c 5625
d46c5b12 5626
ff0dacd7
KH
5627/* Put `charset' property on text in CODING->object according to
5628 the annotation data at CHARBUF. CHARBUF is an array:
5629 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5630 */
b73bfc1c 5631
ff0dacd7
KH
5632static INLINE void
5633produce_charset (coding, charbuf)
5634 struct coding_system *coding;
5635 int *charbuf;
5636{
5637 EMACS_INT from = coding->dst_pos + charbuf[2];
5638 EMACS_INT to = coding->dst_pos + charbuf[3];
5639 struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
b73bfc1c 5640
ff0dacd7
KH
5641 Fput_text_property (make_number (from), make_number (to),
5642 Qcharset, CHARSET_NAME (charset),
5643 coding->dst_object);
4ed46869
KH
5644}
5645
ff0dacd7 5646
df7492f9
KH
5647#define CHARBUF_SIZE 0x4000
5648
5649#define ALLOC_CONVERSION_WORK_AREA(coding) \
5650 do { \
5651 int size = CHARBUF_SIZE;; \
5652 \
5653 coding->charbuf = NULL; \
5654 while (size > 1024) \
5655 { \
5656 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5657 if (coding->charbuf) \
5658 break; \
5659 size >>= 1; \
5660 } \
5661 if (! coding->charbuf) \
5662 { \
5663 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5664 return coding->result; \
5665 } \
5666 coding->charbuf_size = size; \
5667 } while (0)
4ed46869 5668
d46c5b12
KH
5669
5670static void
df7492f9 5671produce_annotation (coding)
d46c5b12 5672 struct coding_system *coding;
d46c5b12 5673{
df7492f9
KH
5674 int *charbuf = coding->charbuf;
5675 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5676
ff0dacd7
KH
5677 if (NILP (coding->dst_object))
5678 return;
5679
df7492f9 5680 while (charbuf < charbuf_end)
d46c5b12 5681 {
df7492f9
KH
5682 if (*charbuf >= 0)
5683 charbuf++;
d46c5b12 5684 else
d46c5b12 5685 {
df7492f9 5686 int len = -*charbuf;
ff0dacd7 5687 switch (charbuf[1])
df7492f9
KH
5688 {
5689 case CODING_ANNOTATE_COMPOSITION_MASK:
5690 produce_composition (coding, charbuf);
5691 break;
ff0dacd7
KH
5692 case CODING_ANNOTATE_CHARSET_MASK:
5693 produce_charset (coding, charbuf);
5694 break;
df7492f9
KH
5695 default:
5696 abort ();
5697 }
5698 charbuf += len;
d46c5b12 5699 }
df7492f9
KH
5700 }
5701}
d46c5b12 5702
df7492f9
KH
5703/* Decode the data at CODING->src_object into CODING->dst_object.
5704 CODING->src_object is a buffer, a string, or nil.
5705 CODING->dst_object is a buffer.
de79a6a5 5706
df7492f9
KH
5707 If CODING->src_object is a buffer, it must be the current buffer.
5708 In this case, if CODING->src_pos is positive, it is a position of
5709 the source text in the buffer, otherwise, the source text is in the
5710 gap area of the buffer, and CODING->src_pos specifies the offset of
5711 the text from GPT (which must be the same as PT). If this is the
5712 same buffer as CODING->dst_object, CODING->src_pos must be
5713 negative.
b73bfc1c 5714
df7492f9
KH
5715 If CODING->src_object is a string, CODING->src_pos in an index to
5716 that string.
d46c5b12 5717
df7492f9
KH
5718 If CODING->src_object is nil, CODING->source must already point to
5719 the non-relocatable memory area. In this case, CODING->src_pos is
5720 an offset from CODING->source.
d46c5b12 5721
df7492f9
KH
5722 The decoded data is inserted at the current point of the buffer
5723 CODING->dst_object.
5724*/
5725
5726static int
5727decode_coding (coding)
d46c5b12 5728 struct coding_system *coding;
d46c5b12 5729{
df7492f9 5730 Lisp_Object attrs;
d46c5b12 5731
df7492f9
KH
5732 if (BUFFERP (coding->src_object)
5733 && coding->src_pos > 0
5734 && coding->src_pos < GPT
5735 && coding->src_pos + coding->src_chars > GPT)
5736 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5737
df7492f9 5738 if (BUFFERP (coding->dst_object))
88993dfd 5739 {
df7492f9
KH
5740 if (current_buffer != XBUFFER (coding->dst_object))
5741 set_buffer_internal (XBUFFER (coding->dst_object));
5742 if (GPT != PT)
5743 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5744 }
5745
df7492f9
KH
5746 coding->consumed = coding->consumed_char = 0;
5747 coding->produced = coding->produced_char = 0;
5748 coding->chars_at_source = 0;
5749 coding->result = CODING_RESULT_SUCCESS;
5750 coding->errors = 0;
5751
5752 ALLOC_CONVERSION_WORK_AREA (coding);
5753
5754 attrs = CODING_ID_ATTRS (coding->id);
5755
5756 do
d46c5b12 5757 {
df7492f9
KH
5758 coding_set_source (coding);
5759 coding->annotated = 0;
5760 (*(coding->decoder)) (coding);
5761 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
da4109a9
KH
5762 translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5763 else if (!NILP (Vstandard_translation_table_for_decode))
5764 translate_chars (coding, Vstandard_translation_table_for_decode);
df7492f9
KH
5765 coding_set_destination (coding);
5766 produce_chars (coding);
5767 if (coding->annotated)
5768 produce_annotation (coding);
d46c5b12 5769 }
df7492f9
KH
5770 while (coding->consumed < coding->src_bytes
5771 && ! coding->result);
d46c5b12 5772
df7492f9
KH
5773 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5774 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5775 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5776 decode_eol (coding);
d46c5b12 5777
df7492f9
KH
5778 coding->carryover_bytes = 0;
5779 if (coding->consumed < coding->src_bytes)
d46c5b12 5780 {
df7492f9
KH
5781 int nbytes = coding->src_bytes - coding->consumed;
5782 unsigned char *src;
5783
5784 coding_set_source (coding);
5785 coding_set_destination (coding);
5786 src = coding->source + coding->consumed;
5787
5788 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5789 {
df7492f9
KH
5790 /* Flush out unprocessed data as binary chars. We are sure
5791 that the number of data is less than the size of
5792 coding->charbuf. */
df7492f9 5793 while (nbytes-- > 0)
d46c5b12 5794 {
df7492f9 5795 int c = *src++;
98725083
KH
5796
5797 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
d46c5b12 5798 }
df7492f9 5799 produce_chars (coding);
d46c5b12 5800 }
d46c5b12 5801 else
df7492f9
KH
5802 {
5803 /* Record unprocessed bytes in coding->carryover. We are
5804 sure that the number of data is less than the size of
5805 coding->carryover. */
5806 unsigned char *p = coding->carryover;
5807
5808 coding->carryover_bytes = nbytes;
5809 while (nbytes-- > 0)
5810 *p++ = *src++;
5811 }
5812 coding->consumed = coding->src_bytes;
5813 }
b73bfc1c 5814
df7492f9 5815 return coding->result;
d46c5b12
KH
5816}
5817
ff0dacd7
KH
5818
5819/* Extract an annotation data from a composition starting at POS and
5820 ending before LIMIT of CODING->src_object (buffer or string), store
5821 the data in BUF, set *STOP to a starting position of the next
5822 composition (if any) or to LIMIT, and return the address of the
5823 next element of BUF.
5824
5825 If such an annotation is not found, set *STOP to a starting
5826 position of a composition after POS (if any) or to LIMIT, and
5827 return BUF. */
5828
5829static INLINE int *
5830handle_composition_annotation (pos, limit, coding, buf, stop)
5831 EMACS_INT pos, limit;
5832 struct coding_system *coding;
5833 int *buf;
5834 EMACS_INT *stop;
5835{
5836 EMACS_INT start, end;
5837 Lisp_Object prop;
5838
5839 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5840 || end > limit)
5841 *stop = limit;
5842 else if (start > pos)
5843 *stop = start;
5844 else
5845 {
5846 if (start == pos)
5847 {
5848 /* We found a composition. Store the corresponding
5849 annotation data in BUF. */
5850 int *head = buf;
5851 enum composition_method method = COMPOSITION_METHOD (prop);
5852 int nchars = COMPOSITION_LENGTH (prop);
5853
5854 ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5855 if (method != COMPOSITION_RELATIVE)
5856 {
5857 Lisp_Object components;
5858 int len, i, i_byte;
5859
5860 components = COMPOSITION_COMPONENTS (prop);
5861 if (VECTORP (components))
5862 {
5863 len = XVECTOR (components)->size;
5864 for (i = 0; i < len; i++)
5865 *buf++ = XINT (AREF (components, i));
5866 }
5867 else if (STRINGP (components))
5868 {
5869 len = XSTRING (components)->size;
5870 i = i_byte = 0;
5871 while (i < len)
5872 {
5873 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5874 buf++;
5875 }
5876 }
5877 else if (INTEGERP (components))
5878 {
5879 len = 1;
5880 *buf++ = XINT (components);
5881 }
5882 else if (CONSP (components))
5883 {
5884 for (len = 0; CONSP (components);
5885 len++, components = XCDR (components))
5886 *buf++ = XINT (XCAR (components));
5887 }
5888 else
5889 abort ();
5890 *head -= len;
5891 }
5892 }
5893
5894 if (find_composition (end, limit, &start, &end, &prop,
5895 coding->src_object)
5896 && end <= limit)
5897 *stop = start;
5898 else
5899 *stop = limit;
5900 }
5901 return buf;
5902}
5903
5904
5905/* Extract an annotation data from a text property `charset' at POS of
5906 CODING->src_object (buffer of string), store the data in BUF, set
5907 *STOP to the position where the value of `charset' property changes
5908 (limiting by LIMIT), and return the address of the next element of
5909 BUF.
5910
5911 If the property value is nil, set *STOP to the position where the
5912 property value is non-nil (limiting by LIMIT), and return BUF. */
5913
5914static INLINE int *
5915handle_charset_annotation (pos, limit, coding, buf, stop)
5916 EMACS_INT pos, limit;
5917 struct coding_system *coding;
5918 int *buf;
5919 EMACS_INT *stop;
5920{
5921 Lisp_Object val, next;
5922 int id;
5923
5924 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5925 if (! NILP (val) && CHARSETP (val))
5926 id = XINT (CHARSET_SYMBOL_ID (val));
5927 else
5928 id = -1;
5929 ADD_CHARSET_DATA (buf, 0, 0, id);
5930 next = Fnext_single_property_change (make_number (pos), Qcharset,
5931 coding->src_object,
5932 make_number (limit));
5933 *stop = XINT (next);
5934 return buf;
5935}
5936
5937
df7492f9
KH
5938static void
5939consume_chars (coding)
5940 struct coding_system *coding;
5941{
5942 int *buf = coding->charbuf;
ff0dacd7 5943 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 5944 const unsigned char *src = coding->source + coding->consumed;
ff0dacd7
KH
5945 EMACS_INT pos = coding->src_pos + coding->consumed_char;
5946 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
5947 int multibytep = coding->src_multibyte;
5948 Lisp_Object eol_type;
5949 int c;
ff0dacd7
KH
5950 EMACS_INT stop, stop_composition, stop_charset;
5951 int id;
88993dfd 5952
df7492f9
KH
5953 eol_type = CODING_ID_EOL_TYPE (coding->id);
5954 if (VECTORP (eol_type))
5955 eol_type = Qunix;
88993dfd 5956
df7492f9
KH
5957 /* Note: composition handling is not yet implemented. */
5958 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 5959
ff0dacd7
KH
5960 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
5961 stop = stop_composition = pos;
5962 else
5963 stop = stop_composition = end_pos;
5964 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
5965 stop = stop_charset = pos;
df7492f9 5966 else
ff0dacd7 5967 stop_charset = end_pos;
ec6d2bb8 5968
ff0dacd7
KH
5969 /* Compensate for CRLF and annotation. */
5970 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 5971 while (buf < buf_end)
ec6d2bb8 5972 {
df7492f9 5973 if (pos == stop)
ec6d2bb8 5974 {
df7492f9 5975 int *p;
ec6d2bb8 5976
df7492f9
KH
5977 if (pos == end_pos)
5978 break;
ff0dacd7
KH
5979 if (pos == stop_composition)
5980 buf = handle_composition_annotation (pos, end_pos, coding,
5981 buf, &stop_composition);
5982 if (pos == stop_charset)
5983 buf = handle_charset_annotation (pos, end_pos, coding,
5984 buf, &stop_charset);
5985 stop = (stop_composition < stop_charset
5986 ? stop_composition : stop_charset);
df7492f9
KH
5987 }
5988
5989 if (! multibytep)
5990 c = *src++;
5991 else
5992 c = STRING_CHAR_ADVANCE (src);
5993 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5994 c = '\n';
5995 if (! EQ (eol_type, Qunix))
5996 {
5997 if (c == '\n')
5998 {
5999 if (EQ (eol_type, Qdos))
6000 *buf++ = '\r';
6001 else
6002 c = '\r';
ec6d2bb8 6003 }
ec6d2bb8 6004 }
df7492f9
KH
6005 *buf++ = c;
6006 pos++;
ec6d2bb8 6007 }
ec6d2bb8 6008
df7492f9
KH
6009 coding->consumed = src - coding->source;
6010 coding->consumed_char = pos - coding->src_pos;
6011 coding->charbuf_used = buf - coding->charbuf;
6012 coding->chars_at_source = 0;
ec6d2bb8
KH
6013}
6014
ec6d2bb8 6015
df7492f9
KH
6016/* Encode the text at CODING->src_object into CODING->dst_object.
6017 CODING->src_object is a buffer or a string.
6018 CODING->dst_object is a buffer or nil.
6019
6020 If CODING->src_object is a buffer, it must be the current buffer.
6021 In this case, if CODING->src_pos is positive, it is a position of
6022 the source text in the buffer, otherwise. the source text is in the
6023 gap area of the buffer, and coding->src_pos specifies the offset of
6024 the text from GPT (which must be the same as PT). If this is the
6025 same buffer as CODING->dst_object, CODING->src_pos must be
6026 negative and CODING should not have `pre-write-conversion'.
6027
6028 If CODING->src_object is a string, CODING should not have
6029 `pre-write-conversion'.
6030
6031 If CODING->dst_object is a buffer, the encoded data is inserted at
6032 the current point of that buffer.
6033
6034 If CODING->dst_object is nil, the encoded data is placed at the
6035 memory area specified by CODING->destination. */
6036
6037static int
6038encode_coding (coding)
ec6d2bb8 6039 struct coding_system *coding;
ec6d2bb8 6040{
df7492f9 6041 Lisp_Object attrs;
ec6d2bb8 6042
df7492f9 6043 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 6044
df7492f9 6045 if (BUFFERP (coding->dst_object))
ec6d2bb8 6046 {
df7492f9
KH
6047 set_buffer_internal (XBUFFER (coding->dst_object));
6048 coding->dst_multibyte
6049 = ! NILP (current_buffer->enable_multibyte_characters);
6050 }
ec6d2bb8 6051
df7492f9
KH
6052 coding->consumed = coding->consumed_char = 0;
6053 coding->produced = coding->produced_char = 0;
6054 coding->result = CODING_RESULT_SUCCESS;
6055 coding->errors = 0;
ec6d2bb8 6056
df7492f9 6057 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 6058
df7492f9
KH
6059 do {
6060 coding_set_source (coding);
6061 consume_chars (coding);
6062
6063 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
da4109a9
KH
6064 translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6065 else if (!NILP (Vstandard_translation_table_for_encode))
6066 translate_chars (coding, Vstandard_translation_table_for_encode);
df7492f9
KH
6067
6068 coding_set_destination (coding);
6069 (*(coding->encoder)) (coding);
6070 } while (coding->consumed_char < coding->src_chars);
6071
6072 if (BUFFERP (coding->dst_object))
6073 insert_from_gap (coding->produced_char, coding->produced);
6074
6075 return (coding->result);
ec6d2bb8
KH
6076}
6077
df7492f9 6078/* Work buffer */
fb88bf2d 6079
df7492f9
KH
6080/* List of currently used working buffer. */
6081Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 6082
df7492f9
KH
6083/* A working buffer used by the top level conversion. */
6084Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 6085
4ed46869 6086
df7492f9
KH
6087/* Return a working buffer that can be freely used by the following
6088 code conversion. MULTIBYTEP specifies the multibyteness of the
6089 buffer. */
b73bfc1c 6090
df7492f9
KH
6091Lisp_Object
6092make_conversion_work_buffer (multibytep)
6093 int multibytep;
6094{
6095 struct buffer *current = current_buffer;
6096 Lisp_Object buf;
d46c5b12 6097
df7492f9 6098 if (NILP (Vcode_conversion_work_buf_list))
e133c8fa 6099 {
df7492f9
KH
6100 if (NILP (Vcode_conversion_reused_work_buf))
6101 Vcode_conversion_reused_work_buf
6102 = Fget_buffer_create (build_string (" *code-conversion-work*"));
6103 Vcode_conversion_work_buf_list
6104 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
e133c8fa 6105 }
df7492f9 6106 else
d46c5b12 6107 {
c197f191 6108 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 6109 char str[128];
e077cc80 6110
df7492f9
KH
6111 sprintf (str, " *code-conversion-work*<%d>", depth);
6112 Vcode_conversion_work_buf_list
6113 = Fcons (Fget_buffer_create (build_string (str)),
6114 Vcode_conversion_work_buf_list);
d46c5b12 6115 }
d46c5b12 6116
df7492f9
KH
6117 buf = XCAR (Vcode_conversion_work_buf_list);
6118 set_buffer_internal (XBUFFER (buf));
6119 current_buffer->undo_list = Qt;
6120 Ferase_buffer ();
9d123124 6121 Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
df7492f9
KH
6122 set_buffer_internal (current);
6123 return buf;
6124}
d46c5b12 6125
df7492f9 6126static struct coding_system *saved_coding;
d46c5b12 6127
df7492f9
KH
6128Lisp_Object
6129code_conversion_restore (info)
6130 Lisp_Object info;
6131{
c197f191 6132 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 6133 Lisp_Object buf;
d46c5b12 6134
df7492f9 6135 if (depth > 0)
d46c5b12 6136 {
df7492f9
KH
6137 buf = XCAR (Vcode_conversion_work_buf_list);
6138 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6139 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
6140 Fkill_buffer (buf);
6141 }
d46c5b12 6142
c197f191 6143 if (EQ (saved_coding->dst_object, Qt)
df7492f9
KH
6144 && saved_coding->destination)
6145 xfree (saved_coding->destination);
b843d1ae 6146
df7492f9
KH
6147 return save_excursion_restore (info);
6148}
d46c5b12 6149
12410ef1 6150
df7492f9
KH
6151int
6152decode_coding_gap (coding, chars, bytes)
6153 struct coding_system *coding;
6154 EMACS_INT chars, bytes;
6155{
6156 int count = specpdl_ptr - specpdl;
fb88bf2d 6157
df7492f9
KH
6158 saved_coding = coding;
6159 record_unwind_protect (code_conversion_restore, save_excursion_save ());
ec6d2bb8 6160
df7492f9
KH
6161 coding->src_object = Fcurrent_buffer ();
6162 coding->src_chars = chars;
6163 coding->src_bytes = bytes;
6164 coding->src_pos = -chars;
6165 coding->src_pos_byte = -bytes;
6166 coding->src_multibyte = chars < bytes;
6167 coding->dst_object = coding->src_object;
6168 coding->dst_pos = PT;
6169 coding->dst_pos_byte = PT_BYTE;
71c81426 6170 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
98725083 6171 coding->mode |= CODING_MODE_LAST_BLOCK;
4956c225 6172
df7492f9
KH
6173 if (CODING_REQUIRE_DETECTION (coding))
6174 detect_coding (coding);
6175
6176 decode_coding (coding);
d46c5b12 6177
df7492f9
KH
6178 unbind_to (count, Qnil);
6179 return coding->result;
6180}
d46c5b12 6181
df7492f9
KH
6182int
6183encode_coding_gap (coding, chars, bytes)
6184 struct coding_system *coding;
6185 EMACS_INT chars, bytes;
6186{
6187 int count = specpdl_ptr - specpdl;
6188 Lisp_Object buffer;
d46c5b12 6189
df7492f9
KH
6190 saved_coding = coding;
6191 record_unwind_protect (code_conversion_restore, save_excursion_save ());
fb88bf2d 6192
df7492f9
KH
6193 buffer = Fcurrent_buffer ();
6194 coding->src_object = buffer;
6195 coding->src_chars = chars;
6196 coding->src_bytes = bytes;
6197 coding->src_pos = -chars;
6198 coding->src_pos_byte = -bytes;
6199 coding->src_multibyte = chars < bytes;
6200 coding->dst_object = coding->src_object;
6201 coding->dst_pos = PT;
6202 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 6203
df7492f9 6204 encode_coding (coding);
f2558efd 6205
df7492f9
KH
6206 unbind_to (count, Qnil);
6207 return coding->result;
6208}
b73bfc1c 6209
d46c5b12 6210
df7492f9
KH
6211/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6212 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 6213
df7492f9 6214 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 6215
df7492f9
KH
6216 If it is a buffer, the text is at point of the buffer. FROM and TO
6217 are positions in the buffer.
ec6d2bb8 6218
df7492f9
KH
6219 If it is a string, the text is at the beginning of the string.
6220 FROM and TO are indices to the string.
ec6d2bb8 6221
df7492f9
KH
6222 If it is nil, the text is at coding->source. FROM and TO are
6223 indices to coding->source.
ec6d2bb8 6224
df7492f9 6225 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 6226
df7492f9
KH
6227 If it is a buffer, the decoded text is inserted at point of the
6228 buffer. If the buffer is the same as SRC_OBJECT, the source text
6229 is deleted.
d46c5b12 6230
df7492f9
KH
6231 If it is Qt, a string is made from the decoded text, and
6232 set in CODING->dst_object.
d46c5b12 6233
df7492f9
KH
6234 If it is Qnil, the decoded text is stored at CODING->destination.
6235 The called must allocate CODING->dst_bytes bytes at
6236 CODING->destination by xmalloc. If the decoded text is longer than
6237 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6238 */
d46c5b12 6239
df7492f9
KH
6240void
6241decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6242 dst_object)
6243 struct coding_system *coding;
6244 Lisp_Object src_object;
6245 EMACS_INT from, from_byte, to, to_byte;
6246 Lisp_Object dst_object;
6247{
6248 int count = specpdl_ptr - specpdl;
6249 unsigned char *destination;
6250 EMACS_INT dst_bytes;
6251 EMACS_INT chars = to - from;
6252 EMACS_INT bytes = to_byte - from_byte;
6253 Lisp_Object attrs;
d46c5b12 6254
df7492f9
KH
6255 saved_coding = coding;
6256 record_unwind_protect (code_conversion_restore, save_excursion_save ());
93dec019 6257
df7492f9
KH
6258 if (NILP (dst_object))
6259 {
6260 destination = coding->destination;
6261 dst_bytes = coding->dst_bytes;
6262 }
93dec019 6263
df7492f9
KH
6264 coding->src_object = src_object;
6265 coding->src_chars = chars;
6266 coding->src_bytes = bytes;
6267 coding->src_multibyte = chars < bytes;
70ad9fc4 6268
df7492f9
KH
6269 if (STRINGP (src_object))
6270 {
6271 coding->src_pos = from;
6272 coding->src_pos_byte = from_byte;
6273 }
6274 else if (BUFFERP (src_object))
6275 {
6276 set_buffer_internal (XBUFFER (src_object));
6277 if (from != GPT)
6278 move_gap_both (from, from_byte);
6279 if (EQ (src_object, dst_object))
fb88bf2d 6280 {
df7492f9
KH
6281 TEMP_SET_PT_BOTH (from, from_byte);
6282 del_range_both (from, from_byte, to, to_byte, 1);
6283 coding->src_pos = -chars;
6284 coding->src_pos_byte = -bytes;
fb88bf2d 6285 }
df7492f9 6286 else
fb88bf2d 6287 {
df7492f9
KH
6288 coding->src_pos = from;
6289 coding->src_pos_byte = from_byte;
fb88bf2d 6290 }
d46c5b12 6291 }
fb88bf2d 6292
df7492f9
KH
6293 if (CODING_REQUIRE_DETECTION (coding))
6294 detect_coding (coding);
6295 attrs = CODING_ID_ATTRS (coding->id);
6296
6297 if (! NILP (CODING_ATTR_POST_READ (attrs))
6298 || EQ (dst_object, Qt))
b73bfc1c 6299 {
df7492f9
KH
6300 coding->dst_object = make_conversion_work_buffer (1);
6301 coding->dst_pos = BEG;
6302 coding->dst_pos_byte = BEG_BYTE;
6303 coding->dst_multibyte = 1;
b73bfc1c 6304 }
df7492f9 6305 else if (BUFFERP (dst_object))
12410ef1 6306 {
df7492f9
KH
6307 coding->dst_object = dst_object;
6308 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6309 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6310 coding->dst_multibyte
6311 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 6312 }
72d1a715 6313 else
df7492f9
KH
6314 {
6315 coding->dst_object = Qnil;
6316 coding->dst_multibyte = 1;
6317 }
6318
6319 decode_coding (coding);
4ed46869 6320
df7492f9
KH
6321 if (BUFFERP (coding->dst_object))
6322 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 6323
df7492f9 6324 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6325 {
df7492f9
KH
6326 struct gcpro gcpro1, gcpro2;
6327 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6328 Lisp_Object val;
4ed46869 6329
c0cc7f7f 6330 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6331 GCPRO2 (coding->src_object, coding->dst_object);
6332 val = call1 (CODING_ATTR_POST_READ (attrs),
6333 make_number (coding->produced_char));
6334 UNGCPRO;
6335 CHECK_NATNUM (val);
6336 coding->produced_char += Z - prev_Z;
6337 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6338 }
4ed46869 6339
df7492f9 6340 if (EQ (dst_object, Qt))
ec6d2bb8 6341 {
df7492f9
KH
6342 coding->dst_object = Fbuffer_string ();
6343 }
6344 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6345 {
6346 set_buffer_internal (XBUFFER (coding->dst_object));
6347 if (dst_bytes < coding->produced)
6348 {
6349 destination
6350 = (unsigned char *) xrealloc (destination, coding->produced);
6351 if (! destination)
6352 {
6353 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6354 unbind_to (count, Qnil);
6355 return;
6356 }
6357 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6358 move_gap_both (BEGV, BEGV_BYTE);
6359 bcopy (BEGV_ADDR, destination, coding->produced);
6360 coding->destination = destination;
6361 }
ec6d2bb8 6362 }
2b4f9037 6363
df7492f9 6364 unbind_to (count, Qnil);
d46c5b12
KH
6365}
6366
df7492f9
KH
6367
6368void
6369encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6370 dst_object)
b73bfc1c 6371 struct coding_system *coding;
df7492f9
KH
6372 Lisp_Object src_object;
6373 EMACS_INT from, from_byte, to, to_byte;
6374 Lisp_Object dst_object;
b73bfc1c
KH
6375{
6376 int count = specpdl_ptr - specpdl;
df7492f9
KH
6377 EMACS_INT chars = to - from;
6378 EMACS_INT bytes = to_byte - from_byte;
6379 Lisp_Object attrs;
6380
6381 saved_coding = coding;
6382 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6383
6384 coding->src_object = src_object;
6385 coding->src_chars = chars;
6386 coding->src_bytes = bytes;
6387 coding->src_multibyte = chars < bytes;
6388
6389 attrs = CODING_ID_ATTRS (coding->id);
6390
6391 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6392 {
df7492f9
KH
6393 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6394 set_buffer_internal (XBUFFER (coding->src_object));
6395 if (STRINGP (src_object))
6396 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6397 else if (BUFFERP (src_object))
6398 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6399 else
6400 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6401
6402 if (EQ (src_object, dst_object))
6403 {
6404 set_buffer_internal (XBUFFER (src_object));
6405 del_range_both (from, from_byte, to, to_byte, 1);
6406 set_buffer_internal (XBUFFER (coding->src_object));
6407 }
6408
ac87bbef
KH
6409 call2 (CODING_ATTR_PRE_WRITE (attrs),
6410 make_number (BEG), make_number (Z));
6411 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6412 if (BEG != GPT)
6413 move_gap_both (BEG, BEG_BYTE);
6414 coding->src_chars = Z - BEG;
6415 coding->src_bytes = Z_BYTE - BEG_BYTE;
6416 coding->src_pos = BEG;
6417 coding->src_pos_byte = BEG_BYTE;
6418 coding->src_multibyte = Z < Z_BYTE;
6419 }
6420 else if (STRINGP (src_object))
6421 {
6422 coding->src_pos = from;
6423 coding->src_pos_byte = from_byte;
6424 }
6425 else if (BUFFERP (src_object))
d46c5b12 6426 {
df7492f9 6427 set_buffer_internal (XBUFFER (src_object));
df7492f9 6428 if (EQ (src_object, dst_object))
d46c5b12 6429 {
ff0dacd7
KH
6430 coding->src_object = del_range_1 (from, to, 1, 1);
6431 coding->src_pos = 0;
6432 coding->src_pos_byte = 0;
d46c5b12 6433 }
df7492f9 6434 else
d46c5b12 6435 {
ff0dacd7
KH
6436 if (from < GPT && to >= GPT)
6437 move_gap_both (from, from_byte);
df7492f9
KH
6438 coding->src_pos = from;
6439 coding->src_pos_byte = from_byte;
d46c5b12
KH
6440 }
6441 }
4ed46869 6442
df7492f9 6443 if (BUFFERP (dst_object))
d46c5b12 6444 {
df7492f9 6445 coding->dst_object = dst_object;
28f67a95
KH
6446 if (EQ (src_object, dst_object))
6447 {
6448 coding->dst_pos = from;
6449 coding->dst_pos_byte = from_byte;
6450 }
6451 else
6452 {
6453 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6454 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6455 }
df7492f9
KH
6456 coding->dst_multibyte
6457 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6458 }
df7492f9 6459 else if (EQ (dst_object, Qt))
4956c225 6460 {
df7492f9 6461 coding->dst_object = Qnil;
df7492f9 6462 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6463 if (coding->dst_bytes == 0)
6464 coding->dst_bytes = 1;
6465 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6466 coding->dst_multibyte = 0;
4956c225 6467 }
df7492f9 6468 else
78108bcd 6469 {
df7492f9
KH
6470 coding->dst_object = Qnil;
6471 coding->dst_multibyte = 0;
78108bcd
KH
6472 }
6473
df7492f9 6474 encode_coding (coding);
4ed46869 6475
df7492f9 6476 if (EQ (dst_object, Qt))
4ed46869 6477 {
df7492f9
KH
6478 if (BUFFERP (coding->dst_object))
6479 coding->dst_object = Fbuffer_string ();
6480 else
73be902c 6481 {
df7492f9
KH
6482 coding->dst_object
6483 = make_unibyte_string ((char *) coding->destination,
6484 coding->produced);
6485 xfree (coding->destination);
73be902c 6486 }
4ed46869 6487 }
d46c5b12 6488
df7492f9 6489 unbind_to (count, Qnil);
b73bfc1c
KH
6490}
6491
df7492f9 6492
b73bfc1c 6493Lisp_Object
df7492f9 6494preferred_coding_system ()
b73bfc1c 6495{
df7492f9 6496 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6497
df7492f9 6498 return CODING_ID_NAME (id);
4ed46869
KH
6499}
6500
6501\f
6502#ifdef emacs
1397dc18 6503/*** 8. Emacs Lisp library functions ***/
4ed46869 6504
4ed46869 6505DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6506 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6507See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6508about coding-system objects. */)
6509 (obj)
4ed46869
KH
6510 Lisp_Object obj;
6511{
df7492f9 6512 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6513}
6514
9d991de8
RS
6515DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6516 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6517 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6518 (prompt)
4ed46869
KH
6519 Lisp_Object prompt;
6520{
e0e989f6 6521 Lisp_Object val;
9d991de8
RS
6522 do
6523 {
4608c386
KH
6524 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6525 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6526 }
6527 while (XSTRING (val)->size == 0);
e0e989f6 6528 return (Fintern (val, Qnil));
4ed46869
KH
6529}
6530
9b787f3e 6531DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6532 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6533If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6534 (prompt, default_coding_system)
9b787f3e 6535 Lisp_Object prompt, default_coding_system;
4ed46869 6536{
f44d27ce 6537 Lisp_Object val;
9b787f3e
RS
6538 if (SYMBOLP (default_coding_system))
6539 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6540 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6541 Qt, Qnil, Qcoding_system_history,
6542 default_coding_system, Qnil);
e0e989f6 6543 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6544}
6545
6546DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6547 1, 1, 0,
48b0f3ae 6548 doc: /* Check validity of CODING-SYSTEM.
b054002f 6549If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
df7492f9 6550 (coding_system)
4ed46869
KH
6551 Lisp_Object coding_system;
6552{
b7826503 6553 CHECK_SYMBOL (coding_system);
4ed46869
KH
6554 if (!NILP (Fcoding_system_p (coding_system)))
6555 return coding_system;
6556 while (1)
02ba4723 6557 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6558}
df7492f9 6559
3a73fa5d 6560\f
89528eb3
KH
6561/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6562 HIGHEST is nonzero, return the coding system of the highest
6563 priority among the detected coding systems. Otherwize return a
6564 list of detected coding systems sorted by their priorities. If
6565 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6566 multibyte form but contains only ASCII and eight-bit chars.
6567 Otherwise, the bytes are raw bytes.
6568
6569 CODING-SYSTEM controls the detection as below:
6570
6571 If it is nil, detect both text-format and eol-format. If the
6572 text-format part of CODING-SYSTEM is already specified
6573 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6574 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6575 detect only text-format. */
6576
d46c5b12 6577Lisp_Object
df7492f9 6578detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6579 unsigned char *src;
6580 int src_bytes, highest;
0a28aafb 6581 int multibytep;
df7492f9 6582 Lisp_Object coding_system;
4ed46869 6583{
df7492f9
KH
6584 unsigned char *src_end = src + src_bytes;
6585 int mask = CATEGORY_MASK_ANY;
df7492f9
KH
6586 Lisp_Object attrs, eol_type;
6587 Lisp_Object val;
6588 struct coding_system coding;
89528eb3 6589 int id;
ff0dacd7 6590 struct coding_detection_info detect_info;
df7492f9
KH
6591
6592 if (NILP (coding_system))
6593 coding_system = Qundecided;
6594 setup_coding_system (coding_system, &coding);
6595 attrs = CODING_ID_ATTRS (coding.id);
6596 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 6597 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 6598
df7492f9
KH
6599 coding.source = src;
6600 coding.src_bytes = src_bytes;
6601 coding.src_multibyte = multibytep;
6602 coding.consumed = 0;
89528eb3 6603 coding.mode |= CODING_MODE_LAST_BLOCK;
4ed46869 6604
ff0dacd7
KH
6605 detect_info.checked = detect_info.found = detect_info.rejected = 0;
6606
89528eb3
KH
6607 /* At first, detect text-format if necessary. */
6608 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
4ed46869 6609 {
ff0dacd7
KH
6610 enum coding_category category;
6611 struct coding_system *this;
6612 int c, i;
6613
df7492f9 6614 for (; src < src_end; src++)
4ed46869 6615 {
df7492f9 6616 c = *src;
89528eb3
KH
6617 if (c & 0x80
6618 || (c < 0x20 && (c == ISO_CODE_ESC
6619 || c == ISO_CODE_SI
584948ac 6620 || c == ISO_CODE_SO)))
d46c5b12 6621 break;
4ed46869 6622 }
df7492f9
KH
6623 coding.head_ascii = src - coding.source;
6624
6625 if (src < src_end)
6626 for (i = 0; i < coding_category_raw_text; i++)
6627 {
ff0dacd7
KH
6628 category = coding_priorities[i];
6629 this = coding_categories + category;
df7492f9 6630
df7492f9
KH
6631 if (this->id < 0)
6632 {
6633 /* No coding system of this category is defined. */
ff0dacd7 6634 detect_info.rejected |= (1 << category);
df7492f9 6635 }
ff0dacd7 6636 else if (category >= coding_category_raw_text)
89528eb3 6637 continue;
ff0dacd7
KH
6638 else if (detect_info.checked & (1 << category))
6639 {
6640 if (highest
6641 && (detect_info.found & (1 << category)))
6642 break;
6643 }
df7492f9
KH
6644 else
6645 {
ff0dacd7 6646 if ((*(this->detector)) (&coding, &detect_info)
89528eb3 6647 && highest
ff0dacd7
KH
6648 && (detect_info.found & (1 << category)))
6649 break;
df7492f9
KH
6650 }
6651 }
4ed46869 6652
ff0dacd7
KH
6653
6654 if (detect_info.rejected == CATEGORY_MASK_ANY)
89528eb3 6655 {
ff0dacd7 6656 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
6657 id = coding_categories[coding_category_raw_text].id;
6658 val = Fcons (make_number (id), Qnil);
6659 }
ff0dacd7 6660 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 6661 {
ff0dacd7 6662 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
6663 id = coding_categories[coding_category_undecided].id;
6664 val = Fcons (make_number (id), Qnil);
6665 }
6666 else if (highest)
6667 {
ff0dacd7
KH
6668 if (detect_info.found)
6669 {
6670 detect_info.found = 1 << category;
6671 val = Fcons (make_number (this->id), Qnil);
6672 }
6673 else
6674 for (i = 0; i < coding_category_raw_text; i++)
6675 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6676 {
6677 detect_info.found = 1 << coding_priorities[i];
6678 id = coding_categories[coding_priorities[i]].id;
6679 val = Fcons (make_number (id), Qnil);
6680 break;
6681 }
6682 }
89528eb3
KH
6683 else
6684 {
ff0dacd7
KH
6685 int mask = detect_info.rejected | detect_info.found;
6686 int found = 0;
89528eb3 6687 val = Qnil;
ff0dacd7 6688
89528eb3 6689 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
6690 {
6691 category = coding_priorities[i];
6692 if (! (mask & (1 << category)))
6693 {
6694 found |= 1 << category;
6695 id = coding_categories[category].id;
6696 val = Fcons (make_number (id), val);
6697 }
6698 }
6699 for (i = coding_category_raw_text - 1; i >= 0; i--)
6700 {
6701 category = coding_priorities[i];
6702 if (detect_info.found & (1 << category))
6703 {
6704 id = coding_categories[category].id;
6705 val = Fcons (make_number (id), val);
6706 }
6707 }
6708 detect_info.found |= found;
89528eb3
KH
6709 }
6710 }
df7492f9
KH
6711 else
6712 {
ff0dacd7 6713 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 6714 val = Fcons (make_number (coding.id), Qnil);
4ed46869 6715 }
df7492f9 6716
89528eb3 6717 /* Then, detect eol-format if necessary. */
df7492f9 6718 {
89528eb3 6719 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
6720 Lisp_Object tail;
6721
89528eb3
KH
6722 if (VECTORP (eol_type))
6723 {
ff0dacd7 6724 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
6725 normal_eol = detect_eol (coding.source, src_bytes,
6726 coding_category_raw_text);
ff0dacd7
KH
6727 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6728 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
6729 utf_16_be_eol = detect_eol (coding.source, src_bytes,
6730 coding_category_utf_16_be);
ff0dacd7
KH
6731 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6732 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
6733 utf_16_le_eol = detect_eol (coding.source, src_bytes,
6734 coding_category_utf_16_le);
6735 }
6736 else
6737 {
6738 if (EQ (eol_type, Qunix))
6739 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6740 else if (EQ (eol_type, Qdos))
6741 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6742 else
6743 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6744 }
6745
df7492f9
KH
6746 for (tail = val; CONSP (tail); tail = XCDR (tail))
6747 {
89528eb3 6748 enum coding_category category;
df7492f9 6749 int this_eol;
89528eb3
KH
6750
6751 id = XINT (XCAR (tail));
6752 attrs = CODING_ID_ATTRS (id);
6753 category = XINT (CODING_ATTR_CATEGORY (attrs));
6754 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
6755 if (VECTORP (eol_type))
6756 {
89528eb3
KH
6757 if (category == coding_category_utf_16_be
6758 || category == coding_category_utf_16_be_nosig)
6759 this_eol = utf_16_be_eol;
6760 else if (category == coding_category_utf_16_le
6761 || category == coding_category_utf_16_le_nosig)
6762 this_eol = utf_16_le_eol;
df7492f9 6763 else
89528eb3
KH
6764 this_eol = normal_eol;
6765
df7492f9
KH
6766 if (this_eol == EOL_SEEN_LF)
6767 XSETCAR (tail, AREF (eol_type, 0));
6768 else if (this_eol == EOL_SEEN_CRLF)
6769 XSETCAR (tail, AREF (eol_type, 1));
6770 else if (this_eol == EOL_SEEN_CR)
6771 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
6772 else
6773 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 6774 }
89528eb3
KH
6775 else
6776 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
6777 }
6778 }
6779
03699b14 6780 return (highest ? XCAR (val) : val);
93dec019 6781}
4ed46869 6782
df7492f9 6783
d46c5b12
KH
6784DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6785 2, 3, 0,
48b0f3ae
PJ
6786 doc: /* Detect coding system of the text in the region between START and END.
6787Return a list of possible coding systems ordered by priority.
6788
6789If only ASCII characters are found, it returns a list of single element
6790`undecided' or its subsidiary coding system according to a detected
6791end-of-line format.
6792
6793If optional argument HIGHEST is non-nil, return the coding system of
6794highest priority. */)
6795 (start, end, highest)
d46c5b12
KH
6796 Lisp_Object start, end, highest;
6797{
6798 int from, to;
6799 int from_byte, to_byte;
6289dd10 6800
b7826503
PJ
6801 CHECK_NUMBER_COERCE_MARKER (start);
6802 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6803
d46c5b12
KH
6804 validate_region (&start, &end);
6805 from = XINT (start), to = XINT (end);
6806 from_byte = CHAR_TO_BYTE (from);
6807 to_byte = CHAR_TO_BYTE (to);
6289dd10 6808
d46c5b12
KH
6809 if (from < GPT && to >= GPT)
6810 move_gap_both (to, to_byte);
c210f766 6811
d46c5b12 6812 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6813 to_byte - from_byte,
0a28aafb
KH
6814 !NILP (highest),
6815 !NILP (current_buffer
df7492f9
KH
6816 ->enable_multibyte_characters),
6817 Qnil);
d46c5b12 6818}
6289dd10 6819
d46c5b12
KH
6820DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6821 1, 2, 0,
48b0f3ae
PJ
6822 doc: /* Detect coding system of the text in STRING.
6823Return a list of possible coding systems ordered by priority.
6824
6825If only ASCII characters are found, it returns a list of single element
6826`undecided' or its subsidiary coding system according to a detected
6827end-of-line format.
6828
6829If optional argument HIGHEST is non-nil, return the coding system of
6830highest priority. */)
6831 (string, highest)
d46c5b12
KH
6832 Lisp_Object string, highest;
6833{
b7826503 6834 CHECK_STRING (string);
4ed46869 6835
d46c5b12 6836 return detect_coding_system (XSTRING (string)->data,
df7492f9 6837 STRING_BYTES (XSTRING (string)),
0a28aafb 6838 !NILP (highest),
df7492f9
KH
6839 STRING_MULTIBYTE (string),
6840 Qnil);
4ed46869
KH
6841}
6842
05e6f5dc 6843
df7492f9
KH
6844static INLINE int
6845char_encodable_p (c, attrs)
6846 int c;
6847 Lisp_Object attrs;
05e6f5dc 6848{
df7492f9 6849 Lisp_Object tail;
df7492f9 6850 struct charset *charset;
05e6f5dc 6851
df7492f9
KH
6852 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6853 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6854 {
df7492f9
KH
6855 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6856 if (CHAR_CHARSET_P (c, charset))
6857 break;
05e6f5dc 6858 }
df7492f9 6859 return (! NILP (tail));
05e6f5dc
KH
6860}
6861
6862
df7492f9
KH
6863/* Return a list of coding systems that safely encode the text between
6864 START and END. If EXCLUDE is non-nil, it is a list of coding
6865 systems not to check. The returned list doesn't contain any such
48468dac 6866 coding systems. In any case, if the text contains only ASCII or is
df7492f9
KH
6867 unibyte, return t. */
6868
6869DEFUN ("find-coding-systems-region-internal",
6870 Ffind_coding_systems_region_internal,
6871 Sfind_coding_systems_region_internal, 2, 3, 0,
6872 doc: /* Internal use only. */)
6873 (start, end, exclude)
6874 Lisp_Object start, end, exclude;
6875{
6876 Lisp_Object coding_attrs_list, safe_codings;
6877 EMACS_INT start_byte, end_byte;
7c78e542 6878 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
6879 int c;
6880 Lisp_Object tail, elt;
05e6f5dc 6881
df7492f9
KH
6882 if (STRINGP (start))
6883 {
6884 if (!STRING_MULTIBYTE (start)
48468dac 6885 || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
df7492f9
KH
6886 return Qt;
6887 start_byte = 0;
6888 end_byte = STRING_BYTES (XSTRING (start));
6889 }
6890 else
6891 {
6892 CHECK_NUMBER_COERCE_MARKER (start);
6893 CHECK_NUMBER_COERCE_MARKER (end);
6894 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6895 args_out_of_range (start, end);
6896 if (NILP (current_buffer->enable_multibyte_characters))
6897 return Qt;
6898 start_byte = CHAR_TO_BYTE (XINT (start));
6899 end_byte = CHAR_TO_BYTE (XINT (end));
6900 if (XINT (end) - XINT (start) == end_byte - start_byte)
6901 return Qt;
05e6f5dc 6902
df7492f9
KH
6903 if (start < GPT && end > GPT)
6904 {
6905 if ((GPT - start) < (end - GPT))
6906 move_gap_both (start, start_byte);
6907 else
6908 move_gap_both (end, end_byte);
6909 }
6910 }
05e6f5dc 6911
df7492f9
KH
6912 coding_attrs_list = Qnil;
6913 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6914 if (NILP (exclude)
6915 || NILP (Fmemq (XCAR (tail), exclude)))
6916 {
6917 Lisp_Object attrs;
05e6f5dc 6918
df7492f9
KH
6919 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6920 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6921 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6922 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6923 }
6924
6925 if (STRINGP (start))
6926 p = pbeg = XSTRING (start)->data;
6927 else
6928 p = pbeg = BYTE_POS_ADDR (start_byte);
6929 pend = p + (end_byte - start_byte);
6930
6931 while (p < pend && ASCII_BYTE_P (*p)) p++;
6932 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
6933
6934 while (p < pend)
6935 {
df7492f9
KH
6936 if (ASCII_BYTE_P (*p))
6937 p++;
6938 else
6939 {
6940 c = STRING_CHAR_ADVANCE (p);
6941
6942 charset_map_loaded = 0;
6943 for (tail = coding_attrs_list; CONSP (tail);)
6944 {
6945 elt = XCAR (tail);
6946 if (NILP (elt))
6947 tail = XCDR (tail);
6948 else if (char_encodable_p (c, elt))
6949 tail = XCDR (tail);
6950 else if (CONSP (XCDR (tail)))
6951 {
6952 XSETCAR (tail, XCAR (XCDR (tail)));
6953 XSETCDR (tail, XCDR (XCDR (tail)));
6954 }
6955 else
6956 {
6957 XSETCAR (tail, Qnil);
6958 tail = XCDR (tail);
6959 }
6960 }
6961 if (charset_map_loaded)
6962 {
6963 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 6964
df7492f9
KH
6965 if (STRINGP (start))
6966 pbeg = XSTRING (start)->data;
6967 else
6968 pbeg = BYTE_POS_ADDR (start_byte);
6969 p = pbeg + p_offset;
6970 pend = pbeg + pend_offset;
6971 }
6972 }
05e6f5dc 6973 }
df7492f9
KH
6974
6975 safe_codings = Qnil;
6976 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6977 if (! NILP (XCAR (tail)))
6978 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6979
05e6f5dc
KH
6980 return safe_codings;
6981}
6982
6983
df7492f9
KH
6984DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6985 Scheck_coding_systems_region, 3, 3, 0,
6986 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 6987
df7492f9
KH
6988START and END are buffer positions specifying the region.
6989CODING-SYSTEM-LIST is a list of coding systems to check.
6990
6991The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6992CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6993whole region, POS0, POS1, ... are buffer positions where non-encodable
6994characters are found.
6995
6996If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6997value is nil.
6998
6999START may be a string. In that case, check if the string is
7000encodable, and the value contains indices to the string instead of
7001buffer positions. END is ignored. */)
7002 (start, end, coding_system_list)
7003 Lisp_Object start, end, coding_system_list;
05e6f5dc 7004{
df7492f9
KH
7005 Lisp_Object list;
7006 EMACS_INT start_byte, end_byte;
7007 int pos;
7c78e542 7008 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7009 int c;
7010 Lisp_Object tail, elt;
05e6f5dc
KH
7011
7012 if (STRINGP (start))
7013 {
df7492f9
KH
7014 if (!STRING_MULTIBYTE (start)
7015 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
7016 return Qnil;
7017 start_byte = 0;
7018 end_byte = STRING_BYTES (XSTRING (start));
7019 pos = 0;
05e6f5dc
KH
7020 }
7021 else
7022 {
b7826503
PJ
7023 CHECK_NUMBER_COERCE_MARKER (start);
7024 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7025 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7026 args_out_of_range (start, end);
7027 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7028 return Qnil;
7029 start_byte = CHAR_TO_BYTE (XINT (start));
7030 end_byte = CHAR_TO_BYTE (XINT (end));
7031 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7032 return Qt;
df7492f9
KH
7033
7034 if (start < GPT && end > GPT)
7035 {
7036 if ((GPT - start) < (end - GPT))
7037 move_gap_both (start, start_byte);
7038 else
7039 move_gap_both (end, end_byte);
7040 }
7041 pos = start;
7042 }
7043
7044 list = Qnil;
7045 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7046 {
7047 elt = XCAR (tail);
7048 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7049 Qnil)),
7050 list);
05e6f5dc
KH
7051 }
7052
df7492f9
KH
7053 if (STRINGP (start))
7054 p = pbeg = XSTRING (start)->data;
7055 else
7056 p = pbeg = BYTE_POS_ADDR (start_byte);
7057 pend = p + (end_byte - start_byte);
7058
7059 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7060 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7061
7062 while (p < pend)
05e6f5dc 7063 {
df7492f9
KH
7064 if (ASCII_BYTE_P (*p))
7065 p++;
7066 else
05e6f5dc 7067 {
df7492f9
KH
7068 c = STRING_CHAR_ADVANCE (p);
7069
7070 charset_map_loaded = 0;
7071 for (tail = list; CONSP (tail); tail = XCDR (tail))
7072 {
7073 elt = XCDR (XCAR (tail));
7074 if (! char_encodable_p (c, XCAR (elt)))
7075 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7076 }
7077 if (charset_map_loaded)
7078 {
7079 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7080
7081 if (STRINGP (start))
7082 pbeg = XSTRING (start)->data;
7083 else
7084 pbeg = BYTE_POS_ADDR (start_byte);
7085 p = pbeg + p_offset;
7086 pend = pbeg + pend_offset;
7087 }
05e6f5dc 7088 }
df7492f9 7089 pos++;
05e6f5dc
KH
7090 }
7091
df7492f9
KH
7092 tail = list;
7093 list = Qnil;
7094 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 7095 {
df7492f9
KH
7096 elt = XCAR (tail);
7097 if (CONSP (XCDR (XCDR (elt))))
7098 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7099 list);
05e6f5dc 7100 }
df7492f9
KH
7101
7102 return list;
05e6f5dc
KH
7103}
7104
7105
df7492f9 7106
4031e2bf 7107Lisp_Object
df7492f9
KH
7108code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7109 Lisp_Object start, end, coding_system, dst_object;
7110 int encodep, norecord;
3a73fa5d
RS
7111{
7112 struct coding_system coding;
df7492f9
KH
7113 EMACS_INT from, from_byte, to, to_byte;
7114 Lisp_Object src_object;
3a73fa5d 7115
b7826503
PJ
7116 CHECK_NUMBER_COERCE_MARKER (start);
7117 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
7118 if (NILP (coding_system))
7119 coding_system = Qno_conversion;
7120 else
7121 CHECK_CODING_SYSTEM (coding_system);
7122 src_object = Fcurrent_buffer ();
7123 if (NILP (dst_object))
7124 dst_object = src_object;
7125 else if (! EQ (dst_object, Qt))
7126 CHECK_BUFFER (dst_object);
3a73fa5d 7127
d46c5b12
KH
7128 validate_region (&start, &end);
7129 from = XFASTINT (start);
df7492f9 7130 from_byte = CHAR_TO_BYTE (from);
d46c5b12 7131 to = XFASTINT (end);
df7492f9 7132 to_byte = CHAR_TO_BYTE (to);
d46c5b12 7133
df7492f9
KH
7134 setup_coding_system (coding_system, &coding);
7135 coding.mode |= CODING_MODE_LAST_BLOCK;
7136
7137 if (encodep)
7138 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7139 dst_object);
7140 else
7141 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7142 dst_object);
7143 if (! norecord)
7144 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 7145
df7492f9
KH
7146 if (coding.result != CODING_RESULT_SUCCESS)
7147 error ("Code conversion error: %d", coding.result);
3a73fa5d 7148
df7492f9
KH
7149 return (BUFFERP (dst_object)
7150 ? make_number (coding.produced_char)
7151 : coding.dst_object);
4031e2bf
KH
7152}
7153
df7492f9 7154
4031e2bf 7155DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 7156 3, 4, "r\nzCoding system: ",
48b0f3ae 7157 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
7158When called from a program, takes four arguments:
7159 START, END, CODING-SYSTEM, and DESTINATION.
7160START and END are buffer positions.
7161
7162Optional 4th arguments DESTINATION specifies where the decoded text goes.
7163If nil, the region between START and END is replace by the decoded text.
7164If buffer, the decoded text is inserted in the buffer.
7165If t, the decoded text is returned.
7166
48b0f3ae
PJ
7167This function sets `last-coding-system-used' to the precise coding system
7168used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7169not fully specified.)
7170It returns the length of the decoded text. */)
df7492f9
KH
7171 (start, end, coding_system, destination)
7172 Lisp_Object start, end, coding_system, destination;
4031e2bf 7173{
df7492f9 7174 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
7175}
7176
7177DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
7178 3, 4, "r\nzCoding system: ",
7179 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
7180When called from a program, takes three arguments:
7181START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
7182
7183Optional 4th arguments DESTINATION specifies where the encoded text goes.
7184If nil, the region between START and END is replace by the encoded text.
7185If buffer, the encoded text is inserted in the buffer.
7186If t, the encoded text is returned.
7187
48b0f3ae
PJ
7188This function sets `last-coding-system-used' to the precise coding system
7189used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7190not fully specified.)
7191It returns the length of the encoded text. */)
df7492f9
KH
7192 (start, end, coding_system, destination)
7193 Lisp_Object start, end, coding_system, destination;
3a73fa5d 7194{
df7492f9 7195 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 7196}
3a73fa5d 7197
4031e2bf 7198Lisp_Object
df7492f9
KH
7199code_convert_string (string, coding_system, dst_object,
7200 encodep, nocopy, norecord)
7201 Lisp_Object string, coding_system, dst_object;
7202 int encodep, nocopy, norecord;
4031e2bf
KH
7203{
7204 struct coding_system coding;
df7492f9 7205 EMACS_INT chars, bytes;
3a73fa5d 7206
b7826503 7207 CHECK_STRING (string);
d46c5b12 7208 if (NILP (coding_system))
df7492f9
KH
7209 {
7210 if (! norecord)
7211 Vlast_coding_system_used = Qno_conversion;
7212 if (NILP (dst_object))
7213 return (nocopy ? Fcopy_sequence (string) : string);
7214 }
4ed46869 7215
df7492f9
KH
7216 if (NILP (coding_system))
7217 coding_system = Qno_conversion;
7218 else
7219 CHECK_CODING_SYSTEM (coding_system);
7220 if (NILP (dst_object))
7221 dst_object = Qt;
7222 else if (! EQ (dst_object, Qt))
7223 CHECK_BUFFER (dst_object);
5f1cd180 7224
df7492f9 7225 setup_coding_system (coding_system, &coding);
d46c5b12 7226 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
7227 chars = XSTRING (string)->size;
7228 bytes = STRING_BYTES (XSTRING (string));
7229 if (encodep)
7230 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7231 else
7232 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7233 if (! norecord)
7234 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 7235
df7492f9
KH
7236 if (coding.result != CODING_RESULT_SUCCESS)
7237 error ("Code conversion error: %d", coding.result);
4ed46869 7238
df7492f9
KH
7239 return (BUFFERP (dst_object)
7240 ? make_number (coding.produced_char)
7241 : coding.dst_object);
4ed46869
KH
7242}
7243
4031e2bf 7244
ecec61c1 7245/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
7246 Do not set Vlast_coding_system_used.
7247
7248 This function is called only from macros DECODE_FILE and
7249 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
7250
7251Lisp_Object
7252code_convert_string_norecord (string, coding_system, encodep)
7253 Lisp_Object string, coding_system;
7254 int encodep;
7255{
0be8721c 7256 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 7257}
ecec61c1 7258
ecec61c1 7259
df7492f9
KH
7260DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7261 2, 4, 0,
7262 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7263
7264Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7265if the decoding operation is trivial.
ecec61c1 7266
df7492f9 7267Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 7268inserted in BUFFER instead of returned as a string. In this case,
df7492f9 7269the return value is BUFFER.
ecec61c1 7270
df7492f9
KH
7271This function sets `last-coding-system-used' to the precise coding system
7272used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7273not fully specified. */)
7274 (string, coding_system, nocopy, buffer)
7275 Lisp_Object string, coding_system, nocopy, buffer;
7276{
7277 return code_convert_string (string, coding_system, buffer,
7278 0, ! NILP (nocopy), 0);
7279}
7280
7281DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7282 2, 4, 0,
7283 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7284
7285Optional third arg NOCOPY non-nil means it is OK to return STRING
7286itself if the encoding operation is trivial.
7287
7288Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 7289inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
7290the return value is BUFFER.
7291
7292This function sets `last-coding-system-used' to the precise coding system
7293used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7294not fully specified.) */)
7295 (string, coding_system, nocopy, buffer)
7296 Lisp_Object string, coding_system, nocopy, buffer;
7297{
7298 return code_convert_string (string, coding_system, buffer,
c197f191 7299 1, ! NILP (nocopy), 1);
ecec61c1 7300}
df7492f9 7301
3a73fa5d 7302\f
4ed46869 7303DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7304 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7305Return the corresponding character. */)
7306 (code)
4ed46869
KH
7307 Lisp_Object code;
7308{
df7492f9
KH
7309 Lisp_Object spec, attrs, val;
7310 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7311 int c;
7312
7313 CHECK_NATNUM (code);
7314 c = XFASTINT (code);
7315 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7316 attrs = AREF (spec, 0);
4ed46869 7317
df7492f9
KH
7318 if (ASCII_BYTE_P (c)
7319 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7320 return code;
7321
7322 val = CODING_ATTR_CHARSET_LIST (attrs);
7323 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
7324 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7325 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
df7492f9
KH
7326
7327 if (c <= 0x7F)
7328 charset = charset_roman;
7329 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 7330 {
df7492f9
KH
7331 charset = charset_kana;
7332 c -= 0x80;
55ab7be3
KH
7333 }
7334 else
7335 {
004068e4 7336 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
7337
7338 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7339 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7340 error ("Invalid code: %d", code);
7341 SJIS_TO_JIS (c);
7342 charset = charset_kanji;
55ab7be3 7343 }
df7492f9
KH
7344 c = DECODE_CHAR (charset, c);
7345 if (c < 0)
7346 error ("Invalid code: %d", code);
7347 return make_number (c);
4ed46869
KH
7348}
7349
df7492f9 7350
4ed46869 7351DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7352 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7353Return the corresponding code in SJIS. */)
7354 (ch)
df7492f9 7355 Lisp_Object ch;
4ed46869 7356{
df7492f9
KH
7357 Lisp_Object spec, attrs, charset_list;
7358 int c;
7359 struct charset *charset;
7360 unsigned code;
4ed46869 7361
df7492f9
KH
7362 CHECK_CHARACTER (ch);
7363 c = XFASTINT (ch);
7364 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7365 attrs = AREF (spec, 0);
7366
7367 if (ASCII_CHAR_P (c)
7368 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7369 return ch;
7370
7371 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7372 charset = char_charset (c, charset_list, &code);
7373 if (code == CHARSET_INVALID_CODE (charset))
7374 error ("Can't encode by shift_jis encoding: %d", c);
7375 JIS_TO_SJIS (code);
7376
7377 return make_number (code);
4ed46869
KH
7378}
7379
7380DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7381 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7382Return the corresponding character. */)
7383 (code)
4ed46869
KH
7384 Lisp_Object code;
7385{
df7492f9
KH
7386 Lisp_Object spec, attrs, val;
7387 struct charset *charset_roman, *charset_big5, *charset;
7388 int c;
4ed46869 7389
df7492f9
KH
7390 CHECK_NATNUM (code);
7391 c = XFASTINT (code);
7392 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7393 attrs = AREF (spec, 0);
7394
7395 if (ASCII_BYTE_P (c)
7396 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7397 return code;
7398
7399 val = CODING_ATTR_CHARSET_LIST (attrs);
7400 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7401 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7402
7403 if (c <= 0x7F)
7404 charset = charset_roman;
c28a9453
KH
7405 else
7406 {
df7492f9
KH
7407 int b1 = c >> 8, b2 = c & 0x7F;
7408 if (b1 < 0xA1 || b1 > 0xFE
7409 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7410 error ("Invalid code: %d", code);
7411 charset = charset_big5;
c28a9453 7412 }
df7492f9
KH
7413 c = DECODE_CHAR (charset, (unsigned )c);
7414 if (c < 0)
7415 error ("Invalid code: %d", code);
7416 return make_number (c);
4ed46869
KH
7417}
7418
7419DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7420 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7421Return the corresponding character code in Big5. */)
7422 (ch)
4ed46869
KH
7423 Lisp_Object ch;
7424{
df7492f9
KH
7425 Lisp_Object spec, attrs, charset_list;
7426 struct charset *charset;
7427 int c;
7428 unsigned code;
7429
7430 CHECK_CHARACTER (ch);
7431 c = XFASTINT (ch);
7432 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7433 attrs = AREF (spec, 0);
7434 if (ASCII_CHAR_P (c)
7435 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7436 return ch;
7437
7438 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7439 charset = char_charset (c, charset_list, &code);
7440 if (code == CHARSET_INVALID_CODE (charset))
7441 error ("Can't encode by Big5 encoding: %d", c);
7442
7443 return make_number (code);
4ed46869 7444}
df7492f9 7445
3a73fa5d 7446\f
1ba9e4ab
KH
7447DEFUN ("set-terminal-coding-system-internal",
7448 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7449 Sset_terminal_coding_system_internal, 1, 1, 0,
7450 doc: /* Internal use only. */)
7451 (coding_system)
b74e4686 7452 Lisp_Object coding_system;
4ed46869 7453{
b7826503 7454 CHECK_SYMBOL (coding_system);
df7492f9
KH
7455 setup_coding_system (Fcheck_coding_system (coding_system),
7456 &terminal_coding);
7457
70c22245 7458 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7459 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7460 /* Characer composition should be disabled. */
7461 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7462 terminal_coding.src_multibyte = 1;
7463 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7464 return Qnil;
7465}
7466
c4825358
KH
7467DEFUN ("set-safe-terminal-coding-system-internal",
7468 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7469 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7470 doc: /* Internal use only. */)
48b0f3ae 7471 (coding_system)
b74e4686 7472 Lisp_Object coding_system;
c4825358 7473{
b7826503 7474 CHECK_SYMBOL (coding_system);
c4825358
KH
7475 setup_coding_system (Fcheck_coding_system (coding_system),
7476 &safe_terminal_coding);
df7492f9
KH
7477 /* Characer composition should be disabled. */
7478 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7479 safe_terminal_coding.src_multibyte = 1;
7480 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7481 return Qnil;
7482}
7483
4ed46869
KH
7484DEFUN ("terminal-coding-system",
7485 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7486 doc: /* Return coding system specified for terminal output. */)
7487 ()
4ed46869 7488{
df7492f9 7489 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7490}
7491
1ba9e4ab
KH
7492DEFUN ("set-keyboard-coding-system-internal",
7493 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7494 Sset_keyboard_coding_system_internal, 1, 1, 0,
7495 doc: /* Internal use only. */)
7496 (coding_system)
4ed46869
KH
7497 Lisp_Object coding_system;
7498{
b7826503 7499 CHECK_SYMBOL (coding_system);
df7492f9
KH
7500 setup_coding_system (Fcheck_coding_system (coding_system),
7501 &keyboard_coding);
7502 /* Characer composition should be disabled. */
7503 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7504 return Qnil;
7505}
7506
7507DEFUN ("keyboard-coding-system",
7508 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7509 doc: /* Return coding system specified for decoding keyboard input. */)
7510 ()
4ed46869 7511{
df7492f9 7512 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7513}
7514
7515\f
a5d301df
KH
7516DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7517 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7518 doc: /* Choose a coding system for an operation based on the target name.
7519The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7520DECODING-SYSTEM is the coding system to use for decoding
7521\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7522for encoding (in case OPERATION does encoding).
7523
7524The first argument OPERATION specifies an I/O primitive:
7525 For file I/O, `insert-file-contents' or `write-region'.
7526 For process I/O, `call-process', `call-process-region', or `start-process'.
7527 For network I/O, `open-network-stream'.
7528
7529The remaining arguments should be the same arguments that were passed
7530to the primitive. Depending on which primitive, one of those arguments
7531is selected as the TARGET. For example, if OPERATION does file I/O,
7532whichever argument specifies the file name is TARGET.
7533
7534TARGET has a meaning which depends on OPERATION:
7535 For file I/O, TARGET is a file name.
7536 For process I/O, TARGET is a process name.
7537 For network I/O, TARGET is a service name or a port number
7538
7539This function looks up what specified for TARGET in,
7540`file-coding-system-alist', `process-coding-system-alist',
7541or `network-coding-system-alist' depending on OPERATION.
7542They may specify a coding system, a cons of coding systems,
7543or a function symbol to call.
7544In the last case, we call the function with one argument,
7545which is a list of all the arguments given to this function.
7546
7547usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7548 (nargs, args)
4ed46869
KH
7549 int nargs;
7550 Lisp_Object *args;
7551{
7552 Lisp_Object operation, target_idx, target, val;
7553 register Lisp_Object chain;
7554
7555 if (nargs < 2)
7556 error ("Too few arguments");
7557 operation = args[0];
7558 if (!SYMBOLP (operation)
7559 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7560 error ("Invalid first arguement");
4ed46869
KH
7561 if (nargs < 1 + XINT (target_idx))
7562 error ("Too few arguments for operation: %s",
7563 XSYMBOL (operation)->name->data);
7564 target = args[XINT (target_idx) + 1];
7565 if (!(STRINGP (target)
7566 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7567 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7568
2e34157c
RS
7569 chain = ((EQ (operation, Qinsert_file_contents)
7570 || EQ (operation, Qwrite_region))
02ba4723 7571 ? Vfile_coding_system_alist
2e34157c 7572 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7573 ? Vnetwork_coding_system_alist
7574 : Vprocess_coding_system_alist));
4ed46869
KH
7575 if (NILP (chain))
7576 return Qnil;
7577
03699b14 7578 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7579 {
f44d27ce 7580 Lisp_Object elt;
4ed46869 7581
df7492f9 7582 elt = XCAR (chain);
4ed46869
KH
7583 if (CONSP (elt)
7584 && ((STRINGP (target)
03699b14
KR
7585 && STRINGP (XCAR (elt))
7586 && fast_string_match (XCAR (elt), target) >= 0)
7587 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7588 {
03699b14 7589 val = XCDR (elt);
b19fd4c5
KH
7590 /* Here, if VAL is both a valid coding system and a valid
7591 function symbol, we return VAL as a coding system. */
02ba4723
KH
7592 if (CONSP (val))
7593 return val;
7594 if (! SYMBOLP (val))
7595 return Qnil;
7596 if (! NILP (Fcoding_system_p (val)))
7597 return Fcons (val, val);
b19fd4c5
KH
7598 if (! NILP (Ffboundp (val)))
7599 {
7600 val = call1 (val, Flist (nargs, args));
7601 if (CONSP (val))
7602 return val;
7603 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7604 return Fcons (val, val);
7605 }
02ba4723
KH
7606 return Qnil;
7607 }
4ed46869
KH
7608 }
7609 return Qnil;
7610}
7611
df7492f9 7612DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 7613 Sset_coding_system_priority, 0, MANY, 0,
da7db224 7614 doc: /* Assign higher priority to the coding systems given as arguments.
1fcd6c8b 7615usage: (set-coding-system-priority CODING-SYSTEM ...) */)
df7492f9
KH
7616 (nargs, args)
7617 int nargs;
7618 Lisp_Object *args;
7619{
7620 int i, j;
7621 int changed[coding_category_max];
7622 enum coding_category priorities[coding_category_max];
7623
7624 bzero (changed, sizeof changed);
7625
7626 for (i = j = 0; i < nargs; i++)
7627 {
7628 enum coding_category category;
7629 Lisp_Object spec, attrs;
7630
7631 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7632 attrs = AREF (spec, 0);
7633 category = XINT (CODING_ATTR_CATEGORY (attrs));
7634 if (changed[category])
7635 /* Ignore this coding system because a coding system of the
7636 same category already had a higher priority. */
7637 continue;
7638 changed[category] = 1;
7639 priorities[j++] = category;
7640 if (coding_categories[category].id >= 0
7641 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7642 setup_coding_system (args[i], &coding_categories[category]);
7643 }
7644
7645 /* Now we have decided top J priorities. Reflect the order of the
7646 original priorities to the remaining priorities. */
7647
7648 for (i = j, j = 0; i < coding_category_max; i++, j++)
7649 {
7650 while (j < coding_category_max
7651 && changed[coding_priorities[j]])
7652 j++;
7653 if (j == coding_category_max)
7654 abort ();
7655 priorities[i] = coding_priorities[j];
7656 }
7657
7658 bcopy (priorities, coding_priorities, sizeof priorities);
7659 return Qnil;
7660}
7661
7662DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7663 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7664 doc: /* Return a list of coding systems ordered by their priorities.
7665HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7666 (highestp)
7667 Lisp_Object highestp;
d46c5b12
KH
7668{
7669 int i;
df7492f9 7670 Lisp_Object val;
d46c5b12 7671
df7492f9 7672 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7673 {
df7492f9
KH
7674 enum coding_category category = coding_priorities[i];
7675 int id = coding_categories[category].id;
7676 Lisp_Object attrs;
7677
7678 if (id < 0)
7679 continue;
7680 attrs = CODING_ID_ATTRS (id);
7681 if (! NILP (highestp))
7682 return CODING_ATTR_BASE_NAME (attrs);
7683 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7684 }
7685 return Fnreverse (val);
7686}
7687
f0064e1f
DL
7688static char *suffixes[] = { "-unix", "-dos", "-mac" };
7689
df7492f9
KH
7690static Lisp_Object
7691make_subsidiaries (base)
7692 Lisp_Object base;
7693{
7694 Lisp_Object subsidiaries;
df7492f9
KH
7695 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7696 char *buf = (char *) alloca (base_name_len + 6);
7697 int i;
7698
7699 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7700 subsidiaries = Fmake_vector (make_number (3), Qnil);
7701 for (i = 0; i < 3; i++)
7702 {
7703 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7704 ASET (subsidiaries, i, intern (buf));
7705 }
7706 return subsidiaries;
7707}
7708
7709
7710DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7711 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7712 doc: /* For internal use only.
7713usage: (define-coding-system-internal ...) */)
df7492f9
KH
7714 (nargs, args)
7715 int nargs;
7716 Lisp_Object *args;
7717{
7718 Lisp_Object name;
7719 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7720 Lisp_Object attrs; /* Vector of attributes. */
7721 Lisp_Object eol_type;
7722 Lisp_Object aliases;
7723 Lisp_Object coding_type, charset_list, safe_charsets;
7724 enum coding_category category;
7725 Lisp_Object tail, val;
7726 int max_charset_id = 0;
7727 int i;
7728
7729 if (nargs < coding_arg_max)
7730 goto short_args;
7731
7732 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7733
7734 name = args[coding_arg_name];
7735 CHECK_SYMBOL (name);
7736 CODING_ATTR_BASE_NAME (attrs) = name;
7737
7738 val = args[coding_arg_mnemonic];
7739 if (! STRINGP (val))
7740 CHECK_CHARACTER (val);
7741 CODING_ATTR_MNEMONIC (attrs) = val;
7742
7743 coding_type = args[coding_arg_coding_type];
7744 CHECK_SYMBOL (coding_type);
7745 CODING_ATTR_TYPE (attrs) = coding_type;
7746
7747 charset_list = args[coding_arg_charset_list];
7748 if (SYMBOLP (charset_list))
7749 {
7750 if (EQ (charset_list, Qiso_2022))
7751 {
7752 if (! EQ (coding_type, Qiso_2022))
7753 error ("Invalid charset-list");
7754 charset_list = Viso_2022_charset_list;
7755 }
7756 else if (EQ (charset_list, Qemacs_mule))
7757 {
7758 if (! EQ (coding_type, Qemacs_mule))
7759 error ("Invalid charset-list");
7760 charset_list = Vemacs_mule_charset_list;
7761 }
7762 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7763 if (max_charset_id < XFASTINT (XCAR (tail)))
7764 max_charset_id = XFASTINT (XCAR (tail));
7765 }
7766 else
7767 {
7768 charset_list = Fcopy_sequence (charset_list);
7769 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7770 {
7771 struct charset *charset;
7772
7773 val = Fcar (tail);
7774 CHECK_CHARSET_GET_CHARSET (val, charset);
7775 if (EQ (coding_type, Qiso_2022)
7776 ? CHARSET_ISO_FINAL (charset) < 0
7777 : EQ (coding_type, Qemacs_mule)
7778 ? CHARSET_EMACS_MULE_ID (charset) < 0
7779 : 0)
7780 error ("Can't handle charset `%s'",
7781 XSYMBOL (CHARSET_NAME (charset))->name->data);
7782
7783 XCAR (tail) = make_number (charset->id);
7784 if (max_charset_id < charset->id)
7785 max_charset_id = charset->id;
7786 }
7787 }
7788 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7789
7790 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7791 make_number (255));
7792 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7793 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7794 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7795
584948ac
KH
7796 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
7797
df7492f9
KH
7798 val = args[coding_arg_decode_translation_table];
7799 if (! NILP (val))
7800 CHECK_CHAR_TABLE (val);
7801 CODING_ATTR_DECODE_TBL (attrs) = val;
7802
7803 val = args[coding_arg_encode_translation_table];
7804 if (! NILP (val))
7805 CHECK_CHAR_TABLE (val);
7806 CODING_ATTR_ENCODE_TBL (attrs) = val;
7807
7808 val = args[coding_arg_post_read_conversion];
7809 CHECK_SYMBOL (val);
7810 CODING_ATTR_POST_READ (attrs) = val;
7811
7812 val = args[coding_arg_pre_write_conversion];
7813 CHECK_SYMBOL (val);
7814 CODING_ATTR_PRE_WRITE (attrs) = val;
7815
7816 val = args[coding_arg_default_char];
7817 if (NILP (val))
7818 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7819 else
7820 {
7821 CHECK_CHARACTER (val);
7822 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7823 }
7824
7825 val = args[coding_arg_plist];
7826 CHECK_LIST (val);
7827 CODING_ATTR_PLIST (attrs) = val;
7828
7829 if (EQ (coding_type, Qcharset))
7830 {
c7c66a95
KH
7831 /* Generate a lisp vector of 256 elements. Each element is nil,
7832 integer, or a list of charset IDs.
7833
7834 If Nth element is nil, the byte code N is invalid in this
7835 coding system.
7836
7837 If Nth element is a number NUM, N is the first byte of a
7838 charset whose ID is NUM.
7839
7840 If Nth element is a list of charset IDs, N is the first byte
7841 of one of them. The list is sorted by dimensions of the
7842 charsets. A charset of smaller dimension comes firtst.
7843 */
df7492f9
KH
7844 val = Fmake_vector (make_number (256), Qnil);
7845
7846 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7847 {
c7c66a95
KH
7848 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7849 int dim = CHARSET_DIMENSION (charset);
7850 int idx = (dim - 1) * 4;
7851
584948ac
KH
7852 if (CHARSET_ASCII_COMPATIBLE_P (charset))
7853 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7854
15d143f7
KH
7855 for (i = charset->code_space[idx];
7856 i <= charset->code_space[idx + 1]; i++)
7857 {
c7c66a95
KH
7858 Lisp_Object tmp, tmp2;
7859 int dim2;
7860
7861 tmp = AREF (val, i);
7862 if (NILP (tmp))
7863 tmp = XCAR (tail);
7864 else if (NUMBERP (tmp))
7865 {
7866 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7867 if (dim < dim2)
c7c66a95 7868 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
7869 else
7870 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 7871 }
15d143f7 7872 else
c7c66a95
KH
7873 {
7874 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7875 {
7876 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7877 if (dim < dim2)
7878 break;
7879 }
7880 if (NILP (tmp2))
7881 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7882 else
7883 {
7884 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7885 XSETCAR (tmp2, XCAR (tail));
7886 }
7887 }
7888 ASET (val, i, tmp);
15d143f7 7889 }
df7492f9
KH
7890 }
7891 ASET (attrs, coding_attr_charset_valids, val);
7892 category = coding_category_charset;
7893 }
7894 else if (EQ (coding_type, Qccl))
7895 {
7896 Lisp_Object valids;
7897
7898 if (nargs < coding_arg_ccl_max)
7899 goto short_args;
7900
7901 val = args[coding_arg_ccl_decoder];
7902 CHECK_CCL_PROGRAM (val);
7903 if (VECTORP (val))
7904 val = Fcopy_sequence (val);
7905 ASET (attrs, coding_attr_ccl_decoder, val);
7906
7907 val = args[coding_arg_ccl_encoder];
7908 CHECK_CCL_PROGRAM (val);
7909 if (VECTORP (val))
7910 val = Fcopy_sequence (val);
7911 ASET (attrs, coding_attr_ccl_encoder, val);
7912
7913 val = args[coding_arg_ccl_valids];
7914 valids = Fmake_string (make_number (256), make_number (0));
7915 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7916 {
8dcbea82
KH
7917 int from, to;
7918
df7492f9
KH
7919 val = Fcar (tail);
7920 if (INTEGERP (val))
8dcbea82
KH
7921 {
7922 from = to = XINT (val);
7923 if (from < 0 || from > 255)
7924 args_out_of_range_3 (val, make_number (0), make_number (255));
7925 }
df7492f9
KH
7926 else
7927 {
df7492f9
KH
7928 CHECK_CONS (val);
7929 CHECK_NUMBER (XCAR (val));
7930 CHECK_NUMBER (XCDR (val));
7931 from = XINT (XCAR (val));
8dcbea82
KH
7932 if (from < 0 || from > 255)
7933 args_out_of_range_3 (XCAR (val),
7934 make_number (0), make_number (255));
df7492f9 7935 to = XINT (XCDR (val));
8dcbea82
KH
7936 if (to < from || to > 255)
7937 args_out_of_range_3 (XCDR (val),
7938 XCAR (val), make_number (255));
df7492f9 7939 }
8dcbea82
KH
7940 for (i = from; i <= to; i++)
7941 XSTRING (valids)->data[i] = 1;
df7492f9
KH
7942 }
7943 ASET (attrs, coding_attr_ccl_valids, valids);
7944
7945 category = coding_category_ccl;
7946 }
7947 else if (EQ (coding_type, Qutf_16))
7948 {
7949 Lisp_Object bom, endian;
7950
584948ac
KH
7951 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
7952
df7492f9
KH
7953 if (nargs < coding_arg_utf16_max)
7954 goto short_args;
7955
7956 bom = args[coding_arg_utf16_bom];
7957 if (! NILP (bom) && ! EQ (bom, Qt))
7958 {
7959 CHECK_CONS (bom);
7960 CHECK_CODING_SYSTEM (XCAR (bom));
7961 CHECK_CODING_SYSTEM (XCDR (bom));
7962 }
7963 ASET (attrs, coding_attr_utf_16_bom, bom);
7964
7965 endian = args[coding_arg_utf16_endian];
7966 ASET (attrs, coding_attr_utf_16_endian, endian);
7967
7968 category = (CONSP (bom)
7969 ? coding_category_utf_16_auto
7970 : NILP (bom)
7971 ? (NILP (endian)
7972 ? coding_category_utf_16_be_nosig
7973 : coding_category_utf_16_le_nosig)
7974 : (NILP (endian)
7975 ? coding_category_utf_16_be
7976 : coding_category_utf_16_le));
7977 }
7978 else if (EQ (coding_type, Qiso_2022))
7979 {
7980 Lisp_Object initial, reg_usage, request, flags;
0be8721c 7981 int i, id;
1397dc18 7982
df7492f9
KH
7983 if (nargs < coding_arg_iso2022_max)
7984 goto short_args;
7985
7986 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7987 CHECK_VECTOR (initial);
7988 for (i = 0; i < 4; i++)
7989 {
7990 val = Faref (initial, make_number (i));
7991 if (! NILP (val))
7992 {
584948ac
KH
7993 struct charset *charset;
7994
7995 CHECK_CHARSET_GET_CHARSET (val, charset);
7996 ASET (initial, i, make_number (CHARSET_ID (charset)));
7997 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
7998 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
7999 }
8000 else
8001 ASET (initial, i, make_number (-1));
8002 }
8003
8004 reg_usage = args[coding_arg_iso2022_reg_usage];
8005 CHECK_CONS (reg_usage);
8006 CHECK_NATNUM (XCAR (reg_usage));
8007 CHECK_NATNUM (XCDR (reg_usage));
8008
8009 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8010 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 8011 {
df7492f9
KH
8012 int id;
8013
8014 val = Fcar (tail);
8015 CHECK_CONS (val);
8016 CHECK_CHARSET_GET_ID (XCAR (val), id);
8017 CHECK_NATNUM (XCDR (val));
8018 if (XINT (XCDR (val)) >= 4)
8019 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8020 XCAR (val) = make_number (id);
1397dc18 8021 }
df7492f9
KH
8022
8023 flags = args[coding_arg_iso2022_flags];
8024 CHECK_NATNUM (flags);
8025 i = XINT (flags);
8026 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8027 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8028
8029 ASET (attrs, coding_attr_iso_initial, initial);
8030 ASET (attrs, coding_attr_iso_usage, reg_usage);
8031 ASET (attrs, coding_attr_iso_request, request);
8032 ASET (attrs, coding_attr_iso_flags, flags);
8033 setup_iso_safe_charsets (attrs);
8034
8035 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8036 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8037 | CODING_ISO_FLAG_SINGLE_SHIFT))
8038 ? coding_category_iso_7_else
8039 : EQ (args[coding_arg_charset_list], Qiso_2022)
8040 ? coding_category_iso_7
8041 : coding_category_iso_7_tight);
8042 else
8043 {
8044 int id = XINT (AREF (initial, 1));
8045
c6fb6e98 8046 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
8047 || EQ (args[coding_arg_charset_list], Qiso_2022)
8048 || id < 0)
8049 ? coding_category_iso_8_else
8050 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8051 ? coding_category_iso_8_1
8052 : coding_category_iso_8_2);
8053 }
0ce7886f
KH
8054 if (category != coding_category_iso_8_1
8055 && category != coding_category_iso_8_2)
8056 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
8057 }
8058 else if (EQ (coding_type, Qemacs_mule))
8059 {
8060 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8061 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 8062 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8063 category = coding_category_emacs_mule;
8064 }
8065 else if (EQ (coding_type, Qshift_jis))
8066 {
8067
8068 struct charset *charset;
8069
8070 if (XINT (Flength (charset_list)) != 3)
8071 error ("There should be just three charsets");
8072
8073 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8074 if (CHARSET_DIMENSION (charset) != 1)
8075 error ("Dimension of charset %s is not one",
8076 XSYMBOL (CHARSET_NAME (charset))->name->data);
584948ac
KH
8077 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8078 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8079
8080 charset_list = XCDR (charset_list);
8081 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8082 if (CHARSET_DIMENSION (charset) != 1)
8083 error ("Dimension of charset %s is not one",
8084 XSYMBOL (CHARSET_NAME (charset))->name->data);
8085
8086 charset_list = XCDR (charset_list);
8087 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8088 if (CHARSET_DIMENSION (charset) != 2)
8089 error ("Dimension of charset %s is not two",
8090 XSYMBOL (CHARSET_NAME (charset))->name->data);
8091
8092 category = coding_category_sjis;
8093 Vsjis_coding_system = name;
8094 }
8095 else if (EQ (coding_type, Qbig5))
8096 {
8097 struct charset *charset;
8098
8099 if (XINT (Flength (charset_list)) != 2)
8100 error ("There should be just two charsets");
8101
8102 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8103 if (CHARSET_DIMENSION (charset) != 1)
8104 error ("Dimension of charset %s is not one",
8105 XSYMBOL (CHARSET_NAME (charset))->name->data);
584948ac
KH
8106 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8107 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8108
8109 charset_list = XCDR (charset_list);
8110 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8111 if (CHARSET_DIMENSION (charset) != 2)
8112 error ("Dimension of charset %s is not two",
8113 XSYMBOL (CHARSET_NAME (charset))->name->data);
8114
8115 category = coding_category_big5;
8116 Vbig5_coding_system = name;
8117 }
8118 else if (EQ (coding_type, Qraw_text))
584948ac
KH
8119 {
8120 category = coding_category_raw_text;
8121 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8122 }
df7492f9 8123 else if (EQ (coding_type, Qutf_8))
584948ac
KH
8124 {
8125 category = coding_category_utf_8;
8126 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8127 }
df7492f9
KH
8128 else if (EQ (coding_type, Qundecided))
8129 category = coding_category_undecided;
8130 else
8131 error ("Invalid coding system type: %s",
8132 XSYMBOL (coding_type)->name->data);
8133
8134 CODING_ATTR_CATEGORY (attrs) = make_number (category);
8135
8136 eol_type = args[coding_arg_eol_type];
8137 if (! NILP (eol_type)
8138 && ! EQ (eol_type, Qunix)
8139 && ! EQ (eol_type, Qdos)
8140 && ! EQ (eol_type, Qmac))
8141 error ("Invalid eol-type");
8142
8143 aliases = Fcons (name, Qnil);
8144
8145 if (NILP (eol_type))
8146 {
8147 eol_type = make_subsidiaries (name);
8148 for (i = 0; i < 3; i++)
1397dc18 8149 {
df7492f9
KH
8150 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8151
8152 this_name = AREF (eol_type, i);
8153 this_aliases = Fcons (this_name, Qnil);
8154 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8155 this_spec = Fmake_vector (make_number (3), attrs);
8156 ASET (this_spec, 1, this_aliases);
8157 ASET (this_spec, 2, this_eol_type);
8158 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8159 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8160 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8161 Vcoding_system_alist);
1397dc18 8162 }
d46c5b12 8163 }
1397dc18 8164
df7492f9
KH
8165 spec_vec = Fmake_vector (make_number (3), attrs);
8166 ASET (spec_vec, 1, aliases);
8167 ASET (spec_vec, 2, eol_type);
8168
8169 Fputhash (name, spec_vec, Vcoding_system_hash_table);
8170 Vcoding_system_list = Fcons (name, Vcoding_system_list);
8171 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8172 Vcoding_system_alist);
8173
8174 {
8175 int id = coding_categories[category].id;
8176
8177 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8178 setup_coding_system (name, &coding_categories[category]);
8179 }
8180
d46c5b12 8181 return Qnil;
df7492f9
KH
8182
8183 short_args:
8184 return Fsignal (Qwrong_number_of_arguments,
8185 Fcons (intern ("define-coding-system-internal"),
8186 make_number (nargs)));
d46c5b12
KH
8187}
8188
da7db224
DL
8189/* Fixme: should this record the alias relationships for
8190 diagnostics? */
df7492f9
KH
8191DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8192 Sdefine_coding_system_alias, 2, 2, 0,
8193 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8194 (alias, coding_system)
8195 Lisp_Object alias, coding_system;
66cfb530 8196{
df7492f9 8197 Lisp_Object spec, aliases, eol_type;
84d60297 8198
df7492f9
KH
8199 CHECK_SYMBOL (alias);
8200 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8201 aliases = AREF (spec, 1);
8202 while (!NILP (XCDR (aliases)))
8203 aliases = XCDR (aliases);
8204 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 8205
df7492f9
KH
8206 eol_type = AREF (spec, 2);
8207 if (VECTORP (eol_type))
66cfb530 8208 {
df7492f9
KH
8209 Lisp_Object subsidiaries;
8210 int i;
8211
8212 subsidiaries = make_subsidiaries (alias);
8213 for (i = 0; i < 3; i++)
8214 Fdefine_coding_system_alias (AREF (subsidiaries, i),
8215 AREF (eol_type, i));
8216
8217 ASET (spec, 2, subsidiaries);
66cfb530 8218 }
df7492f9
KH
8219
8220 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
8221 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8222 Vcoding_system_alist);
66cfb530
KH
8223
8224 return Qnil;
8225}
8226
df7492f9
KH
8227DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8228 1, 1, 0,
8229 doc: /* Return the base of CODING-SYSTEM.
da7db224 8230Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
8231 (coding_system)
8232 Lisp_Object coding_system;
8233{
8234 Lisp_Object spec, attrs;
8235
8236 if (NILP (coding_system))
8237 return (Qno_conversion);
8238 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8239 attrs = AREF (spec, 0);
8240 return CODING_ATTR_BASE_NAME (attrs);
8241}
8242
8243DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8244 1, 1, 0,
8245 doc: "Return the property list of CODING-SYSTEM.")
8246 (coding_system)
8247 Lisp_Object coding_system;
8248{
8249 Lisp_Object spec, attrs;
8250
8251 if (NILP (coding_system))
8252 coding_system = Qno_conversion;
8253 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8254 attrs = AREF (spec, 0);
8255 return CODING_ATTR_PLIST (attrs);
8256}
8257
8258
8259DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8260 1, 1, 0,
da7db224 8261 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
8262 (coding_system)
8263 Lisp_Object coding_system;
8264{
8265 Lisp_Object spec;
8266
8267 if (NILP (coding_system))
8268 coding_system = Qno_conversion;
8269 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 8270 return AREF (spec, 1);
df7492f9
KH
8271}
8272
8273DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8274 Scoding_system_eol_type, 1, 1, 0,
8275 doc: /* Return eol-type of CODING-SYSTEM.
8276An eol-type is integer 0, 1, 2, or a vector of coding systems.
8277
8278Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8279and CR respectively.
8280
8281A vector value indicates that a format of end-of-line should be
8282detected automatically. Nth element of the vector is the subsidiary
8283coding system whose eol-type is N. */)
8284 (coding_system)
8285 Lisp_Object coding_system;
8286{
8287 Lisp_Object spec, eol_type;
8288 int n;
8289
8290 if (NILP (coding_system))
8291 coding_system = Qno_conversion;
8292 if (! CODING_SYSTEM_P (coding_system))
8293 return Qnil;
8294 spec = CODING_SYSTEM_SPEC (coding_system);
8295 eol_type = AREF (spec, 2);
8296 if (VECTORP (eol_type))
8297 return Fcopy_sequence (eol_type);
8298 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8299 return make_number (n);
8300}
8301
4ed46869
KH
8302#endif /* emacs */
8303
8304\f
1397dc18 8305/*** 9. Post-amble ***/
4ed46869 8306
dfcf069d 8307void
4ed46869
KH
8308init_coding_once ()
8309{
8310 int i;
8311
df7492f9
KH
8312 for (i = 0; i < coding_category_max; i++)
8313 {
8314 coding_categories[i].id = -1;
8315 coding_priorities[i] = i;
8316 }
4ed46869
KH
8317
8318 /* ISO2022 specific initialize routine. */
8319 for (i = 0; i < 0x20; i++)
b73bfc1c 8320 iso_code_class[i] = ISO_control_0;
4ed46869
KH
8321 for (i = 0x21; i < 0x7F; i++)
8322 iso_code_class[i] = ISO_graphic_plane_0;
8323 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 8324 iso_code_class[i] = ISO_control_1;
4ed46869
KH
8325 for (i = 0xA1; i < 0xFF; i++)
8326 iso_code_class[i] = ISO_graphic_plane_1;
8327 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8328 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8329 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8330 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8331 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8332 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8333 iso_code_class[ISO_CODE_ESC] = ISO_escape;
8334 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8335 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8336 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8337
b843d1ae 8338 inhibit_pre_post_conversion = 0;
df7492f9
KH
8339
8340 for (i = 0; i < 256; i++)
8341 {
8342 emacs_mule_bytes[i] = 1;
8343 }
7c78e542
KH
8344 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8345 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8346 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8347 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
8348}
8349
8350#ifdef emacs
8351
dfcf069d 8352void
e0e989f6
KH
8353syms_of_coding ()
8354{
df7492f9
KH
8355 staticpro (&Vcoding_system_hash_table);
8356 Vcoding_system_hash_table = Fmakehash (Qeq);
8357
8358 staticpro (&Vsjis_coding_system);
8359 Vsjis_coding_system = Qnil;
8360
8361 staticpro (&Vbig5_coding_system);
8362 Vbig5_coding_system = Qnil;
8363
8364 staticpro (&Vcode_conversion_work_buf_list);
8365 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 8366
df7492f9
KH
8367 staticpro (&Vcode_conversion_reused_work_buf);
8368 Vcode_conversion_reused_work_buf = Qnil;
8369
8370 DEFSYM (Qcharset, "charset");
8371 DEFSYM (Qtarget_idx, "target-idx");
8372 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
8373 Fset (Qcoding_system_history, Qnil);
8374
9ce27fde 8375 /* Target FILENAME is the first argument. */
e0e989f6 8376 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 8377 /* Target FILENAME is the third argument. */
e0e989f6
KH
8378 Fput (Qwrite_region, Qtarget_idx, make_number (2));
8379
df7492f9 8380 DEFSYM (Qcall_process, "call-process");
9ce27fde 8381 /* Target PROGRAM is the first argument. */
e0e989f6
KH
8382 Fput (Qcall_process, Qtarget_idx, make_number (0));
8383
df7492f9 8384 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 8385 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8386 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8387
df7492f9 8388 DEFSYM (Qstart_process, "start-process");
9ce27fde 8389 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8390 Fput (Qstart_process, Qtarget_idx, make_number (2));
8391
df7492f9 8392 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 8393 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
8394 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8395
df7492f9
KH
8396 DEFSYM (Qcoding_system, "coding-system");
8397 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 8398
df7492f9
KH
8399 DEFSYM (Qeol_type, "eol-type");
8400 DEFSYM (Qunix, "unix");
8401 DEFSYM (Qdos, "dos");
4ed46869 8402
df7492f9
KH
8403 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8404 DEFSYM (Qpost_read_conversion, "post-read-conversion");
8405 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8406 DEFSYM (Qdefault_char, "default-char");
8407 DEFSYM (Qundecided, "undecided");
8408 DEFSYM (Qno_conversion, "no-conversion");
8409 DEFSYM (Qraw_text, "raw-text");
4ed46869 8410
df7492f9 8411 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 8412
df7492f9 8413 DEFSYM (Qutf_8, "utf-8");
27901516 8414
df7492f9
KH
8415 DEFSYM (Qutf_16, "utf-16");
8416 DEFSYM (Qutf_16_be, "utf-16-be");
8417 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
8418 DEFSYM (Qutf_16_le, "utf-16-l3");
8419 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
8420 DEFSYM (Qsignature, "signature");
8421 DEFSYM (Qendian, "endian");
8422 DEFSYM (Qbig, "big");
8423 DEFSYM (Qlittle, "little");
27901516 8424
df7492f9
KH
8425 DEFSYM (Qshift_jis, "shift-jis");
8426 DEFSYM (Qbig5, "big5");
4ed46869 8427
df7492f9 8428 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 8429
df7492f9 8430 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
8431 Fput (Qcoding_system_error, Qerror_conditions,
8432 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8433 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 8434 build_string ("Invalid coding system"));
4ed46869 8435
df7492f9
KH
8436 /* Intern this now in case it isn't already done.
8437 Setting this variable twice is harmless.
8438 But don't staticpro it here--that is done in alloc.c. */
8439 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 8440
df7492f9 8441 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 8442 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
8443 DEFSYM (Qtranslation_table_id, "translation-table-id");
8444 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8445 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 8446
df7492f9 8447 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 8448
df7492f9 8449 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 8450
df7492f9
KH
8451 Vcoding_category_table
8452 = Fmake_vector (make_number (coding_category_max), Qnil);
8453 staticpro (&Vcoding_category_table);
8454 /* Followings are target of code detection. */
8455 ASET (Vcoding_category_table, coding_category_iso_7,
8456 intern ("coding-category-iso-7"));
8457 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8458 intern ("coding-category-iso-7-tight"));
8459 ASET (Vcoding_category_table, coding_category_iso_8_1,
8460 intern ("coding-category-iso-8-1"));
8461 ASET (Vcoding_category_table, coding_category_iso_8_2,
8462 intern ("coding-category-iso-8-2"));
8463 ASET (Vcoding_category_table, coding_category_iso_7_else,
8464 intern ("coding-category-iso-7-else"));
8465 ASET (Vcoding_category_table, coding_category_iso_8_else,
8466 intern ("coding-category-iso-8-else"));
8467 ASET (Vcoding_category_table, coding_category_utf_8,
8468 intern ("coding-category-utf-8"));
8469 ASET (Vcoding_category_table, coding_category_utf_16_be,
8470 intern ("coding-category-utf-16-be"));
8471 ASET (Vcoding_category_table, coding_category_utf_16_le,
8472 intern ("coding-category-utf-16-le"));
8473 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8474 intern ("coding-category-utf-16-be-nosig"));
8475 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8476 intern ("coding-category-utf-16-le-nosig"));
8477 ASET (Vcoding_category_table, coding_category_charset,
8478 intern ("coding-category-charset"));
8479 ASET (Vcoding_category_table, coding_category_sjis,
8480 intern ("coding-category-sjis"));
8481 ASET (Vcoding_category_table, coding_category_big5,
8482 intern ("coding-category-big5"));
8483 ASET (Vcoding_category_table, coding_category_ccl,
8484 intern ("coding-category-ccl"));
8485 ASET (Vcoding_category_table, coding_category_emacs_mule,
8486 intern ("coding-category-emacs-mule"));
8487 /* Followings are NOT target of code detection. */
8488 ASET (Vcoding_category_table, coding_category_raw_text,
8489 intern ("coding-category-raw-text"));
8490 ASET (Vcoding_category_table, coding_category_undecided,
8491 intern ("coding-category-undecided"));
70c22245 8492
4ed46869
KH
8493 defsubr (&Scoding_system_p);
8494 defsubr (&Sread_coding_system);
8495 defsubr (&Sread_non_nil_coding_system);
8496 defsubr (&Scheck_coding_system);
8497 defsubr (&Sdetect_coding_region);
d46c5b12 8498 defsubr (&Sdetect_coding_string);
05e6f5dc 8499 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 8500 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8501 defsubr (&Sdecode_coding_region);
8502 defsubr (&Sencode_coding_region);
8503 defsubr (&Sdecode_coding_string);
8504 defsubr (&Sencode_coding_string);
8505 defsubr (&Sdecode_sjis_char);
8506 defsubr (&Sencode_sjis_char);
8507 defsubr (&Sdecode_big5_char);
8508 defsubr (&Sencode_big5_char);
1ba9e4ab 8509 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8510 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8511 defsubr (&Sterminal_coding_system);
1ba9e4ab 8512 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8513 defsubr (&Skeyboard_coding_system);
a5d301df 8514 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
8515 defsubr (&Sset_coding_system_priority);
8516 defsubr (&Sdefine_coding_system_internal);
8517 defsubr (&Sdefine_coding_system_alias);
8518 defsubr (&Scoding_system_base);
8519 defsubr (&Scoding_system_plist);
8520 defsubr (&Scoding_system_aliases);
8521 defsubr (&Scoding_system_eol_type);
8522 defsubr (&Scoding_system_priority_list);
4ed46869 8523
4608c386 8524 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8525 doc: /* List of coding systems.
8526
8527Do not alter the value of this variable manually. This variable should be
df7492f9 8528updated by the functions `define-coding-system' and
48b0f3ae 8529`define-coding-system-alias'. */);
4608c386
KH
8530 Vcoding_system_list = Qnil;
8531
8532 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8533 doc: /* Alist of coding system names.
8534Each element is one element list of coding system name.
8535This variable is given to `completing-read' as TABLE argument.
8536
8537Do not alter the value of this variable manually. This variable should be
8538updated by the functions `make-coding-system' and
8539`define-coding-system-alias'. */);
4608c386
KH
8540 Vcoding_system_alist = Qnil;
8541
4ed46869 8542 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8543 doc: /* List of coding-categories (symbols) ordered by priority.
8544
8545On detecting a coding system, Emacs tries code detection algorithms
8546associated with each coding-category one by one in this order. When
8547one algorithm agrees with a byte sequence of source text, the coding
8548system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8549 {
8550 int i;
8551
8552 Vcoding_category_list = Qnil;
df7492f9 8553 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8554 Vcoding_category_list
d46c5b12
KH
8555 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8556 Vcoding_category_list);
4ed46869
KH
8557 }
8558
8559 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8560 doc: /* Specify the coding system for read operations.
8561It is useful to bind this variable with `let', but do not set it globally.
8562If the value is a coding system, it is used for decoding on read operation.
8563If not, an appropriate element is used from one of the coding system alists:
8564There are three such tables, `file-coding-system-alist',
8565`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8566 Vcoding_system_for_read = Qnil;
8567
8568 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8569 doc: /* Specify the coding system for write operations.
8570Programs bind this variable with `let', but you should not set it globally.
8571If the value is a coding system, it is used for encoding of output,
8572when writing it to a file and when sending it to a file or subprocess.
8573
8574If this does not specify a coding system, an appropriate element
8575is used from one of the coding system alists:
8576There are three such tables, `file-coding-system-alist',
8577`process-coding-system-alist', and `network-coding-system-alist'.
8578For output to files, if the above procedure does not specify a coding system,
8579the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8580 Vcoding_system_for_write = Qnil;
8581
8582 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8583 doc: /*
8584Coding system used in the latest file or process I/O. */);
4ed46869
KH
8585 Vlast_coding_system_used = Qnil;
8586
9ce27fde 8587 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8588 doc: /*
8589*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8590See info node `Coding Systems' and info node `Text and Binary' concerning
8591such conversion. */);
9ce27fde
KH
8592 inhibit_eol_conversion = 0;
8593
ed29121d 8594 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8595 doc: /*
8596Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8597Bind it to t if the process output is to be treated as if it were a file
8598read from some filesystem. */);
ed29121d
EZ
8599 inherit_process_coding_system = 0;
8600
02ba4723 8601 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8602 doc: /*
8603Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8604The format is ((PATTERN . VAL) ...),
8605where PATTERN is a regular expression matching a file name,
8606VAL is a coding system, a cons of coding systems, or a function symbol.
8607If VAL is a coding system, it is used for both decoding and encoding
8608the file contents.
8609If VAL is a cons of coding systems, the car part is used for decoding,
8610and the cdr part is used for encoding.
8611If VAL is a function symbol, the function must return a coding system
0192762c
DL
8612or a cons of coding systems which are used as above. The function gets
8613the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8614
8615See also the function `find-operation-coding-system'
8616and the variable `auto-coding-alist'. */);
02ba4723
KH
8617 Vfile_coding_system_alist = Qnil;
8618
8619 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8620 doc: /*
8621Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8622The format is ((PATTERN . VAL) ...),
8623where PATTERN is a regular expression matching a program name,
8624VAL is a coding system, a cons of coding systems, or a function symbol.
8625If VAL is a coding system, it is used for both decoding what received
8626from the program and encoding what sent to the program.
8627If VAL is a cons of coding systems, the car part is used for decoding,
8628and the cdr part is used for encoding.
8629If VAL is a function symbol, the function must return a coding system
8630or a cons of coding systems which are used as above.
8631
8632See also the function `find-operation-coding-system'. */);
02ba4723
KH
8633 Vprocess_coding_system_alist = Qnil;
8634
8635 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8636 doc: /*
8637Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8638The format is ((PATTERN . VAL) ...),
8639where PATTERN is a regular expression matching a network service name
8640or is a port number to connect to,
8641VAL is a coding system, a cons of coding systems, or a function symbol.
8642If VAL is a coding system, it is used for both decoding what received
8643from the network stream and encoding what sent to the network stream.
8644If VAL is a cons of coding systems, the car part is used for decoding,
8645and the cdr part is used for encoding.
8646If VAL is a function symbol, the function must return a coding system
8647or a cons of coding systems which are used as above.
8648
8649See also the function `find-operation-coding-system'. */);
02ba4723 8650 Vnetwork_coding_system_alist = Qnil;
4ed46869 8651
68c45bf0 8652 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8653 doc: /* Coding system to use with system messages.
8654Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8655 Vlocale_coding_system = Qnil;
8656
005f0d35 8657 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8658 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8659 doc: /*
8660*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8661 eol_mnemonic_unix = build_string (":");
4ed46869 8662
7722baf9 8663 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8664 doc: /*
8665*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8666 eol_mnemonic_dos = build_string ("\\");
4ed46869 8667
7722baf9 8668 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8669 doc: /*
8670*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8671 eol_mnemonic_mac = build_string ("/");
4ed46869 8672
7722baf9 8673 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8674 doc: /*
8675*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8676 eol_mnemonic_undecided = build_string (":");
4ed46869 8677
84fbb8a0 8678 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8679 doc: /*
8680*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8681 Venable_character_translation = Qt;
bdd9fb48 8682
f967223b 8683 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8684 &Vstandard_translation_table_for_decode,
8685 doc: /* Table for translating characters while decoding. */);
f967223b 8686 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8687
f967223b 8688 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8689 &Vstandard_translation_table_for_encode,
8690 doc: /* Table for translating characters while encoding. */);
f967223b 8691 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8692
df7492f9 8693 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8694 doc: /* Alist of charsets vs revision numbers.
8695While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8696designate it with the escape sequence identifying revision (cdr part
8697of the element). */);
8698 Vcharset_revision_table = Qnil;
02ba4723
KH
8699
8700 DEFVAR_LISP ("default-process-coding-system",
8701 &Vdefault_process_coding_system,
48b0f3ae
PJ
8702 doc: /* Cons of coding systems used for process I/O by default.
8703The car part is used for decoding a process output,
8704the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8705 Vdefault_process_coding_system = Qnil;
c4825358 8706
3f003981 8707 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8708 doc: /*
8709Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8710This is a vector of length 256.
8711If Nth element is non-nil, the existence of code N in a file
8712\(or output of subprocess) doesn't prevent it to be detected as
8713a coding system of ISO 2022 variant which has a flag
8714`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8715or reading output of a subprocess.
8716Only 128th through 159th elements has a meaning. */);
3f003981 8717 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8718
8719 DEFVAR_LISP ("select-safe-coding-system-function",
8720 &Vselect_safe_coding_system_function,
df7492f9
KH
8721 doc: /*
8722Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8723
8724If set, this function is called to force a user to select a proper
8725coding system which can encode the text in the case that a default
8726coding system used in each operation can't encode the text.
8727
8728The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8729 Vselect_safe_coding_system_function = Qnil;
8730
22ab2303 8731 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8732 &inhibit_iso_escape_detection,
df7492f9
KH
8733 doc: /*
8734If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8735
8736By default, on reading a file, Emacs tries to detect how the text is
8737encoded. This code detection is sensitive to escape sequences. If
8738the sequence is valid as ISO2022, the code is determined as one of
8739the ISO2022 encodings, and the file is decoded by the corresponding
8740coding system (e.g. `iso-2022-7bit').
8741
8742However, there may be a case that you want to read escape sequences in
8743a file as is. In such a case, you can set this variable to non-nil.
8744Then, as the code detection ignores any escape sequences, no file is
8745detected as encoded in some ISO2022 encoding. The result is that all
8746escape sequences become visible in a buffer.
8747
8748The default value is nil, and it is strongly recommended not to change
8749it. That is because many Emacs Lisp source files that contain
8750non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8751in Emacs's distribution, and they won't be decoded correctly on
8752reading if you suppress escape sequence detection.
8753
8754The other way to read escape sequences in a file without decoding is
8755to explicitly specify some coding system that doesn't use ISO2022's
8756escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8757 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8758
8759 {
8760 Lisp_Object args[coding_arg_max];
8761 Lisp_Object plist[14];
8762 int i;
8763
8764 for (i = 0; i < coding_arg_max; i++)
8765 args[i] = Qnil;
8766
8767 plist[0] = intern (":name");
8768 plist[1] = args[coding_arg_name] = Qno_conversion;
8769 plist[2] = intern (":mnemonic");
8770 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8771 plist[4] = intern (":coding-type");
8772 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8773 plist[6] = intern (":ascii-compatible-p");
8774 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8775 plist[8] = intern (":default-char");
8776 plist[9] = args[coding_arg_default_char] = make_number (0);
8777 plist[10] = intern (":docstring");
8778 plist[11] = build_string ("Do no conversion.\n\
8779\n\
8780When you visit a file with this coding, the file is read into a\n\
8781unibyte buffer as is, thus each byte of a file is treated as a\n\
8782character.");
8783 plist[12] = intern (":eol-type");
8784 plist[13] = args[coding_arg_eol_type] = Qunix;
8785 args[coding_arg_plist] = Flist (14, plist);
8786 Fdefine_coding_system_internal (coding_arg_max, args);
8787 }
8788
8789 setup_coding_system (Qno_conversion, &keyboard_coding);
8790 setup_coding_system (Qno_conversion, &terminal_coding);
8791 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8792}
8793
68c45bf0
PE
8794char *
8795emacs_strerror (error_number)
8796 int error_number;
8797{
8798 char *str;
8799
ca9c0567 8800 synchronize_system_messages_locale ();
68c45bf0
PE
8801 str = strerror (error_number);
8802
8803 if (! NILP (Vlocale_coding_system))
8804 {
8805 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8806 Vlocale_coding_system,
8807 0);
8808 str = (char *) XSTRING (dec)->data;
8809 }
8810
8811 return str;
8812}
8813
4ed46869 8814#endif /* emacs */