(ctext-no-compositions): Remove garbage arguments.
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
8f924df7 3 Licensed to the Free Software Foundation.
6f197c07 4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
8f924df7 5 Copyright (C) 2003
df7492f9
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 56 coding system.
4ed46869 57
e19c3639
KH
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
4ed46869 63
e19c3639 64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
4ed46869 66
df7492f9
KH
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
4ed46869
KH
82
83 The most famous coding system for multiple character sets. X's
df7492f9
KH
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
4ed46869 87
df7492f9 88 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
cfb43547 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
4ed46869
KH
138
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9 146 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
df7492f9 149
ff0dacd7 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
151
152 Below is the template of these functions. */
153
4ed46869 154#if 0
df7492f9 155static int
ff0dacd7 156detect_coding_XXX (coding, detect_info)
df7492f9 157 struct coding_system *coding;
ff0dacd7 158 struct coding_detection_info *detect_info;
4ed46869 159{
df7492f9
KH
160 unsigned char *src = coding->source;
161 unsigned char *src_end = coding->source + coding->src_bytes;
162 int multibytep = coding->src_multibyte;
ff0dacd7 163 int consumed_chars = 0;
df7492f9
KH
164 int found = 0;
165 ...;
166
167 while (1)
168 {
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
171 ONE_MORE_BYTE (c);
ff0dacd7
KH
172
173 if (! __C_conforms_to_XXX___ (c))
174 break;
175 if (! __C_strongly_suggests_XXX__ (c))
176 found = CATEGORY_MASK_XXX;
df7492f9 177 }
ff0dacd7
KH
178 /* The byte sequence is invalid for XXX. */
179 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 180 return 0;
ff0dacd7 181
df7492f9 182 no_more_source:
ff0dacd7
KH
183 /* The source exausted successfully. */
184 detect_info->found |= found;
df7492f9 185 return 1;
4ed46869
KH
186}
187#endif
188
189/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
190
df7492f9
KH
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
d46c5b12 195
df7492f9
KH
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
d46c5b12 200
df7492f9 201 Below is the template of these functions. */
d46c5b12 202
4ed46869 203#if 0
b73bfc1c 204static void
df7492f9 205decode_coding_XXXX (coding)
4ed46869 206 struct coding_system *coding;
4ed46869 207{
df7492f9
KH
208 unsigned char *src = coding->source + coding->consumed;
209 unsigned char *src_end = coding->source + coding->src_bytes;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base;
214 /* A buffer to produce decoded characters. */
215 int *charbuf = coding->charbuf;
216 int *charbuf_end = charbuf + coding->charbuf_size;
217 int multibytep = coding->src_multibyte;
218
219 while (1)
220 {
221 src_base = src;
222 if (charbuf < charbuf_end)
223 /* No more room to produce a decoded character. */
224 break;
225 ONE_MORE_BYTE (c);
226 /* Decode it. */
227 }
228
229 no_more_source:
230 if (src_base < src_end
231 && coding->mode & CODING_MODE_LAST_BLOCK)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base < src_end && charbuf < charbuf_end)
235 *charbuf++ = *src_base++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding->consumed = coding->consumed_char = src_base - coding->source;
239 /* Remember how many characters we produced. */
240 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
241}
242#endif
243
244/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
245
df7492f9
KH
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
d46c5b12 250
df7492f9
KH
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 255
df7492f9
KH
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
d46c5b12 259
df7492f9 260 Below is a template of these functions. */
4ed46869 261#if 0
b73bfc1c 262static void
df7492f9 263encode_coding_XXX (coding)
4ed46869 264 struct coding_system *coding;
4ed46869 265{
df7492f9
KH
266 int multibytep = coding->dst_multibyte;
267 int *charbuf = coding->charbuf;
268 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
269 unsigned char *dst = coding->destination + coding->produced;
270 unsigned char *dst_end = coding->destination + coding->dst_bytes;
271 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
272 int produced_chars = 0;
273
274 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
275 {
276 int c = *charbuf;
277 /* Encode C into DST, and increment DST. */
278 }
279 label_no_more_destination:
280 /* How many chars and bytes we produced. */
281 coding->produced_char += produced_chars;
282 coding->produced = dst - coding->destination;
4ed46869
KH
283}
284#endif
285
4ed46869
KH
286\f
287/*** 1. Preamble ***/
288
68c45bf0 289#include <config.h>
4ed46869
KH
290#include <stdio.h>
291
4ed46869
KH
292#include "lisp.h"
293#include "buffer.h"
df7492f9 294#include "character.h"
4ed46869
KH
295#include "charset.h"
296#include "ccl.h"
df7492f9 297#include "composite.h"
4ed46869
KH
298#include "coding.h"
299#include "window.h"
4ed46869 300
df7492f9 301Lisp_Object Vcoding_system_hash_table;
4ed46869 302
df7492f9 303Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
304Lisp_Object Qunix, Qdos;
305extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
306Lisp_Object Qbuffer_file_coding_system;
307Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 308Lisp_Object Qdefault_char;
27901516 309Lisp_Object Qno_conversion, Qundecided;
df7492f9 310Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 311Lisp_Object Qbig, Qlittle;
bb0115a2 312Lisp_Object Qcoding_system_history;
1397dc18 313Lisp_Object Qvalid_codes;
01378f49 314Lisp_Object QCcategory;
4ed46869
KH
315
316extern Lisp_Object Qinsert_file_contents, Qwrite_region;
317Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
318Lisp_Object Qstart_process, Qopen_network_stream;
319Lisp_Object Qtarget_idx;
320
5d5bf4d8
KH
321int coding_system_require_warning;
322
d46c5b12
KH
323Lisp_Object Vselect_safe_coding_system_function;
324
7722baf9
EZ
325/* Mnemonic string for each format of end-of-line. */
326Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
327/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 328 decided. */
7722baf9 329Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
330
331#ifdef emacs
332
4608c386
KH
333Lisp_Object Vcoding_system_list, Vcoding_system_alist;
334
335Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 336
d46c5b12
KH
337/* Coding system emacs-mule and raw-text are for converting only
338 end-of-line format. */
339Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 340Lisp_Object Qutf_8_emacs;
ecf488bc 341
4ed46869
KH
342/* Coding-systems are handed between Emacs Lisp programs and C internal
343 routines by the following three variables. */
344/* Coding-system for reading files and receiving data from process. */
345Lisp_Object Vcoding_system_for_read;
346/* Coding-system for writing files and sending data to process. */
347Lisp_Object Vcoding_system_for_write;
348/* Coding-system actually used in the latest I/O. */
349Lisp_Object Vlast_coding_system_used;
350
c4825358 351/* A vector of length 256 which contains information about special
94487c4e 352 Latin codes (especially for dealing with Microsoft codes). */
3f003981 353Lisp_Object Vlatin_extra_code_table;
c4825358 354
9ce27fde
KH
355/* Flag to inhibit code conversion of end-of-line format. */
356int inhibit_eol_conversion;
357
74383408
KH
358/* Flag to inhibit ISO2022 escape sequence detection. */
359int inhibit_iso_escape_detection;
360
ed29121d
EZ
361/* Flag to make buffer-file-coding-system inherit from process-coding. */
362int inherit_process_coding_system;
363
c4825358 364/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
365struct coding_system terminal_coding;
366
c4825358
KH
367/* Coding system to be used to encode text for terminal display when
368 terminal coding system is nil. */
369struct coding_system safe_terminal_coding;
370
371/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
372struct coding_system keyboard_coding;
373
02ba4723
KH
374Lisp_Object Vfile_coding_system_alist;
375Lisp_Object Vprocess_coding_system_alist;
376Lisp_Object Vnetwork_coding_system_alist;
4ed46869 377
68c45bf0
PE
378Lisp_Object Vlocale_coding_system;
379
4ed46869
KH
380#endif /* emacs */
381
f967223b
KH
382/* Flag to tell if we look up translation table on character code
383 conversion. */
84fbb8a0 384Lisp_Object Venable_character_translation;
f967223b
KH
385/* Standard translation table to look up on decoding (reading). */
386Lisp_Object Vstandard_translation_table_for_decode;
387/* Standard translation table to look up on encoding (writing). */
388Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 389
f967223b
KH
390Lisp_Object Qtranslation_table;
391Lisp_Object Qtranslation_table_id;
392Lisp_Object Qtranslation_table_for_decode;
393Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
394
395/* Alist of charsets vs revision number. */
df7492f9 396static Lisp_Object Vcharset_revision_table;
4ed46869 397
02ba4723
KH
398/* Default coding systems used for process I/O. */
399Lisp_Object Vdefault_process_coding_system;
400
002fdb44
DL
401/* Char table for translating Quail and self-inserting input. */
402Lisp_Object Vtranslation_table_for_input;
403
df7492f9
KH
404/* Two special coding systems. */
405Lisp_Object Vsjis_coding_system;
406Lisp_Object Vbig5_coding_system;
407
408
ff0dacd7
KH
409static int detect_coding_utf_8 P_ ((struct coding_system *,
410 struct coding_detection_info *info));
df7492f9
KH
411static void decode_coding_utf_8 P_ ((struct coding_system *));
412static int encode_coding_utf_8 P_ ((struct coding_system *));
413
ff0dacd7
KH
414static int detect_coding_utf_16 P_ ((struct coding_system *,
415 struct coding_detection_info *info));
df7492f9
KH
416static void decode_coding_utf_16 P_ ((struct coding_system *));
417static int encode_coding_utf_16 P_ ((struct coding_system *));
418
ff0dacd7
KH
419static int detect_coding_iso_2022 P_ ((struct coding_system *,
420 struct coding_detection_info *info));
df7492f9
KH
421static void decode_coding_iso_2022 P_ ((struct coding_system *));
422static int encode_coding_iso_2022 P_ ((struct coding_system *));
423
ff0dacd7
KH
424static int detect_coding_emacs_mule P_ ((struct coding_system *,
425 struct coding_detection_info *info));
df7492f9
KH
426static void decode_coding_emacs_mule P_ ((struct coding_system *));
427static int encode_coding_emacs_mule P_ ((struct coding_system *));
428
ff0dacd7
KH
429static int detect_coding_sjis P_ ((struct coding_system *,
430 struct coding_detection_info *info));
df7492f9
KH
431static void decode_coding_sjis P_ ((struct coding_system *));
432static int encode_coding_sjis P_ ((struct coding_system *));
433
ff0dacd7
KH
434static int detect_coding_big5 P_ ((struct coding_system *,
435 struct coding_detection_info *info));
df7492f9
KH
436static void decode_coding_big5 P_ ((struct coding_system *));
437static int encode_coding_big5 P_ ((struct coding_system *));
438
ff0dacd7
KH
439static int detect_coding_ccl P_ ((struct coding_system *,
440 struct coding_detection_info *info));
df7492f9
KH
441static void decode_coding_ccl P_ ((struct coding_system *));
442static int encode_coding_ccl P_ ((struct coding_system *));
443
444static void decode_coding_raw_text P_ ((struct coding_system *));
445static int encode_coding_raw_text P_ ((struct coding_system *));
446
447
448/* ISO2022 section */
449
450#define CODING_ISO_INITIAL(coding, reg) \
451 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
452 coding_attr_iso_initial), \
453 reg)))
454
455
456#define CODING_ISO_REQUEST(coding, charset_id) \
457 ((charset_id <= (coding)->max_charset_id \
458 ? (coding)->safe_charsets[charset_id] \
459 : -1))
460
461
462#define CODING_ISO_FLAGS(coding) \
463 ((coding)->spec.iso_2022.flags)
464#define CODING_ISO_DESIGNATION(coding, reg) \
465 ((coding)->spec.iso_2022.current_designation[reg])
466#define CODING_ISO_INVOCATION(coding, plane) \
467 ((coding)->spec.iso_2022.current_invocation[plane])
468#define CODING_ISO_SINGLE_SHIFTING(coding) \
469 ((coding)->spec.iso_2022.single_shifting)
470#define CODING_ISO_BOL(coding) \
471 ((coding)->spec.iso_2022.bol)
472#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
473 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
474
475/* Control characters of ISO2022. */
476 /* code */ /* function */
477#define ISO_CODE_LF 0x0A /* line-feed */
478#define ISO_CODE_CR 0x0D /* carriage-return */
479#define ISO_CODE_SO 0x0E /* shift-out */
480#define ISO_CODE_SI 0x0F /* shift-in */
481#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
482#define ISO_CODE_ESC 0x1B /* escape */
483#define ISO_CODE_SS2 0x8E /* single-shift-2 */
484#define ISO_CODE_SS3 0x8F /* single-shift-3 */
485#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
486
487/* All code (1-byte) of ISO2022 is classified into one of the
488 followings. */
489enum iso_code_class_type
490 {
491 ISO_control_0, /* Control codes in the range
492 0x00..0x1F and 0x7F, except for the
493 following 5 codes. */
494 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
495 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
496 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
497 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
498 ISO_escape, /* ISO_CODE_SO (0x1B) */
499 ISO_control_1, /* Control codes in the range
500 0x80..0x9F, except for the
501 following 3 codes. */
502 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
503 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
504 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
505 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
506 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
507 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
508 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
509 };
05e6f5dc 510
df7492f9
KH
511/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
512 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 513
df7492f9
KH
514/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
515 instead of the correct short-form sequence (e.g. ESC $ A). */
516#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 517
df7492f9
KH
518/* If set, reset graphic planes and registers at end-of-line to the
519 initial state. */
520#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 521
df7492f9
KH
522/* If set, reset graphic planes and registers before any control
523 characters to the initial state. */
524#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 525
df7492f9
KH
526/* If set, encode by 7-bit environment. */
527#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 528
df7492f9
KH
529/* If set, use locking-shift function. */
530#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 531
df7492f9
KH
532/* If set, use single-shift function. Overwrite
533 CODING_ISO_FLAG_LOCKING_SHIFT. */
534#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 535
df7492f9
KH
536/* If set, use designation escape sequence. */
537#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 538
df7492f9
KH
539/* If set, produce revision number sequence. */
540#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 541
df7492f9
KH
542/* If set, produce ISO6429's direction specifying sequence. */
543#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 544
df7492f9
KH
545/* If set, assume designation states are reset at beginning of line on
546 output. */
547#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 548
df7492f9
KH
549/* If set, designation sequence should be placed at beginning of line
550 on output. */
551#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 552
df7492f9
KH
553/* If set, do not encode unsafe charactes on output. */
554#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 555
df7492f9
KH
556/* If set, extra latin codes (128..159) are accepted as a valid code
557 on input. */
558#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 559
df7492f9 560#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 561
df7492f9 562#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 563
bf16eb23 564#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 565
bf16eb23 566#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 567
bf16eb23 568#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 569
df7492f9
KH
570/* A character to be produced on output if encoding of the original
571 character is prohibited by CODING_ISO_FLAG_SAFE. */
572#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 573
4ed46869 574
df7492f9
KH
575/* UTF-16 section */
576#define CODING_UTF_16_BOM(coding) \
577 ((coding)->spec.utf_16.bom)
4ed46869 578
df7492f9
KH
579#define CODING_UTF_16_ENDIAN(coding) \
580 ((coding)->spec.utf_16.endian)
4ed46869 581
df7492f9
KH
582#define CODING_UTF_16_SURROGATE(coding) \
583 ((coding)->spec.utf_16.surrogate)
4ed46869 584
4ed46869 585
df7492f9
KH
586/* CCL section */
587#define CODING_CCL_DECODER(coding) \
588 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
589#define CODING_CCL_ENCODER(coding) \
590 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
591#define CODING_CCL_VALIDS(coding) \
8f924df7 592 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 593
5a936b46 594/* Index for each coding category in `coding_categories' */
4ed46869 595
df7492f9
KH
596enum coding_category
597 {
598 coding_category_iso_7,
599 coding_category_iso_7_tight,
600 coding_category_iso_8_1,
601 coding_category_iso_8_2,
602 coding_category_iso_7_else,
603 coding_category_iso_8_else,
604 coding_category_utf_8,
605 coding_category_utf_16_auto,
606 coding_category_utf_16_be,
607 coding_category_utf_16_le,
608 coding_category_utf_16_be_nosig,
609 coding_category_utf_16_le_nosig,
610 coding_category_charset,
611 coding_category_sjis,
612 coding_category_big5,
613 coding_category_ccl,
614 coding_category_emacs_mule,
615 /* All above are targets of code detection. */
616 coding_category_raw_text,
617 coding_category_undecided,
618 coding_category_max
619 };
620
621/* Definitions of flag bits used in detect_coding_XXXX. */
622#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
623#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
624#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
625#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
626#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
627#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
628#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 629#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
630#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
631#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
632#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
633#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
634#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
635#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
636#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
637#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
638#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 639#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
640
641/* This value is returned if detect_coding_mask () find nothing other
642 than ASCII characters. */
643#define CATEGORY_MASK_ANY \
644 (CATEGORY_MASK_ISO_7 \
645 | CATEGORY_MASK_ISO_7_TIGHT \
646 | CATEGORY_MASK_ISO_8_1 \
647 | CATEGORY_MASK_ISO_8_2 \
648 | CATEGORY_MASK_ISO_7_ELSE \
649 | CATEGORY_MASK_ISO_8_ELSE \
650 | CATEGORY_MASK_UTF_8 \
651 | CATEGORY_MASK_UTF_16_BE \
652 | CATEGORY_MASK_UTF_16_LE \
653 | CATEGORY_MASK_UTF_16_BE_NOSIG \
654 | CATEGORY_MASK_UTF_16_LE_NOSIG \
655 | CATEGORY_MASK_CHARSET \
656 | CATEGORY_MASK_SJIS \
657 | CATEGORY_MASK_BIG5 \
658 | CATEGORY_MASK_CCL \
659 | CATEGORY_MASK_EMACS_MULE)
660
661
662#define CATEGORY_MASK_ISO_7BIT \
663 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
664
665#define CATEGORY_MASK_ISO_8BIT \
666 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
667
668#define CATEGORY_MASK_ISO_ELSE \
669 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
670
671#define CATEGORY_MASK_ISO_ESCAPE \
672 (CATEGORY_MASK_ISO_7 \
673 | CATEGORY_MASK_ISO_7_TIGHT \
674 | CATEGORY_MASK_ISO_7_ELSE \
675 | CATEGORY_MASK_ISO_8_ELSE)
676
677#define CATEGORY_MASK_ISO \
678 ( CATEGORY_MASK_ISO_7BIT \
679 | CATEGORY_MASK_ISO_8BIT \
680 | CATEGORY_MASK_ISO_ELSE)
681
682#define CATEGORY_MASK_UTF_16 \
683 (CATEGORY_MASK_UTF_16_BE \
684 | CATEGORY_MASK_UTF_16_LE \
685 | CATEGORY_MASK_UTF_16_BE_NOSIG \
686 | CATEGORY_MASK_UTF_16_LE_NOSIG)
687
688
689/* List of symbols `coding-category-xxx' ordered by priority. This
690 variable is exposed to Emacs Lisp. */
691static Lisp_Object Vcoding_category_list;
692
693/* Table of coding categories (Lisp symbols). This variable is for
694 internal use oly. */
695static Lisp_Object Vcoding_category_table;
696
697/* Table of coding-categories ordered by priority. */
698static enum coding_category coding_priorities[coding_category_max];
699
700/* Nth element is a coding context for the coding system bound to the
701 Nth coding category. */
702static struct coding_system coding_categories[coding_category_max];
703
df7492f9
KH
704/*** Commonly used macros and functions ***/
705
706#ifndef min
707#define min(a, b) ((a) < (b) ? (a) : (b))
708#endif
709#ifndef max
710#define max(a, b) ((a) > (b) ? (a) : (b))
711#endif
4ed46869 712
df7492f9
KH
713#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
714 do { \
715 attrs = CODING_ID_ATTRS (coding->id); \
716 eol_type = CODING_ID_EOL_TYPE (coding->id); \
717 if (VECTORP (eol_type)) \
718 eol_type = Qunix; \
719 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
720 } while (0)
4ed46869 721
4ed46869 722
df7492f9
KH
723/* Safely get one byte from the source text pointed by SRC which ends
724 at SRC_END, and set C to that byte. If there are not enough bytes
725 in the source, it jumps to `no_more_source'. The caller
726 should declare and set these variables appropriately in advance:
727 src, src_end, multibytep
728*/
aa72b389 729
df7492f9 730#define ONE_MORE_BYTE(c) \
aa72b389 731 do { \
df7492f9
KH
732 if (src == src_end) \
733 { \
734 if (src_base < src) \
735 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
736 goto no_more_source; \
737 } \
738 c = *src++; \
739 if (multibytep && (c & 0x80)) \
740 { \
741 if ((c & 0xFE) != 0xC0) \
742 error ("Undecodable char found"); \
743 c = ((c & 1) << 6) | *src++; \
744 } \
745 consumed_chars++; \
aa72b389
KH
746 } while (0)
747
aa72b389 748
df7492f9
KH
749#define ONE_MORE_BYTE_NO_CHECK(c) \
750 do { \
751 c = *src++; \
752 if (multibytep && (c & 0x80)) \
753 { \
754 if ((c & 0xFE) != 0xC0) \
755 error ("Undecodable char found"); \
756 c = ((c & 1) << 6) | *src++; \
757 } \
781d7a48 758 consumed_chars++; \
aa72b389
KH
759 } while (0)
760
aa72b389 761
df7492f9
KH
762/* Store a byte C in the place pointed by DST and increment DST to the
763 next free point, and increment PRODUCED_CHARS. The caller should
764 assure that C is 0..127, and declare and set the variable `dst'
765 appropriately in advance.
766*/
aa72b389
KH
767
768
df7492f9
KH
769#define EMIT_ONE_ASCII_BYTE(c) \
770 do { \
771 produced_chars++; \
772 *dst++ = (c); \
b6871cc7 773 } while (0)
aa72b389
KH
774
775
df7492f9 776/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 777
df7492f9
KH
778#define EMIT_TWO_ASCII_BYTES(c1, c2) \
779 do { \
780 produced_chars += 2; \
781 *dst++ = (c1), *dst++ = (c2); \
782 } while (0)
aa72b389
KH
783
784
df7492f9
KH
785/* Store a byte C in the place pointed by DST and increment DST to the
786 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
787 nonzero, store in an appropriate multibyte from. The caller should
788 declare and set the variables `dst' and `multibytep' appropriately
789 in advance. */
790
791#define EMIT_ONE_BYTE(c) \
792 do { \
793 produced_chars++; \
794 if (multibytep) \
795 { \
796 int ch = (c); \
797 if (ch >= 0x80) \
798 ch = BYTE8_TO_CHAR (ch); \
799 CHAR_STRING_ADVANCE (ch, dst); \
800 } \
801 else \
802 *dst++ = (c); \
aa72b389 803 } while (0)
aa72b389 804
aa72b389 805
df7492f9 806/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 807
e19c3639
KH
808#define EMIT_TWO_BYTES(c1, c2) \
809 do { \
810 produced_chars += 2; \
811 if (multibytep) \
812 { \
813 int ch; \
814 \
815 ch = (c1); \
816 if (ch >= 0x80) \
817 ch = BYTE8_TO_CHAR (ch); \
818 CHAR_STRING_ADVANCE (ch, dst); \
819 ch = (c2); \
820 if (ch >= 0x80) \
821 ch = BYTE8_TO_CHAR (ch); \
822 CHAR_STRING_ADVANCE (ch, dst); \
823 } \
824 else \
825 { \
826 *dst++ = (c1); \
827 *dst++ = (c2); \
828 } \
aa72b389
KH
829 } while (0)
830
831
df7492f9
KH
832#define EMIT_THREE_BYTES(c1, c2, c3) \
833 do { \
834 EMIT_ONE_BYTE (c1); \
835 EMIT_TWO_BYTES (c2, c3); \
836 } while (0)
aa72b389 837
aa72b389 838
df7492f9
KH
839#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
840 do { \
841 EMIT_TWO_BYTES (c1, c2); \
842 EMIT_TWO_BYTES (c3, c4); \
843 } while (0)
aa72b389 844
aa72b389 845
df7492f9
KH
846#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
847 do { \
848 charset_map_loaded = 0; \
849 c = DECODE_CHAR (charset, code); \
850 if (charset_map_loaded) \
851 { \
8f924df7 852 const unsigned char *orig = coding->source; \
df7492f9
KH
853 EMACS_INT offset; \
854 \
855 coding_set_source (coding); \
856 offset = coding->source - orig; \
857 src += offset; \
858 src_base += offset; \
859 src_end += offset; \
860 } \
aa72b389
KH
861 } while (0)
862
863
df7492f9
KH
864#define ASSURE_DESTINATION(bytes) \
865 do { \
866 if (dst + (bytes) >= dst_end) \
867 { \
868 int more_bytes = charbuf_end - charbuf + (bytes); \
869 \
870 dst = alloc_destination (coding, more_bytes, dst); \
871 dst_end = coding->destination + coding->dst_bytes; \
872 } \
873 } while (0)
aa72b389 874
aa72b389 875
aa72b389 876
df7492f9
KH
877static void
878coding_set_source (coding)
aa72b389 879 struct coding_system *coding;
aa72b389 880{
df7492f9
KH
881 if (BUFFERP (coding->src_object))
882 {
2cb26057 883 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 884
df7492f9 885 if (coding->src_pos < 0)
2cb26057 886 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 887 else
2cb26057 888 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 889 }
df7492f9 890 else if (STRINGP (coding->src_object))
aa72b389 891 {
8f924df7 892 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 893 }
df7492f9
KH
894 else
895 /* Otherwise, the source is C string and is never relocated
896 automatically. Thus we don't have to update anything. */
897 ;
898}
aa72b389 899
df7492f9
KH
900static void
901coding_set_destination (coding)
902 struct coding_system *coding;
903{
904 if (BUFFERP (coding->dst_object))
aa72b389 905 {
df7492f9 906 if (coding->src_pos < 0)
aa72b389 907 {
28f67a95
KH
908 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
909 coding->dst_bytes = (GAP_END_ADDR
910 - (coding->src_bytes - coding->consumed)
911 - coding->destination);
aa72b389 912 }
df7492f9 913 else
28f67a95
KH
914 {
915 /* We are sure that coding->dst_pos_byte is before the gap
916 of the buffer. */
917 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
918 + coding->dst_pos_byte - 1);
919 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
920 - coding->destination);
921 }
df7492f9
KH
922 }
923 else
924 /* Otherwise, the destination is C string and is never relocated
925 automatically. Thus we don't have to update anything. */
926 ;
927}
928
929
930static void
931coding_alloc_by_realloc (coding, bytes)
932 struct coding_system *coding;
933 EMACS_INT bytes;
934{
935 coding->destination = (unsigned char *) xrealloc (coding->destination,
936 coding->dst_bytes + bytes);
937 coding->dst_bytes += bytes;
938}
939
940static void
941coding_alloc_by_making_gap (coding, bytes)
942 struct coding_system *coding;
943 EMACS_INT bytes;
944{
2c78b7e1
KH
945 if (BUFFERP (coding->dst_object)
946 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
947 {
948 EMACS_INT add = coding->src_bytes - coding->consumed;
949
950 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
951 make_gap (bytes);
952 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
953 }
954 else
955 {
2c78b7e1
KH
956 Lisp_Object this_buffer;
957
958 this_buffer = Fcurrent_buffer ();
df7492f9
KH
959 set_buffer_internal (XBUFFER (coding->dst_object));
960 make_gap (bytes);
961 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 962 }
df7492f9 963}
8f924df7 964
df7492f9
KH
965
966static unsigned char *
967alloc_destination (coding, nbytes, dst)
968 struct coding_system *coding;
3e139625 969 EMACS_INT nbytes;
df7492f9
KH
970 unsigned char *dst;
971{
972 EMACS_INT offset = dst - coding->destination;
973
974 if (BUFFERP (coding->dst_object))
975 coding_alloc_by_making_gap (coding, nbytes);
aa72b389 976 else
df7492f9
KH
977 coding_alloc_by_realloc (coding, nbytes);
978 coding->result = CODING_RESULT_SUCCESS;
979 coding_set_destination (coding);
980 dst = coding->destination + offset;
981 return dst;
982}
aa72b389 983
ff0dacd7
KH
984/** Macros for annotations. */
985
986/* Maximum length of annotation data (sum of annotations for
987 composition and charset). */
988#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
989
990/* An annotation data is stored in the array coding->charbuf in this
991 format:
992 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
993 LENGTH is the number of elements in the annotation.
994 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
995 FROM and TO specify the range of text annotated. They are relative
996 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
997
998 The format of the following elements depend on ANNOTATION_MASK.
999
1000 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1001 follows:
1002 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1003 METHOD is one of enum composition_method.
1004 Optionnal COMPOSITION-COMPONENTS are characters and composition
1005 rules.
1006
1007 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1008 follows. */
1009
1010#define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1011 do { \
1012 *(buf)++ = -(len); \
1013 *(buf)++ = (mask); \
1014 *(buf)++ = (from); \
1015 *(buf)++ = (to); \
1016 coding->annotated = 1; \
1017 } while (0);
1018
1019#define ADD_COMPOSITION_DATA(buf, from, to, method) \
1020 do { \
1021 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1022 *buf++ = method; \
1023 } while (0)
1024
1025
1026#define ADD_CHARSET_DATA(buf, from, to, id) \
1027 do { \
1028 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1029 *buf++ = id; \
1030 } while (0)
1031
df7492f9
KH
1032\f
1033/*** 2. Emacs' internal format (emacs-utf-8) ***/
1034
1035
1036
1037\f
1038/*** 3. UTF-8 ***/
1039
1040/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1041 Check if a text is encoded in UTF-8. If it is, return 1, else
1042 return 0. */
df7492f9
KH
1043
1044#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1045#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1046#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1047#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1048#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1049#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1050
1051static int
ff0dacd7 1052detect_coding_utf_8 (coding, detect_info)
df7492f9 1053 struct coding_system *coding;
ff0dacd7 1054 struct coding_detection_info *detect_info;
df7492f9 1055{
8f924df7
KH
1056 const unsigned char *src = coding->source, *src_base = src;
1057 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1058 int multibytep = coding->src_multibyte;
1059 int consumed_chars = 0;
1060 int found = 0;
89528eb3 1061 int incomplete;
df7492f9 1062
ff0dacd7 1063 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1064 /* A coding system of this category is always ASCII compatible. */
1065 src += coding->head_ascii;
1066
1067 while (1)
aa72b389 1068 {
df7492f9 1069 int c, c1, c2, c3, c4;
aa72b389 1070
89528eb3 1071 incomplete = 0;
df7492f9
KH
1072 ONE_MORE_BYTE (c);
1073 if (UTF_8_1_OCTET_P (c))
1074 continue;
89528eb3 1075 incomplete = 1;
df7492f9
KH
1076 ONE_MORE_BYTE (c1);
1077 if (! UTF_8_EXTRA_OCTET_P (c1))
1078 break;
1079 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1080 {
ff0dacd7 1081 found = CATEGORY_MASK_UTF_8;
df7492f9 1082 continue;
aa72b389 1083 }
df7492f9
KH
1084 ONE_MORE_BYTE (c2);
1085 if (! UTF_8_EXTRA_OCTET_P (c2))
1086 break;
1087 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1088 {
ff0dacd7 1089 found = CATEGORY_MASK_UTF_8;
df7492f9 1090 continue;
aa72b389 1091 }
df7492f9
KH
1092 ONE_MORE_BYTE (c3);
1093 if (! UTF_8_EXTRA_OCTET_P (c3))
1094 break;
1095 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1096 {
ff0dacd7 1097 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1098 continue;
1099 }
1100 ONE_MORE_BYTE (c4);
1101 if (! UTF_8_EXTRA_OCTET_P (c4))
1102 break;
1103 if (UTF_8_5_OCTET_LEADING_P (c))
1104 {
ff0dacd7 1105 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1106 continue;
1107 }
1108 break;
aa72b389 1109 }
ff0dacd7 1110 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1111 return 0;
aa72b389 1112
df7492f9 1113 no_more_source:
89528eb3 1114 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1115 {
ff0dacd7 1116 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1117 return 0;
aa72b389 1118 }
ff0dacd7
KH
1119 detect_info->found |= found;
1120 return 1;
aa72b389
KH
1121}
1122
4ed46869 1123
b73bfc1c 1124static void
df7492f9 1125decode_coding_utf_8 (coding)
b73bfc1c 1126 struct coding_system *coding;
b73bfc1c 1127{
8f924df7
KH
1128 const unsigned char *src = coding->source + coding->consumed;
1129 const unsigned char *src_end = coding->source + coding->src_bytes;
1130 const unsigned char *src_base;
df7492f9
KH
1131 int *charbuf = coding->charbuf;
1132 int *charbuf_end = charbuf + coding->charbuf_size;
1133 int consumed_chars = 0, consumed_chars_base;
1134 int multibytep = coding->src_multibyte;
1135 Lisp_Object attr, eol_type, charset_list;
4ed46869 1136
df7492f9
KH
1137 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1138
1139 while (1)
b73bfc1c 1140 {
df7492f9 1141 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1142
df7492f9
KH
1143 src_base = src;
1144 consumed_chars_base = consumed_chars;
4af310db 1145
df7492f9
KH
1146 if (charbuf >= charbuf_end)
1147 break;
1148
1149 ONE_MORE_BYTE (c1);
1150 if (UTF_8_1_OCTET_P(c1))
1151 {
1152 c = c1;
1153 if (c == '\r')
4af310db 1154 {
df7492f9 1155 if (EQ (eol_type, Qdos))
4af310db 1156 {
df7492f9 1157 if (src == src_end)
98725083
KH
1158 {
1159 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1160 goto no_more_source;
1161 }
df7492f9
KH
1162 if (*src == '\n')
1163 ONE_MORE_BYTE (c);
4af310db 1164 }
df7492f9
KH
1165 else if (EQ (eol_type, Qmac))
1166 c = '\n';
4af310db 1167 }
4af310db 1168 }
df7492f9 1169 else
4af310db 1170 {
df7492f9
KH
1171 ONE_MORE_BYTE (c2);
1172 if (! UTF_8_EXTRA_OCTET_P (c2))
1173 goto invalid_code;
1174 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1175 {
b0edb2c5
DL
1176 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1177 /* Reject overlong sequences here and below. Encoders
1178 producing them are incorrect, they can be misleading,
1179 and they mess up read/write invariance. */
1180 if (c < 128)
1181 goto invalid_code;
4af310db 1182 }
df7492f9 1183 else
aa72b389 1184 {
df7492f9
KH
1185 ONE_MORE_BYTE (c3);
1186 if (! UTF_8_EXTRA_OCTET_P (c3))
1187 goto invalid_code;
1188 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1189 {
1190 c = (((c1 & 0xF) << 12)
1191 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1192 if (c < 0x800
1193 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1194 goto invalid_code;
1195 }
df7492f9
KH
1196 else
1197 {
1198 ONE_MORE_BYTE (c4);
1199 if (! UTF_8_EXTRA_OCTET_P (c4))
1200 goto invalid_code;
1201 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1202 {
df7492f9
KH
1203 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1204 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1205 if (c < 0x10000)
1206 goto invalid_code;
1207 }
df7492f9
KH
1208 else
1209 {
1210 ONE_MORE_BYTE (c5);
1211 if (! UTF_8_EXTRA_OCTET_P (c5))
1212 goto invalid_code;
1213 if (UTF_8_5_OCTET_LEADING_P (c1))
1214 {
1215 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1216 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1217 | (c5 & 0x3F));
b0edb2c5 1218 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1219 goto invalid_code;
1220 }
1221 else
1222 goto invalid_code;
1223 }
1224 }
aa72b389 1225 }
b73bfc1c 1226 }
df7492f9
KH
1227
1228 *charbuf++ = c;
1229 continue;
1230
1231 invalid_code:
1232 src = src_base;
1233 consumed_chars = consumed_chars_base;
1234 ONE_MORE_BYTE (c);
1235 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1236 coding->errors++;
aa72b389
KH
1237 }
1238
df7492f9
KH
1239 no_more_source:
1240 coding->consumed_char += consumed_chars_base;
1241 coding->consumed = src_base - coding->source;
1242 coding->charbuf_used = charbuf - coding->charbuf;
1243}
1244
1245
1246static int
1247encode_coding_utf_8 (coding)
1248 struct coding_system *coding;
1249{
1250 int multibytep = coding->dst_multibyte;
1251 int *charbuf = coding->charbuf;
1252 int *charbuf_end = charbuf + coding->charbuf_used;
1253 unsigned char *dst = coding->destination + coding->produced;
1254 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1255 int produced_chars = 0;
df7492f9
KH
1256 int c;
1257
1258 if (multibytep)
aa72b389 1259 {
df7492f9
KH
1260 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1261
1262 while (charbuf < charbuf_end)
b73bfc1c 1263 {
df7492f9 1264 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1265
df7492f9
KH
1266 ASSURE_DESTINATION (safe_room);
1267 c = *charbuf++;
28f67a95
KH
1268 if (CHAR_BYTE8_P (c))
1269 {
1270 c = CHAR_TO_BYTE8 (c);
1271 EMIT_ONE_BYTE (c);
1272 }
1273 else
1274 {
1275 CHAR_STRING_ADVANCE (c, pend);
1276 for (p = str; p < pend; p++)
1277 EMIT_ONE_BYTE (*p);
1278 }
b73bfc1c 1279 }
aa72b389 1280 }
df7492f9
KH
1281 else
1282 {
1283 int safe_room = MAX_MULTIBYTE_LENGTH;
1284
1285 while (charbuf < charbuf_end)
b73bfc1c 1286 {
df7492f9
KH
1287 ASSURE_DESTINATION (safe_room);
1288 c = *charbuf++;
1289 dst += CHAR_STRING (c, dst);
1290 produced_chars++;
4ed46869
KH
1291 }
1292 }
df7492f9
KH
1293 coding->result = CODING_RESULT_SUCCESS;
1294 coding->produced_char += produced_chars;
1295 coding->produced = dst - coding->destination;
1296 return 0;
4ed46869
KH
1297}
1298
b73bfc1c 1299
df7492f9 1300/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1301 Check if a text is encoded in one of UTF-16 based coding systems.
1302 If it is, return 1, else return 0. */
aa72b389 1303
df7492f9
KH
1304#define UTF_16_HIGH_SURROGATE_P(val) \
1305 (((val) & 0xFC00) == 0xD800)
1306
1307#define UTF_16_LOW_SURROGATE_P(val) \
1308 (((val) & 0xFC00) == 0xDC00)
93dec019 1309
df7492f9
KH
1310#define UTF_16_INVALID_P(val) \
1311 (((val) == 0xFFFE) \
1312 || ((val) == 0xFFFF) \
1313 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1314
aa72b389 1315
df7492f9 1316static int
ff0dacd7 1317detect_coding_utf_16 (coding, detect_info)
aa72b389 1318 struct coding_system *coding;
ff0dacd7 1319 struct coding_detection_info *detect_info;
aa72b389 1320{
8f924df7
KH
1321 const unsigned char *src = coding->source, *src_base = src;
1322 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1323 int multibytep = coding->src_multibyte;
1324 int consumed_chars = 0;
1325 int c1, c2;
aa72b389 1326
ff0dacd7 1327 detect_info->checked |= CATEGORY_MASK_UTF_16;
aa72b389 1328
ff0dacd7
KH
1329 if (coding->mode & CODING_MODE_LAST_BLOCK
1330 && (coding->src_bytes & 1))
1331 {
1332 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1333 return 0;
1334 }
df7492f9
KH
1335 ONE_MORE_BYTE (c1);
1336 ONE_MORE_BYTE (c2);
aa72b389 1337
df7492f9 1338 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1339 {
b49a1807
KH
1340 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1341 | CATEGORY_MASK_UTF_16_AUTO);
ff0dacd7 1342 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
aa72b389 1343 }
df7492f9 1344 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1345 {
b49a1807
KH
1346 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1347 | CATEGORY_MASK_UTF_16_AUTO);
ff0dacd7
KH
1348 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1349 }
df7492f9 1350 no_more_source:
ff0dacd7 1351 return 1;
df7492f9 1352}
aa72b389 1353
df7492f9
KH
1354static void
1355decode_coding_utf_16 (coding)
1356 struct coding_system *coding;
1357{
8f924df7
KH
1358 const unsigned char *src = coding->source + coding->consumed;
1359 const unsigned char *src_end = coding->source + coding->src_bytes;
1360 const unsigned char *src_base;
df7492f9
KH
1361 int *charbuf = coding->charbuf;
1362 int *charbuf_end = charbuf + coding->charbuf_size;
1363 int consumed_chars = 0, consumed_chars_base;
1364 int multibytep = coding->src_multibyte;
1365 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1366 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1367 int surrogate = CODING_UTF_16_SURROGATE (coding);
1368 Lisp_Object attr, eol_type, charset_list;
1369
1370 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1371
b49a1807 1372 if (bom == utf_16_with_bom)
aa72b389 1373 {
df7492f9 1374 int c, c1, c2;
4af310db 1375
aa72b389 1376 src_base = src;
df7492f9
KH
1377 ONE_MORE_BYTE (c1);
1378 ONE_MORE_BYTE (c2);
e19c3639 1379 c = (c1 << 8) | c2;
aa72b389 1380
b49a1807
KH
1381 if (endian == utf_16_big_endian
1382 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1383 {
b49a1807
KH
1384 /* The first two bytes are not BOM. Treat them as bytes
1385 for a normal character. */
1386 src = src_base;
1387 coding->errors++;
aa72b389 1388 }
b49a1807
KH
1389 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1390 }
1391 else if (bom == utf_16_detect_bom)
1392 {
1393 /* We have already tried to detect BOM and failed in
1394 detect_coding. */
1395 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9 1396 }
aa72b389 1397
df7492f9
KH
1398 while (1)
1399 {
1400 int c, c1, c2;
1401
1402 src_base = src;
1403 consumed_chars_base = consumed_chars;
1404
1405 if (charbuf + 2 >= charbuf_end)
1406 break;
1407
1408 ONE_MORE_BYTE (c1);
1409 ONE_MORE_BYTE (c2);
1410 c = (endian == utf_16_big_endian
e19c3639 1411 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1412 if (surrogate)
fd3ae0b9 1413 {
df7492f9 1414 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1415 {
df7492f9
KH
1416 if (endian == utf_16_big_endian)
1417 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1418 else
1419 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1420 *charbuf++ = c1;
1421 *charbuf++ = c2;
1422 coding->errors++;
1423 if (UTF_16_HIGH_SURROGATE_P (c))
1424 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1425 else
df7492f9 1426 *charbuf++ = c;
fd3ae0b9
KH
1427 }
1428 else
df7492f9
KH
1429 {
1430 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1431 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1432 *charbuf++ = c;
1433 }
fd3ae0b9 1434 }
aa72b389 1435 else
df7492f9
KH
1436 {
1437 if (UTF_16_HIGH_SURROGATE_P (c))
1438 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1439 else
1440 *charbuf++ = c;
8f924df7 1441 }
aa72b389 1442 }
df7492f9
KH
1443
1444 no_more_source:
1445 coding->consumed_char += consumed_chars_base;
1446 coding->consumed = src_base - coding->source;
1447 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1448}
b73bfc1c 1449
df7492f9
KH
1450static int
1451encode_coding_utf_16 (coding)
1452 struct coding_system *coding;
1453{
1454 int multibytep = coding->dst_multibyte;
1455 int *charbuf = coding->charbuf;
1456 int *charbuf_end = charbuf + coding->charbuf_used;
1457 unsigned char *dst = coding->destination + coding->produced;
1458 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1459 int safe_room = 8;
1460 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1461 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1462 int produced_chars = 0;
1463 Lisp_Object attrs, eol_type, charset_list;
1464 int c;
4ed46869 1465
df7492f9
KH
1466 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1467
b49a1807 1468 if (bom != utf_16_without_bom)
df7492f9
KH
1469 {
1470 ASSURE_DESTINATION (safe_room);
1471 if (big_endian)
df7492f9 1472 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1473 else
1474 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1475 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1476 }
1477
1478 while (charbuf < charbuf_end)
1479 {
1480 ASSURE_DESTINATION (safe_room);
1481 c = *charbuf++;
e19c3639
KH
1482 if (c >= MAX_UNICODE_CHAR)
1483 c = coding->default_char;
df7492f9
KH
1484
1485 if (c < 0x10000)
1486 {
1487 if (big_endian)
1488 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1489 else
1490 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1491 }
1492 else
1493 {
1494 int c1, c2;
1495
1496 c -= 0x10000;
1497 c1 = (c >> 10) + 0xD800;
1498 c2 = (c & 0x3FF) + 0xDC00;
1499 if (big_endian)
1500 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1501 else
1502 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1503 }
1504 }
1505 coding->result = CODING_RESULT_SUCCESS;
1506 coding->produced = dst - coding->destination;
1507 coding->produced_char += produced_chars;
1508 return 0;
1509}
1510
1511\f
1512/*** 6. Old Emacs' internal format (emacs-mule) ***/
1513
1514/* Emacs' internal format for representation of multiple character
1515 sets is a kind of multi-byte encoding, i.e. characters are
1516 represented by variable-length sequences of one-byte codes.
1517
1518 ASCII characters and control characters (e.g. `tab', `newline') are
1519 represented by one-byte sequences which are their ASCII codes, in
1520 the range 0x00 through 0x7F.
1521
1522 8-bit characters of the range 0x80..0x9F are represented by
1523 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1524 code + 0x20).
1525
1526 8-bit characters of the range 0xA0..0xFF are represented by
1527 one-byte sequences which are their 8-bit code.
1528
1529 The other characters are represented by a sequence of `base
1530 leading-code', optional `extended leading-code', and one or two
1531 `position-code's. The length of the sequence is determined by the
1532 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1533 whereas extended leading-code and position-code take the range 0xA0
1534 through 0xFF. See `charset.h' for more details about leading-code
1535 and position-code.
1536
1537 --- CODE RANGE of Emacs' internal format ---
1538 character set range
1539 ------------- -----
1540 ascii 0x00..0x7F
1541 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1542 eight-bit-graphic 0xA0..0xBF
1543 ELSE 0x81..0x9D + [0xA0..0xFF]+
1544 ---------------------------------------------
1545
1546 As this is the internal character representation, the format is
1547 usually not used externally (i.e. in a file or in a data sent to a
1548 process). But, it is possible to have a text externally in this
1549 format (i.e. by encoding by the coding system `emacs-mule').
1550
1551 In that case, a sequence of one-byte codes has a slightly different
1552 form.
1553
1554 At first, all characters in eight-bit-control are represented by
1555 one-byte sequences which are their 8-bit code.
1556
1557 Next, character composition data are represented by the byte
1558 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1559 where,
1560 METHOD is 0xF0 plus one of composition method (enum
1561 composition_method),
1562
1563 BYTES is 0xA0 plus a byte length of this composition data,
1564
1565 CHARS is 0x20 plus a number of characters composed by this
1566 data,
1567
1568 COMPONENTs are characters of multibye form or composition
1569 rules encoded by two-byte of ASCII codes.
1570
1571 In addition, for backward compatibility, the following formats are
1572 also recognized as composition data on decoding.
1573
1574 0x80 MSEQ ...
1575 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1576
1577 Here,
1578 MSEQ is a multibyte form but in these special format:
1579 ASCII: 0xA0 ASCII_CODE+0x80,
1580 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1581 RULE is a one byte code of the range 0xA0..0xF0 that
1582 represents a composition rule.
1583 */
1584
1585char emacs_mule_bytes[256];
1586
df7492f9 1587int
ff0dacd7 1588emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1589 struct coding_system *coding;
781d7a48 1590 unsigned char *src;
ff0dacd7 1591 int *nbytes, *nchars, *id;
df7492f9 1592{
8f924df7
KH
1593 const unsigned char *src_end = coding->source + coding->src_bytes;
1594 const unsigned char *src_base = src;
df7492f9 1595 int multibytep = coding->src_multibyte;
df7492f9
KH
1596 struct charset *charset;
1597 unsigned code;
1598 int c;
1599 int consumed_chars = 0;
1600
1601 ONE_MORE_BYTE (c);
df7492f9
KH
1602 switch (emacs_mule_bytes[c])
1603 {
1604 case 2:
1605 if (! (charset = emacs_mule_charset[c]))
1606 goto invalid_code;
1607 ONE_MORE_BYTE (c);
1608 code = c & 0x7F;
1609 break;
1610
1611 case 3:
7c78e542
KH
1612 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1613 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
b73bfc1c 1614 {
df7492f9
KH
1615 ONE_MORE_BYTE (c);
1616 if (! (charset = emacs_mule_charset[c]))
1617 goto invalid_code;
1618 ONE_MORE_BYTE (c);
1619 code = c & 0x7F;
b73bfc1c
KH
1620 }
1621 else
1622 {
df7492f9
KH
1623 if (! (charset = emacs_mule_charset[c]))
1624 goto invalid_code;
1625 ONE_MORE_BYTE (c);
781d7a48 1626 code = (c & 0x7F) << 8;
df7492f9
KH
1627 ONE_MORE_BYTE (c);
1628 code |= c & 0x7F;
1629 }
1630 break;
1631
1632 case 4:
781d7a48 1633 ONE_MORE_BYTE (c);
df7492f9
KH
1634 if (! (charset = emacs_mule_charset[c]))
1635 goto invalid_code;
1636 ONE_MORE_BYTE (c);
781d7a48 1637 code = (c & 0x7F) << 8;
df7492f9
KH
1638 ONE_MORE_BYTE (c);
1639 code |= c & 0x7F;
1640 break;
1641
1642 case 1:
1643 code = c;
9d123124
KH
1644 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1645 ? charset_ascii : charset_eight_bit);
df7492f9
KH
1646 break;
1647
1648 default:
1649 abort ();
1650 }
1651 c = DECODE_CHAR (charset, code);
1652 if (c < 0)
1653 goto invalid_code;
1654 *nbytes = src - src_base;
1655 *nchars = consumed_chars;
ff0dacd7
KH
1656 if (id)
1657 *id = charset->id;
df7492f9
KH
1658 return c;
1659
1660 no_more_source:
1661 return -2;
1662
1663 invalid_code:
1664 return -1;
1665}
1666
1667
1668/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1669 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1670 else return 0. */
df7492f9
KH
1671
1672static int
ff0dacd7 1673detect_coding_emacs_mule (coding, detect_info)
df7492f9 1674 struct coding_system *coding;
ff0dacd7 1675 struct coding_detection_info *detect_info;
df7492f9 1676{
8f924df7
KH
1677 const unsigned char *src = coding->source, *src_base = src;
1678 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1679 int multibytep = coding->src_multibyte;
1680 int consumed_chars = 0;
1681 int c;
1682 int found = 0;
89528eb3 1683 int incomplete;
df7492f9 1684
ff0dacd7 1685 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1686 /* A coding system of this category is always ASCII compatible. */
1687 src += coding->head_ascii;
1688
1689 while (1)
1690 {
89528eb3 1691 incomplete = 0;
df7492f9 1692 ONE_MORE_BYTE (c);
89528eb3 1693 incomplete = 1;
df7492f9
KH
1694
1695 if (c == 0x80)
1696 {
1697 /* Perhaps the start of composite character. We simple skip
1698 it because analyzing it is too heavy for detecting. But,
1699 at least, we check that the composite character
1700 constitues of more than 4 bytes. */
8f924df7 1701 const unsigned char *src_base;
df7492f9
KH
1702
1703 repeat:
1704 src_base = src;
1705 do
1706 {
1707 ONE_MORE_BYTE (c);
1708 }
1709 while (c >= 0xA0);
1710
1711 if (src - src_base <= 4)
1712 break;
ff0dacd7 1713 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1714 if (c == 0x80)
1715 goto repeat;
b73bfc1c 1716 }
df7492f9
KH
1717
1718 if (c < 0x80)
b73bfc1c 1719 {
df7492f9
KH
1720 if (c < 0x20
1721 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1722 break;
1723 }
1724 else
1725 {
8f924df7 1726 const unsigned char *src_base = src - 1;
df7492f9
KH
1727
1728 do
1729 {
1730 ONE_MORE_BYTE (c);
1731 }
1732 while (c >= 0xA0);
1733 if (src - src_base != emacs_mule_bytes[*src_base])
1734 break;
ff0dacd7 1735 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1736 }
1737 }
ff0dacd7 1738 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1739 return 0;
1740
1741 no_more_source:
89528eb3
KH
1742 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1743 {
ff0dacd7 1744 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1745 return 0;
1746 }
ff0dacd7
KH
1747 detect_info->found |= found;
1748 return 1;
4ed46869
KH
1749}
1750
b73bfc1c 1751
df7492f9
KH
1752/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1753
1754/* Decode a character represented as a component of composition
1755 sequence of Emacs 20/21 style at SRC. Set C to that character and
1756 update SRC to the head of next character (or an encoded composition
1757 rule). If SRC doesn't points a composition component, set C to -1.
1758 If SRC points an invalid byte sequence, global exit by a return
1759 value 0. */
1760
1761#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1762 if (1) \
1763 { \
1764 int c; \
1765 int nbytes, nchars; \
1766 \
1767 if (src == src_end) \
1768 break; \
ff0dacd7 1769 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1770 if (c < 0) \
1771 { \
1772 if (c == -2) \
1773 break; \
1774 goto invalid_code; \
1775 } \
1776 *buf++ = c; \
1777 src += nbytes; \
1778 consumed_chars += nchars; \
1779 } \
1780 else
1781
1782
1783/* Decode a composition rule represented as a component of composition
781d7a48
KH
1784 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1785 and increment BUF. If SRC points an invalid byte sequence, set C
1786 to -1. */
df7492f9 1787
781d7a48 1788#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1789 do { \
1790 int c, gref, nref; \
1791 \
781d7a48 1792 if (src >= src_end) \
df7492f9
KH
1793 goto invalid_code; \
1794 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1795 c -= 0x20; \
df7492f9
KH
1796 if (c < 0 || c >= 81) \
1797 goto invalid_code; \
1798 \
1799 gref = c / 9, nref = c % 9; \
1800 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1801 } while (0)
1802
1803
781d7a48
KH
1804/* Decode a composition rule represented as a component of composition
1805 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1806 and increment BUF. If SRC points an invalid byte sequence, set C
1807 to -1. */
1808
1809#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1810 do { \
1811 int gref, nref; \
1812 \
1813 if (src + 1>= src_end) \
1814 goto invalid_code; \
1815 ONE_MORE_BYTE_NO_CHECK (gref); \
1816 gref -= 0x20; \
1817 ONE_MORE_BYTE_NO_CHECK (nref); \
1818 nref -= 0x20; \
1819 if (gref < 0 || gref >= 81 \
1820 || nref < 0 || nref >= 81) \
1821 goto invalid_code; \
1822 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1823 } while (0)
1824
1825
df7492f9 1826#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1827 do { \
df7492f9 1828 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1829 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1830 the byte length of this composition information, CHARS is the \
1831 number of characters composed by this composition. */ \
781d7a48
KH
1832 enum composition_method method = c - 0xF2; \
1833 int *charbuf_base = charbuf; \
ff0dacd7 1834 int from, to; \
df7492f9
KH
1835 int consumed_chars_limit; \
1836 int nbytes, nchars; \
1837 \
1838 ONE_MORE_BYTE (c); \
1839 nbytes = c - 0xA0; \
1840 if (nbytes < 3) \
1841 goto invalid_code; \
1842 ONE_MORE_BYTE (c); \
1843 nchars = c - 0xA0; \
ff0dacd7
KH
1844 from = coding->produced + char_offset; \
1845 to = from + nchars; \
1846 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1847 consumed_chars_limit = consumed_chars_base + nbytes; \
1848 if (method != COMPOSITION_RELATIVE) \
aa72b389 1849 { \
df7492f9
KH
1850 int i = 0; \
1851 while (consumed_chars < consumed_chars_limit) \
aa72b389 1852 { \
df7492f9 1853 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1854 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1855 else \
1856 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1857 i++; \
aa72b389 1858 } \
df7492f9
KH
1859 if (consumed_chars < consumed_chars_limit) \
1860 goto invalid_code; \
781d7a48 1861 charbuf_base[0] -= i; \
aa72b389
KH
1862 } \
1863 } while (0)
93dec019 1864
aa72b389 1865
df7492f9
KH
1866#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1867 do { \
1868 /* Emacs 20 style format for relative composition. */ \
1869 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1870 enum composition_method method = COMPOSITION_RELATIVE; \
df7492f9
KH
1871 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1872 int *buf = components; \
1873 int i, j; \
ff0dacd7 1874 int from, to; \
df7492f9
KH
1875 \
1876 src = src_base; \
1877 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1878 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1879 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1880 if (i < 2) \
1881 goto invalid_code; \
ff0dacd7
KH
1882 from = coding->produced_char + char_offset; \
1883 to = from + i; \
1884 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1885 for (j = 0; j < i; j++) \
1886 *charbuf++ = components[j]; \
1887 } while (0)
1888
1889
1890#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1891 do { \
1892 /* Emacs 20 style format for rule-base composition. */ \
1893 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1894 enum composition_method method = COMPOSITION_WITH_RULE; \
df7492f9
KH
1895 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1896 int *buf = components; \
1897 int i, j; \
ff0dacd7 1898 int from, to; \
df7492f9
KH
1899 \
1900 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1901 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1902 { \
781d7a48 1903 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1904 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1905 } \
1906 if (i < 1 || (buf - components) % 2 == 0) \
1907 goto invalid_code; \
1908 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1909 goto no_more_source; \
ff0dacd7
KH
1910 from = coding->produced_char + char_offset; \
1911 to = from + i; \
1912 ADD_COMPOSITION_DATA (buf, from, to, method); \
df7492f9
KH
1913 for (j = 0; j < i; j++) \
1914 *charbuf++ = components[j]; \
1915 for (j = 0; j < i; j += 2) \
1916 *charbuf++ = components[j]; \
1917 } while (0)
1918
aa72b389
KH
1919
1920static void
df7492f9 1921decode_coding_emacs_mule (coding)
aa72b389 1922 struct coding_system *coding;
aa72b389 1923{
8f924df7
KH
1924 const unsigned char *src = coding->source + coding->consumed;
1925 const unsigned char *src_end = coding->source + coding->src_bytes;
1926 const unsigned char *src_base;
df7492f9 1927 int *charbuf = coding->charbuf;
ff0dacd7 1928 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 1929 int consumed_chars = 0, consumed_chars_base;
df7492f9
KH
1930 int multibytep = coding->src_multibyte;
1931 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
1932 int char_offset = coding->produced_char;
1933 int last_offset = char_offset;
1934 int last_id = charset_ascii;
aa72b389 1935
df7492f9 1936 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1937
aa72b389
KH
1938 while (1)
1939 {
df7492f9
KH
1940 int c;
1941
aa72b389 1942 src_base = src;
df7492f9
KH
1943 consumed_chars_base = consumed_chars;
1944
1945 if (charbuf >= charbuf_end)
1946 break;
aa72b389 1947
df7492f9
KH
1948 ONE_MORE_BYTE (c);
1949
1950 if (c < 0x80)
aa72b389 1951 {
df7492f9
KH
1952 if (c == '\r')
1953 {
1954 if (EQ (eol_type, Qdos))
1955 {
1956 if (src == src_end)
98725083
KH
1957 {
1958 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1959 goto no_more_source;
1960 }
df7492f9
KH
1961 if (*src == '\n')
1962 ONE_MORE_BYTE (c);
1963 }
1964 else if (EQ (eol_type, Qmac))
1965 c = '\n';
1966 }
1967 *charbuf++ = c;
1968 char_offset++;
aa72b389 1969 }
df7492f9
KH
1970 else if (c == 0x80)
1971 {
df7492f9 1972 ONE_MORE_BYTE (c);
781d7a48
KH
1973 if (c - 0xF2 >= COMPOSITION_RELATIVE
1974 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
1975 DECODE_EMACS_MULE_21_COMPOSITION (c);
1976 else if (c < 0xC0)
1977 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1978 else if (c == 0xFF)
1979 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1980 else
1981 goto invalid_code;
1982 }
1983 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1984 {
1985 int nbytes, nchars;
ff0dacd7
KH
1986 int id;
1987
781d7a48
KH
1988 src = src_base;
1989 consumed_chars = consumed_chars_base;
ff0dacd7 1990 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
1991 if (c < 0)
1992 {
1993 if (c == -2)
1994 break;
1995 goto invalid_code;
1996 }
ff0dacd7
KH
1997 if (last_id != id)
1998 {
1999 if (last_id != charset_ascii)
2000 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2001 last_id = id;
2002 last_offset = char_offset;
2003 }
df7492f9 2004 *charbuf++ = c;
781d7a48
KH
2005 src += nbytes;
2006 consumed_chars += nchars;
df7492f9
KH
2007 char_offset++;
2008 }
2009 continue;
2010
2011 invalid_code:
2012 src = src_base;
2013 consumed_chars = consumed_chars_base;
2014 ONE_MORE_BYTE (c);
2015 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2016 char_offset++;
df7492f9
KH
2017 coding->errors++;
2018 }
2019
2020 no_more_source:
ff0dacd7
KH
2021 if (last_id != charset_ascii)
2022 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
2023 coding->consumed_char += consumed_chars_base;
2024 coding->consumed = src_base - coding->source;
2025 coding->charbuf_used = charbuf - coding->charbuf;
2026}
2027
2028
2029#define EMACS_MULE_LEADING_CODES(id, codes) \
2030 do { \
2031 if (id < 0xA0) \
2032 codes[0] = id, codes[1] = 0; \
2033 else if (id < 0xE0) \
2034 codes[0] = 0x9A, codes[1] = id; \
2035 else if (id < 0xF0) \
2036 codes[0] = 0x9B, codes[1] = id; \
2037 else if (id < 0xF5) \
2038 codes[0] = 0x9C, codes[1] = id; \
2039 else \
2040 codes[0] = 0x9D, codes[1] = id; \
2041 } while (0);
2042
aa72b389 2043
df7492f9
KH
2044static int
2045encode_coding_emacs_mule (coding)
2046 struct coding_system *coding;
2047{
2048 int multibytep = coding->dst_multibyte;
2049 int *charbuf = coding->charbuf;
2050 int *charbuf_end = charbuf + coding->charbuf_used;
2051 unsigned char *dst = coding->destination + coding->produced;
2052 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2053 int safe_room = 8;
df7492f9
KH
2054 int produced_chars = 0;
2055 Lisp_Object attrs, eol_type, charset_list;
2056 int c;
ff0dacd7 2057 int preferred_charset_id = -1;
df7492f9
KH
2058
2059 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
eccb6815
KH
2060 if (! EQ (charset_list, Vemacs_mule_charset_list))
2061 {
2062 CODING_ATTR_CHARSET_LIST (attrs)
2063 = charset_list = Vemacs_mule_charset_list;
2064 }
df7492f9
KH
2065
2066 while (charbuf < charbuf_end)
2067 {
2068 ASSURE_DESTINATION (safe_room);
2069 c = *charbuf++;
ff0dacd7
KH
2070
2071 if (c < 0)
2072 {
2073 /* Handle an annotation. */
2074 switch (*charbuf)
2075 {
2076 case CODING_ANNOTATE_COMPOSITION_MASK:
2077 /* Not yet implemented. */
2078 break;
2079 case CODING_ANNOTATE_CHARSET_MASK:
2080 preferred_charset_id = charbuf[3];
2081 if (preferred_charset_id >= 0
2082 && NILP (Fmemq (make_number (preferred_charset_id),
2083 charset_list)))
2084 preferred_charset_id = -1;
2085 break;
2086 default:
2087 abort ();
2088 }
2089 charbuf += -c - 1;
2090 continue;
2091 }
2092
df7492f9
KH
2093 if (ASCII_CHAR_P (c))
2094 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2095 else if (CHAR_BYTE8_P (c))
2096 {
2097 c = CHAR_TO_BYTE8 (c);
2098 EMIT_ONE_BYTE (c);
2099 }
df7492f9 2100 else
aa72b389 2101 {
df7492f9
KH
2102 struct charset *charset;
2103 unsigned code;
2104 int dimension;
2105 int emacs_mule_id;
2106 unsigned char leading_codes[2];
2107
ff0dacd7
KH
2108 if (preferred_charset_id >= 0)
2109 {
2110 charset = CHARSET_FROM_ID (preferred_charset_id);
2111 if (! CHAR_CHARSET_P (c, charset))
2112 charset = char_charset (c, charset_list, NULL);
2113 }
2114 else
2115 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2116 if (! charset)
2117 {
2118 c = coding->default_char;
2119 if (ASCII_CHAR_P (c))
2120 {
2121 EMIT_ONE_ASCII_BYTE (c);
2122 continue;
2123 }
2124 charset = char_charset (c, charset_list, &code);
2125 }
2126 dimension = CHARSET_DIMENSION (charset);
2127 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2128 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2129 EMIT_ONE_BYTE (leading_codes[0]);
2130 if (leading_codes[1])
2131 EMIT_ONE_BYTE (leading_codes[1]);
2132 if (dimension == 1)
1fa663f9 2133 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2134 else
df7492f9 2135 {
1fa663f9 2136 code |= 0x8080;
df7492f9
KH
2137 EMIT_ONE_BYTE (code >> 8);
2138 EMIT_ONE_BYTE (code & 0xFF);
2139 }
aa72b389 2140 }
aa72b389 2141 }
df7492f9
KH
2142 coding->result = CODING_RESULT_SUCCESS;
2143 coding->produced_char += produced_chars;
2144 coding->produced = dst - coding->destination;
2145 return 0;
aa72b389 2146}
b73bfc1c 2147
4ed46869 2148\f
df7492f9 2149/*** 7. ISO2022 handlers ***/
4ed46869
KH
2150
2151/* The following note describes the coding system ISO2022 briefly.
39787efd 2152 Since the intention of this note is to help understand the
5a936b46 2153 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2154 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2155 original document of ISO2022. This is equivalent to the standard
cfb43547 2156 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2157
2158 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2159 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2160 is encoded using bytes less than 128. This may make the encoded
2161 text a little bit longer, but the text passes more easily through
cfb43547 2162 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2163 Significant Bit).
b73bfc1c 2164
cfb43547
DL
2165 There are two kinds of character sets: control character sets and
2166 graphic character sets. The former contain control characters such
4ed46869 2167 as `newline' and `escape' to provide control functions (control
39787efd 2168 functions are also provided by escape sequences). The latter
cfb43547 2169 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2170 two control character sets and many graphic character sets.
2171
2172 Graphic character sets are classified into one of the following
39787efd
KH
2173 four classes, according to the number of bytes (DIMENSION) and
2174 number of characters in one dimension (CHARS) of the set:
2175 - DIMENSION1_CHARS94
2176 - DIMENSION1_CHARS96
2177 - DIMENSION2_CHARS94
2178 - DIMENSION2_CHARS96
2179
2180 In addition, each character set is assigned an identification tag,
cfb43547 2181 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2182 hereafter). The <F> of each character set is decided by ECMA(*)
2183 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2184 (0x30..0x3F are for private use only).
4ed46869
KH
2185
2186 Note (*): ECMA = European Computer Manufacturers Association
2187
cfb43547 2188 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2189 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2190 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2191 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2192 o DIMENSION2_CHARS96 -- none for the moment
2193
39787efd 2194 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2195 C0 [0x00..0x1F] -- control character plane 0
2196 GL [0x20..0x7F] -- graphic character plane 0
2197 C1 [0x80..0x9F] -- control character plane 1
2198 GR [0xA0..0xFF] -- graphic character plane 1
2199
2200 A control character set is directly designated and invoked to C0 or
39787efd
KH
2201 C1 by an escape sequence. The most common case is that:
2202 - ISO646's control character set is designated/invoked to C0, and
2203 - ISO6429's control character set is designated/invoked to C1,
2204 and usually these designations/invocations are omitted in encoded
2205 text. In a 7-bit environment, only C0 can be used, and a control
2206 character for C1 is encoded by an appropriate escape sequence to
2207 fit into the environment. All control characters for C1 are
2208 defined to have corresponding escape sequences.
4ed46869
KH
2209
2210 A graphic character set is at first designated to one of four
2211 graphic registers (G0 through G3), then these graphic registers are
2212 invoked to GL or GR. These designations and invocations can be
2213 done independently. The most common case is that G0 is invoked to
39787efd
KH
2214 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2215 these invocations and designations are omitted in encoded text.
2216 In a 7-bit environment, only GL can be used.
4ed46869 2217
39787efd
KH
2218 When a graphic character set of CHARS94 is invoked to GL, codes
2219 0x20 and 0x7F of the GL area work as control characters SPACE and
2220 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2221 be used.
4ed46869
KH
2222
2223 There are two ways of invocation: locking-shift and single-shift.
2224 With locking-shift, the invocation lasts until the next different
39787efd
KH
2225 invocation, whereas with single-shift, the invocation affects the
2226 following character only and doesn't affect the locking-shift
2227 state. Invocations are done by the following control characters or
2228 escape sequences:
4ed46869
KH
2229
2230 ----------------------------------------------------------------------
39787efd 2231 abbrev function cntrl escape seq description
4ed46869 2232 ----------------------------------------------------------------------
39787efd
KH
2233 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2234 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2235 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2236 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2237 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2238 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2239 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2240 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2241 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2242 ----------------------------------------------------------------------
39787efd
KH
2243 (*) These are not used by any known coding system.
2244
2245 Control characters for these functions are defined by macros
2246 ISO_CODE_XXX in `coding.h'.
4ed46869 2247
39787efd 2248 Designations are done by the following escape sequences:
4ed46869
KH
2249 ----------------------------------------------------------------------
2250 escape sequence description
2251 ----------------------------------------------------------------------
2252 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2253 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2254 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2255 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2256 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2257 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2258 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2259 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2260 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2261 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2262 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2263 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2264 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2265 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2266 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2267 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2268 ----------------------------------------------------------------------
2269
2270 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2271 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2272
2273 Note (*): Although these designations are not allowed in ISO2022,
2274 Emacs accepts them on decoding, and produces them on encoding
39787efd 2275 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2276 7-bit environment, non-locking-shift, and non-single-shift.
2277
2278 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2279 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2280
cfb43547 2281 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2282 same multilingual text in ISO2022. Actually, there exist many
2283 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2284 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2285 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2286 localized platforms), and all of these are variants of ISO2022.
2287
2288 In addition to the above, Emacs handles two more kinds of escape
2289 sequences: ISO6429's direction specification and Emacs' private
2290 sequence for specifying character composition.
2291
39787efd 2292 ISO6429's direction specification takes the following form:
4ed46869
KH
2293 o CSI ']' -- end of the current direction
2294 o CSI '0' ']' -- end of the current direction
2295 o CSI '1' ']' -- start of left-to-right text
2296 o CSI '2' ']' -- start of right-to-left text
2297 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2298 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2299
2300 Character composition specification takes the following form:
ec6d2bb8
KH
2301 o ESC '0' -- start relative composition
2302 o ESC '1' -- end composition
2303 o ESC '2' -- start rule-base composition (*)
2304 o ESC '3' -- start relative composition with alternate chars (**)
2305 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2306 Since these are not standard escape sequences of any ISO standard,
cfb43547 2307 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2308
5a936b46
DL
2309 (*) This form is used only in Emacs 20.7 and older versions,
2310 but newer versions can safely decode it.
cfb43547 2311 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2312 and older versions can't decode it.
ec6d2bb8 2313
cfb43547 2314 Here's a list of example usages of these composition escape
b73bfc1c 2315 sequences (categorized by `enum composition_method').
ec6d2bb8 2316
b73bfc1c 2317 COMPOSITION_RELATIVE:
ec6d2bb8 2318 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2319 COMPOSITION_WITH_RULE:
ec6d2bb8 2320 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2321 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2322 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2323 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2324 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2325
2326enum iso_code_class_type iso_code_class[256];
2327
df7492f9
KH
2328#define SAFE_CHARSET_P(coding, id) \
2329 ((id) <= (coding)->max_charset_id \
2330 && (coding)->safe_charsets[id] >= 0)
2331
2332
2333#define SHIFT_OUT_OK(category) \
2334 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2335
2336static void
f0064e1f
DL
2337setup_iso_safe_charsets (attrs)
2338 Lisp_Object attrs;
df7492f9
KH
2339{
2340 Lisp_Object charset_list, safe_charsets;
2341 Lisp_Object request;
2342 Lisp_Object reg_usage;
2343 Lisp_Object tail;
2344 int reg94, reg96;
2345 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2346 int max_charset_id;
2347
2348 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2349 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2350 && ! EQ (charset_list, Viso_2022_charset_list))
2351 {
2352 CODING_ATTR_CHARSET_LIST (attrs)
2353 = charset_list = Viso_2022_charset_list;
2354 ASET (attrs, coding_attr_safe_charsets, Qnil);
2355 }
2356
2357 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2358 return;
2359
2360 max_charset_id = 0;
2361 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2362 {
2363 int id = XINT (XCAR (tail));
2364 if (max_charset_id < id)
2365 max_charset_id = id;
2366 }
d46c5b12 2367
df7492f9
KH
2368 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2369 make_number (255));
2370 request = AREF (attrs, coding_attr_iso_request);
2371 reg_usage = AREF (attrs, coding_attr_iso_usage);
2372 reg94 = XINT (XCAR (reg_usage));
2373 reg96 = XINT (XCDR (reg_usage));
2374
2375 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2376 {
2377 Lisp_Object id;
2378 Lisp_Object reg;
2379 struct charset *charset;
2380
2381 id = XCAR (tail);
2382 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2383 reg = Fcdr (Fassq (id, request));
df7492f9 2384 if (! NILP (reg))
8f924df7 2385 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2386 else if (charset->iso_chars_96)
2387 {
2388 if (reg96 < 4)
8f924df7 2389 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2390 }
2391 else
2392 {
2393 if (reg94 < 4)
8f924df7 2394 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2395 }
2396 }
2397 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2398}
d46c5b12 2399
b6871cc7 2400
4ed46869 2401/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2402 Check if a text is encoded in one of ISO-2022 based codig systems.
2403 If it is, return 1, else return 0. */
4ed46869 2404
0a28aafb 2405static int
ff0dacd7 2406detect_coding_iso_2022 (coding, detect_info)
df7492f9 2407 struct coding_system *coding;
ff0dacd7 2408 struct coding_detection_info *detect_info;
4ed46869 2409{
8f924df7
KH
2410 const unsigned char *src = coding->source, *src_base = src;
2411 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2412 int multibytep = coding->src_multibyte;
ff0dacd7 2413 int single_shifting = 0;
df7492f9
KH
2414 int id;
2415 int c, c1;
2416 int consumed_chars = 0;
2417 int i;
ff0dacd7
KH
2418 int rejected = 0;
2419 int found = 0;
2420
2421 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2422
2423 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2424 {
2425 struct coding_system *this = &(coding_categories[i]);
2426 Lisp_Object attrs, val;
2427
2428 attrs = CODING_ID_ATTRS (this->id);
2429 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2430 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2431 setup_iso_safe_charsets (attrs);
2432 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
2433 this->max_charset_id = SCHARS (val) - 1;
2434 this->safe_charsets = (char *) SDATA (val);
df7492f9
KH
2435 }
2436
2437 /* A coding system of this category is always ASCII compatible. */
2438 src += coding->head_ascii;
3f003981 2439
ff0dacd7 2440 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2441 {
df7492f9 2442 ONE_MORE_BYTE (c);
4ed46869
KH
2443 switch (c)
2444 {
2445 case ISO_CODE_ESC:
74383408
KH
2446 if (inhibit_iso_escape_detection)
2447 break;
f46869e4 2448 single_shifting = 0;
df7492f9 2449 ONE_MORE_BYTE (c);
d46c5b12 2450 if (c >= '(' && c <= '/')
4ed46869 2451 {
bf9cdd4e 2452 /* Designation sequence for a charset of dimension 1. */
df7492f9 2453 ONE_MORE_BYTE (c1);
d46c5b12 2454 if (c1 < ' ' || c1 >= 0x80
df7492f9 2455 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2456 /* Invalid designation sequence. Just ignore. */
2457 break;
bf9cdd4e
KH
2458 }
2459 else if (c == '$')
2460 {
2461 /* Designation sequence for a charset of dimension 2. */
df7492f9 2462 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2463 if (c >= '@' && c <= 'B')
2464 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2465 id = iso_charset_table[1][0][c];
bf9cdd4e 2466 else if (c >= '(' && c <= '/')
bcf26d6a 2467 {
df7492f9 2468 ONE_MORE_BYTE (c1);
d46c5b12 2469 if (c1 < ' ' || c1 >= 0x80
df7492f9 2470 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2471 /* Invalid designation sequence. Just ignore. */
2472 break;
bcf26d6a 2473 }
bf9cdd4e 2474 else
ff0dacd7 2475 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2476 break;
2477 }
ae9ff118 2478 else if (c == 'N' || c == 'O')
d46c5b12 2479 {
ae9ff118 2480 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2481 single_shifting = 1;
2482 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2483 break;
4ed46869 2484 }
ec6d2bb8
KH
2485 else if (c >= '0' && c <= '4')
2486 {
2487 /* ESC <Fp> for start/end composition. */
ff0dacd7 2488 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2489 break;
2490 }
bf9cdd4e 2491 else
df7492f9 2492 {
ff0dacd7 2493 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2494 break;
2495 }
d46c5b12
KH
2496
2497 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2498 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2499 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2500 id))
ff0dacd7 2501 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2502 else
ff0dacd7 2503 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2504 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2505 id))
ff0dacd7 2506 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2507 else
ff0dacd7 2508 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2509 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2510 id))
ff0dacd7 2511 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2512 else
ff0dacd7 2513 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2514 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2515 id))
ff0dacd7 2516 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2517 else
ff0dacd7 2518 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2519 break;
2520
4ed46869 2521 case ISO_CODE_SO:
d46c5b12 2522 case ISO_CODE_SI:
ff0dacd7 2523 /* Locking shift out/in. */
74383408
KH
2524 if (inhibit_iso_escape_detection)
2525 break;
f46869e4 2526 single_shifting = 0;
ff0dacd7
KH
2527 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2528 found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2529 break;
2530
4ed46869 2531 case ISO_CODE_CSI:
ff0dacd7 2532 /* Control sequence introducer. */
f46869e4 2533 single_shifting = 0;
ff0dacd7
KH
2534 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2535 found |= CATEGORY_MASK_ISO_8_ELSE;
2536 goto check_extra_latin;
2537
2538
4ed46869
KH
2539 case ISO_CODE_SS2:
2540 case ISO_CODE_SS3:
ff0dacd7
KH
2541 /* Single shift. */
2542 if (inhibit_iso_escape_detection)
2543 break;
2544 single_shifting = 1;
2545 rejected |= CATEGORY_MASK_ISO_7BIT;
2546 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2547 & CODING_ISO_FLAG_SINGLE_SHIFT)
2548 found |= CATEGORY_MASK_ISO_8_1;
2549 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2550 & CODING_ISO_FLAG_SINGLE_SHIFT)
2551 found |= CATEGORY_MASK_ISO_8_2;
2552 goto check_extra_latin;
4ed46869
KH
2553
2554 default:
2555 if (c < 0x80)
f46869e4
KH
2556 {
2557 single_shifting = 0;
2558 break;
2559 }
ff0dacd7 2560 if (c >= 0xA0)
c4825358 2561 {
ff0dacd7
KH
2562 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2563 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2564 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2565 0xA0..0FF. If the byte length is even, we include
2566 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2567 only when we are not single shifting. */
2568 if (! single_shifting
2569 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2570 {
e17de821 2571 int i = 1;
b73bfc1c
KH
2572 while (src < src_end)
2573 {
df7492f9 2574 ONE_MORE_BYTE (c);
b73bfc1c
KH
2575 if (c < 0xA0)
2576 break;
2577 i++;
2578 }
2579
2580 if (i & 1 && src < src_end)
ff0dacd7 2581 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2582 else
ff0dacd7 2583 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2584 }
ff0dacd7 2585 break;
4ed46869 2586 }
ff0dacd7
KH
2587 check_extra_latin:
2588 single_shifting = 0;
2589 if (! VECTORP (Vlatin_extra_code_table)
2590 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2591 {
2592 rejected = CATEGORY_MASK_ISO;
2593 break;
2594 }
2595 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2596 & CODING_ISO_FLAG_LATIN_EXTRA)
2597 found |= CATEGORY_MASK_ISO_8_1;
2598 else
2599 rejected |= CATEGORY_MASK_ISO_8_1;
2600 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2601 & CODING_ISO_FLAG_LATIN_EXTRA)
2602 found |= CATEGORY_MASK_ISO_8_2;
2603 else
2604 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2605 }
2606 }
ff0dacd7
KH
2607 detect_info->rejected |= CATEGORY_MASK_ISO;
2608 return 0;
4ed46869 2609
df7492f9 2610 no_more_source:
ff0dacd7
KH
2611 detect_info->rejected |= rejected;
2612 detect_info->found |= (found & ~rejected);
df7492f9 2613 return 1;
4ed46869 2614}
ec6d2bb8 2615
4ed46869
KH
2616
2617/* Set designation state into CODING. */
df7492f9
KH
2618#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2619 do { \
2620 int id, prev; \
2621 \
2622 if (final < '0' || final >= 128 \
2623 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2624 || !SAFE_CHARSET_P (coding, id)) \
2625 { \
2626 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2627 goto invalid_code; \
2628 } \
2629 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2630 if (id == charset_jisx0201_roman) \
2631 { \
2632 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2633 id = charset_ascii; \
2634 } \
2635 else if (id == charset_jisx0208_1978) \
2636 { \
2637 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2638 id = charset_jisx0208; \
2639 } \
df7492f9
KH
2640 CODING_ISO_DESIGNATION (coding, reg) = id; \
2641 /* If there was an invalid designation to REG previously, and this \
2642 designation is ASCII to REG, we should keep this designation \
2643 sequence. */ \
2644 if (prev == -2 && id == charset_ascii) \
2645 goto invalid_code; \
4ed46869
KH
2646 } while (0)
2647
d46c5b12 2648
df7492f9
KH
2649#define MAYBE_FINISH_COMPOSITION() \
2650 do { \
2651 int i; \
2652 if (composition_state == COMPOSING_NO) \
2653 break; \
2654 /* It is assured that we have enough room for producing \
2655 characters stored in the table `components'. */ \
2656 if (charbuf + component_idx > charbuf_end) \
2657 goto no_more_source; \
2658 composition_state = COMPOSING_NO; \
2659 if (method == COMPOSITION_RELATIVE \
2660 || method == COMPOSITION_WITH_ALTCHARS) \
2661 { \
2662 for (i = 0; i < component_idx; i++) \
2663 *charbuf++ = components[i]; \
2664 char_offset += component_idx; \
2665 } \
2666 else \
2667 { \
2668 for (i = 0; i < component_idx; i += 2) \
2669 *charbuf++ = components[i]; \
2670 char_offset += (component_idx / 2) + 1; \
2671 } \
2672 } while (0)
2673
d46c5b12 2674
aa72b389
KH
2675/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2676 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2677 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2678 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2679 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2680 */
ec6d2bb8 2681
df7492f9
KH
2682#define DECODE_COMPOSITION_START(c1) \
2683 do { \
2684 if (c1 == '0' \
781d7a48 2685 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2686 { \
2687 component_len = component_idx; \
2688 composition_state = COMPOSING_CHAR; \
2689 } \
2690 else \
2691 { \
8f924df7 2692 const unsigned char *p; \
df7492f9
KH
2693 \
2694 MAYBE_FINISH_COMPOSITION (); \
2695 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2696 goto no_more_source; \
2697 for (p = src; p < src_end - 1; p++) \
2698 if (*p == ISO_CODE_ESC && p[1] == '1') \
2699 break; \
2700 if (p == src_end - 1) \
2701 { \
2702 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2703 goto invalid_code; \
2704 goto no_more_source; \
2705 } \
2706 \
2707 /* This is surely the start of a composition. */ \
2708 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2709 : c1 == '2' ? COMPOSITION_WITH_RULE \
2710 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2711 : COMPOSITION_WITH_RULE_ALTCHARS); \
2712 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2713 : COMPOSING_COMPONENT_CHAR); \
2714 component_idx = component_len = 0; \
2715 } \
ec6d2bb8
KH
2716 } while (0)
2717
ec6d2bb8 2718
df7492f9
KH
2719/* Handle compositoin end sequence ESC 1. */
2720
2721#define DECODE_COMPOSITION_END() \
ec6d2bb8 2722 do { \
df7492f9
KH
2723 int nchars = (component_len > 0 ? component_idx - component_len \
2724 : method == COMPOSITION_RELATIVE ? component_idx \
2725 : (component_idx + 1) / 2); \
2726 int i; \
2727 int *saved_charbuf = charbuf; \
8f924df7 2728 int from = char_offset; \
ff0dacd7 2729 int to = from + nchars; \
df7492f9 2730 \
ff0dacd7 2731 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9 2732 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2733 { \
df7492f9
KH
2734 if (component_len == 0) \
2735 for (i = 0; i < component_idx; i++) \
2736 *charbuf++ = components[i]; \
2737 else \
2738 for (i = 0; i < component_len; i++) \
2739 *charbuf++ = components[i]; \
2740 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2741 } \
df7492f9
KH
2742 if (method == COMPOSITION_WITH_RULE) \
2743 for (i = 0; i < component_idx; i += 2, char_offset++) \
2744 *charbuf++ = components[i]; \
ec6d2bb8 2745 else \
df7492f9
KH
2746 for (i = component_len; i < component_idx; i++, char_offset++) \
2747 *charbuf++ = components[i]; \
2748 coding->annotated = 1; \
2749 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2750 } while (0)
2751
df7492f9 2752
ec6d2bb8
KH
2753/* Decode a composition rule from the byte C1 (and maybe one more byte
2754 from SRC) and store one encoded composition rule in
2755 coding->cmp_data. */
2756
2757#define DECODE_COMPOSITION_RULE(c1) \
2758 do { \
ec6d2bb8
KH
2759 (c1) -= 32; \
2760 if (c1 < 81) /* old format (before ver.21) */ \
2761 { \
2762 int gref = (c1) / 9; \
2763 int nref = (c1) % 9; \
2764 if (gref == 4) gref = 10; \
2765 if (nref == 4) nref = 10; \
df7492f9 2766 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2767 } \
b73bfc1c 2768 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2769 { \
2770 ONE_MORE_BYTE (c2); \
df7492f9 2771 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2772 } \
df7492f9
KH
2773 else \
2774 c1 = 0; \
ec6d2bb8 2775 } while (0)
88993dfd 2776
d46c5b12 2777
4ed46869
KH
2778/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2779
b73bfc1c 2780static void
df7492f9 2781decode_coding_iso_2022 (coding)
4ed46869 2782 struct coding_system *coding;
4ed46869 2783{
8f924df7
KH
2784 const unsigned char *src = coding->source + coding->consumed;
2785 const unsigned char *src_end = coding->source + coding->src_bytes;
2786 const unsigned char *src_base;
df7492f9 2787 int *charbuf = coding->charbuf;
ff0dacd7
KH
2788 int *charbuf_end
2789 = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2790 int consumed_chars = 0, consumed_chars_base;
df7492f9 2791 int multibytep = coding->src_multibyte;
4ed46869 2792 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
2793 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2794 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2795 struct charset *charset;
2796 int c;
2797 /* For handling composition sequence. */
2798#define COMPOSING_NO 0
2799#define COMPOSING_CHAR 1
2800#define COMPOSING_RULE 2
2801#define COMPOSING_COMPONENT_CHAR 3
2802#define COMPOSING_COMPONENT_RULE 4
2803
2804 int composition_state = COMPOSING_NO;
2805 enum composition_method method;
2806 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2807 int component_idx;
2808 int component_len;
2809 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
2810 int char_offset = coding->produced_char;
2811 int last_offset = char_offset;
2812 int last_id = charset_ascii;
df7492f9
KH
2813
2814 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2815 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2816
2817 while (1)
4ed46869 2818 {
463f5630 2819 int c1, c2;
b73bfc1c
KH
2820
2821 src_base = src;
df7492f9
KH
2822 consumed_chars_base = consumed_chars;
2823
2824 if (charbuf >= charbuf_end)
2825 break;
2826
b73bfc1c 2827 ONE_MORE_BYTE (c1);
4ed46869 2828
98725083 2829 /* We produce at most one character. */
4ed46869
KH
2830 switch (iso_code_class [c1])
2831 {
2832 case ISO_0x20_or_0x7F:
df7492f9 2833 if (composition_state != COMPOSING_NO)
ec6d2bb8 2834 {
df7492f9
KH
2835 if (composition_state == COMPOSING_RULE
2836 || composition_state == COMPOSING_COMPONENT_RULE)
2837 {
2838 DECODE_COMPOSITION_RULE (c1);
2839 components[component_idx++] = c1;
2840 composition_state--;
2841 continue;
2842 }
4ed46869 2843 }
df7492f9
KH
2844 if (charset_id_0 < 0
2845 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2846 /* This is SPACE or DEL. */
2847 charset = CHARSET_FROM_ID (charset_ascii);
2848 else
2849 charset = CHARSET_FROM_ID (charset_id_0);
2850 break;
4ed46869
KH
2851
2852 case ISO_graphic_plane_0:
781d7a48 2853 if (composition_state != COMPOSING_NO)
b73bfc1c 2854 {
781d7a48
KH
2855 if (composition_state == COMPOSING_RULE
2856 || composition_state == COMPOSING_COMPONENT_RULE)
2857 {
2858 DECODE_COMPOSITION_RULE (c1);
2859 components[component_idx++] = c1;
2860 composition_state--;
2861 continue;
2862 }
b73bfc1c 2863 }
df7492f9 2864 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2865 break;
2866
2867 case ISO_0xA0_or_0xFF:
df7492f9
KH
2868 if (charset_id_1 < 0
2869 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2870 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2871 goto invalid_code;
4ed46869
KH
2872 /* This is a graphic character, we fall down ... */
2873
2874 case ISO_graphic_plane_1:
df7492f9
KH
2875 if (charset_id_1 < 0)
2876 goto invalid_code;
2877 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2878 break;
2879
2880 case ISO_carriage_return:
df7492f9 2881 if (c1 == '\r')
4ed46869 2882 {
df7492f9 2883 if (EQ (eol_type, Qdos))
4ed46869 2884 {
df7492f9 2885 if (src == src_end)
98725083
KH
2886 {
2887 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2888 goto no_more_source;
8f924df7 2889 }
df7492f9
KH
2890 if (*src == '\n')
2891 ONE_MORE_BYTE (c1);
4ed46869 2892 }
df7492f9
KH
2893 else if (EQ (eol_type, Qmac))
2894 c1 = '\n';
4ed46869 2895 }
df7492f9
KH
2896 /* fall through */
2897
2898 case ISO_control_0:
2899 MAYBE_FINISH_COMPOSITION ();
2900 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2901 break;
2902
df7492f9
KH
2903 case ISO_control_1:
2904 MAYBE_FINISH_COMPOSITION ();
2905 goto invalid_code;
2906
4ed46869 2907 case ISO_shift_out:
df7492f9
KH
2908 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2909 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2910 goto invalid_code;
2911 CODING_ISO_INVOCATION (coding, 0) = 1;
2912 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2913 continue;
4ed46869
KH
2914
2915 case ISO_shift_in:
df7492f9
KH
2916 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2917 goto invalid_code;
2918 CODING_ISO_INVOCATION (coding, 0) = 0;
2919 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2920 continue;
4ed46869
KH
2921
2922 case ISO_single_shift_2_7:
2923 case ISO_single_shift_2:
df7492f9
KH
2924 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2925 goto invalid_code;
4ed46869
KH
2926 /* SS2 is handled as an escape sequence of ESC 'N' */
2927 c1 = 'N';
2928 goto label_escape_sequence;
2929
2930 case ISO_single_shift_3:
df7492f9
KH
2931 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2932 goto invalid_code;
4ed46869
KH
2933 /* SS2 is handled as an escape sequence of ESC 'O' */
2934 c1 = 'O';
2935 goto label_escape_sequence;
2936
2937 case ISO_control_sequence_introducer:
2938 /* CSI is handled as an escape sequence of ESC '[' ... */
2939 c1 = '[';
2940 goto label_escape_sequence;
2941
2942 case ISO_escape:
2943 ONE_MORE_BYTE (c1);
2944 label_escape_sequence:
df7492f9 2945 /* Escape sequences handled here are invocation,
4ed46869
KH
2946 designation, direction specification, and character
2947 composition specification. */
2948 switch (c1)
2949 {
2950 case '&': /* revision of following character set */
2951 ONE_MORE_BYTE (c1);
2952 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2953 goto invalid_code;
4ed46869
KH
2954 ONE_MORE_BYTE (c1);
2955 if (c1 != ISO_CODE_ESC)
df7492f9 2956 goto invalid_code;
4ed46869
KH
2957 ONE_MORE_BYTE (c1);
2958 goto label_escape_sequence;
2959
2960 case '$': /* designation of 2-byte character set */
df7492f9
KH
2961 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2962 goto invalid_code;
4ed46869
KH
2963 ONE_MORE_BYTE (c1);
2964 if (c1 >= '@' && c1 <= 'B')
2965 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2966 or JISX0208.1980 */
df7492f9 2967 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2968 }
2969 else if (c1 >= 0x28 && c1 <= 0x2B)
2970 { /* designation of DIMENSION2_CHARS94 character set */
2971 ONE_MORE_BYTE (c2);
df7492f9 2972 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2973 }
2974 else if (c1 >= 0x2C && c1 <= 0x2F)
2975 { /* designation of DIMENSION2_CHARS96 character set */
2976 ONE_MORE_BYTE (c2);
df7492f9 2977 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2978 }
2979 else
df7492f9 2980 goto invalid_code;
b73bfc1c 2981 /* We must update these variables now. */
df7492f9
KH
2982 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2983 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2984 continue;
4ed46869
KH
2985
2986 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
2987 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2988 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2989 goto invalid_code;
2990 CODING_ISO_INVOCATION (coding, 0) = 2;
2991 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2992 continue;
4ed46869
KH
2993
2994 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
2995 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2996 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2997 goto invalid_code;
2998 CODING_ISO_INVOCATION (coding, 0) = 3;
2999 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3000 continue;
4ed46869
KH
3001
3002 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3003 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3004 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3005 goto invalid_code;
3006 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 3007 ONE_MORE_BYTE (c1);
e7046a18 3008 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3009 goto invalid_code;
4ed46869
KH
3010 break;
3011
3012 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3013 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3014 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3015 goto invalid_code;
3016 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 3017 ONE_MORE_BYTE (c1);
e7046a18 3018 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3019 goto invalid_code;
4ed46869
KH
3020 break;
3021
ec6d2bb8 3022 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3023 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3024 goto invalid_code;
ec6d2bb8 3025 DECODE_COMPOSITION_START (c1);
b73bfc1c 3026 continue;
4ed46869 3027
ec6d2bb8 3028 case '1': /* end composition */
df7492f9
KH
3029 if (composition_state == COMPOSING_NO)
3030 goto invalid_code;
3031 DECODE_COMPOSITION_END ();
b73bfc1c 3032 continue;
4ed46869
KH
3033
3034 case '[': /* specification of direction */
df7492f9
KH
3035 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3036 goto invalid_code;
4ed46869 3037 /* For the moment, nested direction is not supported.
d46c5b12 3038 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3039 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3040 ONE_MORE_BYTE (c1);
3041 switch (c1)
3042 {
3043 case ']': /* end of the current direction */
d46c5b12 3044 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3045
3046 case '0': /* end of the current direction */
3047 case '1': /* start of left-to-right direction */
3048 ONE_MORE_BYTE (c1);
3049 if (c1 == ']')
d46c5b12 3050 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3051 else
df7492f9 3052 goto invalid_code;
4ed46869
KH
3053 break;
3054
3055 case '2': /* start of right-to-left direction */
3056 ONE_MORE_BYTE (c1);
3057 if (c1 == ']')
d46c5b12 3058 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3059 else
df7492f9 3060 goto invalid_code;
4ed46869
KH
3061 break;
3062
3063 default:
df7492f9 3064 goto invalid_code;
4ed46869 3065 }
b73bfc1c 3066 continue;
4ed46869 3067
103e0180 3068 case '%':
103e0180
KH
3069 ONE_MORE_BYTE (c1);
3070 if (c1 == '/')
3071 {
3072 /* CTEXT extended segment:
3073 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3074 We keep these bytes as is for the moment.
3075 They may be decoded by post-read-conversion. */
3076 int dim, M, L;
4776e638 3077 int size;
8f924df7 3078
103e0180
KH
3079 ONE_MORE_BYTE (dim);
3080 ONE_MORE_BYTE (M);
3081 ONE_MORE_BYTE (L);
3082 size = ((M - 128) * 128) + (L - 128);
4776e638
KH
3083 if (charbuf + 8 + size > charbuf_end)
3084 goto break_loop;
3085 *charbuf++ = ISO_CODE_ESC;
3086 *charbuf++ = '%';
3087 *charbuf++ = '/';
3088 *charbuf++ = dim;
3089 *charbuf++ = BYTE8_TO_CHAR (M);
3090 *charbuf++ = BYTE8_TO_CHAR (L);
103e0180
KH
3091 while (size-- > 0)
3092 {
3093 ONE_MORE_BYTE (c1);
4776e638 3094 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3095 }
103e0180
KH
3096 }
3097 else if (c1 == 'G')
3098 {
103e0180
KH
3099 /* XFree86 extension for embedding UTF-8 in CTEXT:
3100 ESC % G --UTF-8-BYTES-- ESC % @
3101 We keep these bytes as is for the moment.
3102 They may be decoded by post-read-conversion. */
4776e638
KH
3103 int *p = charbuf;
3104
3105 if (p + 6 > charbuf_end)
3106 goto break_loop;
3107 *p++ = ISO_CODE_ESC;
3108 *p++ = '%';
3109 *p++ = 'G';
3110 while (p < charbuf_end)
103e0180
KH
3111 {
3112 ONE_MORE_BYTE (c1);
3113 if (c1 == ISO_CODE_ESC
3114 && src + 1 < src_end
3115 && src[0] == '%'
3116 && src[1] == '@')
3117 break;
4776e638 3118 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3119 }
4776e638
KH
3120 if (p + 3 > charbuf_end)
3121 goto break_loop;
3122 *p++ = ISO_CODE_ESC;
3123 *p++ = '%';
3124 *p++ = '@';
3125 charbuf = p;
103e0180
KH
3126 }
3127 else
4776e638 3128 goto invalid_code;
103e0180 3129 continue;
4776e638 3130 break;
103e0180 3131
4ed46869 3132 default:
df7492f9
KH
3133 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3134 goto invalid_code;
4ed46869
KH
3135 if (c1 >= 0x28 && c1 <= 0x2B)
3136 { /* designation of DIMENSION1_CHARS94 character set */
3137 ONE_MORE_BYTE (c2);
df7492f9 3138 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
3139 }
3140 else if (c1 >= 0x2C && c1 <= 0x2F)
3141 { /* designation of DIMENSION1_CHARS96 character set */
3142 ONE_MORE_BYTE (c2);
df7492f9 3143 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
3144 }
3145 else
df7492f9 3146 goto invalid_code;
b73bfc1c 3147 /* We must update these variables now. */
df7492f9
KH
3148 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3149 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3150 continue;
4ed46869 3151 }
b73bfc1c 3152 }
4ed46869 3153
ff0dacd7
KH
3154 if (charset->id != charset_ascii
3155 && last_id != charset->id)
3156 {
3157 if (last_id != charset_ascii)
3158 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3159 last_id = charset->id;
3160 last_offset = char_offset;
3161 }
3162
b73bfc1c 3163 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3164 Produce a decoded character while getting 2nd position code
3165 C2 if necessary. */
3166 c1 &= 0x7F;
3167 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3168 {
3169 ONE_MORE_BYTE (c2);
df7492f9 3170 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3171 /* C2 is not in a valid range. */
df7492f9
KH
3172 goto invalid_code;
3173 c1 = (c1 << 8) | (c2 & 0x7F);
3174 if (CHARSET_DIMENSION (charset) > 2)
3175 {
3176 ONE_MORE_BYTE (c2);
3177 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3178 /* C2 is not in a valid range. */
3179 goto invalid_code;
3180 c1 = (c1 << 8) | (c2 & 0x7F);
3181 }
3182 }
3183
3184 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3185 if (c < 0)
3186 {
3187 MAYBE_FINISH_COMPOSITION ();
3188 for (; src_base < src; src_base++, char_offset++)
3189 {
3190 if (ASCII_BYTE_P (*src_base))
3191 *charbuf++ = *src_base;
3192 else
3193 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3194 }
3195 }
3196 else if (composition_state == COMPOSING_NO)
3197 {
3198 *charbuf++ = c;
3199 char_offset++;
4ed46869 3200 }
df7492f9 3201 else
781d7a48
KH
3202 {
3203 components[component_idx++] = c;
3204 if (method == COMPOSITION_WITH_RULE
3205 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3206 && composition_state == COMPOSING_COMPONENT_CHAR))
3207 composition_state++;
4ed46869
KH
3208 }
3209 continue;
3210
df7492f9
KH
3211 invalid_code:
3212 MAYBE_FINISH_COMPOSITION ();
4ed46869 3213 src = src_base;
df7492f9
KH
3214 consumed_chars = consumed_chars_base;
3215 ONE_MORE_BYTE (c);
3216 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3217 char_offset++;
df7492f9 3218 coding->errors++;
4776e638
KH
3219 continue;
3220
3221 break_loop:
3222 break;
4ed46869 3223 }
fb88bf2d 3224
df7492f9 3225 no_more_source:
ff0dacd7
KH
3226 if (last_id != charset_ascii)
3227 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
3228 coding->consumed_char += consumed_chars_base;
3229 coding->consumed = src_base - coding->source;
3230 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3231}
3232
b73bfc1c 3233
f4dee582 3234/* ISO2022 encoding stuff. */
4ed46869
KH
3235
3236/*
f4dee582 3237 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3238 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3239 variant has the following specifications:
df7492f9 3240 1. Initial designation to G0 thru G3.
4ed46869
KH
3241 2. Allows short-form designation?
3242 3. ASCII should be designated to G0 before control characters?
3243 4. ASCII should be designated to G0 at end of line?
3244 5. 7-bit environment or 8-bit environment?
3245 6. Use locking-shift?
3246 7. Use Single-shift?
3247 And the following two are only for Japanese:
3248 8. Use ASCII in place of JIS0201-1976-Roman?
3249 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3250 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3251 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3252 details.
4ed46869
KH
3253*/
3254
3255/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3256 register REG at DST, and increment DST. If <final-char> of CHARSET is
3257 '@', 'A', or 'B' and the coding system CODING allows, produce
3258 designation sequence of short-form. */
4ed46869
KH
3259
3260#define ENCODE_DESIGNATION(charset, reg, coding) \
3261 do { \
df7492f9 3262 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3263 char *intermediate_char_94 = "()*+"; \
3264 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3265 int revision = -1; \
3266 int c; \
3267 \
3268 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3269 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3270 \
3271 if (revision >= 0) \
70c22245 3272 { \
df7492f9
KH
3273 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3274 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3275 } \
df7492f9 3276 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3277 if (CHARSET_DIMENSION (charset) == 1) \
3278 { \
df7492f9
KH
3279 if (! CHARSET_ISO_CHARS_96 (charset)) \
3280 c = intermediate_char_94[reg]; \
4ed46869 3281 else \
df7492f9
KH
3282 c = intermediate_char_96[reg]; \
3283 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3284 } \
3285 else \
3286 { \
df7492f9
KH
3287 EMIT_ONE_ASCII_BYTE ('$'); \
3288 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3289 { \
df7492f9 3290 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3291 || reg != 0 \
3292 || final_char < '@' || final_char > 'B') \
df7492f9 3293 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3294 } \
3295 else \
df7492f9 3296 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3297 } \
df7492f9
KH
3298 EMIT_ONE_ASCII_BYTE (final_char); \
3299 \
3300 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3301 } while (0)
3302
df7492f9 3303
4ed46869
KH
3304/* The following two macros produce codes (control character or escape
3305 sequence) for ISO2022 single-shift functions (single-shift-2 and
3306 single-shift-3). */
3307
df7492f9
KH
3308#define ENCODE_SINGLE_SHIFT_2 \
3309 do { \
3310 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3311 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3312 else \
3313 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3314 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3315 } while (0)
3316
df7492f9
KH
3317
3318#define ENCODE_SINGLE_SHIFT_3 \
3319 do { \
3320 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3321 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3322 else \
3323 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3324 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3325 } while (0)
3326
df7492f9 3327
4ed46869
KH
3328/* The following four macros produce codes (control character or
3329 escape sequence) for ISO2022 locking-shift functions (shift-in,
3330 shift-out, locking-shift-2, and locking-shift-3). */
3331
df7492f9
KH
3332#define ENCODE_SHIFT_IN \
3333 do { \
3334 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3335 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3336 } while (0)
3337
df7492f9
KH
3338
3339#define ENCODE_SHIFT_OUT \
3340 do { \
3341 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3342 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3343 } while (0)
3344
df7492f9
KH
3345
3346#define ENCODE_LOCKING_SHIFT_2 \
3347 do { \
3348 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3349 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3350 } while (0)
3351
df7492f9
KH
3352
3353#define ENCODE_LOCKING_SHIFT_3 \
3354 do { \
3355 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3356 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3357 } while (0)
3358
df7492f9 3359
f4dee582
RS
3360/* Produce codes for a DIMENSION1 character whose character set is
3361 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3362 sequences are also produced in advance if necessary. */
3363
6e85d753
KH
3364#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3365 do { \
df7492f9 3366 int id = CHARSET_ID (charset); \
bf16eb23
KH
3367 \
3368 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3369 && id == charset_ascii) \
3370 { \
3371 id = charset_jisx0201_roman; \
3372 charset = CHARSET_FROM_ID (id); \
3373 } \
3374 \
df7492f9 3375 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3376 { \
df7492f9
KH
3377 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3378 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3379 else \
df7492f9
KH
3380 EMIT_ONE_BYTE (c1 | 0x80); \
3381 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3382 break; \
3383 } \
df7492f9 3384 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3385 { \
df7492f9 3386 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3387 break; \
3388 } \
df7492f9 3389 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3390 { \
df7492f9 3391 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3392 break; \
3393 } \
6e85d753
KH
3394 else \
3395 /* Since CHARSET is not yet invoked to any graphic planes, we \
3396 must invoke it, or, at first, designate it to some graphic \
3397 register. Then repeat the loop to actually produce the \
3398 character. */ \
df7492f9
KH
3399 dst = encode_invocation_designation (charset, coding, dst, \
3400 &produced_chars); \
4ed46869
KH
3401 } while (1)
3402
df7492f9 3403
f4dee582
RS
3404/* Produce codes for a DIMENSION2 character whose character set is
3405 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3406 invocation codes are also produced in advance if necessary. */
3407
6e85d753
KH
3408#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3409 do { \
df7492f9 3410 int id = CHARSET_ID (charset); \
bf16eb23
KH
3411 \
3412 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3413 && id == charset_jisx0208) \
3414 { \
3415 id = charset_jisx0208_1978; \
3416 charset = CHARSET_FROM_ID (id); \
3417 } \
3418 \
df7492f9 3419 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3420 { \
df7492f9
KH
3421 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3422 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3423 else \
df7492f9
KH
3424 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3425 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3426 break; \
3427 } \
df7492f9 3428 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3429 { \
df7492f9 3430 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3431 break; \
3432 } \
df7492f9 3433 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3434 { \
df7492f9 3435 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3436 break; \
3437 } \
6e85d753
KH
3438 else \
3439 /* Since CHARSET is not yet invoked to any graphic planes, we \
3440 must invoke it, or, at first, designate it to some graphic \
3441 register. Then repeat the loop to actually produce the \
3442 character. */ \
df7492f9
KH
3443 dst = encode_invocation_designation (charset, coding, dst, \
3444 &produced_chars); \
4ed46869
KH
3445 } while (1)
3446
05e6f5dc 3447
df7492f9
KH
3448#define ENCODE_ISO_CHARACTER(charset, c) \
3449 do { \
3450 int code = ENCODE_CHAR ((charset),(c)); \
3451 \
3452 if (CHARSET_DIMENSION (charset) == 1) \
3453 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3454 else \
3455 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3456 } while (0)
bdd9fb48 3457
05e6f5dc 3458
4ed46869 3459/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3460 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3461 Return new DST. */
3462
3463unsigned char *
df7492f9
KH
3464encode_invocation_designation (charset, coding, dst, p_nchars)
3465 struct charset *charset;
4ed46869
KH
3466 struct coding_system *coding;
3467 unsigned char *dst;
df7492f9 3468 int *p_nchars;
4ed46869 3469{
df7492f9
KH
3470 int multibytep = coding->dst_multibyte;
3471 int produced_chars = *p_nchars;
4ed46869 3472 int reg; /* graphic register number */
df7492f9 3473 int id = CHARSET_ID (charset);
4ed46869
KH
3474
3475 /* At first, check designations. */
3476 for (reg = 0; reg < 4; reg++)
df7492f9 3477 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3478 break;
3479
3480 if (reg >= 4)
3481 {
3482 /* CHARSET is not yet designated to any graphic registers. */
3483 /* At first check the requested designation. */
df7492f9
KH
3484 reg = CODING_ISO_REQUEST (coding, id);
3485 if (reg < 0)
1ba9e4ab
KH
3486 /* Since CHARSET requests no special designation, designate it
3487 to graphic register 0. */
4ed46869
KH
3488 reg = 0;
3489
3490 ENCODE_DESIGNATION (charset, reg, coding);
3491 }
3492
df7492f9
KH
3493 if (CODING_ISO_INVOCATION (coding, 0) != reg
3494 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3495 {
3496 /* Since the graphic register REG is not invoked to any graphic
3497 planes, invoke it to graphic plane 0. */
3498 switch (reg)
3499 {
3500 case 0: /* graphic register 0 */
3501 ENCODE_SHIFT_IN;
3502 break;
3503
3504 case 1: /* graphic register 1 */
3505 ENCODE_SHIFT_OUT;
3506 break;
3507
3508 case 2: /* graphic register 2 */
df7492f9 3509 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3510 ENCODE_SINGLE_SHIFT_2;
3511 else
3512 ENCODE_LOCKING_SHIFT_2;
3513 break;
3514
3515 case 3: /* graphic register 3 */
df7492f9 3516 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3517 ENCODE_SINGLE_SHIFT_3;
3518 else
3519 ENCODE_LOCKING_SHIFT_3;
3520 break;
3521 }
3522 }
b73bfc1c 3523
df7492f9 3524 *p_nchars = produced_chars;
4ed46869
KH
3525 return dst;
3526}
3527
df7492f9
KH
3528/* The following three macros produce codes for indicating direction
3529 of text. */
3530#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3531 do { \
df7492f9
KH
3532 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3533 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3534 else \
df7492f9 3535 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3536 } while (0)
3537
ec6d2bb8 3538
df7492f9
KH
3539#define ENCODE_DIRECTION_R2L() \
3540 do { \
3541 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3542 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3543 } while (0)
3544
ec6d2bb8 3545
df7492f9 3546#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3547 do { \
df7492f9
KH
3548 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3549 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 3550 } while (0)
4ed46869 3551
4ed46869
KH
3552
3553/* Produce codes for designation and invocation to reset the graphic
3554 planes and registers to initial state. */
df7492f9
KH
3555#define ENCODE_RESET_PLANE_AND_REGISTER() \
3556 do { \
3557 int reg; \
3558 struct charset *charset; \
3559 \
3560 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3561 ENCODE_SHIFT_IN; \
3562 for (reg = 0; reg < 4; reg++) \
3563 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3564 && (CODING_ISO_DESIGNATION (coding, reg) \
3565 != CODING_ISO_INITIAL (coding, reg))) \
3566 { \
3567 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3568 ENCODE_DESIGNATION (charset, reg, coding); \
3569 } \
4ed46869
KH
3570 } while (0)
3571
df7492f9 3572
bdd9fb48 3573/* Produce designation sequences of charsets in the line started from
b73bfc1c 3574 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3575
3576 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3577 find all the necessary designations. */
3578
b73bfc1c 3579static unsigned char *
df7492f9 3580encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3581 struct coding_system *coding;
df7492f9
KH
3582 int *charbuf, *charbuf_end;
3583 unsigned char *dst;
e0e989f6 3584{
df7492f9 3585 struct charset *charset;
bdd9fb48
KH
3586 /* Table of charsets to be designated to each graphic register. */
3587 int r[4];
df7492f9
KH
3588 int c, found = 0, reg;
3589 int produced_chars = 0;
3590 int multibytep = coding->dst_multibyte;
3591 Lisp_Object attrs;
3592 Lisp_Object charset_list;
3593
3594 attrs = CODING_ID_ATTRS (coding->id);
3595 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3596 if (EQ (charset_list, Qiso_2022))
3597 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3598
3599 for (reg = 0; reg < 4; reg++)
3600 r[reg] = -1;
3601
b73bfc1c 3602 while (found < 4)
e0e989f6 3603 {
df7492f9
KH
3604 int id;
3605
3606 c = *charbuf++;
b73bfc1c
KH
3607 if (c == '\n')
3608 break;
df7492f9
KH
3609 charset = char_charset (c, charset_list, NULL);
3610 id = CHARSET_ID (charset);
3611 reg = CODING_ISO_REQUEST (coding, id);
3612 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3613 {
3614 found++;
df7492f9 3615 r[reg] = id;
bdd9fb48 3616 }
bdd9fb48
KH
3617 }
3618
3619 if (found)
3620 {
3621 for (reg = 0; reg < 4; reg++)
3622 if (r[reg] >= 0
df7492f9
KH
3623 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3624 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3625 }
b73bfc1c
KH
3626
3627 return dst;
e0e989f6
KH
3628}
3629
4ed46869
KH
3630/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3631
df7492f9
KH
3632static int
3633encode_coding_iso_2022 (coding)
4ed46869 3634 struct coding_system *coding;
4ed46869 3635{
df7492f9
KH
3636 int multibytep = coding->dst_multibyte;
3637 int *charbuf = coding->charbuf;
3638 int *charbuf_end = charbuf + coding->charbuf_used;
3639 unsigned char *dst = coding->destination + coding->produced;
3640 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3641 int safe_room = 16;
3642 int bol_designation
3643 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3644 && CODING_ISO_BOL (coding));
3645 int produced_chars = 0;
3646 Lisp_Object attrs, eol_type, charset_list;
3647 int ascii_compatible;
b73bfc1c 3648 int c;
ff0dacd7 3649 int preferred_charset_id = -1;
05e6f5dc 3650
df7492f9 3651 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4 3652 setup_iso_safe_charsets (attrs);
ff0dacd7
KH
3653 /* Charset list may have been changed. */
3654 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
8f924df7 3655 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
0eecad43 3656
df7492f9 3657 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 3658
df7492f9 3659 while (charbuf < charbuf_end)
4ed46869 3660 {
df7492f9 3661 ASSURE_DESTINATION (safe_room);
b73bfc1c 3662
df7492f9 3663 if (bol_designation)
b73bfc1c 3664 {
df7492f9 3665 unsigned char *dst_prev = dst;
4ed46869 3666
bdd9fb48 3667 /* We have to produce designation sequences if any now. */
df7492f9
KH
3668 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3669 bol_designation = 0;
3670 /* We are sure that designation sequences are all ASCII bytes. */
3671 produced_chars += dst - dst_prev;
e0e989f6
KH
3672 }
3673
df7492f9 3674 c = *charbuf++;
ec6d2bb8 3675
ff0dacd7
KH
3676 if (c < 0)
3677 {
3678 /* Handle an annotation. */
3679 switch (*charbuf)
ec6d2bb8 3680 {
ff0dacd7
KH
3681 case CODING_ANNOTATE_COMPOSITION_MASK:
3682 /* Not yet implemented. */
3683 break;
3684 case CODING_ANNOTATE_CHARSET_MASK:
3685 preferred_charset_id = charbuf[3];
3686 if (preferred_charset_id >= 0
3687 && NILP (Fmemq (make_number (preferred_charset_id),
3688 charset_list)))
3689 preferred_charset_id = -1;
3690 break;
3691 default:
3692 abort ();
4ed46869 3693 }
ff0dacd7
KH
3694 charbuf += -c - 1;
3695 continue;
4ed46869 3696 }
ec6d2bb8 3697
b73bfc1c
KH
3698 /* Now encode the character C. */
3699 if (c < 0x20 || c == 0x7F)
3700 {
df7492f9
KH
3701 if (c == '\n'
3702 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3703 {
df7492f9
KH
3704 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3705 ENCODE_RESET_PLANE_AND_REGISTER ();
3706 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3707 {
df7492f9
KH
3708 int i;
3709
3710 for (i = 0; i < 4; i++)
3711 CODING_ISO_DESIGNATION (coding, i)
3712 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3713 }
df7492f9
KH
3714 bol_designation
3715 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3716 }
df7492f9
KH
3717 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3718 ENCODE_RESET_PLANE_AND_REGISTER ();
3719 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3720 }
df7492f9 3721 else if (ASCII_CHAR_P (c))
88993dfd 3722 {
df7492f9
KH
3723 if (ascii_compatible)
3724 EMIT_ONE_ASCII_BYTE (c);
93dec019 3725 else
19a8d9e0 3726 {
bf16eb23
KH
3727 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3728 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 3729 }
4ed46869 3730 }
16eafb5d 3731 else if (CHAR_BYTE8_P (c))
88993dfd 3732 {
16eafb5d
KH
3733 c = CHAR_TO_BYTE8 (c);
3734 EMIT_ONE_BYTE (c);
88993dfd 3735 }
b73bfc1c 3736 else
df7492f9 3737 {
ff0dacd7 3738 struct charset *charset;
b73bfc1c 3739
ff0dacd7
KH
3740 if (preferred_charset_id >= 0)
3741 {
3742 charset = CHARSET_FROM_ID (preferred_charset_id);
3743 if (! CHAR_CHARSET_P (c, charset))
3744 charset = char_charset (c, charset_list, NULL);
3745 }
3746 else
3747 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3748 if (!charset)
3749 {
41cbe562
KH
3750 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3751 {
3752 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3753 charset = CHARSET_FROM_ID (charset_ascii);
3754 }
3755 else
3756 {
3757 c = coding->default_char;
3758 charset = char_charset (c, charset_list, NULL);
3759 }
df7492f9
KH
3760 }
3761 ENCODE_ISO_CHARACTER (charset, c);
3762 }
84fbb8a0 3763 }
b73bfc1c 3764
df7492f9
KH
3765 if (coding->mode & CODING_MODE_LAST_BLOCK
3766 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3767 {
3768 ASSURE_DESTINATION (safe_room);
3769 ENCODE_RESET_PLANE_AND_REGISTER ();
3770 }
3771 coding->result = CODING_RESULT_SUCCESS;
3772 CODING_ISO_BOL (coding) = bol_designation;
3773 coding->produced_char += produced_chars;
3774 coding->produced = dst - coding->destination;
3775 return 0;
4ed46869
KH
3776}
3777
3778\f
df7492f9 3779/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3780
df7492f9 3781/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3782 quite widely. So, for the moment, Emacs supports them in the bare
3783 C code. But, in the future, they may be supported only by CCL. */
3784
3785/* SJIS is a coding system encoding three character sets: ASCII, right
3786 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3787 as is. A character of charset katakana-jisx0201 is encoded by
3788 "position-code + 0x80". A character of charset japanese-jisx0208
3789 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3790 so that it fit in the range below.
4ed46869
KH
3791
3792 --- CODE RANGE of SJIS ---
3793 (character set) (range)
3794 ASCII 0x00 .. 0x7F
df7492f9 3795 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3796 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3797 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3798 -------------------------------
3799
3800*/
3801
3802/* BIG5 is a coding system encoding two character sets: ASCII and
3803 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3804 character set and is encoded in two-byte.
4ed46869
KH
3805
3806 --- CODE RANGE of BIG5 ---
3807 (character set) (range)
3808 ASCII 0x00 .. 0x7F
3809 Big5 (1st byte) 0xA1 .. 0xFE
3810 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3811 --------------------------
3812
df7492f9 3813 */
4ed46869
KH
3814
3815/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3816 Check if a text is encoded in SJIS. If it is, return
df7492f9 3817 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3818
0a28aafb 3819static int
ff0dacd7 3820detect_coding_sjis (coding, detect_info)
df7492f9 3821 struct coding_system *coding;
ff0dacd7 3822 struct coding_detection_info *detect_info;
4ed46869 3823{
8f924df7
KH
3824 const unsigned char *src = coding->source, *src_base = src;
3825 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
3826 int multibytep = coding->src_multibyte;
3827 int consumed_chars = 0;
3828 int found = 0;
b73bfc1c 3829 int c;
89528eb3 3830 int incomplete;
df7492f9 3831
ff0dacd7 3832 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
3833 /* A coding system of this category is always ASCII compatible. */
3834 src += coding->head_ascii;
4ed46869 3835
b73bfc1c 3836 while (1)
4ed46869 3837 {
89528eb3 3838 incomplete = 0;
df7492f9 3839 ONE_MORE_BYTE (c);
89528eb3 3840 incomplete = 1;
682169fe
KH
3841 if (c < 0x80)
3842 continue;
df7492f9 3843 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3844 {
df7492f9 3845 ONE_MORE_BYTE (c);
682169fe 3846 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 3847 break;
ff0dacd7 3848 found = CATEGORY_MASK_SJIS;
4ed46869 3849 }
df7492f9 3850 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 3851 found = CATEGORY_MASK_SJIS;
df7492f9
KH
3852 else
3853 break;
4ed46869 3854 }
ff0dacd7 3855 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
3856 return 0;
3857
3858 no_more_source:
89528eb3
KH
3859 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3860 {
ff0dacd7 3861 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 3862 return 0;
4ed46869 3863 }
ff0dacd7
KH
3864 detect_info->found |= found;
3865 return 1;
4ed46869
KH
3866}
3867
3868/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3869 Check if a text is encoded in BIG5. If it is, return
df7492f9 3870 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3871
0a28aafb 3872static int
ff0dacd7 3873detect_coding_big5 (coding, detect_info)
df7492f9 3874 struct coding_system *coding;
ff0dacd7 3875 struct coding_detection_info *detect_info;
4ed46869 3876{
8f924df7
KH
3877 const unsigned char *src = coding->source, *src_base = src;
3878 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
3879 int multibytep = coding->src_multibyte;
3880 int consumed_chars = 0;
3881 int found = 0;
b73bfc1c 3882 int c;
89528eb3 3883 int incomplete;
fa42c37f 3884
ff0dacd7 3885 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
3886 /* A coding system of this category is always ASCII compatible. */
3887 src += coding->head_ascii;
fa42c37f 3888
b73bfc1c 3889 while (1)
fa42c37f 3890 {
89528eb3 3891 incomplete = 0;
df7492f9 3892 ONE_MORE_BYTE (c);
89528eb3 3893 incomplete = 1;
df7492f9 3894 if (c < 0x80)
fa42c37f 3895 continue;
df7492f9 3896 if (c >= 0xA1)
fa42c37f 3897 {
df7492f9
KH
3898 ONE_MORE_BYTE (c);
3899 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3900 return 0;
ff0dacd7 3901 found = CATEGORY_MASK_BIG5;
fa42c37f 3902 }
df7492f9
KH
3903 else
3904 break;
fa42c37f 3905 }
ff0dacd7 3906 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 3907 return 0;
fa42c37f 3908
df7492f9 3909 no_more_source:
89528eb3
KH
3910 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3911 {
ff0dacd7 3912 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
3913 return 0;
3914 }
ff0dacd7
KH
3915 detect_info->found |= found;
3916 return 1;
fa42c37f
KH
3917}
3918
4ed46869
KH
3919/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3920 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 3921
b73bfc1c 3922static void
df7492f9 3923decode_coding_sjis (coding)
4ed46869 3924 struct coding_system *coding;
4ed46869 3925{
8f924df7
KH
3926 const unsigned char *src = coding->source + coding->consumed;
3927 const unsigned char *src_end = coding->source + coding->src_bytes;
3928 const unsigned char *src_base;
df7492f9 3929 int *charbuf = coding->charbuf;
ff0dacd7 3930 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
3931 int consumed_chars = 0, consumed_chars_base;
3932 int multibytep = coding->src_multibyte;
3933 struct charset *charset_roman, *charset_kanji, *charset_kana;
3934 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
3935 int char_offset = coding->produced_char;
3936 int last_offset = char_offset;
3937 int last_id = charset_ascii;
a5d301df 3938
df7492f9
KH
3939 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3940
3941 val = charset_list;
3942 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3
KH
3943 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3944 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 3945
b73bfc1c 3946 while (1)
4ed46869 3947 {
df7492f9 3948 int c, c1;
fa42c37f 3949
b73bfc1c 3950 src_base = src;
df7492f9 3951 consumed_chars_base = consumed_chars;
fa42c37f 3952
df7492f9
KH
3953 if (charbuf >= charbuf_end)
3954 break;
3955
3956 ONE_MORE_BYTE (c);
b73bfc1c 3957
df7492f9 3958 if (c == '\r')
4ed46869 3959 {
df7492f9 3960 if (EQ (eol_type, Qdos))
4ed46869 3961 {
df7492f9 3962 if (src == src_end)
98725083
KH
3963 {
3964 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3965 goto no_more_source;
3966 }
df7492f9
KH
3967 if (*src == '\n')
3968 ONE_MORE_BYTE (c);
4ed46869 3969 }
df7492f9
KH
3970 else if (EQ (eol_type, Qmac))
3971 c = '\n';
4ed46869 3972 }
54f78171 3973 else
df7492f9
KH
3974 {
3975 struct charset *charset;
fa42c37f 3976
df7492f9
KH
3977 if (c < 0x80)
3978 charset = charset_roman;
3979 else
4ed46869 3980 {
df7492f9
KH
3981 if (c >= 0xF0)
3982 goto invalid_code;
3983 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3984 {
54f78171 3985 /* SJIS -> JISX0208 */
df7492f9
KH
3986 ONE_MORE_BYTE (c1);
3987 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3988 goto invalid_code;
3989 c = (c << 8) | c1;
3990 SJIS_TO_JIS (c);
3991 charset = charset_kanji;
5e34de15 3992 }
7487494c 3993 else if (c > 0xA0)
89528eb3
KH
3994 {
3995 /* SJIS -> JISX0201-Kana */
3996 c &= 0x7F;
3997 charset = charset_kana;
3998 }
7487494c
KH
3999 else
4000 goto invalid_code;
df7492f9 4001 }
ff0dacd7
KH
4002 if (charset->id != charset_ascii
4003 && last_id != charset->id)
4004 {
4005 if (last_id != charset_ascii)
4006 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4007 last_id = charset->id;
4008 last_offset = char_offset;
4009 }
df7492f9
KH
4010 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4011 }
4012 *charbuf++ = c;
ff0dacd7 4013 char_offset++;
df7492f9 4014 continue;
b73bfc1c 4015
df7492f9
KH
4016 invalid_code:
4017 src = src_base;
4018 consumed_chars = consumed_chars_base;
4019 ONE_MORE_BYTE (c);
4020 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4021 char_offset++;
df7492f9
KH
4022 coding->errors++;
4023 }
fa42c37f 4024
df7492f9 4025 no_more_source:
ff0dacd7
KH
4026 if (last_id != charset_ascii)
4027 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4028 coding->consumed_char += consumed_chars_base;
4029 coding->consumed = src_base - coding->source;
4030 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4031}
4032
b73bfc1c 4033static void
df7492f9 4034decode_coding_big5 (coding)
4ed46869 4035 struct coding_system *coding;
4ed46869 4036{
8f924df7
KH
4037 const unsigned char *src = coding->source + coding->consumed;
4038 const unsigned char *src_end = coding->source + coding->src_bytes;
4039 const unsigned char *src_base;
df7492f9 4040 int *charbuf = coding->charbuf;
ff0dacd7 4041 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4042 int consumed_chars = 0, consumed_chars_base;
4043 int multibytep = coding->src_multibyte;
4044 struct charset *charset_roman, *charset_big5;
4045 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
4046 int char_offset = coding->produced_char;
4047 int last_offset = char_offset;
4048 int last_id = charset_ascii;
df7492f9
KH
4049
4050 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4051 val = charset_list;
4052 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4053 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4054
b73bfc1c 4055 while (1)
4ed46869 4056 {
df7492f9 4057 int c, c1;
b73bfc1c
KH
4058
4059 src_base = src;
df7492f9
KH
4060 consumed_chars_base = consumed_chars;
4061
4062 if (charbuf >= charbuf_end)
4063 break;
4064
4065 ONE_MORE_BYTE (c);
b73bfc1c 4066
df7492f9 4067 if (c == '\r')
4ed46869 4068 {
df7492f9 4069 if (EQ (eol_type, Qdos))
4ed46869 4070 {
df7492f9 4071 if (src == src_end)
b73bfc1c 4072 {
98725083
KH
4073 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4074 goto no_more_source;
d46c5b12 4075 }
df7492f9
KH
4076 if (*src == '\n')
4077 ONE_MORE_BYTE (c);
4ed46869 4078 }
df7492f9
KH
4079 else if (EQ (eol_type, Qmac))
4080 c = '\n';
4ed46869 4081 }
54f78171 4082 else
df7492f9
KH
4083 {
4084 struct charset *charset;
4085 if (c < 0x80)
4086 charset = charset_roman;
fb88bf2d 4087 else
fb88bf2d 4088 {
54f78171 4089 /* BIG5 -> Big5 */
df7492f9
KH
4090 if (c < 0xA1 || c > 0xFE)
4091 goto invalid_code;
4092 ONE_MORE_BYTE (c1);
4093 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4094 goto invalid_code;
4095 c = c << 8 | c1;
4096 charset = charset_big5;
4ed46869 4097 }
ff0dacd7
KH
4098 if (charset->id != charset_ascii
4099 && last_id != charset->id)
4100 {
4101 if (last_id != charset_ascii)
4102 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4103 last_id = charset->id;
4104 last_offset = char_offset;
4ed46869 4105 }
df7492f9 4106 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 4107 }
4ed46869 4108
df7492f9 4109 *charbuf++ = c;
ff0dacd7 4110 char_offset++;
fb88bf2d
KH
4111 continue;
4112
df7492f9 4113 invalid_code:
4ed46869 4114 src = src_base;
df7492f9
KH
4115 consumed_chars = consumed_chars_base;
4116 ONE_MORE_BYTE (c);
4117 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4118 char_offset++;
df7492f9 4119 coding->errors++;
fb88bf2d 4120 }
d46c5b12 4121
df7492f9 4122 no_more_source:
ff0dacd7
KH
4123 if (last_id != charset_ascii)
4124 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4125 coding->consumed_char += consumed_chars_base;
4126 coding->consumed = src_base - coding->source;
4127 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4128}
4129
4130/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4131 This function can encode charsets `ascii', `katakana-jisx0201',
4132 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4133 are sure that all these charsets are registered as official charset
4ed46869
KH
4134 (i.e. do not have extended leading-codes). Characters of other
4135 charsets are produced without any encoding. If SJIS_P is 1, encode
4136 SJIS text, else encode BIG5 text. */
4137
df7492f9
KH
4138static int
4139encode_coding_sjis (coding)
4ed46869 4140 struct coding_system *coding;
4ed46869 4141{
df7492f9
KH
4142 int multibytep = coding->dst_multibyte;
4143 int *charbuf = coding->charbuf;
4144 int *charbuf_end = charbuf + coding->charbuf_used;
4145 unsigned char *dst = coding->destination + coding->produced;
4146 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4147 int safe_room = 4;
4148 int produced_chars = 0;
4149 Lisp_Object attrs, eol_type, charset_list, val;
4150 int ascii_compatible;
4151 struct charset *charset_roman, *charset_kanji, *charset_kana;
4152 int c;
a5d301df 4153
df7492f9
KH
4154 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4155 val = charset_list;
4156 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4157 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4158 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4159
df7492f9 4160 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4161
df7492f9
KH
4162 while (charbuf < charbuf_end)
4163 {
4164 ASSURE_DESTINATION (safe_room);
4165 c = *charbuf++;
b73bfc1c 4166 /* Now encode the character C. */
df7492f9
KH
4167 if (ASCII_CHAR_P (c) && ascii_compatible)
4168 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4169 else if (CHAR_BYTE8_P (c))
4170 {
4171 c = CHAR_TO_BYTE8 (c);
4172 EMIT_ONE_BYTE (c);
4173 }
df7492f9 4174 else
b73bfc1c 4175 {
df7492f9
KH
4176 unsigned code;
4177 struct charset *charset = char_charset (c, charset_list, &code);
4178
4179 if (!charset)
4ed46869 4180 {
41cbe562 4181 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4182 {
41cbe562
KH
4183 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4184 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4185 }
41cbe562 4186 else
b73bfc1c 4187 {
41cbe562
KH
4188 c = coding->default_char;
4189 charset = char_charset (c, charset_list, &code);
b73bfc1c 4190 }
b73bfc1c 4191 }
df7492f9
KH
4192 if (code == CHARSET_INVALID_CODE (charset))
4193 abort ();
4194 if (charset == charset_kanji)
4195 {
4196 int c1, c2;
4197 JIS_TO_SJIS (code);
4198 c1 = code >> 8, c2 = code & 0xFF;
4199 EMIT_TWO_BYTES (c1, c2);
4200 }
4201 else if (charset == charset_kana)
4202 EMIT_ONE_BYTE (code | 0x80);
4203 else
4204 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4205 }
4206 }
4207 coding->result = CODING_RESULT_SUCCESS;
4208 coding->produced_char += produced_chars;
4209 coding->produced = dst - coding->destination;
4210 return 0;
4211}
4212
4213static int
4214encode_coding_big5 (coding)
4215 struct coding_system *coding;
4216{
4217 int multibytep = coding->dst_multibyte;
4218 int *charbuf = coding->charbuf;
4219 int *charbuf_end = charbuf + coding->charbuf_used;
4220 unsigned char *dst = coding->destination + coding->produced;
4221 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4222 int safe_room = 4;
4223 int produced_chars = 0;
4224 Lisp_Object attrs, eol_type, charset_list, val;
4225 int ascii_compatible;
4226 struct charset *charset_roman, *charset_big5;
4227 int c;
4228
4229 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4230 val = charset_list;
4231 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4232 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4233 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4234
4235 while (charbuf < charbuf_end)
4236 {
4237 ASSURE_DESTINATION (safe_room);
4238 c = *charbuf++;
4239 /* Now encode the character C. */
4240 if (ASCII_CHAR_P (c) && ascii_compatible)
4241 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4242 else if (CHAR_BYTE8_P (c))
4243 {
4244 c = CHAR_TO_BYTE8 (c);
4245 EMIT_ONE_BYTE (c);
b73bfc1c
KH
4246 }
4247 else
4248 {
df7492f9
KH
4249 unsigned code;
4250 struct charset *charset = char_charset (c, charset_list, &code);
4251
4252 if (! charset)
b73bfc1c 4253 {
41cbe562 4254 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4255 {
41cbe562
KH
4256 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4257 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4258 }
41cbe562 4259 else
0eecad43 4260 {
41cbe562
KH
4261 c = coding->default_char;
4262 charset = char_charset (c, charset_list, &code);
0eecad43 4263 }
4ed46869 4264 }
df7492f9
KH
4265 if (code == CHARSET_INVALID_CODE (charset))
4266 abort ();
4267 if (charset == charset_big5)
b73bfc1c 4268 {
df7492f9
KH
4269 int c1, c2;
4270
4271 c1 = code >> 8, c2 = code & 0xFF;
4272 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 4273 }
df7492f9
KH
4274 else
4275 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 4276 }
4ed46869 4277 }
df7492f9
KH
4278 coding->result = CODING_RESULT_SUCCESS;
4279 coding->produced_char += produced_chars;
4280 coding->produced = dst - coding->destination;
4281 return 0;
4ed46869
KH
4282}
4283
4284\f
df7492f9 4285/*** 10. CCL handlers ***/
1397dc18
KH
4286
4287/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4288 Check if a text is encoded in a coding system of which
4289 encoder/decoder are written in CCL program. If it is, return
df7492f9 4290 CATEGORY_MASK_CCL, else return 0. */
1397dc18 4291
0a28aafb 4292static int
ff0dacd7 4293detect_coding_ccl (coding, detect_info)
df7492f9 4294 struct coding_system *coding;
ff0dacd7 4295 struct coding_detection_info *detect_info;
1397dc18 4296{
8f924df7
KH
4297 const unsigned char *src = coding->source, *src_base = src;
4298 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4299 int multibytep = coding->src_multibyte;
4300 int consumed_chars = 0;
4301 int found = 0;
4302 unsigned char *valids = CODING_CCL_VALIDS (coding);
4303 int head_ascii = coding->head_ascii;
4304 Lisp_Object attrs;
4305
ff0dacd7
KH
4306 detect_info->checked |= CATEGORY_MASK_CCL;
4307
df7492f9
KH
4308 coding = &coding_categories[coding_category_ccl];
4309 attrs = CODING_ID_ATTRS (coding->id);
4310 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4311 src += head_ascii;
1397dc18 4312
b73bfc1c 4313 while (1)
1397dc18 4314 {
df7492f9
KH
4315 int c;
4316 ONE_MORE_BYTE (c);
4317 if (! valids[c])
4318 break;
ff0dacd7
KH
4319 if ((valids[c] > 1))
4320 found = CATEGORY_MASK_CCL;
df7492f9 4321 }
ff0dacd7 4322 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4323 return 0;
4324
4325 no_more_source:
ff0dacd7
KH
4326 detect_info->found |= found;
4327 return 1;
df7492f9
KH
4328}
4329
4330static void
4331decode_coding_ccl (coding)
4332 struct coding_system *coding;
4333{
7c78e542 4334 const unsigned char *src = coding->source + coding->consumed;
8f924df7 4335 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4336 int *charbuf = coding->charbuf;
4337 int *charbuf_end = charbuf + coding->charbuf_size;
4338 int consumed_chars = 0;
4339 int multibytep = coding->src_multibyte;
4340 struct ccl_program ccl;
4341 int source_charbuf[1024];
4342 int source_byteidx[1024];
4776e638 4343 Lisp_Object attrs, eol_type, charset_list;
df7492f9 4344
8dcbea82 4345 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4346 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4347
4348 while (src < src_end)
4349 {
7c78e542 4350 const unsigned char *p = src;
df7492f9
KH
4351 int *source, *source_end;
4352 int i = 0;
4353
4354 if (multibytep)
4355 while (i < 1024 && p < src_end)
4356 {
4357 source_byteidx[i] = p - src;
4358 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4359 }
4360 else
4361 while (i < 1024 && p < src_end)
4362 source_charbuf[i++] = *p++;
8f924df7 4363
df7492f9
KH
4364 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4365 ccl.last_block = 1;
4366
4367 source = source_charbuf;
4368 source_end = source + i;
4369 while (source < source_end)
4370 {
4371 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4372 source_end - source, charbuf_end - charbuf,
4373 charset_list);
df7492f9
KH
4374 source += ccl.consumed;
4375 charbuf += ccl.produced;
4376 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4377 break;
4378 }
4379 if (source < source_end)
4380 src += source_byteidx[source - source_charbuf];
4381 else
4382 src = p;
4383 consumed_chars += source - source_charbuf;
4384
4385 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4386 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4387 break;
4388 }
4389
4390 switch (ccl.status)
4391 {
4392 case CCL_STAT_SUSPEND_BY_SRC:
4393 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4394 break;
4395 case CCL_STAT_SUSPEND_BY_DST:
4396 break;
4397 case CCL_STAT_QUIT:
4398 case CCL_STAT_INVALID_CMD:
4399 coding->result = CODING_RESULT_INTERRUPT;
4400 break;
4401 default:
4402 coding->result = CODING_RESULT_SUCCESS;
4403 break;
4404 }
4405 coding->consumed_char += consumed_chars;
4406 coding->consumed = src - coding->source;
4407 coding->charbuf_used = charbuf - coding->charbuf;
4408}
4409
4410static int
4411encode_coding_ccl (coding)
4412 struct coding_system *coding;
4413{
4414 struct ccl_program ccl;
4415 int multibytep = coding->dst_multibyte;
4416 int *charbuf = coding->charbuf;
4417 int *charbuf_end = charbuf + coding->charbuf_used;
4418 unsigned char *dst = coding->destination + coding->produced;
4419 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4420 unsigned char *adjusted_dst_end = dst_end - 1;
4421 int destination_charbuf[1024];
4422 int i, produced_chars = 0;
8dcbea82 4423 Lisp_Object attrs, eol_type, charset_list;
df7492f9 4424
8dcbea82 4425 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4426 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4427
4428 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4429 ccl.dst_multibyte = coding->dst_multibyte;
4430
4431 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4432 {
4433 int dst_bytes = dst_end - dst;
4434 if (dst_bytes > 1024)
4435 dst_bytes = 1024;
4436
4437 ccl_driver (&ccl, charbuf, destination_charbuf,
8dcbea82 4438 charbuf_end - charbuf, dst_bytes, charset_list);
df7492f9
KH
4439 charbuf += ccl.consumed;
4440 if (multibytep)
4441 for (i = 0; i < ccl.produced; i++)
4442 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4443 else
4444 {
4445 for (i = 0; i < ccl.produced; i++)
4446 *dst++ = destination_charbuf[i] & 0xFF;
4447 produced_chars += ccl.produced;
4448 }
4449 }
4450
4451 switch (ccl.status)
4452 {
4453 case CCL_STAT_SUSPEND_BY_SRC:
4454 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4455 break;
4456 case CCL_STAT_SUSPEND_BY_DST:
4457 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4458 break;
4459 case CCL_STAT_QUIT:
4460 case CCL_STAT_INVALID_CMD:
4461 coding->result = CODING_RESULT_INTERRUPT;
4462 break;
4463 default:
4464 coding->result = CODING_RESULT_SUCCESS;
4465 break;
1397dc18 4466 }
df7492f9
KH
4467
4468 coding->produced_char += produced_chars;
4469 coding->produced = dst - coding->destination;
4470 return 0;
1397dc18
KH
4471}
4472
df7492f9 4473
1397dc18 4474\f
df7492f9 4475/*** 10, 11. no-conversion handlers ***/
4ed46869 4476
b73bfc1c 4477/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4478
b73bfc1c 4479static void
df7492f9 4480decode_coding_raw_text (coding)
4ed46869 4481 struct coding_system *coding;
4ed46869 4482{
df7492f9 4483 coding->chars_at_source = 1;
2c78b7e1
KH
4484 coding->consumed_char = 0;
4485 coding->consumed = 0;
df7492f9
KH
4486 coding->result = CODING_RESULT_SUCCESS;
4487}
4ed46869 4488
df7492f9
KH
4489static int
4490encode_coding_raw_text (coding)
4491 struct coding_system *coding;
4492{
4493 int multibytep = coding->dst_multibyte;
4494 int *charbuf = coding->charbuf;
4495 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4496 unsigned char *dst = coding->destination + coding->produced;
4497 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4498 int produced_chars = 0;
b73bfc1c
KH
4499 int c;
4500
df7492f9 4501 if (multibytep)
b73bfc1c 4502 {
df7492f9 4503 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4504
df7492f9
KH
4505 if (coding->src_multibyte)
4506 while (charbuf < charbuf_end)
4507 {
4508 ASSURE_DESTINATION (safe_room);
4509 c = *charbuf++;
4510 if (ASCII_CHAR_P (c))
4511 EMIT_ONE_ASCII_BYTE (c);
4512 else if (CHAR_BYTE8_P (c))
4513 {
4514 c = CHAR_TO_BYTE8 (c);
4515 EMIT_ONE_BYTE (c);
4516 }
4517 else
4518 {
4519 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4520
df7492f9
KH
4521 CHAR_STRING_ADVANCE (c, p1);
4522 while (p0 < p1)
9d123124
KH
4523 {
4524 EMIT_ONE_BYTE (*p0);
4525 p0++;
4526 }
df7492f9
KH
4527 }
4528 }
b73bfc1c 4529 else
df7492f9
KH
4530 while (charbuf < charbuf_end)
4531 {
4532 ASSURE_DESTINATION (safe_room);
4533 c = *charbuf++;
4534 EMIT_ONE_BYTE (c);
4535 }
4536 }
4537 else
4ed46869 4538 {
df7492f9 4539 if (coding->src_multibyte)
d46c5b12 4540 {
df7492f9
KH
4541 int safe_room = MAX_MULTIBYTE_LENGTH;
4542
4543 while (charbuf < charbuf_end)
d46c5b12 4544 {
df7492f9
KH
4545 ASSURE_DESTINATION (safe_room);
4546 c = *charbuf++;
4547 if (ASCII_CHAR_P (c))
4548 *dst++ = c;
4549 else if (CHAR_BYTE8_P (c))
4550 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4551 else
df7492f9
KH
4552 CHAR_STRING_ADVANCE (c, dst);
4553 produced_chars++;
d46c5b12
KH
4554 }
4555 }
df7492f9
KH
4556 else
4557 {
4558 ASSURE_DESTINATION (charbuf_end - charbuf);
4559 while (charbuf < charbuf_end && dst < dst_end)
4560 *dst++ = *charbuf++;
4561 produced_chars = dst - (coding->destination + coding->dst_bytes);
8f924df7 4562 }
4ed46869 4563 }
df7492f9
KH
4564 coding->result = CODING_RESULT_SUCCESS;
4565 coding->produced_char += produced_chars;
4566 coding->produced = dst - coding->destination;
4567 return 0;
4ed46869
KH
4568}
4569
ff0dacd7
KH
4570/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4571 Check if a text is encoded in a charset-based coding system. If it
4572 is, return 1, else return 0. */
4573
0a28aafb 4574static int
ff0dacd7 4575detect_coding_charset (coding, detect_info)
df7492f9 4576 struct coding_system *coding;
ff0dacd7 4577 struct coding_detection_info *detect_info;
1397dc18 4578{
8f924df7
KH
4579 const unsigned char *src = coding->source, *src_base = src;
4580 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4581 int multibytep = coding->src_multibyte;
4582 int consumed_chars = 0;
4583 Lisp_Object attrs, valids;
584948ac 4584 int found = 0;
1397dc18 4585
ff0dacd7
KH
4586 detect_info->checked |= CATEGORY_MASK_CHARSET;
4587
df7492f9
KH
4588 coding = &coding_categories[coding_category_charset];
4589 attrs = CODING_ID_ATTRS (coding->id);
4590 valids = AREF (attrs, coding_attr_charset_valids);
4591
4592 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4593 src += coding->head_ascii;
1397dc18 4594
b73bfc1c 4595 while (1)
1397dc18 4596 {
df7492f9 4597 int c;
1397dc18 4598
df7492f9
KH
4599 ONE_MORE_BYTE (c);
4600 if (NILP (AREF (valids, c)))
4601 break;
584948ac 4602 if (c >= 0x80)
ff0dacd7 4603 found = CATEGORY_MASK_CHARSET;
df7492f9 4604 }
ff0dacd7 4605 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4606 return 0;
4ed46869 4607
df7492f9 4608 no_more_source:
ff0dacd7
KH
4609 detect_info->found |= found;
4610 return 1;
df7492f9 4611}
b73bfc1c 4612
b73bfc1c 4613static void
df7492f9 4614decode_coding_charset (coding)
4ed46869 4615 struct coding_system *coding;
4ed46869 4616{
8f924df7
KH
4617 const unsigned char *src = coding->source + coding->consumed;
4618 const unsigned char *src_end = coding->source + coding->src_bytes;
4619 const unsigned char *src_base;
df7492f9 4620 int *charbuf = coding->charbuf;
ff0dacd7 4621 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4622 int consumed_chars = 0, consumed_chars_base;
4623 int multibytep = coding->src_multibyte;
4eb6d3f1 4624 Lisp_Object attrs, eol_type, charset_list, valids;
ff0dacd7
KH
4625 int char_offset = coding->produced_char;
4626 int last_offset = char_offset;
4627 int last_id = charset_ascii;
df7492f9
KH
4628
4629 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4630 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4631
df7492f9 4632 while (1)
4ed46869 4633 {
4eb6d3f1 4634 int c;
df7492f9
KH
4635
4636 src_base = src;
4637 consumed_chars_base = consumed_chars;
b73bfc1c 4638
df7492f9
KH
4639 if (charbuf >= charbuf_end)
4640 break;
4641
4eb6d3f1 4642 ONE_MORE_BYTE (c);
df7492f9 4643 if (c == '\r')
d46c5b12 4644 {
c7c66a95
KH
4645 /* Here we assume that no charset maps '\r' to something
4646 else. */
df7492f9 4647 if (EQ (eol_type, Qdos))
b73bfc1c 4648 {
98725083 4649 if (src == src_end)
b73bfc1c 4650 {
98725083
KH
4651 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4652 goto no_more_source;
b73bfc1c 4653 }
98725083 4654 if (*src == '\n')
df7492f9 4655 ONE_MORE_BYTE (c);
b73bfc1c 4656 }
df7492f9 4657 else if (EQ (eol_type, Qmac))
b73bfc1c 4658 c = '\n';
d46c5b12 4659 }
df7492f9 4660 else
d46c5b12 4661 {
4eb6d3f1
KH
4662 Lisp_Object val;
4663 struct charset *charset;
c7c66a95 4664 int dim;
acb2a965
KH
4665 int len = 1;
4666 unsigned code = c;
4eb6d3f1
KH
4667
4668 val = AREF (valids, c);
4669 if (NILP (val))
4670 goto invalid_code;
c7c66a95 4671 if (INTEGERP (val))
4eb6d3f1 4672 {
c7c66a95
KH
4673 charset = CHARSET_FROM_ID (XFASTINT (val));
4674 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4675 while (len < dim)
4eb6d3f1 4676 {
acb2a965
KH
4677 ONE_MORE_BYTE (c);
4678 code = (code << 8) | c;
f9d71dcd 4679 len++;
4eb6d3f1 4680 }
c7c66a95
KH
4681 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4682 charset, code, c);
4683 }
4684 else
4685 {
4686 /* VAL is a list of charset IDs. It is assured that the
4687 list is sorted by charset dimensions (smaller one
4688 comes first). */
c7c66a95
KH
4689 while (CONSP (val))
4690 {
4691 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4692 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4693 while (len < dim)
c7c66a95 4694 {
acb2a965
KH
4695 ONE_MORE_BYTE (c);
4696 code = (code << 8) | c;
f9d71dcd 4697 len++;
c7c66a95 4698 }
c7c66a95
KH
4699 CODING_DECODE_CHAR (coding, src, src_base,
4700 src_end, charset, code, c);
4701 if (c >= 0)
4702 break;
4703 val = XCDR (val);
4704 }
4eb6d3f1 4705 }
df7492f9
KH
4706 if (c < 0)
4707 goto invalid_code;
ff0dacd7
KH
4708 if (charset->id != charset_ascii
4709 && last_id != charset->id)
4710 {
4711 if (last_id != charset_ascii)
4712 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4713 last_id = charset->id;
4714 last_offset = char_offset;
4715 }
d46c5b12 4716 }
df7492f9 4717 *charbuf++ = c;
ff0dacd7 4718 char_offset++;
df7492f9
KH
4719 continue;
4720
4721 invalid_code:
4722 src = src_base;
4723 consumed_chars = consumed_chars_base;
4724 ONE_MORE_BYTE (c);
4725 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4726 char_offset++;
df7492f9 4727 coding->errors++;
4ed46869
KH
4728 }
4729
df7492f9 4730 no_more_source:
ff0dacd7
KH
4731 if (last_id != charset_ascii)
4732 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4733 coding->consumed_char += consumed_chars_base;
4734 coding->consumed = src_base - coding->source;
4735 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4736}
4737
df7492f9
KH
4738static int
4739encode_coding_charset (coding)
4ed46869 4740 struct coding_system *coding;
4ed46869 4741{
df7492f9
KH
4742 int multibytep = coding->dst_multibyte;
4743 int *charbuf = coding->charbuf;
4744 int *charbuf_end = charbuf + coding->charbuf_used;
4745 unsigned char *dst = coding->destination + coding->produced;
4746 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4747 int safe_room = MAX_MULTIBYTE_LENGTH;
4748 int produced_chars = 0;
df7492f9
KH
4749 Lisp_Object attrs, eol_type, charset_list;
4750 int ascii_compatible;
b73bfc1c 4751 int c;
b73bfc1c 4752
df7492f9 4753 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4754 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4755
df7492f9 4756 while (charbuf < charbuf_end)
4ed46869 4757 {
4eb6d3f1 4758 struct charset *charset;
df7492f9 4759 unsigned code;
8f924df7 4760
df7492f9
KH
4761 ASSURE_DESTINATION (safe_room);
4762 c = *charbuf++;
4763 if (ascii_compatible && ASCII_CHAR_P (c))
4764 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 4765 else if (CHAR_BYTE8_P (c))
4ed46869 4766 {
16eafb5d
KH
4767 c = CHAR_TO_BYTE8 (c);
4768 EMIT_ONE_BYTE (c);
d46c5b12 4769 }
d46c5b12 4770 else
b73bfc1c 4771 {
4eb6d3f1
KH
4772 charset = char_charset (c, charset_list, &code);
4773 if (charset)
4774 {
4775 if (CHARSET_DIMENSION (charset) == 1)
4776 EMIT_ONE_BYTE (code);
4777 else if (CHARSET_DIMENSION (charset) == 2)
4778 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4779 else if (CHARSET_DIMENSION (charset) == 3)
4780 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4781 else
4782 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4783 (code >> 8) & 0xFF, code & 0xFF);
4784 }
4785 else
41cbe562
KH
4786 {
4787 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4788 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4789 else
4790 c = coding->default_char;
4791 EMIT_ONE_BYTE (c);
4792 }
4ed46869 4793 }
4ed46869
KH
4794 }
4795
df7492f9
KH
4796 coding->result = CODING_RESULT_SUCCESS;
4797 coding->produced_char += produced_chars;
4798 coding->produced = dst - coding->destination;
4799 return 0;
4ed46869
KH
4800}
4801
4802\f
1397dc18 4803/*** 7. C library functions ***/
4ed46869 4804
df7492f9
KH
4805/* Setup coding context CODING from information about CODING_SYSTEM.
4806 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4807 CODING_SYSTEM is invalid, signal an error. */
4ed46869 4808
ec6d2bb8 4809void
e0e989f6
KH
4810setup_coding_system (coding_system, coding)
4811 Lisp_Object coding_system;
4ed46869
KH
4812 struct coding_system *coding;
4813{
df7492f9
KH
4814 Lisp_Object attrs;
4815 Lisp_Object eol_type;
4816 Lisp_Object coding_type;
4608c386 4817 Lisp_Object val;
4ed46869 4818
df7492f9
KH
4819 if (NILP (coding_system))
4820 coding_system = Qno_conversion;
c07c8e12 4821
df7492f9 4822 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 4823
df7492f9
KH
4824 attrs = CODING_ID_ATTRS (coding->id);
4825 eol_type = CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 4826
df7492f9
KH
4827 coding->mode = 0;
4828 coding->head_ascii = -1;
4829 coding->common_flags
4830 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
5e5c78be
KH
4831 if (! NILP (CODING_ATTR_POST_READ (attrs)))
4832 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4833 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4834 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
4835 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4836 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 4837
df7492f9 4838 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
4839 coding->max_charset_id = SCHARS (val) - 1;
4840 coding->safe_charsets = (char *) SDATA (val);
df7492f9 4841 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4608c386 4842
df7492f9
KH
4843 coding_type = CODING_ATTR_TYPE (attrs);
4844 if (EQ (coding_type, Qundecided))
d46c5b12 4845 {
df7492f9
KH
4846 coding->detector = NULL;
4847 coding->decoder = decode_coding_raw_text;
4848 coding->encoder = encode_coding_raw_text;
4849 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 4850 }
df7492f9 4851 else if (EQ (coding_type, Qiso_2022))
d46c5b12 4852 {
df7492f9
KH
4853 int i;
4854 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4855
4856 /* Invoke graphic register 0 to plane 0. */
4857 CODING_ISO_INVOCATION (coding, 0) = 0;
4858 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4859 CODING_ISO_INVOCATION (coding, 1)
4860 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4861 /* Setup the initial status of designation. */
4862 for (i = 0; i < 4; i++)
4863 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4864 /* Not single shifting initially. */
4865 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4866 /* Beginning of buffer should also be regarded as bol. */
4867 CODING_ISO_BOL (coding) = 1;
4868 coding->detector = detect_coding_iso_2022;
4869 coding->decoder = decode_coding_iso_2022;
4870 coding->encoder = encode_coding_iso_2022;
4871 if (flags & CODING_ISO_FLAG_SAFE)
4872 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 4873 coding->common_flags
df7492f9
KH
4874 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4875 | CODING_REQUIRE_FLUSHING_MASK);
4876 if (flags & CODING_ISO_FLAG_COMPOSITION)
4877 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
4878 if (flags & CODING_ISO_FLAG_DESIGNATION)
4879 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
4880 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4881 {
4882 setup_iso_safe_charsets (attrs);
4883 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
4884 coding->max_charset_id = SCHARS (val) - 1;
4885 coding->safe_charsets = (char *) SDATA (val);
df7492f9
KH
4886 }
4887 CODING_ISO_FLAGS (coding) = flags;
d46c5b12 4888 }
df7492f9 4889 else if (EQ (coding_type, Qcharset))
d46c5b12 4890 {
df7492f9
KH
4891 coding->detector = detect_coding_charset;
4892 coding->decoder = decode_coding_charset;
4893 coding->encoder = encode_coding_charset;
d46c5b12 4894 coding->common_flags
df7492f9 4895 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 4896 }
df7492f9 4897 else if (EQ (coding_type, Qutf_8))
d46c5b12 4898 {
df7492f9
KH
4899 coding->detector = detect_coding_utf_8;
4900 coding->decoder = decode_coding_utf_8;
4901 coding->encoder = encode_coding_utf_8;
4902 coding->common_flags
4903 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4904 }
4905 else if (EQ (coding_type, Qutf_16))
4906 {
4907 val = AREF (attrs, coding_attr_utf_16_bom);
4908 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4909 : EQ (val, Qt) ? utf_16_with_bom
4910 : utf_16_without_bom);
4911 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 4912 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 4913 : utf_16_little_endian);
e19c3639 4914 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4915 coding->detector = detect_coding_utf_16;
4916 coding->decoder = decode_coding_utf_16;
4917 coding->encoder = encode_coding_utf_16;
4918 coding->common_flags
4919 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
4920 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4921 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 4922 }
df7492f9 4923 else if (EQ (coding_type, Qccl))
4ed46869 4924 {
df7492f9
KH
4925 coding->detector = detect_coding_ccl;
4926 coding->decoder = decode_coding_ccl;
4927 coding->encoder = encode_coding_ccl;
c952af22 4928 coding->common_flags
df7492f9
KH
4929 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4930 | CODING_REQUIRE_FLUSHING_MASK);
4931 }
4932 else if (EQ (coding_type, Qemacs_mule))
4933 {
4934 coding->detector = detect_coding_emacs_mule;
4935 coding->decoder = decode_coding_emacs_mule;
4936 coding->encoder = encode_coding_emacs_mule;
c952af22 4937 coding->common_flags
df7492f9
KH
4938 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4939 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4940 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4941 {
4942 Lisp_Object tail, safe_charsets;
4943 int max_charset_id = 0;
4944
4945 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4946 tail = XCDR (tail))
4947 if (max_charset_id < XFASTINT (XCAR (tail)))
4948 max_charset_id = XFASTINT (XCAR (tail));
4949 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4950 make_number (255));
4951 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4952 tail = XCDR (tail))
8f924df7 4953 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 4954 coding->max_charset_id = max_charset_id;
8f924df7 4955 coding->safe_charsets = (char *) SDATA (safe_charsets);
df7492f9
KH
4956 }
4957 }
4958 else if (EQ (coding_type, Qshift_jis))
4959 {
4960 coding->detector = detect_coding_sjis;
4961 coding->decoder = decode_coding_sjis;
4962 coding->encoder = encode_coding_sjis;
c952af22 4963 coding->common_flags
df7492f9
KH
4964 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4965 }
4966 else if (EQ (coding_type, Qbig5))
4967 {
4968 coding->detector = detect_coding_big5;
4969 coding->decoder = decode_coding_big5;
4970 coding->encoder = encode_coding_big5;
c952af22 4971 coding->common_flags
df7492f9
KH
4972 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4973 }
4974 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4975 {
df7492f9
KH
4976 coding->detector = NULL;
4977 coding->decoder = decode_coding_raw_text;
4978 coding->encoder = encode_coding_raw_text;
4ed46869 4979 }
4ed46869 4980
df7492f9 4981 return;
4ed46869
KH
4982}
4983
df7492f9
KH
4984/* Return raw-text or one of its subsidiaries that has the same
4985 eol_type as CODING-SYSTEM. */
ec6d2bb8 4986
df7492f9
KH
4987Lisp_Object
4988raw_text_coding_system (coding_system)
4989 Lisp_Object coding_system;
ec6d2bb8 4990{
0be8721c 4991 Lisp_Object spec, attrs;
df7492f9 4992 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 4993
d3e4cb56
KH
4994 if (NILP (coding_system))
4995 return Qraw_text;
df7492f9
KH
4996 spec = CODING_SYSTEM_SPEC (coding_system);
4997 attrs = AREF (spec, 0);
ec6d2bb8 4998
df7492f9
KH
4999 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5000 return coding_system;
ec6d2bb8 5001
df7492f9
KH
5002 eol_type = AREF (spec, 2);
5003 if (VECTORP (eol_type))
5004 return Qraw_text;
5005 spec = CODING_SYSTEM_SPEC (Qraw_text);
5006 raw_text_eol_type = AREF (spec, 2);
5007 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5008 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5009 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5010}
5011
54f78171 5012
df7492f9
KH
5013/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5014 does, return one of the subsidiary that has the same eol-spec as
5015 PARENT. Otherwise, return CODING_SYSTEM. */
5016
5017Lisp_Object
5018coding_inherit_eol_type (coding_system, parent)
b74e4686 5019 Lisp_Object coding_system, parent;
54f78171 5020{
3e139625 5021 Lisp_Object spec, eol_type;
54f78171 5022
d3e4cb56
KH
5023 if (NILP (coding_system))
5024 coding_system = Qraw_text;
df7492f9 5025 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5026 eol_type = AREF (spec, 2);
d3e4cb56
KH
5027 if (VECTORP (eol_type)
5028 && ! NILP (parent))
df7492f9
KH
5029 {
5030 Lisp_Object parent_spec;
df7492f9
KH
5031 Lisp_Object parent_eol_type;
5032
5033 parent_spec
5034 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5035 parent_eol_type = AREF (parent_spec, 2);
5036 if (EQ (parent_eol_type, Qunix))
5037 coding_system = AREF (eol_type, 0);
5038 else if (EQ (parent_eol_type, Qdos))
5039 coding_system = AREF (eol_type, 1);
5040 else if (EQ (parent_eol_type, Qmac))
5041 coding_system = AREF (eol_type, 2);
54f78171 5042 }
df7492f9 5043 return coding_system;
54f78171
KH
5044}
5045
4ed46869
KH
5046/* Emacs has a mechanism to automatically detect a coding system if it
5047 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5048 it's impossible to distinguish some coding systems accurately
5049 because they use the same range of codes. So, at first, coding
5050 systems are categorized into 7, those are:
5051
0ef69138 5052 o coding-category-emacs-mule
4ed46869
KH
5053
5054 The category for a coding system which has the same code range
5055 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5056 symbol) `emacs-mule' by default.
4ed46869
KH
5057
5058 o coding-category-sjis
5059
5060 The category for a coding system which has the same code range
5061 as SJIS. Assigned the coding-system (Lisp
7717c392 5062 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5063
5064 o coding-category-iso-7
5065
5066 The category for a coding system which has the same code range
7717c392 5067 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5068 shift and single shift functions. This can encode/decode all
5069 charsets. Assigned the coding-system (Lisp symbol)
5070 `iso-2022-7bit' by default.
5071
5072 o coding-category-iso-7-tight
5073
5074 Same as coding-category-iso-7 except that this can
5075 encode/decode only the specified charsets.
4ed46869
KH
5076
5077 o coding-category-iso-8-1
5078
5079 The category for a coding system which has the same code range
5080 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5081 for DIMENSION1 charset. This doesn't use any locking shift
5082 and single shift functions. Assigned the coding-system (Lisp
5083 symbol) `iso-latin-1' by default.
4ed46869
KH
5084
5085 o coding-category-iso-8-2
5086
5087 The category for a coding system which has the same code range
5088 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5089 for DIMENSION2 charset. This doesn't use any locking shift
5090 and single shift functions. Assigned the coding-system (Lisp
5091 symbol) `japanese-iso-8bit' by default.
4ed46869 5092
7717c392 5093 o coding-category-iso-7-else
4ed46869
KH
5094
5095 The category for a coding system which has the same code range
df7492f9 5096 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5097 single shift functions. Assigned the coding-system (Lisp
5098 symbol) `iso-2022-7bit-lock' by default.
5099
5100 o coding-category-iso-8-else
5101
5102 The category for a coding system which has the same code range
df7492f9 5103 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5104 single shift functions. Assigned the coding-system (Lisp
5105 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5106
5107 o coding-category-big5
5108
5109 The category for a coding system which has the same code range
5110 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5111 `cn-big5' by default.
4ed46869 5112
fa42c37f
KH
5113 o coding-category-utf-8
5114
5115 The category for a coding system which has the same code range
5116 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5117 symbol) `utf-8' by default.
5118
5119 o coding-category-utf-16-be
5120
5121 The category for a coding system in which a text has an
5122 Unicode signature (cf. Unicode Standard) in the order of BIG
5123 endian at the head. Assigned the coding-system (Lisp symbol)
5124 `utf-16-be' by default.
5125
5126 o coding-category-utf-16-le
5127
5128 The category for a coding system in which a text has an
5129 Unicode signature (cf. Unicode Standard) in the order of
5130 LITTLE endian at the head. Assigned the coding-system (Lisp
5131 symbol) `utf-16-le' by default.
5132
1397dc18
KH
5133 o coding-category-ccl
5134
5135 The category for a coding system of which encoder/decoder is
5136 written in CCL programs. The default value is nil, i.e., no
5137 coding system is assigned.
5138
4ed46869
KH
5139 o coding-category-binary
5140
5141 The category for a coding system not categorized in any of the
5142 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5143 `no-conversion' by default.
4ed46869
KH
5144
5145 Each of them is a Lisp symbol and the value is an actual
df7492f9 5146 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5147 What Emacs does actually is to detect a category of coding system.
5148 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5149 decide only one possible category, it selects a category of the
4ed46869
KH
5150 highest priority. Priorities of categories are also specified by a
5151 user in a Lisp variable `coding-category-list'.
5152
5153*/
5154
df7492f9
KH
5155#define EOL_SEEN_NONE 0
5156#define EOL_SEEN_LF 1
5157#define EOL_SEEN_CR 2
5158#define EOL_SEEN_CRLF 4
66cfb530 5159
ff0dacd7
KH
5160/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5161 SOURCE is encoded. If CATEGORY is one of
5162 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5163 two-byte, else they are encoded by one-byte.
5164
5165 Return one of EOL_SEEN_XXX. */
4ed46869 5166
bc4bc72a 5167#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
5168
5169static int
89528eb3 5170detect_eol (source, src_bytes, category)
d46c5b12 5171 unsigned char *source;
df7492f9 5172 EMACS_INT src_bytes;
89528eb3 5173 enum coding_category category;
4ed46869 5174{
d46c5b12 5175 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5176 unsigned char c;
df7492f9
KH
5177 int total = 0;
5178 int eol_seen = EOL_SEEN_NONE;
4ed46869 5179
89528eb3 5180 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5181 {
df7492f9 5182 int msb, lsb;
fa42c37f 5183
89528eb3
KH
5184 msb = category == (coding_category_utf_16_le
5185 | coding_category_utf_16_le_nosig);
df7492f9 5186 lsb = 1 - msb;
fa42c37f 5187
df7492f9 5188 while (src + 1 < src_end)
fa42c37f 5189 {
df7492f9
KH
5190 c = src[lsb];
5191 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5192 {
df7492f9
KH
5193 int this_eol;
5194
5195 if (c == '\n')
5196 this_eol = EOL_SEEN_LF;
5197 else if (src + 3 >= src_end
5198 || src[msb + 2] != 0
5199 || src[lsb + 2] != '\n')
5200 this_eol = EOL_SEEN_CR;
fa42c37f 5201 else
8f924df7 5202 this_eol = EOL_SEEN_CRLF;
df7492f9
KH
5203
5204 if (eol_seen == EOL_SEEN_NONE)
5205 /* This is the first end-of-line. */
5206 eol_seen = this_eol;
5207 else if (eol_seen != this_eol)
fa42c37f 5208 {
df7492f9
KH
5209 /* The found type is different from what found before. */
5210 eol_seen = EOL_SEEN_LF;
5211 break;
fa42c37f 5212 }
df7492f9
KH
5213 if (++total == MAX_EOL_CHECK_COUNT)
5214 break;
fa42c37f 5215 }
df7492f9 5216 src += 2;
fa42c37f 5217 }
bcf26d6a 5218 }
d46c5b12 5219 else
c4825358 5220 {
df7492f9 5221 while (src < src_end)
27901516 5222 {
df7492f9
KH
5223 c = *src++;
5224 if (c == '\n' || c == '\r')
5225 {
5226 int this_eol;
d46c5b12 5227
df7492f9
KH
5228 if (c == '\n')
5229 this_eol = EOL_SEEN_LF;
5230 else if (src >= src_end || *src != '\n')
5231 this_eol = EOL_SEEN_CR;
5232 else
5233 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5234
df7492f9
KH
5235 if (eol_seen == EOL_SEEN_NONE)
5236 /* This is the first end-of-line. */
5237 eol_seen = this_eol;
5238 else if (eol_seen != this_eol)
5239 {
5240 /* The found type is different from what found before. */
5241 eol_seen = EOL_SEEN_LF;
5242 break;
5243 }
5244 if (++total == MAX_EOL_CHECK_COUNT)
5245 break;
5246 }
5247 }
73be902c 5248 }
df7492f9 5249 return eol_seen;
73be902c
KH
5250}
5251
df7492f9 5252
73be902c 5253static void
df7492f9
KH
5254adjust_coding_eol_type (coding, eol_seen)
5255 struct coding_system *coding;
5256 int eol_seen;
73be902c 5257{
0be8721c 5258 Lisp_Object eol_type;
8f924df7 5259
df7492f9
KH
5260 eol_type = CODING_ID_EOL_TYPE (coding->id);
5261 if (eol_seen & EOL_SEEN_LF)
5262 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6f197c07 5263 else if (eol_seen & EOL_SEEN_CRLF)
df7492f9 5264 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6f197c07 5265 else if (eol_seen & EOL_SEEN_CR)
df7492f9 5266 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12 5267}
4ed46869 5268
df7492f9
KH
5269/* Detect how a text specified in CODING is encoded. If a coding
5270 system is detected, update fields of CODING by the detected coding
5271 system. */
0a28aafb 5272
df7492f9
KH
5273void
5274detect_coding (coding)
d46c5b12 5275 struct coding_system *coding;
d46c5b12 5276{
8f924df7 5277 const unsigned char *src, *src_end;
df7492f9 5278 Lisp_Object attrs, coding_type;
d46c5b12 5279
df7492f9
KH
5280 coding->consumed = coding->consumed_char = 0;
5281 coding->produced = coding->produced_char = 0;
5282 coding_set_source (coding);
1c3478b0 5283
df7492f9 5284 src_end = coding->source + coding->src_bytes;
1c3478b0 5285
df7492f9
KH
5286 /* If we have not yet decided the text encoding type, detect it
5287 now. */
5288 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5289 {
df7492f9
KH
5290 int c, i;
5291
5292 for (src = coding->source; src < src_end; src++)
d46c5b12 5293 {
df7492f9
KH
5294 c = *src;
5295 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5296 || c == ISO_CODE_SI
5297 || c == ISO_CODE_SO)))
5298 break;
d46c5b12 5299 }
df7492f9
KH
5300 coding->head_ascii = src - (coding->source + coding->consumed);
5301
5302 if (coding->head_ascii < coding->src_bytes)
d46c5b12 5303 {
ff0dacd7
KH
5304 struct coding_detection_info detect_info;
5305 enum coding_category category;
5306 struct coding_system *this;
df7492f9 5307
ff0dacd7 5308 detect_info.checked = detect_info.found = detect_info.rejected = 0;
df7492f9 5309 for (i = 0; i < coding_category_raw_text; i++)
d46c5b12 5310 {
ff0dacd7
KH
5311 category = coding_priorities[i];
5312 this = coding_categories + category;
df7492f9 5313 if (this->id < 0)
fa42c37f 5314 {
df7492f9 5315 /* No coding system of this category is defined. */
ff0dacd7 5316 detect_info.rejected |= (1 << category);
fa42c37f 5317 }
ff0dacd7 5318 else if (category >= coding_category_raw_text)
89528eb3 5319 continue;
ff0dacd7 5320 else if (detect_info.checked & (1 << category))
fa42c37f 5321 {
ff0dacd7
KH
5322 if (detect_info.found & (1 << category))
5323 break;
fa42c37f 5324 }
ff0dacd7
KH
5325 else if ((*(this->detector)) (coding, &detect_info)
5326 && detect_info.found & (1 << category))
5327 break;
d46c5b12 5328 }
ff0dacd7
KH
5329 if (i < coding_category_raw_text)
5330 setup_coding_system (CODING_ID_NAME (this->id), coding);
5331 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5332 setup_coding_system (Qraw_text, coding);
ff0dacd7 5333 else if (detect_info.rejected)
df7492f9 5334 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5335 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5336 {
5337 this = coding_categories + coding_priorities[i];
5338 setup_coding_system (CODING_ID_NAME (this->id), coding);
5339 break;
5340 }
d46c5b12 5341 }
b73bfc1c 5342 }
b49a1807
KH
5343 else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
5344 {
5345 Lisp_Object coding_systems;
5346 struct coding_detection_info detect_info;
5347
5348 coding_systems
5349 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5350 detect_info.found = detect_info.rejected = 0;
5351 if (CONSP (coding_systems)
5352 && detect_coding_utf_16 (coding, &detect_info)
5353 && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
5354 | CATEGORY_MASK_UTF_16_BE)))
5355 {
5356 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5357 setup_coding_system (XCAR (coding_systems), coding);
5358 else
5359 setup_coding_system (XCDR (coding_systems), coding);
5360 }
5361 }
4ed46869 5362
df7492f9
KH
5363 attrs = CODING_ID_ATTRS (coding->id);
5364 coding_type = CODING_ATTR_TYPE (attrs);
4ed46869 5365
df7492f9
KH
5366 /* If we have not yet decided the EOL type, detect it now. But, the
5367 detection is impossible for a CCL based coding system, in which
5368 case, we detct the EOL type after decoding. */
5369 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5370 && ! EQ (coding_type, Qccl))
d46c5b12 5371 {
89528eb3 5372 int eol_seen = detect_eol (coding->source, coding->src_bytes,
3e139625 5373 (enum coding_category) XINT (CODING_ATTR_CATEGORY (attrs)));
4ed46869 5374
df7492f9
KH
5375 if (eol_seen != EOL_SEEN_NONE)
5376 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5377 }
4ed46869 5378}
4ed46869 5379
d46c5b12 5380
aaaf0b1e 5381static void
df7492f9 5382decode_eol (coding)
aaaf0b1e 5383 struct coding_system *coding;
aaaf0b1e 5384{
df7492f9 5385 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5386 {
df7492f9
KH
5387 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5388 unsigned char *pend = p + coding->produced;
5389 int eol_seen = EOL_SEEN_NONE;
4ed46869 5390
df7492f9 5391 for (; p < pend; p++)
aaaf0b1e 5392 {
df7492f9
KH
5393 if (*p == '\n')
5394 eol_seen |= EOL_SEEN_LF;
5395 else if (*p == '\r')
aaaf0b1e 5396 {
df7492f9 5397 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5398 {
df7492f9
KH
5399 eol_seen |= EOL_SEEN_CRLF;
5400 p++;
aaaf0b1e 5401 }
aaaf0b1e 5402 else
df7492f9 5403 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5404 }
aaaf0b1e 5405 }
df7492f9
KH
5406 if (eol_seen != EOL_SEEN_NONE)
5407 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5408 }
d46c5b12 5409
df7492f9 5410 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
27901516 5411 {
df7492f9
KH
5412 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5413 unsigned char *pend = p + coding->produced;
d46c5b12 5414
df7492f9
KH
5415 for (; p < pend; p++)
5416 if (*p == '\r')
5417 *p = '\n';
4ed46869 5418 }
df7492f9
KH
5419 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5420 {
5421 unsigned char *p, *pbeg, *pend;
5422 Lisp_Object undo_list;
b73bfc1c 5423
df7492f9
KH
5424 move_gap_both (coding->dst_pos + coding->produced_char,
5425 coding->dst_pos_byte + coding->produced);
5426 undo_list = current_buffer->undo_list;
5427 current_buffer->undo_list = Qt;
c197f191 5428 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
df7492f9
KH
5429 current_buffer->undo_list = undo_list;
5430 pbeg = GPT_ADDR;
5431 pend = pbeg + coding->produced;
b73bfc1c 5432
df7492f9
KH
5433 for (p = pend - 1; p >= pbeg; p--)
5434 if (*p == '\r')
5435 {
5436 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5437 pend--;
5438 }
5439 coding->produced_char -= coding->produced - (pend - pbeg);
5440 coding->produced = pend - pbeg;
5441 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e 5442 }
4ed46869
KH
5443}
5444
df7492f9
KH
5445static void
5446translate_chars (coding, table)
4ed46869 5447 struct coding_system *coding;
df7492f9 5448 Lisp_Object table;
4ed46869 5449{
df7492f9
KH
5450 int *charbuf = coding->charbuf;
5451 int *charbuf_end = charbuf + coding->charbuf_used;
5452 int c;
d46c5b12 5453
df7492f9
KH
5454 if (coding->chars_at_source)
5455 return;
4ed46869 5456
df7492f9 5457 while (charbuf < charbuf_end)
8844fa83 5458 {
df7492f9
KH
5459 c = *charbuf;
5460 if (c < 0)
5461 charbuf += c;
5462 else
5463 *charbuf++ = translate_char (table, c);
8844fa83 5464 }
df7492f9 5465}
bc4bc72a 5466
d46c5b12 5467static int
df7492f9
KH
5468produce_chars (coding)
5469 struct coding_system *coding;
4ed46869 5470{
df7492f9
KH
5471 unsigned char *dst = coding->destination + coding->produced;
5472 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5473 int produced;
5474 int produced_chars = 0;
4ed46869 5475
df7492f9 5476 if (! coding->chars_at_source)
4ed46869 5477 {
df7492f9 5478 /* Characters are in coding->charbuf. */
fba4576f
AS
5479 int *buf = coding->charbuf;
5480 int *buf_end = buf + coding->charbuf_used;
df7492f9 5481 unsigned char *adjusted_dst_end;
4ed46869 5482
df7492f9
KH
5483 if (BUFFERP (coding->src_object)
5484 && EQ (coding->src_object, coding->dst_object))
8f924df7 5485 dst_end = ((unsigned char *) coding->source) + coding->consumed;
df7492f9 5486 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5487
df7492f9 5488 while (buf < buf_end)
4ed46869 5489 {
df7492f9 5490 int c = *buf++;
bc4bc72a 5491
df7492f9 5492 if (dst >= adjusted_dst_end)
d46c5b12 5493 {
df7492f9
KH
5494 dst = alloc_destination (coding,
5495 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5496 dst);
5497 dst_end = coding->destination + coding->dst_bytes;
5498 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5499 }
5500 if (c >= 0)
5501 {
5502 if (coding->dst_multibyte
5503 || ! CHAR_BYTE8_P (c))
5504 CHAR_STRING_ADVANCE (c, dst);
5505 else
5506 *dst++ = CHAR_TO_BYTE8 (c);
5507 produced_chars++;
d46c5b12 5508 }
df7492f9 5509 else
d3e4cb56
KH
5510 /* This is an annotation datum. (-C) is the length of
5511 it. */
5512 buf += -c - 1;
4ed46869
KH
5513 }
5514 }
fa42c37f 5515 else
fa42c37f 5516 {
8f924df7
KH
5517 const unsigned char *src = coding->source;
5518 const unsigned char *src_end = src + coding->src_bytes;
df7492f9 5519 Lisp_Object eol_type;
fa42c37f 5520
df7492f9 5521 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5522
df7492f9 5523 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 5524 {
df7492f9 5525 if (coding->src_multibyte)
fa42c37f 5526 {
71c81426 5527 int multibytep = 1;
df7492f9 5528 int consumed_chars;
d46c5b12 5529
df7492f9
KH
5530 while (1)
5531 {
8f924df7 5532 const unsigned char *src_base = src;
df7492f9 5533 int c;
b73bfc1c 5534
df7492f9
KH
5535 ONE_MORE_BYTE (c);
5536 if (c == '\r')
5537 {
5538 if (EQ (eol_type, Qdos))
5539 {
98725083
KH
5540 if (src == src_end)
5541 {
5542 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5543 goto no_more_source;
5544 }
5545 if (*src == '\n')
df7492f9
KH
5546 c = *src++;
5547 }
5548 else if (EQ (eol_type, Qmac))
5549 c = '\n';
5550 }
5551 if (dst == dst_end)
5552 {
2c78b7e1 5553 coding->consumed = src - coding->source;
b73bfc1c 5554
2c78b7e1 5555 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5556 dst_end = (unsigned char *) src;
2c78b7e1
KH
5557 if (dst == dst_end)
5558 {
5559 dst = alloc_destination (coding, src_end - src + 1,
5560 dst);
5561 dst_end = coding->destination + coding->dst_bytes;
5562 coding_set_source (coding);
5563 src = coding->source + coding->consumed;
5564 src_end = coding->source + coding->src_bytes;
5565 }
df7492f9
KH
5566 }
5567 *dst++ = c;
5568 produced_chars++;
5569 }
5570 no_more_source:
5571 ;
fa42c37f
KH
5572 }
5573 else
df7492f9
KH
5574 while (src < src_end)
5575 {
71c81426 5576 int multibytep = 1;
df7492f9 5577 int c = *src++;
b73bfc1c 5578
df7492f9
KH
5579 if (c == '\r')
5580 {
5581 if (EQ (eol_type, Qdos))
5582 {
5583 if (src < src_end
5584 && *src == '\n')
5585 c = *src++;
5586 }
5587 else if (EQ (eol_type, Qmac))
5588 c = '\n';
5589 }
5590 if (dst >= dst_end - 1)
5591 {
2c78b7e1 5592 coding->consumed = src - coding->source;
df7492f9 5593
2c78b7e1 5594 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5595 dst_end = (unsigned char *) src;
2c78b7e1
KH
5596 if (dst >= dst_end - 1)
5597 {
5598 dst = alloc_destination (coding, src_end - src + 2,
5599 dst);
5600 dst_end = coding->destination + coding->dst_bytes;
5601 coding_set_source (coding);
5602 src = coding->source + coding->consumed;
5603 src_end = coding->source + coding->src_bytes;
5604 }
df7492f9
KH
5605 }
5606 EMIT_ONE_BYTE (c);
5607 }
d46c5b12 5608 }
df7492f9
KH
5609 else
5610 {
5611 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 5612 {
df7492f9 5613 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5614
df7492f9 5615 if (require > 0)
fa42c37f 5616 {
df7492f9
KH
5617 EMACS_INT offset = src - coding->source;
5618
5619 dst = alloc_destination (coding, require, dst);
5620 coding_set_source (coding);
5621 src = coding->source + offset;
5622 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
5623 }
5624 }
df7492f9
KH
5625 produced_chars = coding->src_chars;
5626 while (src < src_end)
fa42c37f 5627 {
df7492f9
KH
5628 int c = *src++;
5629
5630 if (c == '\r')
5631 {
5632 if (EQ (eol_type, Qdos))
5633 {
5634 if (src < src_end
5635 && *src == '\n')
5636 c = *src++;
5637 produced_chars--;
5638 }
5639 else if (EQ (eol_type, Qmac))
5640 c = '\n';
5641 }
5642 *dst++ = c;
fa42c37f
KH
5643 }
5644 }
2c78b7e1
KH
5645 coding->consumed = coding->src_bytes;
5646 coding->consumed_char = coding->src_chars;
fa42c37f
KH
5647 }
5648
df7492f9
KH
5649 produced = dst - (coding->destination + coding->produced);
5650 if (BUFFERP (coding->dst_object))
5651 insert_from_gap (produced_chars, produced);
5652 coding->produced += produced;
5653 coding->produced_char += produced_chars;
5654 return produced_chars;
fa42c37f
KH
5655}
5656
ff0dacd7
KH
5657/* Compose text in CODING->object according to the annotation data at
5658 CHARBUF. CHARBUF is an array:
5659 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 5660 */
4ed46869 5661
df7492f9
KH
5662static INLINE void
5663produce_composition (coding, charbuf)
4ed46869 5664 struct coding_system *coding;
df7492f9 5665 int *charbuf;
4ed46869 5666{
df7492f9 5667 int len;
ff0dacd7 5668 EMACS_INT from, to;
df7492f9 5669 enum composition_method method;
df7492f9 5670 Lisp_Object components;
fa42c37f 5671
df7492f9 5672 len = -charbuf[0];
ff0dacd7
KH
5673 from = coding->dst_pos + charbuf[2];
5674 to = coding->dst_pos + charbuf[3];
5675 method = (enum composition_method) (charbuf[4]);
d46c5b12 5676
df7492f9
KH
5677 if (method == COMPOSITION_RELATIVE)
5678 components = Qnil;
d46c5b12 5679 else
d46c5b12 5680 {
df7492f9
KH
5681 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5682 int i;
b73bfc1c 5683
df7492f9
KH
5684 len -= 5;
5685 charbuf += 5;
5686 for (i = 0; i < len; i++)
5687 args[i] = make_number (charbuf[i]);
5688 components = (method == COMPOSITION_WITH_ALTCHARS
5689 ? Fstring (len, args) : Fvector (len, args));
d46c5b12 5690 }
ff0dacd7 5691 compose_text (from, to, components, Qnil, coding->dst_object);
d46c5b12
KH
5692}
5693
d46c5b12 5694
ff0dacd7
KH
5695/* Put `charset' property on text in CODING->object according to
5696 the annotation data at CHARBUF. CHARBUF is an array:
5697 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5698 */
d46c5b12 5699
ff0dacd7
KH
5700static INLINE void
5701produce_charset (coding, charbuf)
d46c5b12 5702 struct coding_system *coding;
ff0dacd7 5703 int *charbuf;
d46c5b12 5704{
ff0dacd7
KH
5705 EMACS_INT from = coding->dst_pos + charbuf[2];
5706 EMACS_INT to = coding->dst_pos + charbuf[3];
5707 struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
b73bfc1c 5708
ff0dacd7
KH
5709 Fput_text_property (make_number (from), make_number (to),
5710 Qcharset, CHARSET_NAME (charset),
5711 coding->dst_object);
d46c5b12
KH
5712}
5713
d46c5b12 5714
df7492f9
KH
5715#define CHARBUF_SIZE 0x4000
5716
5717#define ALLOC_CONVERSION_WORK_AREA(coding) \
5718 do { \
5719 int size = CHARBUF_SIZE;; \
5720 \
5721 coding->charbuf = NULL; \
5722 while (size > 1024) \
5723 { \
5724 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5725 if (coding->charbuf) \
5726 break; \
5727 size >>= 1; \
5728 } \
5729 if (! coding->charbuf) \
5730 { \
5731 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5732 return coding->result; \
5733 } \
5734 coding->charbuf_size = size; \
5735 } while (0)
4ed46869 5736
d46c5b12
KH
5737
5738static void
df7492f9 5739produce_annotation (coding)
d46c5b12 5740 struct coding_system *coding;
d46c5b12 5741{
df7492f9
KH
5742 int *charbuf = coding->charbuf;
5743 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5744
ff0dacd7
KH
5745 if (NILP (coding->dst_object))
5746 return;
d46c5b12 5747
df7492f9 5748 while (charbuf < charbuf_end)
a84f1519 5749 {
df7492f9
KH
5750 if (*charbuf >= 0)
5751 charbuf++;
d46c5b12 5752 else
d46c5b12 5753 {
df7492f9 5754 int len = -*charbuf;
ff0dacd7 5755 switch (charbuf[1])
df7492f9
KH
5756 {
5757 case CODING_ANNOTATE_COMPOSITION_MASK:
5758 produce_composition (coding, charbuf);
5759 break;
ff0dacd7
KH
5760 case CODING_ANNOTATE_CHARSET_MASK:
5761 produce_charset (coding, charbuf);
5762 break;
df7492f9
KH
5763 default:
5764 abort ();
5765 }
5766 charbuf += len;
d46c5b12 5767 }
a84f1519 5768 }
d46c5b12
KH
5769}
5770
df7492f9
KH
5771/* Decode the data at CODING->src_object into CODING->dst_object.
5772 CODING->src_object is a buffer, a string, or nil.
5773 CODING->dst_object is a buffer.
d46c5b12 5774
df7492f9
KH
5775 If CODING->src_object is a buffer, it must be the current buffer.
5776 In this case, if CODING->src_pos is positive, it is a position of
5777 the source text in the buffer, otherwise, the source text is in the
5778 gap area of the buffer, and CODING->src_pos specifies the offset of
5779 the text from GPT (which must be the same as PT). If this is the
5780 same buffer as CODING->dst_object, CODING->src_pos must be
5781 negative.
d46c5b12 5782
df7492f9
KH
5783 If CODING->src_object is a string, CODING->src_pos in an index to
5784 that string.
d46c5b12 5785
df7492f9
KH
5786 If CODING->src_object is nil, CODING->source must already point to
5787 the non-relocatable memory area. In this case, CODING->src_pos is
5788 an offset from CODING->source.
73be902c 5789
df7492f9
KH
5790 The decoded data is inserted at the current point of the buffer
5791 CODING->dst_object.
5792*/
d46c5b12 5793
df7492f9
KH
5794static int
5795decode_coding (coding)
d46c5b12 5796 struct coding_system *coding;
d46c5b12 5797{
df7492f9 5798 Lisp_Object attrs;
d46c5b12 5799
df7492f9
KH
5800 if (BUFFERP (coding->src_object)
5801 && coding->src_pos > 0
5802 && coding->src_pos < GPT
5803 && coding->src_pos + coding->src_chars > GPT)
5804 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5805
df7492f9 5806 if (BUFFERP (coding->dst_object))
1c3478b0 5807 {
df7492f9
KH
5808 if (current_buffer != XBUFFER (coding->dst_object))
5809 set_buffer_internal (XBUFFER (coding->dst_object));
5810 if (GPT != PT)
5811 move_gap_both (PT, PT_BYTE);
1c3478b0
KH
5812 }
5813
df7492f9
KH
5814 coding->consumed = coding->consumed_char = 0;
5815 coding->produced = coding->produced_char = 0;
5816 coding->chars_at_source = 0;
5817 coding->result = CODING_RESULT_SUCCESS;
5818 coding->errors = 0;
1c3478b0 5819
df7492f9
KH
5820 ALLOC_CONVERSION_WORK_AREA (coding);
5821
5822 attrs = CODING_ID_ATTRS (coding->id);
5823
5824 do
b73bfc1c 5825 {
df7492f9
KH
5826 coding_set_source (coding);
5827 coding->annotated = 0;
5828 (*(coding->decoder)) (coding);
5829 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
da4109a9
KH
5830 translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5831 else if (!NILP (Vstandard_translation_table_for_decode))
5832 translate_chars (coding, Vstandard_translation_table_for_decode);
df7492f9
KH
5833 coding_set_destination (coding);
5834 produce_chars (coding);
5835 if (coding->annotated)
5836 produce_annotation (coding);
d46c5b12 5837 }
df7492f9
KH
5838 while (coding->consumed < coding->src_bytes
5839 && ! coding->result);
d46c5b12 5840
df7492f9
KH
5841 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5842 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5843 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5844 decode_eol (coding);
d46c5b12 5845
df7492f9
KH
5846 coding->carryover_bytes = 0;
5847 if (coding->consumed < coding->src_bytes)
d46c5b12 5848 {
df7492f9 5849 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 5850 const unsigned char *src;
df7492f9
KH
5851
5852 coding_set_source (coding);
5853 coding_set_destination (coding);
5854 src = coding->source + coding->consumed;
5855
5856 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 5857 {
df7492f9
KH
5858 /* Flush out unprocessed data as binary chars. We are sure
5859 that the number of data is less than the size of
5860 coding->charbuf. */
df7492f9 5861 while (nbytes-- > 0)
1c3478b0 5862 {
df7492f9 5863 int c = *src++;
98725083
KH
5864
5865 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
1c3478b0 5866 }
df7492f9 5867 produce_chars (coding);
d46c5b12 5868 }
d46c5b12 5869 else
df7492f9
KH
5870 {
5871 /* Record unprocessed bytes in coding->carryover. We are
5872 sure that the number of data is less than the size of
5873 coding->carryover. */
5874 unsigned char *p = coding->carryover;
5875
5876 coding->carryover_bytes = nbytes;
5877 while (nbytes-- > 0)
5878 *p++ = *src++;
1c3478b0 5879 }
df7492f9 5880 coding->consumed = coding->src_bytes;
b73bfc1c 5881 }
69f76525 5882
73be902c 5883 return coding->result;
4ed46869
KH
5884}
5885
aaaf0b1e 5886
e1c23804 5887/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
5888 ending before LIMIT of CODING->src_object (buffer or string), store
5889 the data in BUF, set *STOP to a starting position of the next
5890 composition (if any) or to LIMIT, and return the address of the
5891 next element of BUF.
5892
5893 If such an annotation is not found, set *STOP to a starting
5894 position of a composition after POS (if any) or to LIMIT, and
5895 return BUF. */
5896
5897static INLINE int *
5898handle_composition_annotation (pos, limit, coding, buf, stop)
5899 EMACS_INT pos, limit;
aaaf0b1e 5900 struct coding_system *coding;
ff0dacd7
KH
5901 int *buf;
5902 EMACS_INT *stop;
aaaf0b1e 5903{
ff0dacd7
KH
5904 EMACS_INT start, end;
5905 Lisp_Object prop;
aaaf0b1e 5906
ff0dacd7
KH
5907 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5908 || end > limit)
5909 *stop = limit;
5910 else if (start > pos)
5911 *stop = start;
5912 else
aaaf0b1e 5913 {
ff0dacd7 5914 if (start == pos)
aaaf0b1e 5915 {
ff0dacd7
KH
5916 /* We found a composition. Store the corresponding
5917 annotation data in BUF. */
5918 int *head = buf;
5919 enum composition_method method = COMPOSITION_METHOD (prop);
5920 int nchars = COMPOSITION_LENGTH (prop);
5921
5922 ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5923 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 5924 {
ff0dacd7
KH
5925 Lisp_Object components;
5926 int len, i, i_byte;
5927
5928 components = COMPOSITION_COMPONENTS (prop);
5929 if (VECTORP (components))
aaaf0b1e 5930 {
ff0dacd7
KH
5931 len = XVECTOR (components)->size;
5932 for (i = 0; i < len; i++)
5933 *buf++ = XINT (AREF (components, i));
aaaf0b1e 5934 }
ff0dacd7 5935 else if (STRINGP (components))
aaaf0b1e 5936 {
8f924df7 5937 len = SCHARS (components);
ff0dacd7
KH
5938 i = i_byte = 0;
5939 while (i < len)
5940 {
5941 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5942 buf++;
5943 }
5944 }
5945 else if (INTEGERP (components))
5946 {
5947 len = 1;
5948 *buf++ = XINT (components);
5949 }
5950 else if (CONSP (components))
5951 {
5952 for (len = 0; CONSP (components);
5953 len++, components = XCDR (components))
5954 *buf++ = XINT (XCAR (components));
aaaf0b1e 5955 }
aaaf0b1e 5956 else
ff0dacd7
KH
5957 abort ();
5958 *head -= len;
aaaf0b1e 5959 }
aaaf0b1e 5960 }
ff0dacd7
KH
5961
5962 if (find_composition (end, limit, &start, &end, &prop,
5963 coding->src_object)
5964 && end <= limit)
5965 *stop = start;
5966 else
5967 *stop = limit;
aaaf0b1e 5968 }
ff0dacd7
KH
5969 return buf;
5970}
5971
5972
e1c23804 5973/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
5974 CODING->src_object (buffer of string), store the data in BUF, set
5975 *STOP to the position where the value of `charset' property changes
5976 (limiting by LIMIT), and return the address of the next element of
5977 BUF.
5978
5979 If the property value is nil, set *STOP to the position where the
5980 property value is non-nil (limiting by LIMIT), and return BUF. */
5981
5982static INLINE int *
5983handle_charset_annotation (pos, limit, coding, buf, stop)
5984 EMACS_INT pos, limit;
5985 struct coding_system *coding;
5986 int *buf;
5987 EMACS_INT *stop;
5988{
5989 Lisp_Object val, next;
5990 int id;
5991
5992 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5993 if (! NILP (val) && CHARSETP (val))
5994 id = XINT (CHARSET_SYMBOL_ID (val));
5995 else
5996 id = -1;
5997 ADD_CHARSET_DATA (buf, 0, 0, id);
5998 next = Fnext_single_property_change (make_number (pos), Qcharset,
5999 coding->src_object,
6000 make_number (limit));
6001 *stop = XINT (next);
6002 return buf;
6003}
6004
6005
df7492f9
KH
6006static void
6007consume_chars (coding)
6008 struct coding_system *coding;
6009{
6010 int *buf = coding->charbuf;
ff0dacd7 6011 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 6012 const unsigned char *src = coding->source + coding->consumed;
4776e638 6013 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
6014 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6015 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6016 int multibytep = coding->src_multibyte;
6017 Lisp_Object eol_type;
6018 int c;
ff0dacd7 6019 EMACS_INT stop, stop_composition, stop_charset;
88993dfd 6020
df7492f9
KH
6021 eol_type = CODING_ID_EOL_TYPE (coding->id);
6022 if (VECTORP (eol_type))
6023 eol_type = Qunix;
88993dfd 6024
df7492f9
KH
6025 /* Note: composition handling is not yet implemented. */
6026 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6027
0b5670c9
KH
6028 if (NILP (coding->src_object))
6029 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 6030 else
0b5670c9
KH
6031 {
6032 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6033 stop = stop_composition = pos;
6034 else
6035 stop = stop_composition = end_pos;
6036 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6037 stop = stop_charset = pos;
6038 else
6039 stop_charset = end_pos;
6040 }
ec6d2bb8 6041
ff0dacd7
KH
6042 /* Compensate for CRLF and annotation. */
6043 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6044 while (buf < buf_end)
aaaf0b1e 6045 {
df7492f9 6046 if (pos == stop)
ec6d2bb8 6047 {
df7492f9
KH
6048 if (pos == end_pos)
6049 break;
ff0dacd7
KH
6050 if (pos == stop_composition)
6051 buf = handle_composition_annotation (pos, end_pos, coding,
6052 buf, &stop_composition);
6053 if (pos == stop_charset)
6054 buf = handle_charset_annotation (pos, end_pos, coding,
6055 buf, &stop_charset);
6056 stop = (stop_composition < stop_charset
6057 ? stop_composition : stop_charset);
df7492f9
KH
6058 }
6059
6060 if (! multibytep)
4776e638 6061 {
d3e4cb56 6062 EMACS_INT bytes;
aaaf0b1e 6063
d3e4cb56
KH
6064 if (! CODING_FOR_UNIBYTE (coding)
6065 && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
4776e638
KH
6066 c = STRING_CHAR_ADVANCE (src), pos += bytes;
6067 else
6068 c = *src++, pos++;
6069 }
df7492f9 6070 else
4776e638 6071 c = STRING_CHAR_ADVANCE (src), pos++;
df7492f9
KH
6072 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6073 c = '\n';
6074 if (! EQ (eol_type, Qunix))
aaaf0b1e 6075 {
df7492f9 6076 if (c == '\n')
aaaf0b1e 6077 {
df7492f9
KH
6078 if (EQ (eol_type, Qdos))
6079 *buf++ = '\r';
6080 else
6081 c = '\r';
aaaf0b1e
KH
6082 }
6083 }
df7492f9 6084 *buf++ = c;
aaaf0b1e 6085 }
ec6d2bb8 6086
df7492f9
KH
6087 coding->consumed = src - coding->source;
6088 coding->consumed_char = pos - coding->src_pos;
6089 coding->charbuf_used = buf - coding->charbuf;
6090 coding->chars_at_source = 0;
aaaf0b1e
KH
6091}
6092
4ed46869 6093
df7492f9
KH
6094/* Encode the text at CODING->src_object into CODING->dst_object.
6095 CODING->src_object is a buffer or a string.
6096 CODING->dst_object is a buffer or nil.
6097
6098 If CODING->src_object is a buffer, it must be the current buffer.
6099 In this case, if CODING->src_pos is positive, it is a position of
6100 the source text in the buffer, otherwise. the source text is in the
6101 gap area of the buffer, and coding->src_pos specifies the offset of
6102 the text from GPT (which must be the same as PT). If this is the
6103 same buffer as CODING->dst_object, CODING->src_pos must be
6104 negative and CODING should not have `pre-write-conversion'.
6105
6106 If CODING->src_object is a string, CODING should not have
6107 `pre-write-conversion'.
6108
6109 If CODING->dst_object is a buffer, the encoded data is inserted at
6110 the current point of that buffer.
6111
6112 If CODING->dst_object is nil, the encoded data is placed at the
6113 memory area specified by CODING->destination. */
6114
6115static int
6116encode_coding (coding)
4ed46869 6117 struct coding_system *coding;
4ed46869 6118{
df7492f9 6119 Lisp_Object attrs;
9861e777 6120
df7492f9 6121 attrs = CODING_ID_ATTRS (coding->id);
4ed46869 6122
df7492f9 6123 if (BUFFERP (coding->dst_object))
8844fa83 6124 {
df7492f9
KH
6125 set_buffer_internal (XBUFFER (coding->dst_object));
6126 coding->dst_multibyte
6127 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 6128 }
4ed46869 6129
b73bfc1c 6130 coding->consumed = coding->consumed_char = 0;
df7492f9
KH
6131 coding->produced = coding->produced_char = 0;
6132 coding->result = CODING_RESULT_SUCCESS;
b73bfc1c 6133 coding->errors = 0;
b73bfc1c 6134
df7492f9 6135 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 6136
df7492f9
KH
6137 do {
6138 coding_set_source (coding);
6139 consume_chars (coding);
4ed46869 6140
df7492f9 6141 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
da4109a9
KH
6142 translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6143 else if (!NILP (Vstandard_translation_table_for_encode))
6144 translate_chars (coding, Vstandard_translation_table_for_encode);
b73bfc1c 6145
df7492f9
KH
6146 coding_set_destination (coding);
6147 (*(coding->encoder)) (coding);
6148 } while (coding->consumed_char < coding->src_chars);
6149
6150 if (BUFFERP (coding->dst_object))
6151 insert_from_gap (coding->produced_char, coding->produced);
6152
6153 return (coding->result);
ec6d2bb8
KH
6154}
6155
fb88bf2d 6156
4776e638
KH
6157/* Stack of working buffers used in code conversion. An nil element
6158 means that the code conversion of that level is not using a working
6159 buffer. */
df7492f9 6160Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 6161
df7492f9
KH
6162/* A working buffer used by the top level conversion. */
6163Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 6164
4ed46869 6165
df7492f9
KH
6166/* Return a working buffer that can be freely used by the following
6167 code conversion. MULTIBYTEP specifies the multibyteness of the
6168 buffer. */
b73bfc1c 6169
df7492f9 6170Lisp_Object
4776e638
KH
6171make_conversion_work_buffer (multibytep, depth)
6172 int multibytep, depth;
df7492f9
KH
6173{
6174 struct buffer *current = current_buffer;
4776e638 6175 Lisp_Object buf, name;
4ed46869 6176
4776e638 6177 if (depth == 0)
e133c8fa 6178 {
df7492f9
KH
6179 if (NILP (Vcode_conversion_reused_work_buf))
6180 Vcode_conversion_reused_work_buf
857dccb0 6181 = Fget_buffer_create (build_string (" *code-converting-work<0>*"));
4776e638 6182 buf = Vcode_conversion_reused_work_buf;
e133c8fa 6183 }
df7492f9 6184 else
d46c5b12 6185 {
4776e638 6186 if (depth < 0)
aaaf0b1e 6187 {
857dccb0 6188 name = build_string (" *code-converting-work*");
4776e638 6189 name = Fgenerate_new_buffer_name (name, Qnil);
aaaf0b1e 6190 }
4776e638 6191 else
9861e777 6192 {
4776e638 6193 char str[128];
d46c5b12 6194
857dccb0 6195 sprintf (str, " *code-converting-work*<%d>", depth);
4776e638
KH
6196 name = build_string (str);
6197 }
6198 buf = Fget_buffer_create (name);
b73bfc1c 6199 }
df7492f9
KH
6200 set_buffer_internal (XBUFFER (buf));
6201 current_buffer->undo_list = Qt;
6202 Ferase_buffer ();
8f924df7 6203 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
df7492f9
KH
6204 set_buffer_internal (current);
6205 return buf;
6206}
d46c5b12 6207
4776e638
KH
6208static Lisp_Object
6209code_conversion_restore (buffer)
6210 Lisp_Object buffer;
6211{
6212 Lisp_Object workbuf;
6213
6214 workbuf = XCAR (Vcode_conversion_work_buf_list);
6215 if (! NILP (workbuf)
6216 && ! EQ (workbuf, Vcode_conversion_reused_work_buf)
6217 && ! NILP (Fbuffer_live_p (workbuf)))
6218 Fkill_buffer (workbuf);
6219 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6220 set_buffer_internal (XBUFFER (buffer));
6221 return Qnil;
6222}
b73bfc1c 6223
4776e638
KH
6224static Lisp_Object
6225code_conversion_save (buffer, with_work_buf, multibyte)
6226 Lisp_Object buffer;
6227 int with_work_buf, multibyte;
df7492f9 6228{
4776e638 6229 Lisp_Object workbuf;
b73bfc1c 6230
4776e638 6231 if (with_work_buf)
b73bfc1c 6232 {
4776e638 6233 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
b73bfc1c 6234
4776e638 6235 workbuf = make_conversion_work_buffer (multibyte, depth);
4ed46869 6236 }
4776e638
KH
6237 else
6238 workbuf = Qnil;
6239 Vcode_conversion_work_buf_list
6240 = Fcons (workbuf, Vcode_conversion_work_buf_list);
6241 record_unwind_protect (code_conversion_restore, buffer);
6242 return workbuf;
df7492f9 6243}
d46c5b12 6244
df7492f9
KH
6245int
6246decode_coding_gap (coding, chars, bytes)
6247 struct coding_system *coding;
6248 EMACS_INT chars, bytes;
6249{
6250 int count = specpdl_ptr - specpdl;
5e5c78be 6251 Lisp_Object attrs;
4776e638 6252 Lisp_Object buffer;
fb88bf2d 6253
4776e638
KH
6254 buffer = Fcurrent_buffer ();
6255 code_conversion_save (buffer, 0, 0);
ec6d2bb8 6256
4776e638 6257 coding->src_object = buffer;
df7492f9
KH
6258 coding->src_chars = chars;
6259 coding->src_bytes = bytes;
6260 coding->src_pos = -chars;
6261 coding->src_pos_byte = -bytes;
6262 coding->src_multibyte = chars < bytes;
5e5c78be 6263 coding->dst_object = buffer;
df7492f9
KH
6264 coding->dst_pos = PT;
6265 coding->dst_pos_byte = PT_BYTE;
71c81426 6266 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
98725083 6267 coding->mode |= CODING_MODE_LAST_BLOCK;
4ed46869 6268
df7492f9
KH
6269 if (CODING_REQUIRE_DETECTION (coding))
6270 detect_coding (coding);
8f924df7 6271
df7492f9 6272 decode_coding (coding);
d46c5b12 6273
5e5c78be
KH
6274 attrs = CODING_ID_ATTRS (coding->id);
6275 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 6276 {
5e5c78be
KH
6277 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6278 Lisp_Object val;
6279
6280 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
6281 val = call1 (CODING_ATTR_POST_READ (attrs),
6282 make_number (coding->produced_char));
5e5c78be
KH
6283 CHECK_NATNUM (val);
6284 coding->produced_char += Z - prev_Z;
6285 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 6286 }
4ed46869 6287
df7492f9 6288 unbind_to (count, Qnil);
b73bfc1c
KH
6289 return coding->result;
6290}
52d41803 6291
4ed46869 6292int
df7492f9 6293encode_coding_gap (coding, chars, bytes)
4ed46869 6294 struct coding_system *coding;
df7492f9 6295 EMACS_INT chars, bytes;
4ed46869 6296{
df7492f9
KH
6297 int count = specpdl_ptr - specpdl;
6298 Lisp_Object buffer;
4ed46869 6299
df7492f9 6300 buffer = Fcurrent_buffer ();
4776e638 6301 code_conversion_save (buffer, 0, 0);
4ed46869 6302
df7492f9
KH
6303 coding->src_object = buffer;
6304 coding->src_chars = chars;
6305 coding->src_bytes = bytes;
6306 coding->src_pos = -chars;
6307 coding->src_pos_byte = -bytes;
6308 coding->src_multibyte = chars < bytes;
6309 coding->dst_object = coding->src_object;
6310 coding->dst_pos = PT;
6311 coding->dst_pos_byte = PT_BYTE;
4ed46869 6312
df7492f9 6313 encode_coding (coding);
b73bfc1c 6314
df7492f9
KH
6315 unbind_to (count, Qnil);
6316 return coding->result;
6317}
4ed46869 6318
d46c5b12 6319
df7492f9
KH
6320/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6321 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 6322
df7492f9 6323 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 6324
df7492f9
KH
6325 If it is a buffer, the text is at point of the buffer. FROM and TO
6326 are positions in the buffer.
b73bfc1c 6327
df7492f9
KH
6328 If it is a string, the text is at the beginning of the string.
6329 FROM and TO are indices to the string.
4ed46869 6330
df7492f9
KH
6331 If it is nil, the text is at coding->source. FROM and TO are
6332 indices to coding->source.
bb10be8b 6333
df7492f9 6334 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 6335
df7492f9
KH
6336 If it is a buffer, the decoded text is inserted at point of the
6337 buffer. If the buffer is the same as SRC_OBJECT, the source text
6338 is deleted.
4ed46869 6339
df7492f9
KH
6340 If it is Qt, a string is made from the decoded text, and
6341 set in CODING->dst_object.
d46c5b12 6342
df7492f9 6343 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6344 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6345 CODING->destination by xmalloc. If the decoded text is longer than
6346 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6347 */
d46c5b12 6348
df7492f9
KH
6349void
6350decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6351 dst_object)
d46c5b12 6352 struct coding_system *coding;
df7492f9
KH
6353 Lisp_Object src_object;
6354 EMACS_INT from, from_byte, to, to_byte;
6355 Lisp_Object dst_object;
d46c5b12 6356{
df7492f9
KH
6357 int count = specpdl_ptr - specpdl;
6358 unsigned char *destination;
6359 EMACS_INT dst_bytes;
6360 EMACS_INT chars = to - from;
6361 EMACS_INT bytes = to_byte - from_byte;
6362 Lisp_Object attrs;
4776e638
KH
6363 Lisp_Object buffer;
6364 int saved_pt = -1, saved_pt_byte;
d46c5b12 6365
4776e638 6366 buffer = Fcurrent_buffer ();
93dec019 6367
df7492f9 6368 if (NILP (dst_object))
d46c5b12 6369 {
df7492f9
KH
6370 destination = coding->destination;
6371 dst_bytes = coding->dst_bytes;
d46c5b12 6372 }
93dec019 6373
df7492f9
KH
6374 coding->src_object = src_object;
6375 coding->src_chars = chars;
6376 coding->src_bytes = bytes;
6377 coding->src_multibyte = chars < bytes;
70ad9fc4 6378
df7492f9 6379 if (STRINGP (src_object))
d46c5b12 6380 {
df7492f9
KH
6381 coding->src_pos = from;
6382 coding->src_pos_byte = from_byte;
d46c5b12 6383 }
df7492f9 6384 else if (BUFFERP (src_object))
88993dfd 6385 {
df7492f9
KH
6386 set_buffer_internal (XBUFFER (src_object));
6387 if (from != GPT)
6388 move_gap_both (from, from_byte);
6389 if (EQ (src_object, dst_object))
fb88bf2d 6390 {
4776e638 6391 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6392 TEMP_SET_PT_BOTH (from, from_byte);
6393 del_range_both (from, from_byte, to, to_byte, 1);
6394 coding->src_pos = -chars;
6395 coding->src_pos_byte = -bytes;
fb88bf2d 6396 }
df7492f9 6397 else
fb88bf2d 6398 {
df7492f9
KH
6399 coding->src_pos = from;
6400 coding->src_pos_byte = from_byte;
fb88bf2d 6401 }
88993dfd
KH
6402 }
6403
df7492f9
KH
6404 if (CODING_REQUIRE_DETECTION (coding))
6405 detect_coding (coding);
6406 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 6407
2cb26057
KH
6408 if (EQ (dst_object, Qt)
6409 || (! NILP (CODING_ATTR_POST_READ (attrs))
6410 && NILP (dst_object)))
b73bfc1c 6411 {
4776e638 6412 coding->dst_object = code_conversion_save (buffer, 1, 1);
df7492f9
KH
6413 coding->dst_pos = BEG;
6414 coding->dst_pos_byte = BEG_BYTE;
6415 coding->dst_multibyte = 1;
b73bfc1c 6416 }
df7492f9 6417 else if (BUFFERP (dst_object))
d46c5b12 6418 {
4776e638 6419 code_conversion_save (buffer, 0, 0);
df7492f9
KH
6420 coding->dst_object = dst_object;
6421 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6422 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6423 coding->dst_multibyte
6424 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
6425 }
6426 else
6427 {
4776e638 6428 code_conversion_save (buffer, 0, 0);
df7492f9
KH
6429 coding->dst_object = Qnil;
6430 coding->dst_multibyte = 1;
d46c5b12
KH
6431 }
6432
df7492f9 6433 decode_coding (coding);
fa46990e 6434
df7492f9
KH
6435 if (BUFFERP (coding->dst_object))
6436 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 6437
df7492f9 6438 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6439 {
df7492f9
KH
6440 struct gcpro gcpro1, gcpro2;
6441 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6442 Lisp_Object val;
d46c5b12 6443
c0cc7f7f 6444 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6445 GCPRO2 (coding->src_object, coding->dst_object);
6446 val = call1 (CODING_ATTR_POST_READ (attrs),
6447 make_number (coding->produced_char));
6448 UNGCPRO;
6449 CHECK_NATNUM (val);
6450 coding->produced_char += Z - prev_Z;
6451 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6452 }
de79a6a5 6453
df7492f9 6454 if (EQ (dst_object, Qt))
ec6d2bb8 6455 {
df7492f9
KH
6456 coding->dst_object = Fbuffer_string ();
6457 }
6458 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6459 {
6460 set_buffer_internal (XBUFFER (coding->dst_object));
6461 if (dst_bytes < coding->produced)
6462 {
6463 destination
6464 = (unsigned char *) xrealloc (destination, coding->produced);
6465 if (! destination)
6466 {
6467 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6468 unbind_to (count, Qnil);
6469 return;
6470 }
6471 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6472 move_gap_both (BEGV, BEGV_BYTE);
6473 bcopy (BEGV_ADDR, destination, coding->produced);
6474 coding->destination = destination;
d46c5b12 6475 }
ec6d2bb8 6476 }
b73bfc1c 6477
4776e638
KH
6478 if (saved_pt >= 0)
6479 {
6480 /* This is the case of:
6481 (BUFFERP (src_object) && EQ (src_object, dst_object))
6482 As we have moved PT while replacing the original buffer
6483 contents, we must recover it now. */
6484 set_buffer_internal (XBUFFER (src_object));
6485 if (saved_pt < from)
6486 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6487 else if (saved_pt < from + chars)
6488 TEMP_SET_PT_BOTH (from, from_byte);
6489 else if (! NILP (current_buffer->enable_multibyte_characters))
6490 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6491 saved_pt_byte + (coding->produced - bytes));
6492 else
6493 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6494 saved_pt_byte + (coding->produced - bytes));
d46c5b12 6495 }
4776e638 6496
df7492f9 6497 unbind_to (count, Qnil);
d46c5b12
KH
6498}
6499
d46c5b12 6500
df7492f9
KH
6501void
6502encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6503 dst_object)
d46c5b12 6504 struct coding_system *coding;
df7492f9
KH
6505 Lisp_Object src_object;
6506 EMACS_INT from, from_byte, to, to_byte;
6507 Lisp_Object dst_object;
d46c5b12 6508{
b73bfc1c 6509 int count = specpdl_ptr - specpdl;
df7492f9
KH
6510 EMACS_INT chars = to - from;
6511 EMACS_INT bytes = to_byte - from_byte;
6512 Lisp_Object attrs;
4776e638
KH
6513 Lisp_Object buffer;
6514 int saved_pt = -1, saved_pt_byte;
df7492f9 6515
4776e638 6516 buffer = Fcurrent_buffer ();
df7492f9
KH
6517
6518 coding->src_object = src_object;
6519 coding->src_chars = chars;
6520 coding->src_bytes = bytes;
6521 coding->src_multibyte = chars < bytes;
6522
6523 attrs = CODING_ID_ATTRS (coding->id);
6524
6525 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6526 {
4776e638
KH
6527 coding->src_object = code_conversion_save (buffer, 1,
6528 coding->src_multibyte);
df7492f9
KH
6529 set_buffer_internal (XBUFFER (coding->src_object));
6530 if (STRINGP (src_object))
6531 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6532 else if (BUFFERP (src_object))
6533 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6534 else
6535 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 6536
df7492f9
KH
6537 if (EQ (src_object, dst_object))
6538 {
6539 set_buffer_internal (XBUFFER (src_object));
4776e638 6540 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6541 del_range_both (from, from_byte, to, to_byte, 1);
6542 set_buffer_internal (XBUFFER (coding->src_object));
6543 }
6544
ac87bbef
KH
6545 call2 (CODING_ATTR_PRE_WRITE (attrs),
6546 make_number (BEG), make_number (Z));
6547 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6548 if (BEG != GPT)
6549 move_gap_both (BEG, BEG_BYTE);
6550 coding->src_chars = Z - BEG;
6551 coding->src_bytes = Z_BYTE - BEG_BYTE;
6552 coding->src_pos = BEG;
6553 coding->src_pos_byte = BEG_BYTE;
6554 coding->src_multibyte = Z < Z_BYTE;
6555 }
6556 else if (STRINGP (src_object))
d46c5b12 6557 {
4776e638 6558 code_conversion_save (buffer, 0, 0);
df7492f9
KH
6559 coding->src_pos = from;
6560 coding->src_pos_byte = from_byte;
b73bfc1c 6561 }
df7492f9 6562 else if (BUFFERP (src_object))
b73bfc1c 6563 {
4776e638 6564 code_conversion_save (buffer, 0, 0);
df7492f9 6565 set_buffer_internal (XBUFFER (src_object));
df7492f9 6566 if (EQ (src_object, dst_object))
d46c5b12 6567 {
4776e638 6568 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
6569 coding->src_object = del_range_1 (from, to, 1, 1);
6570 coding->src_pos = 0;
6571 coding->src_pos_byte = 0;
d46c5b12 6572 }
df7492f9 6573 else
d46c5b12 6574 {
ff0dacd7
KH
6575 if (from < GPT && to >= GPT)
6576 move_gap_both (from, from_byte);
df7492f9
KH
6577 coding->src_pos = from;
6578 coding->src_pos_byte = from_byte;
d46c5b12 6579 }
d46c5b12 6580 }
4776e638
KH
6581 else
6582 code_conversion_save (buffer, 0, 0);
d46c5b12 6583
df7492f9 6584 if (BUFFERP (dst_object))
88993dfd 6585 {
df7492f9 6586 coding->dst_object = dst_object;
28f67a95
KH
6587 if (EQ (src_object, dst_object))
6588 {
6589 coding->dst_pos = from;
6590 coding->dst_pos_byte = from_byte;
6591 }
6592 else
6593 {
6594 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6595 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6596 }
df7492f9
KH
6597 coding->dst_multibyte
6598 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 6599 }
df7492f9 6600 else if (EQ (dst_object, Qt))
d46c5b12 6601 {
df7492f9 6602 coding->dst_object = Qnil;
df7492f9 6603 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6604 if (coding->dst_bytes == 0)
6605 coding->dst_bytes = 1;
6606 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6607 coding->dst_multibyte = 0;
d46c5b12
KH
6608 }
6609 else
6610 {
df7492f9
KH
6611 coding->dst_object = Qnil;
6612 coding->dst_multibyte = 0;
d46c5b12
KH
6613 }
6614
df7492f9 6615 encode_coding (coding);
d46c5b12 6616
df7492f9 6617 if (EQ (dst_object, Qt))
d46c5b12 6618 {
df7492f9
KH
6619 if (BUFFERP (coding->dst_object))
6620 coding->dst_object = Fbuffer_string ();
6621 else
d46c5b12 6622 {
df7492f9
KH
6623 coding->dst_object
6624 = make_unibyte_string ((char *) coding->destination,
6625 coding->produced);
6626 xfree (coding->destination);
d46c5b12 6627 }
4ed46869 6628 }
d46c5b12 6629
4776e638
KH
6630 if (saved_pt >= 0)
6631 {
6632 /* This is the case of:
6633 (BUFFERP (src_object) && EQ (src_object, dst_object))
6634 As we have moved PT while replacing the original buffer
6635 contents, we must recover it now. */
6636 set_buffer_internal (XBUFFER (src_object));
6637 if (saved_pt < from)
6638 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6639 else if (saved_pt < from + chars)
6640 TEMP_SET_PT_BOTH (from, from_byte);
6641 else if (! NILP (current_buffer->enable_multibyte_characters))
6642 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6643 saved_pt_byte + (coding->produced - bytes));
d46c5b12 6644 else
4776e638
KH
6645 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6646 saved_pt_byte + (coding->produced - bytes));
6647 }
6648
df7492f9 6649 unbind_to (count, Qnil);
b73bfc1c
KH
6650}
6651
df7492f9 6652
b73bfc1c 6653Lisp_Object
df7492f9 6654preferred_coding_system ()
b73bfc1c 6655{
df7492f9 6656 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6657
df7492f9 6658 return CODING_ID_NAME (id);
4ed46869
KH
6659}
6660
6661\f
6662#ifdef emacs
1397dc18 6663/*** 8. Emacs Lisp library functions ***/
4ed46869 6664
4ed46869 6665DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6666 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6667See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6668about coding-system objects. */)
6669 (obj)
4ed46869
KH
6670 Lisp_Object obj;
6671{
df7492f9 6672 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6673}
6674
9d991de8
RS
6675DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6676 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6677 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6678 (prompt)
4ed46869
KH
6679 Lisp_Object prompt;
6680{
e0e989f6 6681 Lisp_Object val;
9d991de8
RS
6682 do
6683 {
4608c386
KH
6684 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6685 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 6686 }
8f924df7 6687 while (SCHARS (val) == 0);
e0e989f6 6688 return (Fintern (val, Qnil));
4ed46869
KH
6689}
6690
9b787f3e 6691DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6692 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6693If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6694 (prompt, default_coding_system)
9b787f3e 6695 Lisp_Object prompt, default_coding_system;
4ed46869 6696{
f44d27ce 6697 Lisp_Object val;
9b787f3e 6698 if (SYMBOLP (default_coding_system))
a3181084 6699 XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
4608c386 6700 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6701 Qt, Qnil, Qcoding_system_history,
6702 default_coding_system, Qnil);
8f924df7 6703 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6704}
6705
6706DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6707 1, 1, 0,
48b0f3ae 6708 doc: /* Check validity of CODING-SYSTEM.
b054002f 6709If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
df7492f9 6710 (coding_system)
4ed46869
KH
6711 Lisp_Object coding_system;
6712{
b7826503 6713 CHECK_SYMBOL (coding_system);
4ed46869
KH
6714 if (!NILP (Fcoding_system_p (coding_system)))
6715 return coding_system;
6716 while (1)
02ba4723 6717 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6718}
df7492f9 6719
3a73fa5d 6720\f
89528eb3
KH
6721/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6722 HIGHEST is nonzero, return the coding system of the highest
6723 priority among the detected coding systems. Otherwize return a
6724 list of detected coding systems sorted by their priorities. If
6725 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6726 multibyte form but contains only ASCII and eight-bit chars.
6727 Otherwise, the bytes are raw bytes.
6728
6729 CODING-SYSTEM controls the detection as below:
6730
6731 If it is nil, detect both text-format and eol-format. If the
6732 text-format part of CODING-SYSTEM is already specified
6733 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6734 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6735 detect only text-format. */
6736
d46c5b12 6737Lisp_Object
df7492f9 6738detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
8f924df7 6739 const unsigned char *src;
d46c5b12 6740 int src_bytes, highest;
0a28aafb 6741 int multibytep;
df7492f9 6742 Lisp_Object coding_system;
4ed46869 6743{
8f924df7 6744 const unsigned char *src_end = src + src_bytes;
df7492f9
KH
6745 Lisp_Object attrs, eol_type;
6746 Lisp_Object val;
6747 struct coding_system coding;
89528eb3 6748 int id;
ff0dacd7 6749 struct coding_detection_info detect_info;
b73bfc1c 6750
df7492f9
KH
6751 if (NILP (coding_system))
6752 coding_system = Qundecided;
6753 setup_coding_system (coding_system, &coding);
6754 attrs = CODING_ID_ATTRS (coding.id);
6755 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 6756 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 6757
df7492f9
KH
6758 coding.source = src;
6759 coding.src_bytes = src_bytes;
6760 coding.src_multibyte = multibytep;
6761 coding.consumed = 0;
89528eb3 6762 coding.mode |= CODING_MODE_LAST_BLOCK;
d46c5b12 6763
ff0dacd7 6764 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 6765
89528eb3
KH
6766 /* At first, detect text-format if necessary. */
6767 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
4ed46869 6768 {
ff0dacd7
KH
6769 enum coding_category category;
6770 struct coding_system *this;
6771 int c, i;
88993dfd 6772
df7492f9 6773 for (; src < src_end; src++)
4ed46869 6774 {
df7492f9 6775 c = *src;
89528eb3
KH
6776 if (c & 0x80
6777 || (c < 0x20 && (c == ISO_CODE_ESC
6778 || c == ISO_CODE_SI
584948ac 6779 || c == ISO_CODE_SO)))
d46c5b12 6780 break;
4ed46869 6781 }
df7492f9 6782 coding.head_ascii = src - coding.source;
88993dfd 6783
df7492f9
KH
6784 if (src < src_end)
6785 for (i = 0; i < coding_category_raw_text; i++)
6786 {
ff0dacd7
KH
6787 category = coding_priorities[i];
6788 this = coding_categories + category;
b843d1ae 6789
df7492f9
KH
6790 if (this->id < 0)
6791 {
6792 /* No coding system of this category is defined. */
ff0dacd7 6793 detect_info.rejected |= (1 << category);
df7492f9 6794 }
ff0dacd7 6795 else if (category >= coding_category_raw_text)
89528eb3 6796 continue;
ff0dacd7
KH
6797 else if (detect_info.checked & (1 << category))
6798 {
6799 if (highest
6800 && (detect_info.found & (1 << category)))
6801 break;
6802 }
df7492f9
KH
6803 else
6804 {
ff0dacd7 6805 if ((*(this->detector)) (&coding, &detect_info)
89528eb3 6806 && highest
ff0dacd7
KH
6807 && (detect_info.found & (1 << category)))
6808 break;
df7492f9
KH
6809 }
6810 }
ec6d2bb8 6811
ec6d2bb8 6812
ff0dacd7 6813 if (detect_info.rejected == CATEGORY_MASK_ANY)
ec6d2bb8 6814 {
ff0dacd7 6815 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
6816 id = coding_categories[coding_category_raw_text].id;
6817 val = Fcons (make_number (id), Qnil);
6818 }
ff0dacd7 6819 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 6820 {
ff0dacd7 6821 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
6822 id = coding_categories[coding_category_undecided].id;
6823 val = Fcons (make_number (id), Qnil);
6824 }
6825 else if (highest)
6826 {
ff0dacd7 6827 if (detect_info.found)
ec6d2bb8 6828 {
ff0dacd7
KH
6829 detect_info.found = 1 << category;
6830 val = Fcons (make_number (this->id), Qnil);
6831 }
6832 else
6833 for (i = 0; i < coding_category_raw_text; i++)
6834 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6835 {
6836 detect_info.found = 1 << coding_priorities[i];
6837 id = coding_categories[coding_priorities[i]].id;
6838 val = Fcons (make_number (id), Qnil);
6839 break;
6840 }
6841 }
89528eb3
KH
6842 else
6843 {
ff0dacd7
KH
6844 int mask = detect_info.rejected | detect_info.found;
6845 int found = 0;
89528eb3 6846 val = Qnil;
ec6d2bb8 6847
89528eb3 6848 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
6849 {
6850 category = coding_priorities[i];
6851 if (! (mask & (1 << category)))
ec6d2bb8 6852 {
ff0dacd7
KH
6853 found |= 1 << category;
6854 id = coding_categories[category].id;
6855 val = Fcons (make_number (id), val);
6856 }
6857 }
6858 for (i = coding_category_raw_text - 1; i >= 0; i--)
6859 {
6860 category = coding_priorities[i];
6861 if (detect_info.found & (1 << category))
6862 {
6863 id = coding_categories[category].id;
6864 val = Fcons (make_number (id), val);
ec6d2bb8 6865 }
ec6d2bb8 6866 }
ff0dacd7 6867 detect_info.found |= found;
ec6d2bb8 6868 }
ec6d2bb8 6869 }
df7492f9
KH
6870 else
6871 {
ff0dacd7 6872 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 6873 val = Fcons (make_number (coding.id), Qnil);
4ed46869 6874 }
df7492f9 6875
89528eb3 6876 /* Then, detect eol-format if necessary. */
df7492f9 6877 {
89528eb3 6878 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
6879 Lisp_Object tail;
6880
89528eb3
KH
6881 if (VECTORP (eol_type))
6882 {
ff0dacd7 6883 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
6884 normal_eol = detect_eol (coding.source, src_bytes,
6885 coding_category_raw_text);
ff0dacd7
KH
6886 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6887 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
6888 utf_16_be_eol = detect_eol (coding.source, src_bytes,
6889 coding_category_utf_16_be);
ff0dacd7
KH
6890 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6891 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
6892 utf_16_le_eol = detect_eol (coding.source, src_bytes,
6893 coding_category_utf_16_le);
6894 }
6895 else
6896 {
6897 if (EQ (eol_type, Qunix))
6898 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6899 else if (EQ (eol_type, Qdos))
6900 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6901 else
6902 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6903 }
6904
df7492f9
KH
6905 for (tail = val; CONSP (tail); tail = XCDR (tail))
6906 {
89528eb3 6907 enum coding_category category;
df7492f9 6908 int this_eol;
89528eb3
KH
6909
6910 id = XINT (XCAR (tail));
6911 attrs = CODING_ID_ATTRS (id);
6912 category = XINT (CODING_ATTR_CATEGORY (attrs));
6913 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
6914 if (VECTORP (eol_type))
6915 {
89528eb3
KH
6916 if (category == coding_category_utf_16_be
6917 || category == coding_category_utf_16_be_nosig)
6918 this_eol = utf_16_be_eol;
6919 else if (category == coding_category_utf_16_le
6920 || category == coding_category_utf_16_le_nosig)
6921 this_eol = utf_16_le_eol;
df7492f9 6922 else
89528eb3
KH
6923 this_eol = normal_eol;
6924
df7492f9
KH
6925 if (this_eol == EOL_SEEN_LF)
6926 XSETCAR (tail, AREF (eol_type, 0));
6927 else if (this_eol == EOL_SEEN_CRLF)
6928 XSETCAR (tail, AREF (eol_type, 1));
6929 else if (this_eol == EOL_SEEN_CR)
6930 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
6931 else
6932 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 6933 }
89528eb3
KH
6934 else
6935 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
6936 }
6937 }
ec6d2bb8 6938
03699b14 6939 return (highest ? XCAR (val) : val);
ec6d2bb8
KH
6940}
6941
ec6d2bb8 6942
d46c5b12
KH
6943DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6944 2, 3, 0,
48b0f3ae
PJ
6945 doc: /* Detect coding system of the text in the region between START and END.
6946Return a list of possible coding systems ordered by priority.
ec6d2bb8 6947
48b0f3ae
PJ
6948If only ASCII characters are found, it returns a list of single element
6949`undecided' or its subsidiary coding system according to a detected
6950end-of-line format.
ec6d2bb8 6951
48b0f3ae
PJ
6952If optional argument HIGHEST is non-nil, return the coding system of
6953highest priority. */)
6954 (start, end, highest)
d46c5b12
KH
6955 Lisp_Object start, end, highest;
6956{
6957 int from, to;
6958 int from_byte, to_byte;
ec6d2bb8 6959
b7826503
PJ
6960 CHECK_NUMBER_COERCE_MARKER (start);
6961 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 6962
d46c5b12
KH
6963 validate_region (&start, &end);
6964 from = XINT (start), to = XINT (end);
6965 from_byte = CHAR_TO_BYTE (from);
6966 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 6967
d46c5b12
KH
6968 if (from < GPT && to >= GPT)
6969 move_gap_both (to, to_byte);
c210f766 6970
d46c5b12 6971 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6972 to_byte - from_byte,
0a28aafb
KH
6973 !NILP (highest),
6974 !NILP (current_buffer
df7492f9
KH
6975 ->enable_multibyte_characters),
6976 Qnil);
ec6d2bb8
KH
6977}
6978
d46c5b12
KH
6979DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6980 1, 2, 0,
48b0f3ae
PJ
6981 doc: /* Detect coding system of the text in STRING.
6982Return a list of possible coding systems ordered by priority.
fb88bf2d 6983
48b0f3ae
PJ
6984If only ASCII characters are found, it returns a list of single element
6985`undecided' or its subsidiary coding system according to a detected
6986end-of-line format.
d46c5b12 6987
48b0f3ae
PJ
6988If optional argument HIGHEST is non-nil, return the coding system of
6989highest priority. */)
6990 (string, highest)
d46c5b12
KH
6991 Lisp_Object string, highest;
6992{
b7826503 6993 CHECK_STRING (string);
b73bfc1c 6994
8f924df7
KH
6995 return detect_coding_system (SDATA (string), SBYTES (string),
6996 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 6997 Qnil);
4ed46869 6998}
4ed46869 6999
b73bfc1c 7000
df7492f9
KH
7001static INLINE int
7002char_encodable_p (c, attrs)
7003 int c;
7004 Lisp_Object attrs;
05e6f5dc 7005{
df7492f9 7006 Lisp_Object tail;
df7492f9 7007 struct charset *charset;
d46c5b12 7008
df7492f9
KH
7009 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7010 CONSP (tail); tail = XCDR (tail))
e133c8fa 7011 {
df7492f9
KH
7012 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7013 if (CHAR_CHARSET_P (c, charset))
7014 break;
e133c8fa 7015 }
df7492f9 7016 return (! NILP (tail));
05e6f5dc 7017}
83fa074f 7018
fb88bf2d 7019
df7492f9
KH
7020/* Return a list of coding systems that safely encode the text between
7021 START and END. If EXCLUDE is non-nil, it is a list of coding
7022 systems not to check. The returned list doesn't contain any such
48468dac 7023 coding systems. In any case, if the text contains only ASCII or is
df7492f9 7024 unibyte, return t. */
e077cc80 7025
df7492f9
KH
7026DEFUN ("find-coding-systems-region-internal",
7027 Ffind_coding_systems_region_internal,
7028 Sfind_coding_systems_region_internal, 2, 3, 0,
7029 doc: /* Internal use only. */)
7030 (start, end, exclude)
7031 Lisp_Object start, end, exclude;
7032{
7033 Lisp_Object coding_attrs_list, safe_codings;
7034 EMACS_INT start_byte, end_byte;
7c78e542 7035 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7036 int c;
7037 Lisp_Object tail, elt;
d46c5b12 7038
df7492f9
KH
7039 if (STRINGP (start))
7040 {
7041 if (!STRING_MULTIBYTE (start)
8f924df7 7042 || SCHARS (start) == SBYTES (start))
df7492f9
KH
7043 return Qt;
7044 start_byte = 0;
8f924df7 7045 end_byte = SBYTES (start);
df7492f9
KH
7046 }
7047 else
d46c5b12 7048 {
df7492f9
KH
7049 CHECK_NUMBER_COERCE_MARKER (start);
7050 CHECK_NUMBER_COERCE_MARKER (end);
7051 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7052 args_out_of_range (start, end);
7053 if (NILP (current_buffer->enable_multibyte_characters))
7054 return Qt;
7055 start_byte = CHAR_TO_BYTE (XINT (start));
7056 end_byte = CHAR_TO_BYTE (XINT (end));
7057 if (XINT (end) - XINT (start) == end_byte - start_byte)
7058 return Qt;
d46c5b12 7059
e1c23804 7060 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 7061 {
e1c23804
DL
7062 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7063 move_gap_both (XINT (start), start_byte);
df7492f9 7064 else
e1c23804 7065 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
7066 }
7067 }
7068
df7492f9
KH
7069 coding_attrs_list = Qnil;
7070 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7071 if (NILP (exclude)
7072 || NILP (Fmemq (XCAR (tail), exclude)))
7073 {
7074 Lisp_Object attrs;
d46c5b12 7075
df7492f9
KH
7076 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7077 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7078 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7079 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7080 }
d46c5b12 7081
df7492f9 7082 if (STRINGP (start))
8f924df7 7083 p = pbeg = SDATA (start);
df7492f9
KH
7084 else
7085 p = pbeg = BYTE_POS_ADDR (start_byte);
7086 pend = p + (end_byte - start_byte);
b843d1ae 7087
df7492f9
KH
7088 while (p < pend && ASCII_BYTE_P (*p)) p++;
7089 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 7090
05e6f5dc 7091 while (p < pend)
72d1a715 7092 {
df7492f9
KH
7093 if (ASCII_BYTE_P (*p))
7094 p++;
72d1a715
RS
7095 else
7096 {
df7492f9 7097 c = STRING_CHAR_ADVANCE (p);
12410ef1 7098
df7492f9
KH
7099 charset_map_loaded = 0;
7100 for (tail = coding_attrs_list; CONSP (tail);)
7101 {
7102 elt = XCAR (tail);
7103 if (NILP (elt))
7104 tail = XCDR (tail);
7105 else if (char_encodable_p (c, elt))
7106 tail = XCDR (tail);
7107 else if (CONSP (XCDR (tail)))
7108 {
7109 XSETCAR (tail, XCAR (XCDR (tail)));
7110 XSETCDR (tail, XCDR (XCDR (tail)));
7111 }
7112 else
7113 {
7114 XSETCAR (tail, Qnil);
7115 tail = XCDR (tail);
7116 }
7117 }
7118 if (charset_map_loaded)
7119 {
7120 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 7121
df7492f9 7122 if (STRINGP (start))
8f924df7 7123 pbeg = SDATA (start);
df7492f9
KH
7124 else
7125 pbeg = BYTE_POS_ADDR (start_byte);
7126 p = pbeg + p_offset;
7127 pend = pbeg + pend_offset;
7128 }
7129 }
ec6d2bb8 7130 }
fb88bf2d 7131
df7492f9
KH
7132 safe_codings = Qnil;
7133 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7134 if (! NILP (XCAR (tail)))
7135 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 7136
05e6f5dc
KH
7137 return safe_codings;
7138}
4956c225 7139
d46c5b12 7140
8f924df7
KH
7141DEFUN ("unencodable-char-position", Funencodable_char_position,
7142 Sunencodable_char_position, 3, 5, 0,
7143 doc: /*
7144Return position of first un-encodable character in a region.
7145START and END specfiy the region and CODING-SYSTEM specifies the
7146encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 7147
8f924df7
KH
7148If optional 4th argument COUNT is non-nil, it specifies at most how
7149many un-encodable characters to search. In this case, the value is a
7150list of positions.
d46c5b12 7151
8f924df7
KH
7152If optional 5th argument STRING is non-nil, it is a string to search
7153for un-encodable characters. In that case, START and END are indexes
7154to the string. */)
7155 (start, end, coding_system, count, string)
7156 Lisp_Object start, end, coding_system, count, string;
7157{
7158 int n;
7159 struct coding_system coding;
7160 Lisp_Object attrs, charset_list;
7161 Lisp_Object positions;
7162 int from, to;
7163 const unsigned char *p, *stop, *pend;
7164 int ascii_compatible;
fb88bf2d 7165
8f924df7
KH
7166 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7167 attrs = CODING_ID_ATTRS (coding.id);
7168 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7169 return Qnil;
7170 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7171 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
fb88bf2d 7172
8f924df7
KH
7173 if (NILP (string))
7174 {
7175 validate_region (&start, &end);
7176 from = XINT (start);
7177 to = XINT (end);
7178 if (NILP (current_buffer->enable_multibyte_characters)
7179 || (ascii_compatible
7180 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7181 return Qnil;
7182 p = CHAR_POS_ADDR (from);
7183 pend = CHAR_POS_ADDR (to);
7184 if (from < GPT && to >= GPT)
7185 stop = GPT_ADDR;
7186 else
7187 stop = pend;
7188 }
7189 else
7190 {
7191 CHECK_STRING (string);
7192 CHECK_NATNUM (start);
7193 CHECK_NATNUM (end);
7194 from = XINT (start);
7195 to = XINT (end);
7196 if (from > to
7197 || to > SCHARS (string))
7198 args_out_of_range_3 (string, start, end);
7199 if (! STRING_MULTIBYTE (string))
7200 return Qnil;
7201 p = SDATA (string) + string_char_to_byte (string, from);
7202 stop = pend = SDATA (string) + string_char_to_byte (string, to);
7203 if (ascii_compatible && (to - from) == (pend - p))
7204 return Qnil;
7205 }
f2558efd 7206
8f924df7
KH
7207 if (NILP (count))
7208 n = 1;
7209 else
b73bfc1c 7210 {
8f924df7
KH
7211 CHECK_NATNUM (count);
7212 n = XINT (count);
b73bfc1c
KH
7213 }
7214
8f924df7
KH
7215 positions = Qnil;
7216 while (1)
d46c5b12 7217 {
8f924df7 7218 int c;
ec6d2bb8 7219
8f924df7
KH
7220 if (ascii_compatible)
7221 while (p < stop && ASCII_BYTE_P (*p))
7222 p++, from++;
7223 if (p >= stop)
0e79d667 7224 {
8f924df7
KH
7225 if (p >= pend)
7226 break;
7227 stop = pend;
7228 p = GAP_END_ADDR;
0e79d667 7229 }
ec6d2bb8 7230
8f924df7
KH
7231 c = STRING_CHAR_ADVANCE (p);
7232 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7233 && ! char_charset (c, charset_list, NULL))
ec6d2bb8 7234 {
8f924df7
KH
7235 positions = Fcons (make_number (from), positions);
7236 n--;
7237 if (n == 0)
7238 break;
ec6d2bb8
KH
7239 }
7240
8f924df7
KH
7241 from++;
7242 }
d46c5b12 7243
8f924df7
KH
7244 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7245}
d46c5b12 7246
d46c5b12 7247
df7492f9
KH
7248DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7249 Scheck_coding_systems_region, 3, 3, 0,
7250 doc: /* Check if the region is encodable by coding systems.
d46c5b12 7251
df7492f9
KH
7252START and END are buffer positions specifying the region.
7253CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 7254
df7492f9
KH
7255The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7256CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7257whole region, POS0, POS1, ... are buffer positions where non-encodable
7258characters are found.
93dec019 7259
df7492f9
KH
7260If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7261value is nil.
93dec019 7262
df7492f9
KH
7263START may be a string. In that case, check if the string is
7264encodable, and the value contains indices to the string instead of
7265buffer positions. END is ignored. */)
7266 (start, end, coding_system_list)
7267 Lisp_Object start, end, coding_system_list;
05e6f5dc 7268{
df7492f9
KH
7269 Lisp_Object list;
7270 EMACS_INT start_byte, end_byte;
7271 int pos;
7c78e542 7272 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7273 int c;
7274 Lisp_Object tail, elt;
70ad9fc4 7275
05e6f5dc
KH
7276 if (STRINGP (start))
7277 {
df7492f9 7278 if (!STRING_MULTIBYTE (start)
8f924df7 7279 && SCHARS (start) != SBYTES (start))
df7492f9
KH
7280 return Qnil;
7281 start_byte = 0;
8f924df7 7282 end_byte = SBYTES (start);
df7492f9 7283 pos = 0;
d46c5b12 7284 }
05e6f5dc 7285 else
b73bfc1c 7286 {
b7826503
PJ
7287 CHECK_NUMBER_COERCE_MARKER (start);
7288 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7289 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7290 args_out_of_range (start, end);
7291 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7292 return Qnil;
7293 start_byte = CHAR_TO_BYTE (XINT (start));
7294 end_byte = CHAR_TO_BYTE (XINT (end));
7295 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7296 return Qt;
df7492f9 7297
e1c23804 7298 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 7299 {
e1c23804
DL
7300 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7301 move_gap_both (XINT (start), start_byte);
df7492f9 7302 else
e1c23804 7303 move_gap_both (XINT (end), end_byte);
b73bfc1c 7304 }
e1c23804 7305 pos = XINT (start);
b73bfc1c 7306 }
7553d0e1 7307
df7492f9
KH
7308 list = Qnil;
7309 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 7310 {
df7492f9
KH
7311 elt = XCAR (tail);
7312 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7313 Qnil)),
7314 list);
12410ef1
KH
7315 }
7316
df7492f9 7317 if (STRINGP (start))
8f924df7 7318 p = pbeg = SDATA (start);
72d1a715 7319 else
df7492f9
KH
7320 p = pbeg = BYTE_POS_ADDR (start_byte);
7321 pend = p + (end_byte - start_byte);
4ed46869 7322
df7492f9
KH
7323 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7324 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 7325
df7492f9 7326 while (p < pend)
d46c5b12 7327 {
df7492f9
KH
7328 if (ASCII_BYTE_P (*p))
7329 p++;
e133c8fa 7330 else
05e6f5dc 7331 {
df7492f9
KH
7332 c = STRING_CHAR_ADVANCE (p);
7333
7334 charset_map_loaded = 0;
7335 for (tail = list; CONSP (tail); tail = XCDR (tail))
7336 {
7337 elt = XCDR (XCAR (tail));
7338 if (! char_encodable_p (c, XCAR (elt)))
7339 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7340 }
7341 if (charset_map_loaded)
7342 {
7343 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7344
7345 if (STRINGP (start))
8f924df7 7346 pbeg = SDATA (start);
df7492f9
KH
7347 else
7348 pbeg = BYTE_POS_ADDR (start_byte);
7349 p = pbeg + p_offset;
7350 pend = pbeg + pend_offset;
7351 }
05e6f5dc 7352 }
df7492f9 7353 pos++;
d46c5b12 7354 }
4ed46869 7355
df7492f9
KH
7356 tail = list;
7357 list = Qnil;
7358 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 7359 {
df7492f9
KH
7360 elt = XCAR (tail);
7361 if (CONSP (XCDR (XCDR (elt))))
7362 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7363 list);
ec6d2bb8 7364 }
2b4f9037 7365
df7492f9 7366 return list;
d46c5b12
KH
7367}
7368
3fd9494b 7369
b73bfc1c
KH
7370
7371Lisp_Object
df7492f9
KH
7372code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7373 Lisp_Object start, end, coding_system, dst_object;
7374 int encodep, norecord;
4ed46869 7375{
3a73fa5d 7376 struct coding_system coding;
df7492f9
KH
7377 EMACS_INT from, from_byte, to, to_byte;
7378 Lisp_Object src_object;
4ed46869 7379
b7826503
PJ
7380 CHECK_NUMBER_COERCE_MARKER (start);
7381 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
7382 if (NILP (coding_system))
7383 coding_system = Qno_conversion;
7384 else
7385 CHECK_CODING_SYSTEM (coding_system);
7386 src_object = Fcurrent_buffer ();
7387 if (NILP (dst_object))
7388 dst_object = src_object;
7389 else if (! EQ (dst_object, Qt))
7390 CHECK_BUFFER (dst_object);
3a73fa5d 7391
d46c5b12
KH
7392 validate_region (&start, &end);
7393 from = XFASTINT (start);
df7492f9 7394 from_byte = CHAR_TO_BYTE (from);
d46c5b12 7395 to = XFASTINT (end);
df7492f9 7396 to_byte = CHAR_TO_BYTE (to);
764ca8da 7397
df7492f9
KH
7398 setup_coding_system (coding_system, &coding);
7399 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 7400
df7492f9
KH
7401 if (encodep)
7402 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7403 dst_object);
7404 else
7405 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7406 dst_object);
7407 if (! norecord)
7408 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 7409
df7492f9
KH
7410 if (coding.result != CODING_RESULT_SUCCESS)
7411 error ("Code conversion error: %d", coding.result);
b73bfc1c 7412
df7492f9
KH
7413 return (BUFFERP (dst_object)
7414 ? make_number (coding.produced_char)
7415 : coding.dst_object);
4031e2bf 7416}
78108bcd 7417
4ed46869 7418
4031e2bf 7419DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 7420 3, 4, "r\nzCoding system: ",
48b0f3ae 7421 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
7422When called from a program, takes four arguments:
7423 START, END, CODING-SYSTEM, and DESTINATION.
7424START and END are buffer positions.
8844fa83 7425
df7492f9
KH
7426Optional 4th arguments DESTINATION specifies where the decoded text goes.
7427If nil, the region between START and END is replace by the decoded text.
7428If buffer, the decoded text is inserted in the buffer.
7429If t, the decoded text is returned.
8844fa83 7430
48b0f3ae
PJ
7431This function sets `last-coding-system-used' to the precise coding system
7432used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7433not fully specified.)
7434It returns the length of the decoded text. */)
df7492f9
KH
7435 (start, end, coding_system, destination)
7436 Lisp_Object start, end, coding_system, destination;
4031e2bf 7437{
df7492f9 7438 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 7439}
8844fa83 7440
3a73fa5d 7441DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
7442 3, 4, "r\nzCoding system: ",
7443 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
7444When called from a program, takes three arguments:
7445START, END, and CODING-SYSTEM. START and END are buffer positions.
d46c5b12 7446
df7492f9
KH
7447Optional 4th arguments DESTINATION specifies where the encoded text goes.
7448If nil, the region between START and END is replace by the encoded text.
7449If buffer, the encoded text is inserted in the buffer.
7450If t, the encoded text is returned.
2391eaa4 7451
48b0f3ae
PJ
7452This function sets `last-coding-system-used' to the precise coding system
7453used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7454not fully specified.)
7455It returns the length of the encoded text. */)
df7492f9
KH
7456 (start, end, coding_system, destination)
7457 Lisp_Object start, end, coding_system, destination;
3a73fa5d 7458{
df7492f9 7459 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
7460}
7461
7462Lisp_Object
df7492f9
KH
7463code_convert_string (string, coding_system, dst_object,
7464 encodep, nocopy, norecord)
7465 Lisp_Object string, coding_system, dst_object;
7466 int encodep, nocopy, norecord;
b73bfc1c 7467{
4031e2bf 7468 struct coding_system coding;
df7492f9 7469 EMACS_INT chars, bytes;
ec6d2bb8 7470
b7826503 7471 CHECK_STRING (string);
d46c5b12 7472 if (NILP (coding_system))
4956c225 7473 {
df7492f9
KH
7474 if (! norecord)
7475 Vlast_coding_system_used = Qno_conversion;
7476 if (NILP (dst_object))
7477 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 7478 }
b73bfc1c 7479
df7492f9
KH
7480 if (NILP (coding_system))
7481 coding_system = Qno_conversion;
7482 else
7483 CHECK_CODING_SYSTEM (coding_system);
7484 if (NILP (dst_object))
7485 dst_object = Qt;
7486 else if (! EQ (dst_object, Qt))
7487 CHECK_BUFFER (dst_object);
73be902c 7488
df7492f9 7489 setup_coding_system (coding_system, &coding);
d46c5b12 7490 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
7491 chars = SCHARS (string);
7492 bytes = SBYTES (string);
df7492f9
KH
7493 if (encodep)
7494 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7495 else
7496 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7497 if (! norecord)
7498 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 7499
df7492f9
KH
7500 if (coding.result != CODING_RESULT_SUCCESS)
7501 error ("Code conversion error: %d", coding.result);
2391eaa4 7502
df7492f9
KH
7503 return (BUFFERP (dst_object)
7504 ? make_number (coding.produced_char)
7505 : coding.dst_object);
4ed46869 7506}
73be902c 7507
b73bfc1c 7508
ecec61c1 7509/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 7510 Do not set Vlast_coding_system_used.
4ed46869 7511
ec6d2bb8
KH
7512 This function is called only from macros DECODE_FILE and
7513 ENCODE_FILE, thus we ignore character composition. */
4ed46869 7514
ecec61c1
KH
7515Lisp_Object
7516code_convert_string_norecord (string, coding_system, encodep)
7517 Lisp_Object string, coding_system;
7518 int encodep;
4ed46869 7519{
0be8721c 7520 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
7521}
7522
4ed46869 7523
df7492f9
KH
7524DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7525 2, 4, 0,
7526 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7527
7528Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7529if the decoding operation is trivial.
ecec61c1 7530
df7492f9 7531Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 7532inserted in BUFFER instead of returned as a string. In this case,
df7492f9 7533the return value is BUFFER.
ecec61c1 7534
df7492f9
KH
7535This function sets `last-coding-system-used' to the precise coding system
7536used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7537not fully specified. */)
7538 (string, coding_system, nocopy, buffer)
7539 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 7540{
df7492f9
KH
7541 return code_convert_string (string, coding_system, buffer,
7542 0, ! NILP (nocopy), 0);
4ed46869
KH
7543}
7544
df7492f9
KH
7545DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7546 2, 4, 0,
7547 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7548
7549Optional third arg NOCOPY non-nil means it is OK to return STRING
7550itself if the encoding operation is trivial.
7551
7552Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 7553inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
7554the return value is BUFFER.
7555
7556This function sets `last-coding-system-used' to the precise coding system
7557used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7558not fully specified.) */)
7559 (string, coding_system, nocopy, buffer)
7560 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 7561{
df7492f9 7562 return code_convert_string (string, coding_system, buffer,
c197f191 7563 1, ! NILP (nocopy), 1);
4ed46869 7564}
df7492f9 7565
3a73fa5d 7566\f
4ed46869 7567DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7568 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7569Return the corresponding character. */)
7570 (code)
4ed46869 7571 Lisp_Object code;
4ed46869 7572{
df7492f9
KH
7573 Lisp_Object spec, attrs, val;
7574 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7575 int c;
4ed46869 7576
df7492f9
KH
7577 CHECK_NATNUM (code);
7578 c = XFASTINT (code);
7579 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7580 attrs = AREF (spec, 0);
4ed46869 7581
df7492f9
KH
7582 if (ASCII_BYTE_P (c)
7583 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7584 return code;
4ed46869 7585
df7492f9
KH
7586 val = CODING_ATTR_CHARSET_LIST (attrs);
7587 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
7588 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7589 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 7590
df7492f9
KH
7591 if (c <= 0x7F)
7592 charset = charset_roman;
7593 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 7594 {
df7492f9
KH
7595 charset = charset_kana;
7596 c -= 0x80;
4ed46869 7597 }
55ab7be3 7598 else
4ed46869 7599 {
004068e4 7600 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
7601
7602 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7603 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7604 error ("Invalid code: %d", code);
7605 SJIS_TO_JIS (c);
7606 charset = charset_kanji;
4ed46869 7607 }
df7492f9
KH
7608 c = DECODE_CHAR (charset, c);
7609 if (c < 0)
7610 error ("Invalid code: %d", code);
7611 return make_number (c);
93dec019 7612}
4ed46869 7613
48b0f3ae 7614
4ed46869 7615DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7616 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7617Return the corresponding code in SJIS. */)
7618 (ch)
df7492f9 7619 Lisp_Object ch;
4ed46869 7620{
df7492f9
KH
7621 Lisp_Object spec, attrs, charset_list;
7622 int c;
7623 struct charset *charset;
7624 unsigned code;
48b0f3ae 7625
df7492f9
KH
7626 CHECK_CHARACTER (ch);
7627 c = XFASTINT (ch);
7628 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7629 attrs = AREF (spec, 0);
7630
7631 if (ASCII_CHAR_P (c)
7632 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7633 return ch;
7634
7635 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7636 charset = char_charset (c, charset_list, &code);
7637 if (code == CHARSET_INVALID_CODE (charset))
7638 error ("Can't encode by shift_jis encoding: %d", c);
7639 JIS_TO_SJIS (code);
7640
7641 return make_number (code);
4ed46869
KH
7642}
7643
7644DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7645 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7646Return the corresponding character. */)
7647 (code)
4ed46869 7648 Lisp_Object code;
d46c5b12 7649{
df7492f9
KH
7650 Lisp_Object spec, attrs, val;
7651 struct charset *charset_roman, *charset_big5, *charset;
7652 int c;
6289dd10 7653
df7492f9
KH
7654 CHECK_NATNUM (code);
7655 c = XFASTINT (code);
7656 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7657 attrs = AREF (spec, 0);
4ed46869 7658
df7492f9
KH
7659 if (ASCII_BYTE_P (c)
7660 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7661 return code;
6289dd10 7662
df7492f9
KH
7663 val = CODING_ATTR_CHARSET_LIST (attrs);
7664 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7665 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 7666
df7492f9
KH
7667 if (c <= 0x7F)
7668 charset = charset_roman;
c28a9453
KH
7669 else
7670 {
df7492f9
KH
7671 int b1 = c >> 8, b2 = c & 0x7F;
7672 if (b1 < 0xA1 || b1 > 0xFE
7673 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7674 error ("Invalid code: %d", code);
7675 charset = charset_big5;
c28a9453 7676 }
df7492f9
KH
7677 c = DECODE_CHAR (charset, (unsigned )c);
7678 if (c < 0)
7679 error ("Invalid code: %d", code);
7680 return make_number (c);
d46c5b12 7681}
6289dd10 7682
4ed46869 7683DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7684 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7685Return the corresponding character code in Big5. */)
7686 (ch)
4ed46869
KH
7687 Lisp_Object ch;
7688{
df7492f9
KH
7689 Lisp_Object spec, attrs, charset_list;
7690 struct charset *charset;
7691 int c;
7692 unsigned code;
7693
7694 CHECK_CHARACTER (ch);
7695 c = XFASTINT (ch);
7696 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7697 attrs = AREF (spec, 0);
7698 if (ASCII_CHAR_P (c)
7699 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7700 return ch;
7701
7702 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7703 charset = char_charset (c, charset_list, &code);
7704 if (code == CHARSET_INVALID_CODE (charset))
7705 error ("Can't encode by Big5 encoding: %d", c);
7706
7707 return make_number (code);
4ed46869 7708}
48b0f3ae 7709
3a73fa5d 7710\f
1ba9e4ab
KH
7711DEFUN ("set-terminal-coding-system-internal",
7712 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7713 Sset_terminal_coding_system_internal, 1, 1, 0,
7714 doc: /* Internal use only. */)
7715 (coding_system)
b74e4686 7716 Lisp_Object coding_system;
4ed46869 7717{
b7826503 7718 CHECK_SYMBOL (coding_system);
df7492f9
KH
7719 setup_coding_system (Fcheck_coding_system (coding_system),
7720 &terminal_coding);
48b0f3ae 7721
70c22245 7722 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7723 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7724 /* Characer composition should be disabled. */
7725 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7726 terminal_coding.src_multibyte = 1;
7727 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7728 return Qnil;
7729}
7730
c4825358
KH
7731DEFUN ("set-safe-terminal-coding-system-internal",
7732 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7733 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7734 doc: /* Internal use only. */)
48b0f3ae 7735 (coding_system)
b74e4686 7736 Lisp_Object coding_system;
d46c5b12 7737{
b7826503 7738 CHECK_SYMBOL (coding_system);
c4825358
KH
7739 setup_coding_system (Fcheck_coding_system (coding_system),
7740 &safe_terminal_coding);
df7492f9
KH
7741 /* Characer composition should be disabled. */
7742 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7743 safe_terminal_coding.src_multibyte = 1;
7744 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7745 return Qnil;
7746}
4ed46869 7747
4ed46869
KH
7748DEFUN ("terminal-coding-system",
7749 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7750 doc: /* Return coding system specified for terminal output. */)
7751 ()
4ed46869 7752{
df7492f9 7753 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7754}
7755
1ba9e4ab
KH
7756DEFUN ("set-keyboard-coding-system-internal",
7757 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7758 Sset_keyboard_coding_system_internal, 1, 1, 0,
7759 doc: /* Internal use only. */)
7760 (coding_system)
4ed46869
KH
7761 Lisp_Object coding_system;
7762{
b7826503 7763 CHECK_SYMBOL (coding_system);
df7492f9
KH
7764 setup_coding_system (Fcheck_coding_system (coding_system),
7765 &keyboard_coding);
7766 /* Characer composition should be disabled. */
7767 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7768 return Qnil;
7769}
7770
7771DEFUN ("keyboard-coding-system",
7772 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7773 doc: /* Return coding system specified for decoding keyboard input. */)
7774 ()
4ed46869 7775{
df7492f9 7776 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7777}
7778
4ed46869 7779\f
a5d301df
KH
7780DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7781 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7782 doc: /* Choose a coding system for an operation based on the target name.
7783The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7784DECODING-SYSTEM is the coding system to use for decoding
7785\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7786for encoding (in case OPERATION does encoding).
05e6f5dc 7787
48b0f3ae
PJ
7788The first argument OPERATION specifies an I/O primitive:
7789 For file I/O, `insert-file-contents' or `write-region'.
7790 For process I/O, `call-process', `call-process-region', or `start-process'.
7791 For network I/O, `open-network-stream'.
05e6f5dc 7792
48b0f3ae
PJ
7793The remaining arguments should be the same arguments that were passed
7794to the primitive. Depending on which primitive, one of those arguments
7795is selected as the TARGET. For example, if OPERATION does file I/O,
7796whichever argument specifies the file name is TARGET.
05e6f5dc 7797
48b0f3ae
PJ
7798TARGET has a meaning which depends on OPERATION:
7799 For file I/O, TARGET is a file name.
7800 For process I/O, TARGET is a process name.
7801 For network I/O, TARGET is a service name or a port number
05e6f5dc 7802
48b0f3ae
PJ
7803This function looks up what specified for TARGET in,
7804`file-coding-system-alist', `process-coding-system-alist',
7805or `network-coding-system-alist' depending on OPERATION.
7806They may specify a coding system, a cons of coding systems,
7807or a function symbol to call.
7808In the last case, we call the function with one argument,
7809which is a list of all the arguments given to this function.
7810
7811usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7812 (nargs, args)
4ed46869
KH
7813 int nargs;
7814 Lisp_Object *args;
6b89e3aa 7815{
4ed46869
KH
7816 Lisp_Object operation, target_idx, target, val;
7817 register Lisp_Object chain;
177c0ea7 7818
4ed46869
KH
7819 if (nargs < 2)
7820 error ("Too few arguments");
7821 operation = args[0];
7822 if (!SYMBOLP (operation)
7823 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7824 error ("Invalid first arguement");
4ed46869
KH
7825 if (nargs < 1 + XINT (target_idx))
7826 error ("Too few arguments for operation: %s",
8f924df7 7827 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
7828 target = args[XINT (target_idx) + 1];
7829 if (!(STRINGP (target)
7830 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7831 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7832
2e34157c
RS
7833 chain = ((EQ (operation, Qinsert_file_contents)
7834 || EQ (operation, Qwrite_region))
02ba4723 7835 ? Vfile_coding_system_alist
2e34157c 7836 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7837 ? Vnetwork_coding_system_alist
7838 : Vprocess_coding_system_alist));
4ed46869
KH
7839 if (NILP (chain))
7840 return Qnil;
7841
03699b14 7842 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 7843 {
f44d27ce 7844 Lisp_Object elt;
6b89e3aa 7845
df7492f9 7846 elt = XCAR (chain);
4ed46869
KH
7847 if (CONSP (elt)
7848 && ((STRINGP (target)
03699b14
KR
7849 && STRINGP (XCAR (elt))
7850 && fast_string_match (XCAR (elt), target) >= 0)
7851 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 7852 {
03699b14 7853 val = XCDR (elt);
b19fd4c5
KH
7854 /* Here, if VAL is both a valid coding system and a valid
7855 function symbol, we return VAL as a coding system. */
02ba4723
KH
7856 if (CONSP (val))
7857 return val;
7858 if (! SYMBOLP (val))
7859 return Qnil;
7860 if (! NILP (Fcoding_system_p (val)))
7861 return Fcons (val, val);
b19fd4c5 7862 if (! NILP (Ffboundp (val)))
6b89e3aa 7863 {
b19fd4c5
KH
7864 val = call1 (val, Flist (nargs, args));
7865 if (CONSP (val))
7866 return val;
7867 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7868 return Fcons (val, val);
6b89e3aa 7869 }
02ba4723 7870 return Qnil;
6b89e3aa
KH
7871 }
7872 }
4ed46869 7873 return Qnil;
6b89e3aa
KH
7874}
7875
df7492f9 7876DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 7877 Sset_coding_system_priority, 0, MANY, 0,
da7db224 7878 doc: /* Assign higher priority to the coding systems given as arguments.
ff563fce 7879If multiple coding systems belongs to the same category,
a3181084
DL
7880all but the first one are ignored.
7881
7882usage: (set-coding-system-priority ...) */)
df7492f9
KH
7883 (nargs, args)
7884 int nargs;
7885 Lisp_Object *args;
7886{
7887 int i, j;
7888 int changed[coding_category_max];
7889 enum coding_category priorities[coding_category_max];
7890
7891 bzero (changed, sizeof changed);
6b89e3aa 7892
df7492f9 7893 for (i = j = 0; i < nargs; i++)
6b89e3aa 7894 {
df7492f9
KH
7895 enum coding_category category;
7896 Lisp_Object spec, attrs;
6b89e3aa 7897
df7492f9
KH
7898 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7899 attrs = AREF (spec, 0);
7900 category = XINT (CODING_ATTR_CATEGORY (attrs));
7901 if (changed[category])
7902 /* Ignore this coding system because a coding system of the
7903 same category already had a higher priority. */
7904 continue;
7905 changed[category] = 1;
7906 priorities[j++] = category;
7907 if (coding_categories[category].id >= 0
7908 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7909 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 7910 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 7911 }
6b89e3aa 7912
df7492f9
KH
7913 /* Now we have decided top J priorities. Reflect the order of the
7914 original priorities to the remaining priorities. */
6b89e3aa 7915
df7492f9 7916 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 7917 {
df7492f9
KH
7918 while (j < coding_category_max
7919 && changed[coding_priorities[j]])
7920 j++;
7921 if (j == coding_category_max)
7922 abort ();
7923 priorities[i] = coding_priorities[j];
7924 }
6b89e3aa 7925
df7492f9 7926 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 7927
ff563fce
KH
7928 /* Update `coding-category-list'. */
7929 Vcoding_category_list = Qnil;
7930 for (i = coding_category_max - 1; i >= 0; i--)
7931 Vcoding_category_list
7932 = Fcons (AREF (Vcoding_category_table, priorities[i]),
7933 Vcoding_category_list);
6b89e3aa 7934
df7492f9 7935 return Qnil;
6b89e3aa
KH
7936}
7937
df7492f9
KH
7938DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7939 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7940 doc: /* Return a list of coding systems ordered by their priorities.
7941HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7942 (highestp)
7943 Lisp_Object highestp;
d46c5b12
KH
7944{
7945 int i;
df7492f9 7946 Lisp_Object val;
6b89e3aa 7947
df7492f9 7948 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7949 {
df7492f9
KH
7950 enum coding_category category = coding_priorities[i];
7951 int id = coding_categories[category].id;
7952 Lisp_Object attrs;
068a9dbd 7953
df7492f9
KH
7954 if (id < 0)
7955 continue;
7956 attrs = CODING_ID_ATTRS (id);
7957 if (! NILP (highestp))
7958 return CODING_ATTR_BASE_NAME (attrs);
7959 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7960 }
7961 return Fnreverse (val);
7962}
068a9dbd 7963
f0064e1f 7964static char *suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
7965
7966static Lisp_Object
df7492f9
KH
7967make_subsidiaries (base)
7968 Lisp_Object base;
068a9dbd 7969{
df7492f9 7970 Lisp_Object subsidiaries;
8f924df7 7971 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
7972 char *buf = (char *) alloca (base_name_len + 6);
7973 int i;
068a9dbd 7974
8f924df7 7975 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
7976 subsidiaries = Fmake_vector (make_number (3), Qnil);
7977 for (i = 0; i < 3; i++)
068a9dbd 7978 {
df7492f9
KH
7979 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7980 ASET (subsidiaries, i, intern (buf));
068a9dbd 7981 }
df7492f9 7982 return subsidiaries;
068a9dbd
KH
7983}
7984
7985
df7492f9
KH
7986DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7987 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7988 doc: /* For internal use only.
7989usage: (define-coding-system-internal ...) */)
df7492f9
KH
7990 (nargs, args)
7991 int nargs;
7992 Lisp_Object *args;
068a9dbd 7993{
df7492f9
KH
7994 Lisp_Object name;
7995 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7996 Lisp_Object attrs; /* Vector of attributes. */
7997 Lisp_Object eol_type;
7998 Lisp_Object aliases;
7999 Lisp_Object coding_type, charset_list, safe_charsets;
8000 enum coding_category category;
8001 Lisp_Object tail, val;
8002 int max_charset_id = 0;
8003 int i;
068a9dbd 8004
df7492f9
KH
8005 if (nargs < coding_arg_max)
8006 goto short_args;
068a9dbd 8007
df7492f9 8008 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 8009
df7492f9
KH
8010 name = args[coding_arg_name];
8011 CHECK_SYMBOL (name);
8012 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 8013
df7492f9
KH
8014 val = args[coding_arg_mnemonic];
8015 if (! STRINGP (val))
8016 CHECK_CHARACTER (val);
8017 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 8018
df7492f9
KH
8019 coding_type = args[coding_arg_coding_type];
8020 CHECK_SYMBOL (coding_type);
8021 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 8022
df7492f9
KH
8023 charset_list = args[coding_arg_charset_list];
8024 if (SYMBOLP (charset_list))
8025 {
8026 if (EQ (charset_list, Qiso_2022))
8027 {
8028 if (! EQ (coding_type, Qiso_2022))
8029 error ("Invalid charset-list");
8030 charset_list = Viso_2022_charset_list;
8031 }
8032 else if (EQ (charset_list, Qemacs_mule))
8033 {
8034 if (! EQ (coding_type, Qemacs_mule))
8035 error ("Invalid charset-list");
8036 charset_list = Vemacs_mule_charset_list;
8037 }
8038 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8039 if (max_charset_id < XFASTINT (XCAR (tail)))
8040 max_charset_id = XFASTINT (XCAR (tail));
8041 }
068a9dbd
KH
8042 else
8043 {
df7492f9
KH
8044 charset_list = Fcopy_sequence (charset_list);
8045 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
068a9dbd 8046 {
df7492f9
KH
8047 struct charset *charset;
8048
8049 val = Fcar (tail);
8050 CHECK_CHARSET_GET_CHARSET (val, charset);
8051 if (EQ (coding_type, Qiso_2022)
8052 ? CHARSET_ISO_FINAL (charset) < 0
8053 : EQ (coding_type, Qemacs_mule)
8054 ? CHARSET_EMACS_MULE_ID (charset) < 0
8055 : 0)
8056 error ("Can't handle charset `%s'",
8f924df7 8057 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 8058
8f924df7 8059 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
8060 if (max_charset_id < charset->id)
8061 max_charset_id = charset->id;
068a9dbd
KH
8062 }
8063 }
df7492f9 8064 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 8065
df7492f9
KH
8066 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8067 make_number (255));
8068 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 8069 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 8070 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 8071
584948ac 8072 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 8073
df7492f9
KH
8074 val = args[coding_arg_decode_translation_table];
8075 if (! NILP (val))
8076 CHECK_CHAR_TABLE (val);
8077 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 8078
df7492f9
KH
8079 val = args[coding_arg_encode_translation_table];
8080 if (! NILP (val))
8081 CHECK_CHAR_TABLE (val);
8082 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 8083
df7492f9
KH
8084 val = args[coding_arg_post_read_conversion];
8085 CHECK_SYMBOL (val);
8086 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 8087
df7492f9
KH
8088 val = args[coding_arg_pre_write_conversion];
8089 CHECK_SYMBOL (val);
8090 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 8091
df7492f9
KH
8092 val = args[coding_arg_default_char];
8093 if (NILP (val))
8094 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8095 else
8096 {
8f924df7 8097 CHECK_CHARACTER (val);
df7492f9
KH
8098 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8099 }
4031e2bf 8100
8f924df7
KH
8101 val = args[coding_arg_for_unibyte];
8102 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 8103
df7492f9
KH
8104 val = args[coding_arg_plist];
8105 CHECK_LIST (val);
8106 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 8107
df7492f9
KH
8108 if (EQ (coding_type, Qcharset))
8109 {
4187a77d 8110 Lisp_Object list;
c7c66a95
KH
8111 /* Generate a lisp vector of 256 elements. Each element is nil,
8112 integer, or a list of charset IDs.
3a73fa5d 8113
c7c66a95
KH
8114 If Nth element is nil, the byte code N is invalid in this
8115 coding system.
4ed46869 8116
c7c66a95
KH
8117 If Nth element is a number NUM, N is the first byte of a
8118 charset whose ID is NUM.
4ed46869 8119
c7c66a95
KH
8120 If Nth element is a list of charset IDs, N is the first byte
8121 of one of them. The list is sorted by dimensions of the
8122 charsets. A charset of smaller dimension comes firtst.
8123 */
4187a77d
KH
8124 for (list = Qnil, tail = charset_list; CONSP (tail); tail = XCDR (tail))
8125 {
8126 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
5f1cd180 8127
4187a77d
KH
8128 if (charset->method == CHARSET_METHOD_SUPERSET)
8129 {
8130 val = CHARSET_SUPERSET (charset);
8131 for (; CONSP (val); val = XCDR (val))
8f924df7 8132 list = Fcons (XCAR (XCAR (val)), list);
4187a77d
KH
8133 }
8134 else
8135 list = Fcons (XCAR (tail), list);
8136 }
ec6d2bb8 8137
df7492f9 8138 val = Fmake_vector (make_number (256), Qnil);
4ed46869 8139
4187a77d 8140 for (tail = Fnreverse (list); CONSP (tail); tail = XCDR (tail))
df7492f9 8141 {
c7c66a95
KH
8142 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8143 int dim = CHARSET_DIMENSION (charset);
8144 int idx = (dim - 1) * 4;
4ed46869 8145
584948ac
KH
8146 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8147 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 8148
15d143f7
KH
8149 for (i = charset->code_space[idx];
8150 i <= charset->code_space[idx + 1]; i++)
8151 {
c7c66a95
KH
8152 Lisp_Object tmp, tmp2;
8153 int dim2;
ec6d2bb8 8154
c7c66a95
KH
8155 tmp = AREF (val, i);
8156 if (NILP (tmp))
8157 tmp = XCAR (tail);
8158 else if (NUMBERP (tmp))
8159 {
8160 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8161 if (dim < dim2)
c7c66a95 8162 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
8163 else
8164 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 8165 }
15d143f7 8166 else
c7c66a95
KH
8167 {
8168 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8169 {
8170 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8171 if (dim < dim2)
8172 break;
8173 }
8174 if (NILP (tmp2))
8175 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8176 else
8177 {
8178 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8179 XSETCAR (tmp2, XCAR (tail));
8180 }
8181 }
8182 ASET (val, i, tmp);
15d143f7 8183 }
df7492f9
KH
8184 }
8185 ASET (attrs, coding_attr_charset_valids, val);
8186 category = coding_category_charset;
8187 }
8188 else if (EQ (coding_type, Qccl))
8189 {
8190 Lisp_Object valids;
ecec61c1 8191
df7492f9
KH
8192 if (nargs < coding_arg_ccl_max)
8193 goto short_args;
ecec61c1 8194
df7492f9
KH
8195 val = args[coding_arg_ccl_decoder];
8196 CHECK_CCL_PROGRAM (val);
8197 if (VECTORP (val))
8198 val = Fcopy_sequence (val);
8199 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 8200
df7492f9
KH
8201 val = args[coding_arg_ccl_encoder];
8202 CHECK_CCL_PROGRAM (val);
8203 if (VECTORP (val))
8204 val = Fcopy_sequence (val);
8205 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 8206
df7492f9
KH
8207 val = args[coding_arg_ccl_valids];
8208 valids = Fmake_string (make_number (256), make_number (0));
8209 for (tail = val; !NILP (tail); tail = Fcdr (tail))
8210 {
8dcbea82 8211 int from, to;
ecec61c1 8212
df7492f9
KH
8213 val = Fcar (tail);
8214 if (INTEGERP (val))
8dcbea82
KH
8215 {
8216 from = to = XINT (val);
8217 if (from < 0 || from > 255)
8218 args_out_of_range_3 (val, make_number (0), make_number (255));
8219 }
df7492f9
KH
8220 else
8221 {
df7492f9 8222 CHECK_CONS (val);
8f924df7
KH
8223 CHECK_NATNUM_CAR (val);
8224 CHECK_NATNUM_CDR (val);
df7492f9 8225 from = XINT (XCAR (val));
8f924df7 8226 if (from > 255)
8dcbea82
KH
8227 args_out_of_range_3 (XCAR (val),
8228 make_number (0), make_number (255));
df7492f9 8229 to = XINT (XCDR (val));
8dcbea82
KH
8230 if (to < from || to > 255)
8231 args_out_of_range_3 (XCDR (val),
8232 XCAR (val), make_number (255));
df7492f9 8233 }
8dcbea82 8234 for (i = from; i <= to; i++)
8f924df7 8235 SSET (valids, i, 1);
df7492f9
KH
8236 }
8237 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 8238
df7492f9 8239 category = coding_category_ccl;
55ab7be3 8240 }
df7492f9 8241 else if (EQ (coding_type, Qutf_16))
55ab7be3 8242 {
df7492f9 8243 Lisp_Object bom, endian;
4ed46869 8244
584948ac 8245 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 8246
df7492f9
KH
8247 if (nargs < coding_arg_utf16_max)
8248 goto short_args;
4ed46869 8249
df7492f9
KH
8250 bom = args[coding_arg_utf16_bom];
8251 if (! NILP (bom) && ! EQ (bom, Qt))
8252 {
8253 CHECK_CONS (bom);
8f924df7
KH
8254 val = XCAR (bom);
8255 CHECK_CODING_SYSTEM (val);
8256 val = XCDR (bom);
8257 CHECK_CODING_SYSTEM (val);
df7492f9
KH
8258 }
8259 ASET (attrs, coding_attr_utf_16_bom, bom);
8260
8261 endian = args[coding_arg_utf16_endian];
b49a1807
KH
8262 CHECK_SYMBOL (endian);
8263 if (NILP (endian))
8264 endian = Qbig;
8265 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 8266 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
8267 ASET (attrs, coding_attr_utf_16_endian, endian);
8268
8269 category = (CONSP (bom)
8270 ? coding_category_utf_16_auto
8271 : NILP (bom)
b49a1807 8272 ? (EQ (endian, Qbig)
df7492f9
KH
8273 ? coding_category_utf_16_be_nosig
8274 : coding_category_utf_16_le_nosig)
b49a1807 8275 : (EQ (endian, Qbig)
df7492f9
KH
8276 ? coding_category_utf_16_be
8277 : coding_category_utf_16_le));
8278 }
8279 else if (EQ (coding_type, Qiso_2022))
8280 {
8281 Lisp_Object initial, reg_usage, request, flags;
4776e638 8282 int i;
1397dc18 8283
df7492f9
KH
8284 if (nargs < coding_arg_iso2022_max)
8285 goto short_args;
8286
8287 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8288 CHECK_VECTOR (initial);
8289 for (i = 0; i < 4; i++)
8290 {
8291 val = Faref (initial, make_number (i));
8292 if (! NILP (val))
8293 {
584948ac
KH
8294 struct charset *charset;
8295
8296 CHECK_CHARSET_GET_CHARSET (val, charset);
8297 ASET (initial, i, make_number (CHARSET_ID (charset)));
8298 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8299 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8300 }
8301 else
8302 ASET (initial, i, make_number (-1));
8303 }
8304
8305 reg_usage = args[coding_arg_iso2022_reg_usage];
8306 CHECK_CONS (reg_usage);
8f924df7
KH
8307 CHECK_NUMBER_CAR (reg_usage);
8308 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
8309
8310 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8311 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 8312 {
df7492f9 8313 int id;
8f924df7 8314 Lisp_Object tmp;
df7492f9
KH
8315
8316 val = Fcar (tail);
8317 CHECK_CONS (val);
8f924df7
KH
8318 tmp = XCAR (val);
8319 CHECK_CHARSET_GET_ID (tmp, id);
8320 CHECK_NATNUM_CDR (val);
df7492f9
KH
8321 if (XINT (XCDR (val)) >= 4)
8322 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 8323 XSETCAR (val, make_number (id));
1397dc18 8324 }
4ed46869 8325
df7492f9
KH
8326 flags = args[coding_arg_iso2022_flags];
8327 CHECK_NATNUM (flags);
8328 i = XINT (flags);
8329 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8330 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8331
8332 ASET (attrs, coding_attr_iso_initial, initial);
8333 ASET (attrs, coding_attr_iso_usage, reg_usage);
8334 ASET (attrs, coding_attr_iso_request, request);
8335 ASET (attrs, coding_attr_iso_flags, flags);
8336 setup_iso_safe_charsets (attrs);
8337
8338 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8339 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8340 | CODING_ISO_FLAG_SINGLE_SHIFT))
8341 ? coding_category_iso_7_else
8342 : EQ (args[coding_arg_charset_list], Qiso_2022)
8343 ? coding_category_iso_7
8344 : coding_category_iso_7_tight);
8345 else
8346 {
8347 int id = XINT (AREF (initial, 1));
8348
c6fb6e98 8349 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
8350 || EQ (args[coding_arg_charset_list], Qiso_2022)
8351 || id < 0)
8352 ? coding_category_iso_8_else
8353 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8354 ? coding_category_iso_8_1
8355 : coding_category_iso_8_2);
8356 }
0ce7886f
KH
8357 if (category != coding_category_iso_8_1
8358 && category != coding_category_iso_8_2)
8359 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
8360 }
8361 else if (EQ (coding_type, Qemacs_mule))
c28a9453 8362 {
df7492f9
KH
8363 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8364 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 8365 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 8366 category = coding_category_emacs_mule;
c28a9453 8367 }
df7492f9 8368 else if (EQ (coding_type, Qshift_jis))
c28a9453 8369 {
df7492f9
KH
8370
8371 struct charset *charset;
8372
8373 if (XINT (Flength (charset_list)) != 3)
8374 error ("There should be just three charsets");
8375
8376 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8377 if (CHARSET_DIMENSION (charset) != 1)
8378 error ("Dimension of charset %s is not one",
8f924df7 8379 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8380 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8381 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8382
8383 charset_list = XCDR (charset_list);
8384 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8385 if (CHARSET_DIMENSION (charset) != 1)
8386 error ("Dimension of charset %s is not one",
8f924df7 8387 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
8388
8389 charset_list = XCDR (charset_list);
8390 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8391 if (CHARSET_DIMENSION (charset) != 2)
8392 error ("Dimension of charset %s is not two",
8f924df7 8393 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
8394
8395 category = coding_category_sjis;
8396 Vsjis_coding_system = name;
c28a9453 8397 }
df7492f9
KH
8398 else if (EQ (coding_type, Qbig5))
8399 {
8400 struct charset *charset;
4ed46869 8401
df7492f9
KH
8402 if (XINT (Flength (charset_list)) != 2)
8403 error ("There should be just two charsets");
8404
8405 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8406 if (CHARSET_DIMENSION (charset) != 1)
8407 error ("Dimension of charset %s is not one",
8f924df7 8408 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8409 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8410 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8411
8412 charset_list = XCDR (charset_list);
8413 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8414 if (CHARSET_DIMENSION (charset) != 2)
8415 error ("Dimension of charset %s is not two",
8f924df7 8416 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 8417
df7492f9
KH
8418 category = coding_category_big5;
8419 Vbig5_coding_system = name;
8420 }
8421 else if (EQ (coding_type, Qraw_text))
c28a9453 8422 {
584948ac
KH
8423 category = coding_category_raw_text;
8424 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 8425 }
df7492f9 8426 else if (EQ (coding_type, Qutf_8))
4ed46869 8427 {
584948ac
KH
8428 category = coding_category_utf_8;
8429 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4ed46869 8430 }
df7492f9
KH
8431 else if (EQ (coding_type, Qundecided))
8432 category = coding_category_undecided;
4ed46869 8433 else
df7492f9 8434 error ("Invalid coding system type: %s",
8f924df7 8435 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 8436
df7492f9 8437 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
8438 CODING_ATTR_PLIST (attrs)
8439 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8440 CODING_ATTR_PLIST (attrs)));
c4825358 8441
df7492f9
KH
8442 eol_type = args[coding_arg_eol_type];
8443 if (! NILP (eol_type)
8444 && ! EQ (eol_type, Qunix)
8445 && ! EQ (eol_type, Qdos)
8446 && ! EQ (eol_type, Qmac))
8447 error ("Invalid eol-type");
4ed46869 8448
df7492f9 8449 aliases = Fcons (name, Qnil);
4ed46869 8450
df7492f9
KH
8451 if (NILP (eol_type))
8452 {
8453 eol_type = make_subsidiaries (name);
8454 for (i = 0; i < 3; i++)
1397dc18 8455 {
df7492f9
KH
8456 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8457
8458 this_name = AREF (eol_type, i);
8459 this_aliases = Fcons (this_name, Qnil);
8460 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8461 this_spec = Fmake_vector (make_number (3), attrs);
8462 ASET (this_spec, 1, this_aliases);
8463 ASET (this_spec, 2, this_eol_type);
8464 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8465 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8466 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8467 Vcoding_system_alist);
1397dc18 8468 }
d46c5b12 8469 }
4ed46869 8470
df7492f9
KH
8471 spec_vec = Fmake_vector (make_number (3), attrs);
8472 ASET (spec_vec, 1, aliases);
8473 ASET (spec_vec, 2, eol_type);
48b0f3ae 8474
df7492f9
KH
8475 Fputhash (name, spec_vec, Vcoding_system_hash_table);
8476 Vcoding_system_list = Fcons (name, Vcoding_system_list);
8477 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8478 Vcoding_system_alist);
48b0f3ae 8479
df7492f9
KH
8480 {
8481 int id = coding_categories[category].id;
48b0f3ae 8482
df7492f9
KH
8483 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8484 setup_coding_system (name, &coding_categories[category]);
8485 }
48b0f3ae 8486
d46c5b12 8487 return Qnil;
48b0f3ae 8488
df7492f9
KH
8489 short_args:
8490 return Fsignal (Qwrong_number_of_arguments,
8491 Fcons (intern ("define-coding-system-internal"),
8492 make_number (nargs)));
d46c5b12 8493}
4ed46869 8494
d6925f38 8495
df7492f9
KH
8496DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8497 Sdefine_coding_system_alias, 2, 2, 0,
8498 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8499 (alias, coding_system)
8500 Lisp_Object alias, coding_system;
66cfb530 8501{
df7492f9 8502 Lisp_Object spec, aliases, eol_type;
4ed46869 8503
df7492f9
KH
8504 CHECK_SYMBOL (alias);
8505 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8506 aliases = AREF (spec, 1);
d6925f38
KH
8507 /* ALISES should be a list of length more than zero, and the first
8508 element is a base coding system. Append ALIAS at the tail of the
8509 list. */
df7492f9
KH
8510 while (!NILP (XCDR (aliases)))
8511 aliases = XCDR (aliases);
8f924df7 8512 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 8513
df7492f9
KH
8514 eol_type = AREF (spec, 2);
8515 if (VECTORP (eol_type))
4ed46869 8516 {
df7492f9
KH
8517 Lisp_Object subsidiaries;
8518 int i;
4ed46869 8519
df7492f9
KH
8520 subsidiaries = make_subsidiaries (alias);
8521 for (i = 0; i < 3; i++)
8522 Fdefine_coding_system_alias (AREF (subsidiaries, i),
8523 AREF (eol_type, i));
4ed46869 8524 }
df7492f9
KH
8525
8526 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 8527 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
5bad0796
DL
8528 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8529 Vcoding_system_alist);
66cfb530 8530
4ed46869
KH
8531 return Qnil;
8532}
8533
df7492f9
KH
8534DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8535 1, 1, 0,
8536 doc: /* Return the base of CODING-SYSTEM.
da7db224 8537Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
8538 (coding_system)
8539 Lisp_Object coding_system;
d46c5b12 8540{
df7492f9 8541 Lisp_Object spec, attrs;
d46c5b12 8542
df7492f9
KH
8543 if (NILP (coding_system))
8544 return (Qno_conversion);
8545 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8546 attrs = AREF (spec, 0);
8547 return CODING_ATTR_BASE_NAME (attrs);
8548}
1397dc18 8549
df7492f9
KH
8550DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8551 1, 1, 0,
8552 doc: "Return the property list of CODING-SYSTEM.")
8553 (coding_system)
8554 Lisp_Object coding_system;
8555{
8556 Lisp_Object spec, attrs;
1397dc18 8557
df7492f9
KH
8558 if (NILP (coding_system))
8559 coding_system = Qno_conversion;
8560 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8561 attrs = AREF (spec, 0);
8562 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
8563}
8564
df7492f9
KH
8565
8566DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8567 1, 1, 0,
da7db224 8568 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
8569 (coding_system)
8570 Lisp_Object coding_system;
66cfb530 8571{
df7492f9 8572 Lisp_Object spec;
84d60297 8573
df7492f9
KH
8574 if (NILP (coding_system))
8575 coding_system = Qno_conversion;
8576 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 8577 return AREF (spec, 1);
df7492f9 8578}
66cfb530 8579
df7492f9
KH
8580DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8581 Scoding_system_eol_type, 1, 1, 0,
8582 doc: /* Return eol-type of CODING-SYSTEM.
8583An eol-type is integer 0, 1, 2, or a vector of coding systems.
66cfb530 8584
df7492f9
KH
8585Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8586and CR respectively.
66cfb530 8587
df7492f9
KH
8588A vector value indicates that a format of end-of-line should be
8589detected automatically. Nth element of the vector is the subsidiary
8590coding system whose eol-type is N. */)
6b89e3aa
KH
8591 (coding_system)
8592 Lisp_Object coding_system;
8593{
df7492f9
KH
8594 Lisp_Object spec, eol_type;
8595 int n;
6b89e3aa 8596
df7492f9
KH
8597 if (NILP (coding_system))
8598 coding_system = Qno_conversion;
8599 if (! CODING_SYSTEM_P (coding_system))
8600 return Qnil;
8601 spec = CODING_SYSTEM_SPEC (coding_system);
8602 eol_type = AREF (spec, 2);
8603 if (VECTORP (eol_type))
8604 return Fcopy_sequence (eol_type);
8605 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8606 return make_number (n);
6b89e3aa
KH
8607}
8608
4ed46869
KH
8609#endif /* emacs */
8610
8611\f
1397dc18 8612/*** 9. Post-amble ***/
4ed46869 8613
dfcf069d 8614void
4ed46869
KH
8615init_coding_once ()
8616{
8617 int i;
8618
df7492f9
KH
8619 for (i = 0; i < coding_category_max; i++)
8620 {
8621 coding_categories[i].id = -1;
8622 coding_priorities[i] = i;
8623 }
4ed46869
KH
8624
8625 /* ISO2022 specific initialize routine. */
8626 for (i = 0; i < 0x20; i++)
b73bfc1c 8627 iso_code_class[i] = ISO_control_0;
4ed46869
KH
8628 for (i = 0x21; i < 0x7F; i++)
8629 iso_code_class[i] = ISO_graphic_plane_0;
8630 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 8631 iso_code_class[i] = ISO_control_1;
4ed46869
KH
8632 for (i = 0xA1; i < 0xFF; i++)
8633 iso_code_class[i] = ISO_graphic_plane_1;
8634 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8635 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8636 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8637 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8638 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8639 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8640 iso_code_class[ISO_CODE_ESC] = ISO_escape;
8641 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8642 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8643 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8644
df7492f9
KH
8645 for (i = 0; i < 256; i++)
8646 {
8647 emacs_mule_bytes[i] = 1;
8648 }
7c78e542
KH
8649 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8650 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8651 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8652 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
8653}
8654
8655#ifdef emacs
8656
dfcf069d 8657void
e0e989f6
KH
8658syms_of_coding ()
8659{
df7492f9 8660 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
8661 {
8662 Lisp_Object args[2];
8663 args[0] = QCtest;
8664 args[1] = Qeq;
8665 Vcoding_system_hash_table = Fmake_hash_table (2, args);
8666 }
df7492f9
KH
8667
8668 staticpro (&Vsjis_coding_system);
8669 Vsjis_coding_system = Qnil;
e0e989f6 8670
df7492f9
KH
8671 staticpro (&Vbig5_coding_system);
8672 Vbig5_coding_system = Qnil;
8673
8674 staticpro (&Vcode_conversion_work_buf_list);
8675 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 8676
df7492f9
KH
8677 staticpro (&Vcode_conversion_reused_work_buf);
8678 Vcode_conversion_reused_work_buf = Qnil;
8679
8680 DEFSYM (Qcharset, "charset");
8681 DEFSYM (Qtarget_idx, "target-idx");
8682 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
8683 Fset (Qcoding_system_history, Qnil);
8684
9ce27fde 8685 /* Target FILENAME is the first argument. */
e0e989f6 8686 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 8687 /* Target FILENAME is the third argument. */
e0e989f6
KH
8688 Fput (Qwrite_region, Qtarget_idx, make_number (2));
8689
df7492f9 8690 DEFSYM (Qcall_process, "call-process");
9ce27fde 8691 /* Target PROGRAM is the first argument. */
e0e989f6
KH
8692 Fput (Qcall_process, Qtarget_idx, make_number (0));
8693
df7492f9 8694 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 8695 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8696 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8697
df7492f9 8698 DEFSYM (Qstart_process, "start-process");
9ce27fde 8699 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8700 Fput (Qstart_process, Qtarget_idx, make_number (2));
8701
df7492f9 8702 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 8703 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
8704 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8705
df7492f9
KH
8706 DEFSYM (Qcoding_system, "coding-system");
8707 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 8708
df7492f9
KH
8709 DEFSYM (Qeol_type, "eol-type");
8710 DEFSYM (Qunix, "unix");
8711 DEFSYM (Qdos, "dos");
4ed46869 8712
df7492f9
KH
8713 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8714 DEFSYM (Qpost_read_conversion, "post-read-conversion");
8715 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8716 DEFSYM (Qdefault_char, "default-char");
8717 DEFSYM (Qundecided, "undecided");
8718 DEFSYM (Qno_conversion, "no-conversion");
8719 DEFSYM (Qraw_text, "raw-text");
4ed46869 8720
df7492f9 8721 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 8722
df7492f9 8723 DEFSYM (Qutf_8, "utf-8");
8f924df7 8724 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 8725
df7492f9 8726 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
8727 DEFSYM (Qbig, "big");
8728 DEFSYM (Qlittle, "little");
27901516 8729
df7492f9
KH
8730 DEFSYM (Qshift_jis, "shift-jis");
8731 DEFSYM (Qbig5, "big5");
4ed46869 8732
df7492f9 8733 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 8734
df7492f9 8735 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
8736 Fput (Qcoding_system_error, Qerror_conditions,
8737 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8738 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 8739 build_string ("Invalid coding system"));
4ed46869 8740
05e6f5dc
KH
8741 /* Intern this now in case it isn't already done.
8742 Setting this variable twice is harmless.
8743 But don't staticpro it here--that is done in alloc.c. */
8744 Qchar_table_extra_slots = intern ("char-table-extra-slots");
70c22245 8745
df7492f9 8746 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 8747 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
8748 DEFSYM (Qtranslation_table_id, "translation-table-id");
8749 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8750 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 8751
df7492f9 8752 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 8753
df7492f9 8754 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 8755
01378f49
KH
8756 DEFSYM (QCcategory, ":category");
8757
df7492f9
KH
8758 Vcoding_category_table
8759 = Fmake_vector (make_number (coding_category_max), Qnil);
8760 staticpro (&Vcoding_category_table);
8761 /* Followings are target of code detection. */
8762 ASET (Vcoding_category_table, coding_category_iso_7,
8763 intern ("coding-category-iso-7"));
8764 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8765 intern ("coding-category-iso-7-tight"));
8766 ASET (Vcoding_category_table, coding_category_iso_8_1,
8767 intern ("coding-category-iso-8-1"));
8768 ASET (Vcoding_category_table, coding_category_iso_8_2,
8769 intern ("coding-category-iso-8-2"));
8770 ASET (Vcoding_category_table, coding_category_iso_7_else,
8771 intern ("coding-category-iso-7-else"));
8772 ASET (Vcoding_category_table, coding_category_iso_8_else,
8773 intern ("coding-category-iso-8-else"));
8774 ASET (Vcoding_category_table, coding_category_utf_8,
8775 intern ("coding-category-utf-8"));
8776 ASET (Vcoding_category_table, coding_category_utf_16_be,
8777 intern ("coding-category-utf-16-be"));
ff563fce
KH
8778 ASET (Vcoding_category_table, coding_category_utf_16_auto,
8779 intern ("coding-category-utf-16-auto"));
df7492f9
KH
8780 ASET (Vcoding_category_table, coding_category_utf_16_le,
8781 intern ("coding-category-utf-16-le"));
8782 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8783 intern ("coding-category-utf-16-be-nosig"));
8784 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8785 intern ("coding-category-utf-16-le-nosig"));
8786 ASET (Vcoding_category_table, coding_category_charset,
8787 intern ("coding-category-charset"));
8788 ASET (Vcoding_category_table, coding_category_sjis,
8789 intern ("coding-category-sjis"));
8790 ASET (Vcoding_category_table, coding_category_big5,
8791 intern ("coding-category-big5"));
8792 ASET (Vcoding_category_table, coding_category_ccl,
8793 intern ("coding-category-ccl"));
8794 ASET (Vcoding_category_table, coding_category_emacs_mule,
8795 intern ("coding-category-emacs-mule"));
8796 /* Followings are NOT target of code detection. */
8797 ASET (Vcoding_category_table, coding_category_raw_text,
8798 intern ("coding-category-raw-text"));
8799 ASET (Vcoding_category_table, coding_category_undecided,
8800 intern ("coding-category-undecided"));
ecf488bc 8801
4ed46869
KH
8802 defsubr (&Scoding_system_p);
8803 defsubr (&Sread_coding_system);
8804 defsubr (&Sread_non_nil_coding_system);
8805 defsubr (&Scheck_coding_system);
8806 defsubr (&Sdetect_coding_region);
d46c5b12 8807 defsubr (&Sdetect_coding_string);
05e6f5dc 8808 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 8809 defsubr (&Sunencodable_char_position);
df7492f9 8810 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8811 defsubr (&Sdecode_coding_region);
8812 defsubr (&Sencode_coding_region);
8813 defsubr (&Sdecode_coding_string);
8814 defsubr (&Sencode_coding_string);
8815 defsubr (&Sdecode_sjis_char);
8816 defsubr (&Sencode_sjis_char);
8817 defsubr (&Sdecode_big5_char);
8818 defsubr (&Sencode_big5_char);
1ba9e4ab 8819 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8820 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8821 defsubr (&Sterminal_coding_system);
1ba9e4ab 8822 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8823 defsubr (&Skeyboard_coding_system);
a5d301df 8824 defsubr (&Sfind_operation_coding_system);
df7492f9 8825 defsubr (&Sset_coding_system_priority);
6b89e3aa 8826 defsubr (&Sdefine_coding_system_internal);
df7492f9
KH
8827 defsubr (&Sdefine_coding_system_alias);
8828 defsubr (&Scoding_system_base);
8829 defsubr (&Scoding_system_plist);
8830 defsubr (&Scoding_system_aliases);
8831 defsubr (&Scoding_system_eol_type);
8832 defsubr (&Scoding_system_priority_list);
4ed46869 8833
4608c386 8834 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8835 doc: /* List of coding systems.
8836
8837Do not alter the value of this variable manually. This variable should be
df7492f9 8838updated by the functions `define-coding-system' and
48b0f3ae 8839`define-coding-system-alias'. */);
4608c386
KH
8840 Vcoding_system_list = Qnil;
8841
8842 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8843 doc: /* Alist of coding system names.
8844Each element is one element list of coding system name.
8845This variable is given to `completing-read' as TABLE argument.
8846
8847Do not alter the value of this variable manually. This variable should be
8848updated by the functions `make-coding-system' and
8849`define-coding-system-alias'. */);
4608c386
KH
8850 Vcoding_system_alist = Qnil;
8851
4ed46869 8852 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8853 doc: /* List of coding-categories (symbols) ordered by priority.
8854
8855On detecting a coding system, Emacs tries code detection algorithms
8856associated with each coding-category one by one in this order. When
8857one algorithm agrees with a byte sequence of source text, the coding
8858system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8859 {
8860 int i;
8861
8862 Vcoding_category_list = Qnil;
df7492f9 8863 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8864 Vcoding_category_list
d46c5b12
KH
8865 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8866 Vcoding_category_list);
4ed46869
KH
8867 }
8868
8869 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8870 doc: /* Specify the coding system for read operations.
8871It is useful to bind this variable with `let', but do not set it globally.
8872If the value is a coding system, it is used for decoding on read operation.
8873If not, an appropriate element is used from one of the coding system alists:
8874There are three such tables, `file-coding-system-alist',
8875`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8876 Vcoding_system_for_read = Qnil;
8877
8878 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8879 doc: /* Specify the coding system for write operations.
8880Programs bind this variable with `let', but you should not set it globally.
8881If the value is a coding system, it is used for encoding of output,
8882when writing it to a file and when sending it to a file or subprocess.
8883
8884If this does not specify a coding system, an appropriate element
8885is used from one of the coding system alists:
8886There are three such tables, `file-coding-system-alist',
8887`process-coding-system-alist', and `network-coding-system-alist'.
8888For output to files, if the above procedure does not specify a coding system,
8889the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8890 Vcoding_system_for_write = Qnil;
8891
8892 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8893 doc: /*
8894Coding system used in the latest file or process I/O. */);
4ed46869
KH
8895 Vlast_coding_system_used = Qnil;
8896
9ce27fde 8897 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8898 doc: /*
8899*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8900See info node `Coding Systems' and info node `Text and Binary' concerning
8901such conversion. */);
9ce27fde
KH
8902 inhibit_eol_conversion = 0;
8903
ed29121d 8904 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8905 doc: /*
8906Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8907Bind it to t if the process output is to be treated as if it were a file
8908read from some filesystem. */);
ed29121d
EZ
8909 inherit_process_coding_system = 0;
8910
02ba4723 8911 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8912 doc: /*
8913Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8914The format is ((PATTERN . VAL) ...),
8915where PATTERN is a regular expression matching a file name,
8916VAL is a coding system, a cons of coding systems, or a function symbol.
8917If VAL is a coding system, it is used for both decoding and encoding
8918the file contents.
8919If VAL is a cons of coding systems, the car part is used for decoding,
8920and the cdr part is used for encoding.
8921If VAL is a function symbol, the function must return a coding system
0192762c
DL
8922or a cons of coding systems which are used as above. The function gets
8923the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8924
8925See also the function `find-operation-coding-system'
8926and the variable `auto-coding-alist'. */);
02ba4723
KH
8927 Vfile_coding_system_alist = Qnil;
8928
8929 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8930 doc: /*
8931Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8932The format is ((PATTERN . VAL) ...),
8933where PATTERN is a regular expression matching a program name,
8934VAL is a coding system, a cons of coding systems, or a function symbol.
8935If VAL is a coding system, it is used for both decoding what received
8936from the program and encoding what sent to the program.
8937If VAL is a cons of coding systems, the car part is used for decoding,
8938and the cdr part is used for encoding.
8939If VAL is a function symbol, the function must return a coding system
8940or a cons of coding systems which are used as above.
8941
8942See also the function `find-operation-coding-system'. */);
02ba4723
KH
8943 Vprocess_coding_system_alist = Qnil;
8944
8945 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8946 doc: /*
8947Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8948The format is ((PATTERN . VAL) ...),
8949where PATTERN is a regular expression matching a network service name
8950or is a port number to connect to,
8951VAL is a coding system, a cons of coding systems, or a function symbol.
8952If VAL is a coding system, it is used for both decoding what received
8953from the network stream and encoding what sent to the network stream.
8954If VAL is a cons of coding systems, the car part is used for decoding,
8955and the cdr part is used for encoding.
8956If VAL is a function symbol, the function must return a coding system
8957or a cons of coding systems which are used as above.
8958
8959See also the function `find-operation-coding-system'. */);
02ba4723 8960 Vnetwork_coding_system_alist = Qnil;
4ed46869 8961
68c45bf0 8962 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8963 doc: /* Coding system to use with system messages.
8964Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8965 Vlocale_coding_system = Qnil;
8966
005f0d35 8967 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8968 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8969 doc: /*
8970*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8971 eol_mnemonic_unix = build_string (":");
4ed46869 8972
7722baf9 8973 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8974 doc: /*
8975*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8976 eol_mnemonic_dos = build_string ("\\");
4ed46869 8977
7722baf9 8978 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8979 doc: /*
8980*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8981 eol_mnemonic_mac = build_string ("/");
4ed46869 8982
7722baf9 8983 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8984 doc: /*
8985*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8986 eol_mnemonic_undecided = build_string (":");
4ed46869 8987
84fbb8a0 8988 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8989 doc: /*
8990*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8991 Venable_character_translation = Qt;
bdd9fb48 8992
f967223b 8993 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8994 &Vstandard_translation_table_for_decode,
8995 doc: /* Table for translating characters while decoding. */);
f967223b 8996 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8997
f967223b 8998 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8999 &Vstandard_translation_table_for_encode,
9000 doc: /* Table for translating characters while encoding. */);
f967223b 9001 Vstandard_translation_table_for_encode = Qnil;
4ed46869 9002
df7492f9 9003 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
9004 doc: /* Alist of charsets vs revision numbers.
9005While encoding, if a charset (car part of an element) is found,
df7492f9
KH
9006designate it with the escape sequence identifying revision (cdr part
9007of the element). */);
9008 Vcharset_revision_table = Qnil;
02ba4723
KH
9009
9010 DEFVAR_LISP ("default-process-coding-system",
9011 &Vdefault_process_coding_system,
48b0f3ae
PJ
9012 doc: /* Cons of coding systems used for process I/O by default.
9013The car part is used for decoding a process output,
9014the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 9015 Vdefault_process_coding_system = Qnil;
c4825358 9016
3f003981 9017 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
9018 doc: /*
9019Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
9020This is a vector of length 256.
9021If Nth element is non-nil, the existence of code N in a file
9022\(or output of subprocess) doesn't prevent it to be detected as
9023a coding system of ISO 2022 variant which has a flag
9024`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9025or reading output of a subprocess.
9026Only 128th through 159th elements has a meaning. */);
3f003981 9027 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
9028
9029 DEFVAR_LISP ("select-safe-coding-system-function",
9030 &Vselect_safe_coding_system_function,
df7492f9
KH
9031 doc: /*
9032Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
9033
9034If set, this function is called to force a user to select a proper
9035coding system which can encode the text in the case that a default
9036coding system used in each operation can't encode the text.
9037
9038The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
9039 Vselect_safe_coding_system_function = Qnil;
9040
5d5bf4d8
KH
9041 DEFVAR_BOOL ("coding-system-require-warning",
9042 &coding_system_require_warning,
9043 doc: /* Internal use only.
6b89e3aa
KH
9044If non-nil, on writing a file, `select-safe-coding-system-function' is
9045called even if `coding-system-for-write' is non-nil. The command
9046`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
9047 coding_system_require_warning = 0;
9048
9049
22ab2303 9050 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 9051 &inhibit_iso_escape_detection,
df7492f9
KH
9052 doc: /*
9053If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
9054
9055By default, on reading a file, Emacs tries to detect how the text is
9056encoded. This code detection is sensitive to escape sequences. If
9057the sequence is valid as ISO2022, the code is determined as one of
9058the ISO2022 encodings, and the file is decoded by the corresponding
9059coding system (e.g. `iso-2022-7bit').
9060
9061However, there may be a case that you want to read escape sequences in
9062a file as is. In such a case, you can set this variable to non-nil.
9063Then, as the code detection ignores any escape sequences, no file is
9064detected as encoded in some ISO2022 encoding. The result is that all
9065escape sequences become visible in a buffer.
9066
9067The default value is nil, and it is strongly recommended not to change
9068it. That is because many Emacs Lisp source files that contain
9069non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9070in Emacs's distribution, and they won't be decoded correctly on
9071reading if you suppress escape sequence detection.
9072
9073The other way to read escape sequences in a file without decoding is
9074to explicitly specify some coding system that doesn't use ISO2022's
9075escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 9076 inhibit_iso_escape_detection = 0;
002fdb44
DL
9077
9078 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1
DL
9079 doc: /* Char table for translating self-inserting characters.
9080This is applied to the result of input methods, not their input. See also
9081`keyboard-translate-table'. */);
002fdb44 9082 Vtranslation_table_for_input = Qnil;
8f924df7 9083
2c78b7e1
KH
9084 {
9085 Lisp_Object args[coding_arg_max];
8f924df7 9086 Lisp_Object plist[16];
2c78b7e1
KH
9087 int i;
9088
9089 for (i = 0; i < coding_arg_max; i++)
9090 args[i] = Qnil;
9091
9092 plist[0] = intern (":name");
9093 plist[1] = args[coding_arg_name] = Qno_conversion;
9094 plist[2] = intern (":mnemonic");
9095 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9096 plist[4] = intern (":coding-type");
9097 plist[5] = args[coding_arg_coding_type] = Qraw_text;
9098 plist[6] = intern (":ascii-compatible-p");
9099 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9100 plist[8] = intern (":default-char");
9101 plist[9] = args[coding_arg_default_char] = make_number (0);
8f924df7
KH
9102 plist[10] = intern (":for-unibyte");
9103 plist[11] = args[coding_arg_for_unibyte] = Qt;
9104 plist[12] = intern (":docstring");
9105 plist[13] = build_string ("Do no conversion.\n\
2c78b7e1
KH
9106\n\
9107When you visit a file with this coding, the file is read into a\n\
9108unibyte buffer as is, thus each byte of a file is treated as a\n\
9109character.");
8f924df7
KH
9110 plist[14] = intern (":eol-type");
9111 plist[15] = args[coding_arg_eol_type] = Qunix;
9112 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1
KH
9113 Fdefine_coding_system_internal (coding_arg_max, args);
9114 }
9115
9116 setup_coding_system (Qno_conversion, &keyboard_coding);
9117 setup_coding_system (Qno_conversion, &terminal_coding);
9118 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
9119
9120 {
9121 int i;
9122
9123 for (i = 0; i < coding_category_max; i++)
9124 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9125 }
4ed46869
KH
9126}
9127
68c45bf0
PE
9128char *
9129emacs_strerror (error_number)
9130 int error_number;
9131{
9132 char *str;
9133
ca9c0567 9134 synchronize_system_messages_locale ();
68c45bf0
PE
9135 str = strerror (error_number);
9136
9137 if (! NILP (Vlocale_coding_system))
9138 {
9139 Lisp_Object dec = code_convert_string_norecord (build_string (str),
9140 Vlocale_coding_system,
9141 0);
d5db4077 9142 str = (char *) SDATA (dec);
68c45bf0
PE
9143 }
9144
9145 return str;
9146}
9147
4ed46869 9148#endif /* emacs */