(inhibit_pre_post_conversion): Removed (unused).
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
8f924df7 3 Licensed to the Free Software Foundation.
6f197c07 4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
8f924df7 5 Copyright (C) 2003
df7492f9
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 56 coding system.
4ed46869 57
e19c3639
KH
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
4ed46869 63
e19c3639 64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
4ed46869 66
df7492f9
KH
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
4ed46869
KH
82
83 The most famous coding system for multiple character sets. X's
df7492f9
KH
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
4ed46869 87
df7492f9 88 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
cfb43547 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
4ed46869
KH
138
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9 146 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
df7492f9 149
ff0dacd7 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
151
152 Below is the template of these functions. */
153
4ed46869 154#if 0
df7492f9 155static int
ff0dacd7 156detect_coding_XXX (coding, detect_info)
df7492f9 157 struct coding_system *coding;
ff0dacd7 158 struct coding_detection_info *detect_info;
4ed46869 159{
df7492f9
KH
160 unsigned char *src = coding->source;
161 unsigned char *src_end = coding->source + coding->src_bytes;
162 int multibytep = coding->src_multibyte;
ff0dacd7 163 int consumed_chars = 0;
df7492f9
KH
164 int found = 0;
165 ...;
166
167 while (1)
168 {
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
171 ONE_MORE_BYTE (c);
ff0dacd7
KH
172
173 if (! __C_conforms_to_XXX___ (c))
174 break;
175 if (! __C_strongly_suggests_XXX__ (c))
176 found = CATEGORY_MASK_XXX;
df7492f9 177 }
ff0dacd7
KH
178 /* The byte sequence is invalid for XXX. */
179 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 180 return 0;
ff0dacd7 181
df7492f9 182 no_more_source:
ff0dacd7
KH
183 /* The source exausted successfully. */
184 detect_info->found |= found;
df7492f9 185 return 1;
4ed46869
KH
186}
187#endif
188
189/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
190
df7492f9
KH
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
d46c5b12 195
df7492f9
KH
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
d46c5b12 200
df7492f9 201 Below is the template of these functions. */
d46c5b12 202
4ed46869 203#if 0
b73bfc1c 204static void
df7492f9 205decode_coding_XXXX (coding)
4ed46869 206 struct coding_system *coding;
4ed46869 207{
df7492f9
KH
208 unsigned char *src = coding->source + coding->consumed;
209 unsigned char *src_end = coding->source + coding->src_bytes;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base;
214 /* A buffer to produce decoded characters. */
215 int *charbuf = coding->charbuf;
216 int *charbuf_end = charbuf + coding->charbuf_size;
217 int multibytep = coding->src_multibyte;
218
219 while (1)
220 {
221 src_base = src;
222 if (charbuf < charbuf_end)
223 /* No more room to produce a decoded character. */
224 break;
225 ONE_MORE_BYTE (c);
226 /* Decode it. */
227 }
228
229 no_more_source:
230 if (src_base < src_end
231 && coding->mode & CODING_MODE_LAST_BLOCK)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base < src_end && charbuf < charbuf_end)
235 *charbuf++ = *src_base++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding->consumed = coding->consumed_char = src_base - coding->source;
239 /* Remember how many characters we produced. */
240 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
241}
242#endif
243
244/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
245
df7492f9
KH
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
d46c5b12 250
df7492f9
KH
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 255
df7492f9
KH
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
d46c5b12 259
df7492f9 260 Below is a template of these functions. */
4ed46869 261#if 0
b73bfc1c 262static void
df7492f9 263encode_coding_XXX (coding)
4ed46869 264 struct coding_system *coding;
4ed46869 265{
df7492f9
KH
266 int multibytep = coding->dst_multibyte;
267 int *charbuf = coding->charbuf;
268 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
269 unsigned char *dst = coding->destination + coding->produced;
270 unsigned char *dst_end = coding->destination + coding->dst_bytes;
271 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
272 int produced_chars = 0;
273
274 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
275 {
276 int c = *charbuf;
277 /* Encode C into DST, and increment DST. */
278 }
279 label_no_more_destination:
280 /* How many chars and bytes we produced. */
281 coding->produced_char += produced_chars;
282 coding->produced = dst - coding->destination;
4ed46869
KH
283}
284#endif
285
4ed46869
KH
286\f
287/*** 1. Preamble ***/
288
68c45bf0 289#include <config.h>
4ed46869
KH
290#include <stdio.h>
291
4ed46869
KH
292#include "lisp.h"
293#include "buffer.h"
df7492f9 294#include "character.h"
4ed46869
KH
295#include "charset.h"
296#include "ccl.h"
df7492f9 297#include "composite.h"
4ed46869
KH
298#include "coding.h"
299#include "window.h"
4ed46869 300
df7492f9 301Lisp_Object Vcoding_system_hash_table;
4ed46869 302
df7492f9 303Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
304Lisp_Object Qunix, Qdos;
305extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
306Lisp_Object Qbuffer_file_coding_system;
307Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 308Lisp_Object Qdefault_char;
27901516 309Lisp_Object Qno_conversion, Qundecided;
df7492f9 310Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 311Lisp_Object Qbig, Qlittle;
bb0115a2 312Lisp_Object Qcoding_system_history;
1397dc18 313Lisp_Object Qvalid_codes;
01378f49 314Lisp_Object QCcategory;
4ed46869
KH
315
316extern Lisp_Object Qinsert_file_contents, Qwrite_region;
317Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
318Lisp_Object Qstart_process, Qopen_network_stream;
319Lisp_Object Qtarget_idx;
320
5d5bf4d8
KH
321int coding_system_require_warning;
322
d46c5b12
KH
323Lisp_Object Vselect_safe_coding_system_function;
324
7722baf9
EZ
325/* Mnemonic string for each format of end-of-line. */
326Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
327/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 328 decided. */
7722baf9 329Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
330
331#ifdef emacs
332
4608c386
KH
333Lisp_Object Vcoding_system_list, Vcoding_system_alist;
334
335Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 336
d46c5b12
KH
337/* Coding system emacs-mule and raw-text are for converting only
338 end-of-line format. */
339Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 340Lisp_Object Qutf_8_emacs;
ecf488bc 341
4ed46869
KH
342/* Coding-systems are handed between Emacs Lisp programs and C internal
343 routines by the following three variables. */
344/* Coding-system for reading files and receiving data from process. */
345Lisp_Object Vcoding_system_for_read;
346/* Coding-system for writing files and sending data to process. */
347Lisp_Object Vcoding_system_for_write;
348/* Coding-system actually used in the latest I/O. */
349Lisp_Object Vlast_coding_system_used;
350
c4825358 351/* A vector of length 256 which contains information about special
94487c4e 352 Latin codes (especially for dealing with Microsoft codes). */
3f003981 353Lisp_Object Vlatin_extra_code_table;
c4825358 354
9ce27fde
KH
355/* Flag to inhibit code conversion of end-of-line format. */
356int inhibit_eol_conversion;
357
74383408
KH
358/* Flag to inhibit ISO2022 escape sequence detection. */
359int inhibit_iso_escape_detection;
360
ed29121d
EZ
361/* Flag to make buffer-file-coding-system inherit from process-coding. */
362int inherit_process_coding_system;
363
c4825358 364/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
365struct coding_system terminal_coding;
366
c4825358
KH
367/* Coding system to be used to encode text for terminal display when
368 terminal coding system is nil. */
369struct coding_system safe_terminal_coding;
370
371/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
372struct coding_system keyboard_coding;
373
02ba4723
KH
374Lisp_Object Vfile_coding_system_alist;
375Lisp_Object Vprocess_coding_system_alist;
376Lisp_Object Vnetwork_coding_system_alist;
4ed46869 377
68c45bf0
PE
378Lisp_Object Vlocale_coding_system;
379
4ed46869
KH
380#endif /* emacs */
381
f967223b
KH
382/* Flag to tell if we look up translation table on character code
383 conversion. */
84fbb8a0 384Lisp_Object Venable_character_translation;
f967223b
KH
385/* Standard translation table to look up on decoding (reading). */
386Lisp_Object Vstandard_translation_table_for_decode;
387/* Standard translation table to look up on encoding (writing). */
388Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 389
f967223b
KH
390Lisp_Object Qtranslation_table;
391Lisp_Object Qtranslation_table_id;
392Lisp_Object Qtranslation_table_for_decode;
393Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
394
395/* Alist of charsets vs revision number. */
df7492f9 396static Lisp_Object Vcharset_revision_table;
4ed46869 397
02ba4723
KH
398/* Default coding systems used for process I/O. */
399Lisp_Object Vdefault_process_coding_system;
400
002fdb44
DL
401/* Char table for translating Quail and self-inserting input. */
402Lisp_Object Vtranslation_table_for_input;
403
df7492f9
KH
404/* Two special coding systems. */
405Lisp_Object Vsjis_coding_system;
406Lisp_Object Vbig5_coding_system;
407
408
ff0dacd7
KH
409static int detect_coding_utf_8 P_ ((struct coding_system *,
410 struct coding_detection_info *info));
df7492f9
KH
411static void decode_coding_utf_8 P_ ((struct coding_system *));
412static int encode_coding_utf_8 P_ ((struct coding_system *));
413
ff0dacd7
KH
414static int detect_coding_utf_16 P_ ((struct coding_system *,
415 struct coding_detection_info *info));
df7492f9
KH
416static void decode_coding_utf_16 P_ ((struct coding_system *));
417static int encode_coding_utf_16 P_ ((struct coding_system *));
418
ff0dacd7
KH
419static int detect_coding_iso_2022 P_ ((struct coding_system *,
420 struct coding_detection_info *info));
df7492f9
KH
421static void decode_coding_iso_2022 P_ ((struct coding_system *));
422static int encode_coding_iso_2022 P_ ((struct coding_system *));
423
ff0dacd7
KH
424static int detect_coding_emacs_mule P_ ((struct coding_system *,
425 struct coding_detection_info *info));
df7492f9
KH
426static void decode_coding_emacs_mule P_ ((struct coding_system *));
427static int encode_coding_emacs_mule P_ ((struct coding_system *));
428
ff0dacd7
KH
429static int detect_coding_sjis P_ ((struct coding_system *,
430 struct coding_detection_info *info));
df7492f9
KH
431static void decode_coding_sjis P_ ((struct coding_system *));
432static int encode_coding_sjis P_ ((struct coding_system *));
433
ff0dacd7
KH
434static int detect_coding_big5 P_ ((struct coding_system *,
435 struct coding_detection_info *info));
df7492f9
KH
436static void decode_coding_big5 P_ ((struct coding_system *));
437static int encode_coding_big5 P_ ((struct coding_system *));
438
ff0dacd7
KH
439static int detect_coding_ccl P_ ((struct coding_system *,
440 struct coding_detection_info *info));
df7492f9
KH
441static void decode_coding_ccl P_ ((struct coding_system *));
442static int encode_coding_ccl P_ ((struct coding_system *));
443
444static void decode_coding_raw_text P_ ((struct coding_system *));
445static int encode_coding_raw_text P_ ((struct coding_system *));
446
447
448/* ISO2022 section */
449
450#define CODING_ISO_INITIAL(coding, reg) \
451 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
452 coding_attr_iso_initial), \
453 reg)))
454
455
456#define CODING_ISO_REQUEST(coding, charset_id) \
457 ((charset_id <= (coding)->max_charset_id \
458 ? (coding)->safe_charsets[charset_id] \
459 : -1))
460
461
462#define CODING_ISO_FLAGS(coding) \
463 ((coding)->spec.iso_2022.flags)
464#define CODING_ISO_DESIGNATION(coding, reg) \
465 ((coding)->spec.iso_2022.current_designation[reg])
466#define CODING_ISO_INVOCATION(coding, plane) \
467 ((coding)->spec.iso_2022.current_invocation[plane])
468#define CODING_ISO_SINGLE_SHIFTING(coding) \
469 ((coding)->spec.iso_2022.single_shifting)
470#define CODING_ISO_BOL(coding) \
471 ((coding)->spec.iso_2022.bol)
472#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
473 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
474
475/* Control characters of ISO2022. */
476 /* code */ /* function */
477#define ISO_CODE_LF 0x0A /* line-feed */
478#define ISO_CODE_CR 0x0D /* carriage-return */
479#define ISO_CODE_SO 0x0E /* shift-out */
480#define ISO_CODE_SI 0x0F /* shift-in */
481#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
482#define ISO_CODE_ESC 0x1B /* escape */
483#define ISO_CODE_SS2 0x8E /* single-shift-2 */
484#define ISO_CODE_SS3 0x8F /* single-shift-3 */
485#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
486
487/* All code (1-byte) of ISO2022 is classified into one of the
488 followings. */
489enum iso_code_class_type
490 {
491 ISO_control_0, /* Control codes in the range
492 0x00..0x1F and 0x7F, except for the
493 following 5 codes. */
494 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
495 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
496 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
497 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
498 ISO_escape, /* ISO_CODE_SO (0x1B) */
499 ISO_control_1, /* Control codes in the range
500 0x80..0x9F, except for the
501 following 3 codes. */
502 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
503 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
504 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
505 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
506 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
507 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
508 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
509 };
05e6f5dc 510
df7492f9
KH
511/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
512 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 513
df7492f9
KH
514/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
515 instead of the correct short-form sequence (e.g. ESC $ A). */
516#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 517
df7492f9
KH
518/* If set, reset graphic planes and registers at end-of-line to the
519 initial state. */
520#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 521
df7492f9
KH
522/* If set, reset graphic planes and registers before any control
523 characters to the initial state. */
524#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 525
df7492f9
KH
526/* If set, encode by 7-bit environment. */
527#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 528
df7492f9
KH
529/* If set, use locking-shift function. */
530#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 531
df7492f9
KH
532/* If set, use single-shift function. Overwrite
533 CODING_ISO_FLAG_LOCKING_SHIFT. */
534#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 535
df7492f9
KH
536/* If set, use designation escape sequence. */
537#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 538
df7492f9
KH
539/* If set, produce revision number sequence. */
540#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 541
df7492f9
KH
542/* If set, produce ISO6429's direction specifying sequence. */
543#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 544
df7492f9
KH
545/* If set, assume designation states are reset at beginning of line on
546 output. */
547#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 548
df7492f9
KH
549/* If set, designation sequence should be placed at beginning of line
550 on output. */
551#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 552
df7492f9
KH
553/* If set, do not encode unsafe charactes on output. */
554#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 555
df7492f9
KH
556/* If set, extra latin codes (128..159) are accepted as a valid code
557 on input. */
558#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 559
df7492f9 560#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 561
df7492f9 562#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 563
bf16eb23 564#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 565
bf16eb23 566#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 567
bf16eb23 568#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 569
df7492f9
KH
570/* A character to be produced on output if encoding of the original
571 character is prohibited by CODING_ISO_FLAG_SAFE. */
572#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 573
4ed46869 574
df7492f9
KH
575/* UTF-16 section */
576#define CODING_UTF_16_BOM(coding) \
577 ((coding)->spec.utf_16.bom)
4ed46869 578
df7492f9
KH
579#define CODING_UTF_16_ENDIAN(coding) \
580 ((coding)->spec.utf_16.endian)
4ed46869 581
df7492f9
KH
582#define CODING_UTF_16_SURROGATE(coding) \
583 ((coding)->spec.utf_16.surrogate)
4ed46869 584
4ed46869 585
df7492f9
KH
586/* CCL section */
587#define CODING_CCL_DECODER(coding) \
588 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
589#define CODING_CCL_ENCODER(coding) \
590 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
591#define CODING_CCL_VALIDS(coding) \
8f924df7 592 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 593
5a936b46 594/* Index for each coding category in `coding_categories' */
4ed46869 595
df7492f9
KH
596enum coding_category
597 {
598 coding_category_iso_7,
599 coding_category_iso_7_tight,
600 coding_category_iso_8_1,
601 coding_category_iso_8_2,
602 coding_category_iso_7_else,
603 coding_category_iso_8_else,
604 coding_category_utf_8,
605 coding_category_utf_16_auto,
606 coding_category_utf_16_be,
607 coding_category_utf_16_le,
608 coding_category_utf_16_be_nosig,
609 coding_category_utf_16_le_nosig,
610 coding_category_charset,
611 coding_category_sjis,
612 coding_category_big5,
613 coding_category_ccl,
614 coding_category_emacs_mule,
615 /* All above are targets of code detection. */
616 coding_category_raw_text,
617 coding_category_undecided,
618 coding_category_max
619 };
620
621/* Definitions of flag bits used in detect_coding_XXXX. */
622#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
623#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
624#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
625#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
626#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
627#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
628#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 629#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
630#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
631#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
632#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
633#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
634#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
635#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
636#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
637#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
638#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 639#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
640
641/* This value is returned if detect_coding_mask () find nothing other
642 than ASCII characters. */
643#define CATEGORY_MASK_ANY \
644 (CATEGORY_MASK_ISO_7 \
645 | CATEGORY_MASK_ISO_7_TIGHT \
646 | CATEGORY_MASK_ISO_8_1 \
647 | CATEGORY_MASK_ISO_8_2 \
648 | CATEGORY_MASK_ISO_7_ELSE \
649 | CATEGORY_MASK_ISO_8_ELSE \
650 | CATEGORY_MASK_UTF_8 \
651 | CATEGORY_MASK_UTF_16_BE \
652 | CATEGORY_MASK_UTF_16_LE \
653 | CATEGORY_MASK_UTF_16_BE_NOSIG \
654 | CATEGORY_MASK_UTF_16_LE_NOSIG \
655 | CATEGORY_MASK_CHARSET \
656 | CATEGORY_MASK_SJIS \
657 | CATEGORY_MASK_BIG5 \
658 | CATEGORY_MASK_CCL \
659 | CATEGORY_MASK_EMACS_MULE)
660
661
662#define CATEGORY_MASK_ISO_7BIT \
663 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
664
665#define CATEGORY_MASK_ISO_8BIT \
666 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
667
668#define CATEGORY_MASK_ISO_ELSE \
669 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
670
671#define CATEGORY_MASK_ISO_ESCAPE \
672 (CATEGORY_MASK_ISO_7 \
673 | CATEGORY_MASK_ISO_7_TIGHT \
674 | CATEGORY_MASK_ISO_7_ELSE \
675 | CATEGORY_MASK_ISO_8_ELSE)
676
677#define CATEGORY_MASK_ISO \
678 ( CATEGORY_MASK_ISO_7BIT \
679 | CATEGORY_MASK_ISO_8BIT \
680 | CATEGORY_MASK_ISO_ELSE)
681
682#define CATEGORY_MASK_UTF_16 \
683 (CATEGORY_MASK_UTF_16_BE \
684 | CATEGORY_MASK_UTF_16_LE \
685 | CATEGORY_MASK_UTF_16_BE_NOSIG \
686 | CATEGORY_MASK_UTF_16_LE_NOSIG)
687
688
689/* List of symbols `coding-category-xxx' ordered by priority. This
690 variable is exposed to Emacs Lisp. */
691static Lisp_Object Vcoding_category_list;
692
693/* Table of coding categories (Lisp symbols). This variable is for
694 internal use oly. */
695static Lisp_Object Vcoding_category_table;
696
697/* Table of coding-categories ordered by priority. */
698static enum coding_category coding_priorities[coding_category_max];
699
700/* Nth element is a coding context for the coding system bound to the
701 Nth coding category. */
702static struct coding_system coding_categories[coding_category_max];
703
df7492f9
KH
704/*** Commonly used macros and functions ***/
705
706#ifndef min
707#define min(a, b) ((a) < (b) ? (a) : (b))
708#endif
709#ifndef max
710#define max(a, b) ((a) > (b) ? (a) : (b))
711#endif
4ed46869 712
df7492f9
KH
713#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
714 do { \
715 attrs = CODING_ID_ATTRS (coding->id); \
716 eol_type = CODING_ID_EOL_TYPE (coding->id); \
717 if (VECTORP (eol_type)) \
718 eol_type = Qunix; \
719 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
720 } while (0)
4ed46869 721
4ed46869 722
df7492f9
KH
723/* Safely get one byte from the source text pointed by SRC which ends
724 at SRC_END, and set C to that byte. If there are not enough bytes
725 in the source, it jumps to `no_more_source'. The caller
726 should declare and set these variables appropriately in advance:
727 src, src_end, multibytep
728*/
aa72b389 729
df7492f9 730#define ONE_MORE_BYTE(c) \
aa72b389 731 do { \
df7492f9
KH
732 if (src == src_end) \
733 { \
734 if (src_base < src) \
735 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
736 goto no_more_source; \
737 } \
738 c = *src++; \
739 if (multibytep && (c & 0x80)) \
740 { \
741 if ((c & 0xFE) != 0xC0) \
742 error ("Undecodable char found"); \
743 c = ((c & 1) << 6) | *src++; \
744 } \
745 consumed_chars++; \
aa72b389
KH
746 } while (0)
747
aa72b389 748
df7492f9
KH
749#define ONE_MORE_BYTE_NO_CHECK(c) \
750 do { \
751 c = *src++; \
752 if (multibytep && (c & 0x80)) \
753 { \
754 if ((c & 0xFE) != 0xC0) \
755 error ("Undecodable char found"); \
756 c = ((c & 1) << 6) | *src++; \
757 } \
781d7a48 758 consumed_chars++; \
aa72b389
KH
759 } while (0)
760
aa72b389 761
df7492f9
KH
762/* Store a byte C in the place pointed by DST and increment DST to the
763 next free point, and increment PRODUCED_CHARS. The caller should
764 assure that C is 0..127, and declare and set the variable `dst'
765 appropriately in advance.
766*/
aa72b389
KH
767
768
df7492f9
KH
769#define EMIT_ONE_ASCII_BYTE(c) \
770 do { \
771 produced_chars++; \
772 *dst++ = (c); \
b6871cc7 773 } while (0)
aa72b389
KH
774
775
df7492f9 776/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 777
df7492f9
KH
778#define EMIT_TWO_ASCII_BYTES(c1, c2) \
779 do { \
780 produced_chars += 2; \
781 *dst++ = (c1), *dst++ = (c2); \
782 } while (0)
aa72b389
KH
783
784
df7492f9
KH
785/* Store a byte C in the place pointed by DST and increment DST to the
786 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
787 nonzero, store in an appropriate multibyte from. The caller should
788 declare and set the variables `dst' and `multibytep' appropriately
789 in advance. */
790
791#define EMIT_ONE_BYTE(c) \
792 do { \
793 produced_chars++; \
794 if (multibytep) \
795 { \
796 int ch = (c); \
797 if (ch >= 0x80) \
798 ch = BYTE8_TO_CHAR (ch); \
799 CHAR_STRING_ADVANCE (ch, dst); \
800 } \
801 else \
802 *dst++ = (c); \
aa72b389 803 } while (0)
aa72b389 804
aa72b389 805
df7492f9 806/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 807
e19c3639
KH
808#define EMIT_TWO_BYTES(c1, c2) \
809 do { \
810 produced_chars += 2; \
811 if (multibytep) \
812 { \
813 int ch; \
814 \
815 ch = (c1); \
816 if (ch >= 0x80) \
817 ch = BYTE8_TO_CHAR (ch); \
818 CHAR_STRING_ADVANCE (ch, dst); \
819 ch = (c2); \
820 if (ch >= 0x80) \
821 ch = BYTE8_TO_CHAR (ch); \
822 CHAR_STRING_ADVANCE (ch, dst); \
823 } \
824 else \
825 { \
826 *dst++ = (c1); \
827 *dst++ = (c2); \
828 } \
aa72b389
KH
829 } while (0)
830
831
df7492f9
KH
832#define EMIT_THREE_BYTES(c1, c2, c3) \
833 do { \
834 EMIT_ONE_BYTE (c1); \
835 EMIT_TWO_BYTES (c2, c3); \
836 } while (0)
aa72b389 837
aa72b389 838
df7492f9
KH
839#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
840 do { \
841 EMIT_TWO_BYTES (c1, c2); \
842 EMIT_TWO_BYTES (c3, c4); \
843 } while (0)
aa72b389 844
aa72b389 845
df7492f9
KH
846#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
847 do { \
848 charset_map_loaded = 0; \
849 c = DECODE_CHAR (charset, code); \
850 if (charset_map_loaded) \
851 { \
8f924df7 852 const unsigned char *orig = coding->source; \
df7492f9
KH
853 EMACS_INT offset; \
854 \
855 coding_set_source (coding); \
856 offset = coding->source - orig; \
857 src += offset; \
858 src_base += offset; \
859 src_end += offset; \
860 } \
aa72b389
KH
861 } while (0)
862
863
df7492f9
KH
864#define ASSURE_DESTINATION(bytes) \
865 do { \
866 if (dst + (bytes) >= dst_end) \
867 { \
868 int more_bytes = charbuf_end - charbuf + (bytes); \
869 \
870 dst = alloc_destination (coding, more_bytes, dst); \
871 dst_end = coding->destination + coding->dst_bytes; \
872 } \
873 } while (0)
aa72b389 874
aa72b389 875
aa72b389 876
df7492f9
KH
877static void
878coding_set_source (coding)
aa72b389 879 struct coding_system *coding;
aa72b389 880{
df7492f9
KH
881 if (BUFFERP (coding->src_object))
882 {
2cb26057 883 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 884
df7492f9 885 if (coding->src_pos < 0)
2cb26057 886 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 887 else
2cb26057 888 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 889 }
df7492f9 890 else if (STRINGP (coding->src_object))
aa72b389 891 {
8f924df7 892 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 893 }
df7492f9
KH
894 else
895 /* Otherwise, the source is C string and is never relocated
896 automatically. Thus we don't have to update anything. */
897 ;
898}
aa72b389 899
df7492f9
KH
900static void
901coding_set_destination (coding)
902 struct coding_system *coding;
903{
904 if (BUFFERP (coding->dst_object))
aa72b389 905 {
df7492f9 906 if (coding->src_pos < 0)
aa72b389 907 {
28f67a95
KH
908 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
909 coding->dst_bytes = (GAP_END_ADDR
910 - (coding->src_bytes - coding->consumed)
911 - coding->destination);
aa72b389 912 }
df7492f9 913 else
28f67a95
KH
914 {
915 /* We are sure that coding->dst_pos_byte is before the gap
916 of the buffer. */
917 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
918 + coding->dst_pos_byte - 1);
919 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
920 - coding->destination);
921 }
df7492f9
KH
922 }
923 else
924 /* Otherwise, the destination is C string and is never relocated
925 automatically. Thus we don't have to update anything. */
926 ;
927}
928
929
930static void
931coding_alloc_by_realloc (coding, bytes)
932 struct coding_system *coding;
933 EMACS_INT bytes;
934{
935 coding->destination = (unsigned char *) xrealloc (coding->destination,
936 coding->dst_bytes + bytes);
937 coding->dst_bytes += bytes;
938}
939
940static void
941coding_alloc_by_making_gap (coding, bytes)
942 struct coding_system *coding;
943 EMACS_INT bytes;
944{
2c78b7e1
KH
945 if (BUFFERP (coding->dst_object)
946 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
947 {
948 EMACS_INT add = coding->src_bytes - coding->consumed;
949
950 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
951 make_gap (bytes);
952 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
953 }
954 else
955 {
2c78b7e1
KH
956 Lisp_Object this_buffer;
957
958 this_buffer = Fcurrent_buffer ();
df7492f9
KH
959 set_buffer_internal (XBUFFER (coding->dst_object));
960 make_gap (bytes);
961 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 962 }
df7492f9 963}
8f924df7 964
df7492f9
KH
965
966static unsigned char *
967alloc_destination (coding, nbytes, dst)
968 struct coding_system *coding;
3e139625 969 EMACS_INT nbytes;
df7492f9
KH
970 unsigned char *dst;
971{
972 EMACS_INT offset = dst - coding->destination;
973
974 if (BUFFERP (coding->dst_object))
975 coding_alloc_by_making_gap (coding, nbytes);
aa72b389 976 else
df7492f9
KH
977 coding_alloc_by_realloc (coding, nbytes);
978 coding->result = CODING_RESULT_SUCCESS;
979 coding_set_destination (coding);
980 dst = coding->destination + offset;
981 return dst;
982}
aa72b389 983
ff0dacd7
KH
984/** Macros for annotations. */
985
986/* Maximum length of annotation data (sum of annotations for
987 composition and charset). */
988#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
989
990/* An annotation data is stored in the array coding->charbuf in this
991 format:
992 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
993 LENGTH is the number of elements in the annotation.
994 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
995 FROM and TO specify the range of text annotated. They are relative
996 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
997
998 The format of the following elements depend on ANNOTATION_MASK.
999
1000 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1001 follows:
1002 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1003 METHOD is one of enum composition_method.
1004 Optionnal COMPOSITION-COMPONENTS are characters and composition
1005 rules.
1006
1007 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1008 follows. */
1009
1010#define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1011 do { \
1012 *(buf)++ = -(len); \
1013 *(buf)++ = (mask); \
1014 *(buf)++ = (from); \
1015 *(buf)++ = (to); \
1016 coding->annotated = 1; \
1017 } while (0);
1018
1019#define ADD_COMPOSITION_DATA(buf, from, to, method) \
1020 do { \
1021 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1022 *buf++ = method; \
1023 } while (0)
1024
1025
1026#define ADD_CHARSET_DATA(buf, from, to, id) \
1027 do { \
1028 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1029 *buf++ = id; \
1030 } while (0)
1031
df7492f9
KH
1032\f
1033/*** 2. Emacs' internal format (emacs-utf-8) ***/
1034
1035
1036
1037\f
1038/*** 3. UTF-8 ***/
1039
1040/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1041 Check if a text is encoded in UTF-8. If it is, return 1, else
1042 return 0. */
df7492f9
KH
1043
1044#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1045#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1046#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1047#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1048#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1049#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1050
1051static int
ff0dacd7 1052detect_coding_utf_8 (coding, detect_info)
df7492f9 1053 struct coding_system *coding;
ff0dacd7 1054 struct coding_detection_info *detect_info;
df7492f9 1055{
8f924df7
KH
1056 const unsigned char *src = coding->source, *src_base = src;
1057 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1058 int multibytep = coding->src_multibyte;
1059 int consumed_chars = 0;
1060 int found = 0;
89528eb3 1061 int incomplete;
df7492f9 1062
ff0dacd7 1063 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1064 /* A coding system of this category is always ASCII compatible. */
1065 src += coding->head_ascii;
1066
1067 while (1)
aa72b389 1068 {
df7492f9 1069 int c, c1, c2, c3, c4;
aa72b389 1070
89528eb3 1071 incomplete = 0;
df7492f9
KH
1072 ONE_MORE_BYTE (c);
1073 if (UTF_8_1_OCTET_P (c))
1074 continue;
89528eb3 1075 incomplete = 1;
df7492f9
KH
1076 ONE_MORE_BYTE (c1);
1077 if (! UTF_8_EXTRA_OCTET_P (c1))
1078 break;
1079 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1080 {
ff0dacd7 1081 found = CATEGORY_MASK_UTF_8;
df7492f9 1082 continue;
aa72b389 1083 }
df7492f9
KH
1084 ONE_MORE_BYTE (c2);
1085 if (! UTF_8_EXTRA_OCTET_P (c2))
1086 break;
1087 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1088 {
ff0dacd7 1089 found = CATEGORY_MASK_UTF_8;
df7492f9 1090 continue;
aa72b389 1091 }
df7492f9
KH
1092 ONE_MORE_BYTE (c3);
1093 if (! UTF_8_EXTRA_OCTET_P (c3))
1094 break;
1095 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1096 {
ff0dacd7 1097 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1098 continue;
1099 }
1100 ONE_MORE_BYTE (c4);
1101 if (! UTF_8_EXTRA_OCTET_P (c4))
1102 break;
1103 if (UTF_8_5_OCTET_LEADING_P (c))
1104 {
ff0dacd7 1105 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1106 continue;
1107 }
1108 break;
aa72b389 1109 }
ff0dacd7 1110 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1111 return 0;
aa72b389 1112
df7492f9 1113 no_more_source:
89528eb3 1114 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1115 {
ff0dacd7 1116 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1117 return 0;
aa72b389 1118 }
ff0dacd7
KH
1119 detect_info->found |= found;
1120 return 1;
aa72b389
KH
1121}
1122
4ed46869 1123
b73bfc1c 1124static void
df7492f9 1125decode_coding_utf_8 (coding)
b73bfc1c 1126 struct coding_system *coding;
b73bfc1c 1127{
8f924df7
KH
1128 const unsigned char *src = coding->source + coding->consumed;
1129 const unsigned char *src_end = coding->source + coding->src_bytes;
1130 const unsigned char *src_base;
df7492f9
KH
1131 int *charbuf = coding->charbuf;
1132 int *charbuf_end = charbuf + coding->charbuf_size;
1133 int consumed_chars = 0, consumed_chars_base;
1134 int multibytep = coding->src_multibyte;
1135 Lisp_Object attr, eol_type, charset_list;
4ed46869 1136
df7492f9
KH
1137 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1138
1139 while (1)
b73bfc1c 1140 {
df7492f9 1141 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1142
df7492f9
KH
1143 src_base = src;
1144 consumed_chars_base = consumed_chars;
4af310db 1145
df7492f9
KH
1146 if (charbuf >= charbuf_end)
1147 break;
1148
1149 ONE_MORE_BYTE (c1);
1150 if (UTF_8_1_OCTET_P(c1))
1151 {
1152 c = c1;
1153 if (c == '\r')
4af310db 1154 {
df7492f9 1155 if (EQ (eol_type, Qdos))
4af310db 1156 {
df7492f9 1157 if (src == src_end)
98725083
KH
1158 {
1159 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1160 goto no_more_source;
1161 }
df7492f9
KH
1162 if (*src == '\n')
1163 ONE_MORE_BYTE (c);
4af310db 1164 }
df7492f9
KH
1165 else if (EQ (eol_type, Qmac))
1166 c = '\n';
4af310db 1167 }
4af310db 1168 }
df7492f9 1169 else
4af310db 1170 {
df7492f9
KH
1171 ONE_MORE_BYTE (c2);
1172 if (! UTF_8_EXTRA_OCTET_P (c2))
1173 goto invalid_code;
1174 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1175 {
b0edb2c5
DL
1176 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1177 /* Reject overlong sequences here and below. Encoders
1178 producing them are incorrect, they can be misleading,
1179 and they mess up read/write invariance. */
1180 if (c < 128)
1181 goto invalid_code;
4af310db 1182 }
df7492f9 1183 else
aa72b389 1184 {
df7492f9
KH
1185 ONE_MORE_BYTE (c3);
1186 if (! UTF_8_EXTRA_OCTET_P (c3))
1187 goto invalid_code;
1188 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1189 {
1190 c = (((c1 & 0xF) << 12)
1191 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1192 if (c < 0x800
1193 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1194 goto invalid_code;
1195 }
df7492f9
KH
1196 else
1197 {
1198 ONE_MORE_BYTE (c4);
1199 if (! UTF_8_EXTRA_OCTET_P (c4))
1200 goto invalid_code;
1201 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1202 {
df7492f9
KH
1203 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1204 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1205 if (c < 0x10000)
1206 goto invalid_code;
1207 }
df7492f9
KH
1208 else
1209 {
1210 ONE_MORE_BYTE (c5);
1211 if (! UTF_8_EXTRA_OCTET_P (c5))
1212 goto invalid_code;
1213 if (UTF_8_5_OCTET_LEADING_P (c1))
1214 {
1215 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1216 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1217 | (c5 & 0x3F));
b0edb2c5 1218 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1219 goto invalid_code;
1220 }
1221 else
1222 goto invalid_code;
1223 }
1224 }
aa72b389 1225 }
b73bfc1c 1226 }
df7492f9
KH
1227
1228 *charbuf++ = c;
1229 continue;
1230
1231 invalid_code:
1232 src = src_base;
1233 consumed_chars = consumed_chars_base;
1234 ONE_MORE_BYTE (c);
1235 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1236 coding->errors++;
aa72b389
KH
1237 }
1238
df7492f9
KH
1239 no_more_source:
1240 coding->consumed_char += consumed_chars_base;
1241 coding->consumed = src_base - coding->source;
1242 coding->charbuf_used = charbuf - coding->charbuf;
1243}
1244
1245
1246static int
1247encode_coding_utf_8 (coding)
1248 struct coding_system *coding;
1249{
1250 int multibytep = coding->dst_multibyte;
1251 int *charbuf = coding->charbuf;
1252 int *charbuf_end = charbuf + coding->charbuf_used;
1253 unsigned char *dst = coding->destination + coding->produced;
1254 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1255 int produced_chars = 0;
df7492f9
KH
1256 int c;
1257
1258 if (multibytep)
aa72b389 1259 {
df7492f9
KH
1260 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1261
1262 while (charbuf < charbuf_end)
b73bfc1c 1263 {
df7492f9 1264 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1265
df7492f9
KH
1266 ASSURE_DESTINATION (safe_room);
1267 c = *charbuf++;
28f67a95
KH
1268 if (CHAR_BYTE8_P (c))
1269 {
1270 c = CHAR_TO_BYTE8 (c);
1271 EMIT_ONE_BYTE (c);
1272 }
1273 else
1274 {
1275 CHAR_STRING_ADVANCE (c, pend);
1276 for (p = str; p < pend; p++)
1277 EMIT_ONE_BYTE (*p);
1278 }
b73bfc1c 1279 }
aa72b389 1280 }
df7492f9
KH
1281 else
1282 {
1283 int safe_room = MAX_MULTIBYTE_LENGTH;
1284
1285 while (charbuf < charbuf_end)
b73bfc1c 1286 {
df7492f9
KH
1287 ASSURE_DESTINATION (safe_room);
1288 c = *charbuf++;
1289 dst += CHAR_STRING (c, dst);
1290 produced_chars++;
4ed46869
KH
1291 }
1292 }
df7492f9
KH
1293 coding->result = CODING_RESULT_SUCCESS;
1294 coding->produced_char += produced_chars;
1295 coding->produced = dst - coding->destination;
1296 return 0;
4ed46869
KH
1297}
1298
b73bfc1c 1299
df7492f9 1300/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1301 Check if a text is encoded in one of UTF-16 based coding systems.
1302 If it is, return 1, else return 0. */
aa72b389 1303
df7492f9
KH
1304#define UTF_16_HIGH_SURROGATE_P(val) \
1305 (((val) & 0xFC00) == 0xD800)
1306
1307#define UTF_16_LOW_SURROGATE_P(val) \
1308 (((val) & 0xFC00) == 0xDC00)
93dec019 1309
df7492f9
KH
1310#define UTF_16_INVALID_P(val) \
1311 (((val) == 0xFFFE) \
1312 || ((val) == 0xFFFF) \
1313 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1314
aa72b389 1315
df7492f9 1316static int
ff0dacd7 1317detect_coding_utf_16 (coding, detect_info)
aa72b389 1318 struct coding_system *coding;
ff0dacd7 1319 struct coding_detection_info *detect_info;
aa72b389 1320{
8f924df7
KH
1321 const unsigned char *src = coding->source, *src_base = src;
1322 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1323 int multibytep = coding->src_multibyte;
1324 int consumed_chars = 0;
1325 int c1, c2;
aa72b389 1326
ff0dacd7 1327 detect_info->checked |= CATEGORY_MASK_UTF_16;
aa72b389 1328
ff0dacd7
KH
1329 if (coding->mode & CODING_MODE_LAST_BLOCK
1330 && (coding->src_bytes & 1))
1331 {
1332 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1333 return 0;
1334 }
df7492f9
KH
1335 ONE_MORE_BYTE (c1);
1336 ONE_MORE_BYTE (c2);
aa72b389 1337
df7492f9 1338 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1339 {
b49a1807
KH
1340 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1341 | CATEGORY_MASK_UTF_16_AUTO);
ff0dacd7 1342 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
aa72b389 1343 }
df7492f9 1344 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1345 {
b49a1807
KH
1346 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1347 | CATEGORY_MASK_UTF_16_AUTO);
ff0dacd7
KH
1348 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1349 }
df7492f9 1350 no_more_source:
ff0dacd7 1351 return 1;
df7492f9 1352}
aa72b389 1353
df7492f9
KH
1354static void
1355decode_coding_utf_16 (coding)
1356 struct coding_system *coding;
1357{
8f924df7
KH
1358 const unsigned char *src = coding->source + coding->consumed;
1359 const unsigned char *src_end = coding->source + coding->src_bytes;
1360 const unsigned char *src_base;
df7492f9
KH
1361 int *charbuf = coding->charbuf;
1362 int *charbuf_end = charbuf + coding->charbuf_size;
1363 int consumed_chars = 0, consumed_chars_base;
1364 int multibytep = coding->src_multibyte;
1365 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1366 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1367 int surrogate = CODING_UTF_16_SURROGATE (coding);
1368 Lisp_Object attr, eol_type, charset_list;
1369
1370 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1371
b49a1807 1372 if (bom == utf_16_with_bom)
aa72b389 1373 {
df7492f9 1374 int c, c1, c2;
4af310db 1375
aa72b389 1376 src_base = src;
df7492f9
KH
1377 ONE_MORE_BYTE (c1);
1378 ONE_MORE_BYTE (c2);
e19c3639 1379 c = (c1 << 8) | c2;
aa72b389 1380
b49a1807
KH
1381 if (endian == utf_16_big_endian
1382 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1383 {
b49a1807
KH
1384 /* The first two bytes are not BOM. Treat them as bytes
1385 for a normal character. */
1386 src = src_base;
1387 coding->errors++;
aa72b389 1388 }
b49a1807
KH
1389 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1390 }
1391 else if (bom == utf_16_detect_bom)
1392 {
1393 /* We have already tried to detect BOM and failed in
1394 detect_coding. */
1395 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9 1396 }
aa72b389 1397
df7492f9
KH
1398 while (1)
1399 {
1400 int c, c1, c2;
1401
1402 src_base = src;
1403 consumed_chars_base = consumed_chars;
1404
1405 if (charbuf + 2 >= charbuf_end)
1406 break;
1407
1408 ONE_MORE_BYTE (c1);
1409 ONE_MORE_BYTE (c2);
1410 c = (endian == utf_16_big_endian
e19c3639 1411 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1412 if (surrogate)
fd3ae0b9 1413 {
df7492f9 1414 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1415 {
df7492f9
KH
1416 if (endian == utf_16_big_endian)
1417 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1418 else
1419 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1420 *charbuf++ = c1;
1421 *charbuf++ = c2;
1422 coding->errors++;
1423 if (UTF_16_HIGH_SURROGATE_P (c))
1424 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1425 else
df7492f9 1426 *charbuf++ = c;
fd3ae0b9
KH
1427 }
1428 else
df7492f9
KH
1429 {
1430 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1431 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1432 *charbuf++ = c;
1433 }
fd3ae0b9 1434 }
aa72b389 1435 else
df7492f9
KH
1436 {
1437 if (UTF_16_HIGH_SURROGATE_P (c))
1438 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1439 else
1440 *charbuf++ = c;
8f924df7 1441 }
aa72b389 1442 }
df7492f9
KH
1443
1444 no_more_source:
1445 coding->consumed_char += consumed_chars_base;
1446 coding->consumed = src_base - coding->source;
1447 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1448}
b73bfc1c 1449
df7492f9
KH
1450static int
1451encode_coding_utf_16 (coding)
1452 struct coding_system *coding;
1453{
1454 int multibytep = coding->dst_multibyte;
1455 int *charbuf = coding->charbuf;
1456 int *charbuf_end = charbuf + coding->charbuf_used;
1457 unsigned char *dst = coding->destination + coding->produced;
1458 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1459 int safe_room = 8;
1460 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1461 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1462 int produced_chars = 0;
1463 Lisp_Object attrs, eol_type, charset_list;
1464 int c;
4ed46869 1465
df7492f9
KH
1466 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1467
b49a1807 1468 if (bom != utf_16_without_bom)
df7492f9
KH
1469 {
1470 ASSURE_DESTINATION (safe_room);
1471 if (big_endian)
df7492f9 1472 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1473 else
1474 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1475 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1476 }
1477
1478 while (charbuf < charbuf_end)
1479 {
1480 ASSURE_DESTINATION (safe_room);
1481 c = *charbuf++;
e19c3639
KH
1482 if (c >= MAX_UNICODE_CHAR)
1483 c = coding->default_char;
df7492f9
KH
1484
1485 if (c < 0x10000)
1486 {
1487 if (big_endian)
1488 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1489 else
1490 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1491 }
1492 else
1493 {
1494 int c1, c2;
1495
1496 c -= 0x10000;
1497 c1 = (c >> 10) + 0xD800;
1498 c2 = (c & 0x3FF) + 0xDC00;
1499 if (big_endian)
1500 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1501 else
1502 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1503 }
1504 }
1505 coding->result = CODING_RESULT_SUCCESS;
1506 coding->produced = dst - coding->destination;
1507 coding->produced_char += produced_chars;
1508 return 0;
1509}
1510
1511\f
1512/*** 6. Old Emacs' internal format (emacs-mule) ***/
1513
1514/* Emacs' internal format for representation of multiple character
1515 sets is a kind of multi-byte encoding, i.e. characters are
1516 represented by variable-length sequences of one-byte codes.
1517
1518 ASCII characters and control characters (e.g. `tab', `newline') are
1519 represented by one-byte sequences which are their ASCII codes, in
1520 the range 0x00 through 0x7F.
1521
1522 8-bit characters of the range 0x80..0x9F are represented by
1523 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1524 code + 0x20).
1525
1526 8-bit characters of the range 0xA0..0xFF are represented by
1527 one-byte sequences which are their 8-bit code.
1528
1529 The other characters are represented by a sequence of `base
1530 leading-code', optional `extended leading-code', and one or two
1531 `position-code's. The length of the sequence is determined by the
1532 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1533 whereas extended leading-code and position-code take the range 0xA0
1534 through 0xFF. See `charset.h' for more details about leading-code
1535 and position-code.
1536
1537 --- CODE RANGE of Emacs' internal format ---
1538 character set range
1539 ------------- -----
1540 ascii 0x00..0x7F
1541 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1542 eight-bit-graphic 0xA0..0xBF
1543 ELSE 0x81..0x9D + [0xA0..0xFF]+
1544 ---------------------------------------------
1545
1546 As this is the internal character representation, the format is
1547 usually not used externally (i.e. in a file or in a data sent to a
1548 process). But, it is possible to have a text externally in this
1549 format (i.e. by encoding by the coding system `emacs-mule').
1550
1551 In that case, a sequence of one-byte codes has a slightly different
1552 form.
1553
1554 At first, all characters in eight-bit-control are represented by
1555 one-byte sequences which are their 8-bit code.
1556
1557 Next, character composition data are represented by the byte
1558 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1559 where,
1560 METHOD is 0xF0 plus one of composition method (enum
1561 composition_method),
1562
1563 BYTES is 0xA0 plus a byte length of this composition data,
1564
1565 CHARS is 0x20 plus a number of characters composed by this
1566 data,
1567
1568 COMPONENTs are characters of multibye form or composition
1569 rules encoded by two-byte of ASCII codes.
1570
1571 In addition, for backward compatibility, the following formats are
1572 also recognized as composition data on decoding.
1573
1574 0x80 MSEQ ...
1575 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1576
1577 Here,
1578 MSEQ is a multibyte form but in these special format:
1579 ASCII: 0xA0 ASCII_CODE+0x80,
1580 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1581 RULE is a one byte code of the range 0xA0..0xF0 that
1582 represents a composition rule.
1583 */
1584
1585char emacs_mule_bytes[256];
1586
df7492f9 1587int
ff0dacd7 1588emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1589 struct coding_system *coding;
781d7a48 1590 unsigned char *src;
ff0dacd7 1591 int *nbytes, *nchars, *id;
df7492f9 1592{
8f924df7
KH
1593 const unsigned char *src_end = coding->source + coding->src_bytes;
1594 const unsigned char *src_base = src;
df7492f9 1595 int multibytep = coding->src_multibyte;
df7492f9
KH
1596 struct charset *charset;
1597 unsigned code;
1598 int c;
1599 int consumed_chars = 0;
1600
1601 ONE_MORE_BYTE (c);
df7492f9
KH
1602 switch (emacs_mule_bytes[c])
1603 {
1604 case 2:
1605 if (! (charset = emacs_mule_charset[c]))
1606 goto invalid_code;
1607 ONE_MORE_BYTE (c);
1608 code = c & 0x7F;
1609 break;
1610
1611 case 3:
7c78e542
KH
1612 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1613 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
b73bfc1c 1614 {
df7492f9
KH
1615 ONE_MORE_BYTE (c);
1616 if (! (charset = emacs_mule_charset[c]))
1617 goto invalid_code;
1618 ONE_MORE_BYTE (c);
1619 code = c & 0x7F;
b73bfc1c
KH
1620 }
1621 else
1622 {
df7492f9
KH
1623 if (! (charset = emacs_mule_charset[c]))
1624 goto invalid_code;
1625 ONE_MORE_BYTE (c);
781d7a48 1626 code = (c & 0x7F) << 8;
df7492f9
KH
1627 ONE_MORE_BYTE (c);
1628 code |= c & 0x7F;
1629 }
1630 break;
1631
1632 case 4:
781d7a48 1633 ONE_MORE_BYTE (c);
df7492f9
KH
1634 if (! (charset = emacs_mule_charset[c]))
1635 goto invalid_code;
1636 ONE_MORE_BYTE (c);
781d7a48 1637 code = (c & 0x7F) << 8;
df7492f9
KH
1638 ONE_MORE_BYTE (c);
1639 code |= c & 0x7F;
1640 break;
1641
1642 case 1:
1643 code = c;
9d123124
KH
1644 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1645 ? charset_ascii : charset_eight_bit);
df7492f9
KH
1646 break;
1647
1648 default:
1649 abort ();
1650 }
1651 c = DECODE_CHAR (charset, code);
1652 if (c < 0)
1653 goto invalid_code;
1654 *nbytes = src - src_base;
1655 *nchars = consumed_chars;
ff0dacd7
KH
1656 if (id)
1657 *id = charset->id;
df7492f9
KH
1658 return c;
1659
1660 no_more_source:
1661 return -2;
1662
1663 invalid_code:
1664 return -1;
1665}
1666
1667
1668/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1669 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1670 else return 0. */
df7492f9
KH
1671
1672static int
ff0dacd7 1673detect_coding_emacs_mule (coding, detect_info)
df7492f9 1674 struct coding_system *coding;
ff0dacd7 1675 struct coding_detection_info *detect_info;
df7492f9 1676{
8f924df7
KH
1677 const unsigned char *src = coding->source, *src_base = src;
1678 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1679 int multibytep = coding->src_multibyte;
1680 int consumed_chars = 0;
1681 int c;
1682 int found = 0;
89528eb3 1683 int incomplete;
df7492f9 1684
ff0dacd7 1685 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1686 /* A coding system of this category is always ASCII compatible. */
1687 src += coding->head_ascii;
1688
1689 while (1)
1690 {
89528eb3 1691 incomplete = 0;
df7492f9 1692 ONE_MORE_BYTE (c);
89528eb3 1693 incomplete = 1;
df7492f9
KH
1694
1695 if (c == 0x80)
1696 {
1697 /* Perhaps the start of composite character. We simple skip
1698 it because analyzing it is too heavy for detecting. But,
1699 at least, we check that the composite character
1700 constitues of more than 4 bytes. */
8f924df7 1701 const unsigned char *src_base;
df7492f9
KH
1702
1703 repeat:
1704 src_base = src;
1705 do
1706 {
1707 ONE_MORE_BYTE (c);
1708 }
1709 while (c >= 0xA0);
1710
1711 if (src - src_base <= 4)
1712 break;
ff0dacd7 1713 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1714 if (c == 0x80)
1715 goto repeat;
b73bfc1c 1716 }
df7492f9
KH
1717
1718 if (c < 0x80)
b73bfc1c 1719 {
df7492f9
KH
1720 if (c < 0x20
1721 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1722 break;
1723 }
1724 else
1725 {
8f924df7 1726 const unsigned char *src_base = src - 1;
df7492f9
KH
1727
1728 do
1729 {
1730 ONE_MORE_BYTE (c);
1731 }
1732 while (c >= 0xA0);
1733 if (src - src_base != emacs_mule_bytes[*src_base])
1734 break;
ff0dacd7 1735 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1736 }
1737 }
ff0dacd7 1738 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1739 return 0;
1740
1741 no_more_source:
89528eb3
KH
1742 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1743 {
ff0dacd7 1744 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1745 return 0;
1746 }
ff0dacd7
KH
1747 detect_info->found |= found;
1748 return 1;
4ed46869
KH
1749}
1750
b73bfc1c 1751
df7492f9
KH
1752/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1753
1754/* Decode a character represented as a component of composition
1755 sequence of Emacs 20/21 style at SRC. Set C to that character and
1756 update SRC to the head of next character (or an encoded composition
1757 rule). If SRC doesn't points a composition component, set C to -1.
1758 If SRC points an invalid byte sequence, global exit by a return
1759 value 0. */
1760
1761#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1762 if (1) \
1763 { \
1764 int c; \
1765 int nbytes, nchars; \
1766 \
1767 if (src == src_end) \
1768 break; \
ff0dacd7 1769 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1770 if (c < 0) \
1771 { \
1772 if (c == -2) \
1773 break; \
1774 goto invalid_code; \
1775 } \
1776 *buf++ = c; \
1777 src += nbytes; \
1778 consumed_chars += nchars; \
1779 } \
1780 else
1781
1782
1783/* Decode a composition rule represented as a component of composition
781d7a48
KH
1784 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1785 and increment BUF. If SRC points an invalid byte sequence, set C
1786 to -1. */
df7492f9 1787
781d7a48 1788#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1789 do { \
1790 int c, gref, nref; \
1791 \
781d7a48 1792 if (src >= src_end) \
df7492f9
KH
1793 goto invalid_code; \
1794 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1795 c -= 0x20; \
df7492f9
KH
1796 if (c < 0 || c >= 81) \
1797 goto invalid_code; \
1798 \
1799 gref = c / 9, nref = c % 9; \
1800 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1801 } while (0)
1802
1803
781d7a48
KH
1804/* Decode a composition rule represented as a component of composition
1805 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1806 and increment BUF. If SRC points an invalid byte sequence, set C
1807 to -1. */
1808
1809#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1810 do { \
1811 int gref, nref; \
1812 \
1813 if (src + 1>= src_end) \
1814 goto invalid_code; \
1815 ONE_MORE_BYTE_NO_CHECK (gref); \
1816 gref -= 0x20; \
1817 ONE_MORE_BYTE_NO_CHECK (nref); \
1818 nref -= 0x20; \
1819 if (gref < 0 || gref >= 81 \
1820 || nref < 0 || nref >= 81) \
1821 goto invalid_code; \
1822 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1823 } while (0)
1824
1825
df7492f9 1826#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1827 do { \
df7492f9 1828 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1829 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1830 the byte length of this composition information, CHARS is the \
1831 number of characters composed by this composition. */ \
781d7a48
KH
1832 enum composition_method method = c - 0xF2; \
1833 int *charbuf_base = charbuf; \
ff0dacd7 1834 int from, to; \
df7492f9
KH
1835 int consumed_chars_limit; \
1836 int nbytes, nchars; \
1837 \
1838 ONE_MORE_BYTE (c); \
1839 nbytes = c - 0xA0; \
1840 if (nbytes < 3) \
1841 goto invalid_code; \
1842 ONE_MORE_BYTE (c); \
1843 nchars = c - 0xA0; \
ff0dacd7
KH
1844 from = coding->produced + char_offset; \
1845 to = from + nchars; \
1846 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1847 consumed_chars_limit = consumed_chars_base + nbytes; \
1848 if (method != COMPOSITION_RELATIVE) \
aa72b389 1849 { \
df7492f9
KH
1850 int i = 0; \
1851 while (consumed_chars < consumed_chars_limit) \
aa72b389 1852 { \
df7492f9 1853 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1854 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1855 else \
1856 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1857 i++; \
aa72b389 1858 } \
df7492f9
KH
1859 if (consumed_chars < consumed_chars_limit) \
1860 goto invalid_code; \
781d7a48 1861 charbuf_base[0] -= i; \
aa72b389
KH
1862 } \
1863 } while (0)
93dec019 1864
aa72b389 1865
df7492f9
KH
1866#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1867 do { \
1868 /* Emacs 20 style format for relative composition. */ \
1869 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1870 enum composition_method method = COMPOSITION_RELATIVE; \
df7492f9
KH
1871 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1872 int *buf = components; \
1873 int i, j; \
ff0dacd7 1874 int from, to; \
df7492f9
KH
1875 \
1876 src = src_base; \
1877 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1878 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1879 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1880 if (i < 2) \
1881 goto invalid_code; \
ff0dacd7
KH
1882 from = coding->produced_char + char_offset; \
1883 to = from + i; \
1884 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9
KH
1885 for (j = 0; j < i; j++) \
1886 *charbuf++ = components[j]; \
1887 } while (0)
1888
1889
1890#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1891 do { \
1892 /* Emacs 20 style format for rule-base composition. */ \
1893 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1894 enum composition_method method = COMPOSITION_WITH_RULE; \
df7492f9
KH
1895 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1896 int *buf = components; \
1897 int i, j; \
ff0dacd7 1898 int from, to; \
df7492f9
KH
1899 \
1900 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1901 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1902 { \
781d7a48 1903 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1904 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1905 } \
1906 if (i < 1 || (buf - components) % 2 == 0) \
1907 goto invalid_code; \
1908 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1909 goto no_more_source; \
ff0dacd7
KH
1910 from = coding->produced_char + char_offset; \
1911 to = from + i; \
1912 ADD_COMPOSITION_DATA (buf, from, to, method); \
df7492f9
KH
1913 for (j = 0; j < i; j++) \
1914 *charbuf++ = components[j]; \
1915 for (j = 0; j < i; j += 2) \
1916 *charbuf++ = components[j]; \
1917 } while (0)
1918
aa72b389
KH
1919
1920static void
df7492f9 1921decode_coding_emacs_mule (coding)
aa72b389 1922 struct coding_system *coding;
aa72b389 1923{
8f924df7
KH
1924 const unsigned char *src = coding->source + coding->consumed;
1925 const unsigned char *src_end = coding->source + coding->src_bytes;
1926 const unsigned char *src_base;
df7492f9 1927 int *charbuf = coding->charbuf;
ff0dacd7 1928 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 1929 int consumed_chars = 0, consumed_chars_base;
df7492f9
KH
1930 int multibytep = coding->src_multibyte;
1931 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
1932 int char_offset = coding->produced_char;
1933 int last_offset = char_offset;
1934 int last_id = charset_ascii;
aa72b389 1935
df7492f9 1936 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1937
aa72b389
KH
1938 while (1)
1939 {
df7492f9
KH
1940 int c;
1941
aa72b389 1942 src_base = src;
df7492f9
KH
1943 consumed_chars_base = consumed_chars;
1944
1945 if (charbuf >= charbuf_end)
1946 break;
aa72b389 1947
df7492f9
KH
1948 ONE_MORE_BYTE (c);
1949
1950 if (c < 0x80)
aa72b389 1951 {
df7492f9
KH
1952 if (c == '\r')
1953 {
1954 if (EQ (eol_type, Qdos))
1955 {
1956 if (src == src_end)
98725083
KH
1957 {
1958 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1959 goto no_more_source;
1960 }
df7492f9
KH
1961 if (*src == '\n')
1962 ONE_MORE_BYTE (c);
1963 }
1964 else if (EQ (eol_type, Qmac))
1965 c = '\n';
1966 }
1967 *charbuf++ = c;
1968 char_offset++;
aa72b389 1969 }
df7492f9
KH
1970 else if (c == 0x80)
1971 {
df7492f9 1972 ONE_MORE_BYTE (c);
781d7a48
KH
1973 if (c - 0xF2 >= COMPOSITION_RELATIVE
1974 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
1975 DECODE_EMACS_MULE_21_COMPOSITION (c);
1976 else if (c < 0xC0)
1977 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1978 else if (c == 0xFF)
1979 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1980 else
1981 goto invalid_code;
1982 }
1983 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1984 {
1985 int nbytes, nchars;
ff0dacd7
KH
1986 int id;
1987
781d7a48
KH
1988 src = src_base;
1989 consumed_chars = consumed_chars_base;
ff0dacd7 1990 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
1991 if (c < 0)
1992 {
1993 if (c == -2)
1994 break;
1995 goto invalid_code;
1996 }
ff0dacd7
KH
1997 if (last_id != id)
1998 {
1999 if (last_id != charset_ascii)
2000 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2001 last_id = id;
2002 last_offset = char_offset;
2003 }
df7492f9 2004 *charbuf++ = c;
781d7a48
KH
2005 src += nbytes;
2006 consumed_chars += nchars;
df7492f9
KH
2007 char_offset++;
2008 }
2009 continue;
2010
2011 invalid_code:
2012 src = src_base;
2013 consumed_chars = consumed_chars_base;
2014 ONE_MORE_BYTE (c);
2015 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2016 char_offset++;
df7492f9
KH
2017 coding->errors++;
2018 }
2019
2020 no_more_source:
ff0dacd7
KH
2021 if (last_id != charset_ascii)
2022 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
2023 coding->consumed_char += consumed_chars_base;
2024 coding->consumed = src_base - coding->source;
2025 coding->charbuf_used = charbuf - coding->charbuf;
2026}
2027
2028
2029#define EMACS_MULE_LEADING_CODES(id, codes) \
2030 do { \
2031 if (id < 0xA0) \
2032 codes[0] = id, codes[1] = 0; \
2033 else if (id < 0xE0) \
2034 codes[0] = 0x9A, codes[1] = id; \
2035 else if (id < 0xF0) \
2036 codes[0] = 0x9B, codes[1] = id; \
2037 else if (id < 0xF5) \
2038 codes[0] = 0x9C, codes[1] = id; \
2039 else \
2040 codes[0] = 0x9D, codes[1] = id; \
2041 } while (0);
2042
aa72b389 2043
df7492f9
KH
2044static int
2045encode_coding_emacs_mule (coding)
2046 struct coding_system *coding;
2047{
2048 int multibytep = coding->dst_multibyte;
2049 int *charbuf = coding->charbuf;
2050 int *charbuf_end = charbuf + coding->charbuf_used;
2051 unsigned char *dst = coding->destination + coding->produced;
2052 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2053 int safe_room = 8;
df7492f9
KH
2054 int produced_chars = 0;
2055 Lisp_Object attrs, eol_type, charset_list;
2056 int c;
ff0dacd7 2057 int preferred_charset_id = -1;
df7492f9
KH
2058
2059 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2060
2061 while (charbuf < charbuf_end)
2062 {
2063 ASSURE_DESTINATION (safe_room);
2064 c = *charbuf++;
ff0dacd7
KH
2065
2066 if (c < 0)
2067 {
2068 /* Handle an annotation. */
2069 switch (*charbuf)
2070 {
2071 case CODING_ANNOTATE_COMPOSITION_MASK:
2072 /* Not yet implemented. */
2073 break;
2074 case CODING_ANNOTATE_CHARSET_MASK:
2075 preferred_charset_id = charbuf[3];
2076 if (preferred_charset_id >= 0
2077 && NILP (Fmemq (make_number (preferred_charset_id),
2078 charset_list)))
2079 preferred_charset_id = -1;
2080 break;
2081 default:
2082 abort ();
2083 }
2084 charbuf += -c - 1;
2085 continue;
2086 }
2087
df7492f9
KH
2088 if (ASCII_CHAR_P (c))
2089 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2090 else if (CHAR_BYTE8_P (c))
2091 {
2092 c = CHAR_TO_BYTE8 (c);
2093 EMIT_ONE_BYTE (c);
2094 }
df7492f9 2095 else
aa72b389 2096 {
df7492f9
KH
2097 struct charset *charset;
2098 unsigned code;
2099 int dimension;
2100 int emacs_mule_id;
2101 unsigned char leading_codes[2];
2102
ff0dacd7
KH
2103 if (preferred_charset_id >= 0)
2104 {
2105 charset = CHARSET_FROM_ID (preferred_charset_id);
2106 if (! CHAR_CHARSET_P (c, charset))
2107 charset = char_charset (c, charset_list, NULL);
2108 }
2109 else
2110 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2111 if (! charset)
2112 {
2113 c = coding->default_char;
2114 if (ASCII_CHAR_P (c))
2115 {
2116 EMIT_ONE_ASCII_BYTE (c);
2117 continue;
2118 }
2119 charset = char_charset (c, charset_list, &code);
2120 }
2121 dimension = CHARSET_DIMENSION (charset);
2122 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2123 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2124 EMIT_ONE_BYTE (leading_codes[0]);
2125 if (leading_codes[1])
2126 EMIT_ONE_BYTE (leading_codes[1]);
2127 if (dimension == 1)
2128 EMIT_ONE_BYTE (code);
aa72b389 2129 else
df7492f9
KH
2130 {
2131 EMIT_ONE_BYTE (code >> 8);
2132 EMIT_ONE_BYTE (code & 0xFF);
2133 }
aa72b389 2134 }
aa72b389 2135 }
df7492f9
KH
2136 coding->result = CODING_RESULT_SUCCESS;
2137 coding->produced_char += produced_chars;
2138 coding->produced = dst - coding->destination;
2139 return 0;
aa72b389 2140}
b73bfc1c 2141
4ed46869 2142\f
df7492f9 2143/*** 7. ISO2022 handlers ***/
4ed46869
KH
2144
2145/* The following note describes the coding system ISO2022 briefly.
39787efd 2146 Since the intention of this note is to help understand the
5a936b46 2147 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2148 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2149 original document of ISO2022. This is equivalent to the standard
cfb43547 2150 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2151
2152 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2153 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2154 is encoded using bytes less than 128. This may make the encoded
2155 text a little bit longer, but the text passes more easily through
cfb43547 2156 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2157 Significant Bit).
b73bfc1c 2158
cfb43547
DL
2159 There are two kinds of character sets: control character sets and
2160 graphic character sets. The former contain control characters such
4ed46869 2161 as `newline' and `escape' to provide control functions (control
39787efd 2162 functions are also provided by escape sequences). The latter
cfb43547 2163 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2164 two control character sets and many graphic character sets.
2165
2166 Graphic character sets are classified into one of the following
39787efd
KH
2167 four classes, according to the number of bytes (DIMENSION) and
2168 number of characters in one dimension (CHARS) of the set:
2169 - DIMENSION1_CHARS94
2170 - DIMENSION1_CHARS96
2171 - DIMENSION2_CHARS94
2172 - DIMENSION2_CHARS96
2173
2174 In addition, each character set is assigned an identification tag,
cfb43547 2175 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2176 hereafter). The <F> of each character set is decided by ECMA(*)
2177 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2178 (0x30..0x3F are for private use only).
4ed46869
KH
2179
2180 Note (*): ECMA = European Computer Manufacturers Association
2181
cfb43547 2182 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2183 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2184 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2185 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2186 o DIMENSION2_CHARS96 -- none for the moment
2187
39787efd 2188 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2189 C0 [0x00..0x1F] -- control character plane 0
2190 GL [0x20..0x7F] -- graphic character plane 0
2191 C1 [0x80..0x9F] -- control character plane 1
2192 GR [0xA0..0xFF] -- graphic character plane 1
2193
2194 A control character set is directly designated and invoked to C0 or
39787efd
KH
2195 C1 by an escape sequence. The most common case is that:
2196 - ISO646's control character set is designated/invoked to C0, and
2197 - ISO6429's control character set is designated/invoked to C1,
2198 and usually these designations/invocations are omitted in encoded
2199 text. In a 7-bit environment, only C0 can be used, and a control
2200 character for C1 is encoded by an appropriate escape sequence to
2201 fit into the environment. All control characters for C1 are
2202 defined to have corresponding escape sequences.
4ed46869
KH
2203
2204 A graphic character set is at first designated to one of four
2205 graphic registers (G0 through G3), then these graphic registers are
2206 invoked to GL or GR. These designations and invocations can be
2207 done independently. The most common case is that G0 is invoked to
39787efd
KH
2208 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2209 these invocations and designations are omitted in encoded text.
2210 In a 7-bit environment, only GL can be used.
4ed46869 2211
39787efd
KH
2212 When a graphic character set of CHARS94 is invoked to GL, codes
2213 0x20 and 0x7F of the GL area work as control characters SPACE and
2214 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2215 be used.
4ed46869
KH
2216
2217 There are two ways of invocation: locking-shift and single-shift.
2218 With locking-shift, the invocation lasts until the next different
39787efd
KH
2219 invocation, whereas with single-shift, the invocation affects the
2220 following character only and doesn't affect the locking-shift
2221 state. Invocations are done by the following control characters or
2222 escape sequences:
4ed46869
KH
2223
2224 ----------------------------------------------------------------------
39787efd 2225 abbrev function cntrl escape seq description
4ed46869 2226 ----------------------------------------------------------------------
39787efd
KH
2227 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2228 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2229 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2230 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2231 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2232 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2233 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2234 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2235 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2236 ----------------------------------------------------------------------
39787efd
KH
2237 (*) These are not used by any known coding system.
2238
2239 Control characters for these functions are defined by macros
2240 ISO_CODE_XXX in `coding.h'.
4ed46869 2241
39787efd 2242 Designations are done by the following escape sequences:
4ed46869
KH
2243 ----------------------------------------------------------------------
2244 escape sequence description
2245 ----------------------------------------------------------------------
2246 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2247 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2248 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2249 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2250 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2251 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2252 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2253 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2254 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2255 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2256 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2257 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2258 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2259 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2260 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2261 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2262 ----------------------------------------------------------------------
2263
2264 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2265 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2266
2267 Note (*): Although these designations are not allowed in ISO2022,
2268 Emacs accepts them on decoding, and produces them on encoding
39787efd 2269 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2270 7-bit environment, non-locking-shift, and non-single-shift.
2271
2272 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2273 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2274
cfb43547 2275 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2276 same multilingual text in ISO2022. Actually, there exist many
2277 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2278 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2279 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2280 localized platforms), and all of these are variants of ISO2022.
2281
2282 In addition to the above, Emacs handles two more kinds of escape
2283 sequences: ISO6429's direction specification and Emacs' private
2284 sequence for specifying character composition.
2285
39787efd 2286 ISO6429's direction specification takes the following form:
4ed46869
KH
2287 o CSI ']' -- end of the current direction
2288 o CSI '0' ']' -- end of the current direction
2289 o CSI '1' ']' -- start of left-to-right text
2290 o CSI '2' ']' -- start of right-to-left text
2291 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2292 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2293
2294 Character composition specification takes the following form:
ec6d2bb8
KH
2295 o ESC '0' -- start relative composition
2296 o ESC '1' -- end composition
2297 o ESC '2' -- start rule-base composition (*)
2298 o ESC '3' -- start relative composition with alternate chars (**)
2299 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2300 Since these are not standard escape sequences of any ISO standard,
cfb43547 2301 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2302
5a936b46
DL
2303 (*) This form is used only in Emacs 20.7 and older versions,
2304 but newer versions can safely decode it.
cfb43547 2305 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2306 and older versions can't decode it.
ec6d2bb8 2307
cfb43547 2308 Here's a list of example usages of these composition escape
b73bfc1c 2309 sequences (categorized by `enum composition_method').
ec6d2bb8 2310
b73bfc1c 2311 COMPOSITION_RELATIVE:
ec6d2bb8 2312 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2313 COMPOSITION_WITH_RULE:
ec6d2bb8 2314 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2315 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2316 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2317 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2318 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2319
2320enum iso_code_class_type iso_code_class[256];
2321
df7492f9
KH
2322#define SAFE_CHARSET_P(coding, id) \
2323 ((id) <= (coding)->max_charset_id \
2324 && (coding)->safe_charsets[id] >= 0)
2325
2326
2327#define SHIFT_OUT_OK(category) \
2328 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2329
2330static void
f0064e1f
DL
2331setup_iso_safe_charsets (attrs)
2332 Lisp_Object attrs;
df7492f9
KH
2333{
2334 Lisp_Object charset_list, safe_charsets;
2335 Lisp_Object request;
2336 Lisp_Object reg_usage;
2337 Lisp_Object tail;
2338 int reg94, reg96;
2339 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2340 int max_charset_id;
2341
2342 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2343 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2344 && ! EQ (charset_list, Viso_2022_charset_list))
2345 {
2346 CODING_ATTR_CHARSET_LIST (attrs)
2347 = charset_list = Viso_2022_charset_list;
2348 ASET (attrs, coding_attr_safe_charsets, Qnil);
2349 }
2350
2351 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2352 return;
2353
2354 max_charset_id = 0;
2355 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2356 {
2357 int id = XINT (XCAR (tail));
2358 if (max_charset_id < id)
2359 max_charset_id = id;
2360 }
d46c5b12 2361
df7492f9
KH
2362 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2363 make_number (255));
2364 request = AREF (attrs, coding_attr_iso_request);
2365 reg_usage = AREF (attrs, coding_attr_iso_usage);
2366 reg94 = XINT (XCAR (reg_usage));
2367 reg96 = XINT (XCDR (reg_usage));
2368
2369 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2370 {
2371 Lisp_Object id;
2372 Lisp_Object reg;
2373 struct charset *charset;
2374
2375 id = XCAR (tail);
2376 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2377 reg = Fcdr (Fassq (id, request));
df7492f9 2378 if (! NILP (reg))
8f924df7 2379 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2380 else if (charset->iso_chars_96)
2381 {
2382 if (reg96 < 4)
8f924df7 2383 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2384 }
2385 else
2386 {
2387 if (reg94 < 4)
8f924df7 2388 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2389 }
2390 }
2391 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2392}
d46c5b12 2393
b6871cc7 2394
4ed46869 2395/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2396 Check if a text is encoded in one of ISO-2022 based codig systems.
2397 If it is, return 1, else return 0. */
4ed46869 2398
0a28aafb 2399static int
ff0dacd7 2400detect_coding_iso_2022 (coding, detect_info)
df7492f9 2401 struct coding_system *coding;
ff0dacd7 2402 struct coding_detection_info *detect_info;
4ed46869 2403{
8f924df7
KH
2404 const unsigned char *src = coding->source, *src_base = src;
2405 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2406 int multibytep = coding->src_multibyte;
ff0dacd7 2407 int single_shifting = 0;
df7492f9
KH
2408 int id;
2409 int c, c1;
2410 int consumed_chars = 0;
2411 int i;
ff0dacd7
KH
2412 int rejected = 0;
2413 int found = 0;
2414
2415 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2416
2417 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2418 {
2419 struct coding_system *this = &(coding_categories[i]);
2420 Lisp_Object attrs, val;
2421
2422 attrs = CODING_ID_ATTRS (this->id);
2423 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2424 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2425 setup_iso_safe_charsets (attrs);
2426 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
2427 this->max_charset_id = SCHARS (val) - 1;
2428 this->safe_charsets = (char *) SDATA (val);
df7492f9
KH
2429 }
2430
2431 /* A coding system of this category is always ASCII compatible. */
2432 src += coding->head_ascii;
3f003981 2433
ff0dacd7 2434 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2435 {
df7492f9 2436 ONE_MORE_BYTE (c);
4ed46869
KH
2437 switch (c)
2438 {
2439 case ISO_CODE_ESC:
74383408
KH
2440 if (inhibit_iso_escape_detection)
2441 break;
f46869e4 2442 single_shifting = 0;
df7492f9 2443 ONE_MORE_BYTE (c);
d46c5b12 2444 if (c >= '(' && c <= '/')
4ed46869 2445 {
bf9cdd4e 2446 /* Designation sequence for a charset of dimension 1. */
df7492f9 2447 ONE_MORE_BYTE (c1);
d46c5b12 2448 if (c1 < ' ' || c1 >= 0x80
df7492f9 2449 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2450 /* Invalid designation sequence. Just ignore. */
2451 break;
bf9cdd4e
KH
2452 }
2453 else if (c == '$')
2454 {
2455 /* Designation sequence for a charset of dimension 2. */
df7492f9 2456 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2457 if (c >= '@' && c <= 'B')
2458 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2459 id = iso_charset_table[1][0][c];
bf9cdd4e 2460 else if (c >= '(' && c <= '/')
bcf26d6a 2461 {
df7492f9 2462 ONE_MORE_BYTE (c1);
d46c5b12 2463 if (c1 < ' ' || c1 >= 0x80
df7492f9 2464 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2465 /* Invalid designation sequence. Just ignore. */
2466 break;
bcf26d6a 2467 }
bf9cdd4e 2468 else
ff0dacd7 2469 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2470 break;
2471 }
ae9ff118 2472 else if (c == 'N' || c == 'O')
d46c5b12 2473 {
ae9ff118 2474 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2475 single_shifting = 1;
2476 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2477 break;
4ed46869 2478 }
ec6d2bb8
KH
2479 else if (c >= '0' && c <= '4')
2480 {
2481 /* ESC <Fp> for start/end composition. */
ff0dacd7 2482 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2483 break;
2484 }
bf9cdd4e 2485 else
df7492f9 2486 {
ff0dacd7 2487 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2488 break;
2489 }
d46c5b12
KH
2490
2491 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2492 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2493 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2494 id))
ff0dacd7 2495 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2496 else
ff0dacd7 2497 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2498 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2499 id))
ff0dacd7 2500 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2501 else
ff0dacd7 2502 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2503 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2504 id))
ff0dacd7 2505 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2506 else
ff0dacd7 2507 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2508 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2509 id))
ff0dacd7 2510 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2511 else
ff0dacd7 2512 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2513 break;
2514
4ed46869 2515 case ISO_CODE_SO:
d46c5b12 2516 case ISO_CODE_SI:
ff0dacd7 2517 /* Locking shift out/in. */
74383408
KH
2518 if (inhibit_iso_escape_detection)
2519 break;
f46869e4 2520 single_shifting = 0;
ff0dacd7
KH
2521 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2522 found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2523 break;
2524
4ed46869 2525 case ISO_CODE_CSI:
ff0dacd7 2526 /* Control sequence introducer. */
f46869e4 2527 single_shifting = 0;
ff0dacd7
KH
2528 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2529 found |= CATEGORY_MASK_ISO_8_ELSE;
2530 goto check_extra_latin;
2531
2532
4ed46869
KH
2533 case ISO_CODE_SS2:
2534 case ISO_CODE_SS3:
ff0dacd7
KH
2535 /* Single shift. */
2536 if (inhibit_iso_escape_detection)
2537 break;
2538 single_shifting = 1;
2539 rejected |= CATEGORY_MASK_ISO_7BIT;
2540 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2541 & CODING_ISO_FLAG_SINGLE_SHIFT)
2542 found |= CATEGORY_MASK_ISO_8_1;
2543 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2544 & CODING_ISO_FLAG_SINGLE_SHIFT)
2545 found |= CATEGORY_MASK_ISO_8_2;
2546 goto check_extra_latin;
4ed46869
KH
2547
2548 default:
2549 if (c < 0x80)
f46869e4
KH
2550 {
2551 single_shifting = 0;
2552 break;
2553 }
ff0dacd7 2554 if (c >= 0xA0)
c4825358 2555 {
ff0dacd7
KH
2556 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2557 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2558 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2559 0xA0..0FF. If the byte length is even, we include
2560 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2561 only when we are not single shifting. */
2562 if (! single_shifting
2563 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2564 {
e17de821 2565 int i = 1;
b73bfc1c
KH
2566 while (src < src_end)
2567 {
df7492f9 2568 ONE_MORE_BYTE (c);
b73bfc1c
KH
2569 if (c < 0xA0)
2570 break;
2571 i++;
2572 }
2573
2574 if (i & 1 && src < src_end)
ff0dacd7 2575 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2576 else
ff0dacd7 2577 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2578 }
ff0dacd7 2579 break;
4ed46869 2580 }
ff0dacd7
KH
2581 check_extra_latin:
2582 single_shifting = 0;
2583 if (! VECTORP (Vlatin_extra_code_table)
2584 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2585 {
2586 rejected = CATEGORY_MASK_ISO;
2587 break;
2588 }
2589 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2590 & CODING_ISO_FLAG_LATIN_EXTRA)
2591 found |= CATEGORY_MASK_ISO_8_1;
2592 else
2593 rejected |= CATEGORY_MASK_ISO_8_1;
2594 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2595 & CODING_ISO_FLAG_LATIN_EXTRA)
2596 found |= CATEGORY_MASK_ISO_8_2;
2597 else
2598 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2599 }
2600 }
ff0dacd7
KH
2601 detect_info->rejected |= CATEGORY_MASK_ISO;
2602 return 0;
4ed46869 2603
df7492f9 2604 no_more_source:
ff0dacd7
KH
2605 detect_info->rejected |= rejected;
2606 detect_info->found |= (found & ~rejected);
df7492f9 2607 return 1;
4ed46869 2608}
ec6d2bb8 2609
4ed46869
KH
2610
2611/* Set designation state into CODING. */
df7492f9
KH
2612#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2613 do { \
2614 int id, prev; \
2615 \
2616 if (final < '0' || final >= 128 \
2617 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2618 || !SAFE_CHARSET_P (coding, id)) \
2619 { \
2620 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2621 goto invalid_code; \
2622 } \
2623 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2624 if (id == charset_jisx0201_roman) \
2625 { \
2626 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2627 id = charset_ascii; \
2628 } \
2629 else if (id == charset_jisx0208_1978) \
2630 { \
2631 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2632 id = charset_jisx0208; \
2633 } \
df7492f9
KH
2634 CODING_ISO_DESIGNATION (coding, reg) = id; \
2635 /* If there was an invalid designation to REG previously, and this \
2636 designation is ASCII to REG, we should keep this designation \
2637 sequence. */ \
2638 if (prev == -2 && id == charset_ascii) \
2639 goto invalid_code; \
4ed46869
KH
2640 } while (0)
2641
d46c5b12 2642
df7492f9
KH
2643#define MAYBE_FINISH_COMPOSITION() \
2644 do { \
2645 int i; \
2646 if (composition_state == COMPOSING_NO) \
2647 break; \
2648 /* It is assured that we have enough room for producing \
2649 characters stored in the table `components'. */ \
2650 if (charbuf + component_idx > charbuf_end) \
2651 goto no_more_source; \
2652 composition_state = COMPOSING_NO; \
2653 if (method == COMPOSITION_RELATIVE \
2654 || method == COMPOSITION_WITH_ALTCHARS) \
2655 { \
2656 for (i = 0; i < component_idx; i++) \
2657 *charbuf++ = components[i]; \
2658 char_offset += component_idx; \
2659 } \
2660 else \
2661 { \
2662 for (i = 0; i < component_idx; i += 2) \
2663 *charbuf++ = components[i]; \
2664 char_offset += (component_idx / 2) + 1; \
2665 } \
2666 } while (0)
2667
d46c5b12 2668
aa72b389
KH
2669/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2670 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2671 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2672 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2673 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2674 */
ec6d2bb8 2675
df7492f9
KH
2676#define DECODE_COMPOSITION_START(c1) \
2677 do { \
2678 if (c1 == '0' \
781d7a48 2679 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2680 { \
2681 component_len = component_idx; \
2682 composition_state = COMPOSING_CHAR; \
2683 } \
2684 else \
2685 { \
8f924df7 2686 const unsigned char *p; \
df7492f9
KH
2687 \
2688 MAYBE_FINISH_COMPOSITION (); \
2689 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2690 goto no_more_source; \
2691 for (p = src; p < src_end - 1; p++) \
2692 if (*p == ISO_CODE_ESC && p[1] == '1') \
2693 break; \
2694 if (p == src_end - 1) \
2695 { \
2696 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2697 goto invalid_code; \
2698 goto no_more_source; \
2699 } \
2700 \
2701 /* This is surely the start of a composition. */ \
2702 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2703 : c1 == '2' ? COMPOSITION_WITH_RULE \
2704 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2705 : COMPOSITION_WITH_RULE_ALTCHARS); \
2706 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2707 : COMPOSING_COMPONENT_CHAR); \
2708 component_idx = component_len = 0; \
2709 } \
ec6d2bb8
KH
2710 } while (0)
2711
ec6d2bb8 2712
df7492f9
KH
2713/* Handle compositoin end sequence ESC 1. */
2714
2715#define DECODE_COMPOSITION_END() \
ec6d2bb8 2716 do { \
df7492f9
KH
2717 int nchars = (component_len > 0 ? component_idx - component_len \
2718 : method == COMPOSITION_RELATIVE ? component_idx \
2719 : (component_idx + 1) / 2); \
2720 int i; \
2721 int *saved_charbuf = charbuf; \
8f924df7 2722 int from = char_offset; \
ff0dacd7 2723 int to = from + nchars; \
df7492f9 2724 \
ff0dacd7 2725 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
df7492f9 2726 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2727 { \
df7492f9
KH
2728 if (component_len == 0) \
2729 for (i = 0; i < component_idx; i++) \
2730 *charbuf++ = components[i]; \
2731 else \
2732 for (i = 0; i < component_len; i++) \
2733 *charbuf++ = components[i]; \
2734 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2735 } \
df7492f9
KH
2736 if (method == COMPOSITION_WITH_RULE) \
2737 for (i = 0; i < component_idx; i += 2, char_offset++) \
2738 *charbuf++ = components[i]; \
ec6d2bb8 2739 else \
df7492f9
KH
2740 for (i = component_len; i < component_idx; i++, char_offset++) \
2741 *charbuf++ = components[i]; \
2742 coding->annotated = 1; \
2743 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2744 } while (0)
2745
df7492f9 2746
ec6d2bb8
KH
2747/* Decode a composition rule from the byte C1 (and maybe one more byte
2748 from SRC) and store one encoded composition rule in
2749 coding->cmp_data. */
2750
2751#define DECODE_COMPOSITION_RULE(c1) \
2752 do { \
ec6d2bb8
KH
2753 (c1) -= 32; \
2754 if (c1 < 81) /* old format (before ver.21) */ \
2755 { \
2756 int gref = (c1) / 9; \
2757 int nref = (c1) % 9; \
2758 if (gref == 4) gref = 10; \
2759 if (nref == 4) nref = 10; \
df7492f9 2760 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2761 } \
b73bfc1c 2762 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2763 { \
2764 ONE_MORE_BYTE (c2); \
df7492f9 2765 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2766 } \
df7492f9
KH
2767 else \
2768 c1 = 0; \
ec6d2bb8 2769 } while (0)
88993dfd 2770
d46c5b12 2771
4ed46869
KH
2772/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2773
b73bfc1c 2774static void
df7492f9 2775decode_coding_iso_2022 (coding)
4ed46869 2776 struct coding_system *coding;
4ed46869 2777{
8f924df7
KH
2778 const unsigned char *src = coding->source + coding->consumed;
2779 const unsigned char *src_end = coding->source + coding->src_bytes;
2780 const unsigned char *src_base;
df7492f9 2781 int *charbuf = coding->charbuf;
ff0dacd7
KH
2782 int *charbuf_end
2783 = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2784 int consumed_chars = 0, consumed_chars_base;
df7492f9 2785 int multibytep = coding->src_multibyte;
4ed46869 2786 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
2787 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2788 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2789 struct charset *charset;
2790 int c;
2791 /* For handling composition sequence. */
2792#define COMPOSING_NO 0
2793#define COMPOSING_CHAR 1
2794#define COMPOSING_RULE 2
2795#define COMPOSING_COMPONENT_CHAR 3
2796#define COMPOSING_COMPONENT_RULE 4
2797
2798 int composition_state = COMPOSING_NO;
2799 enum composition_method method;
2800 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2801 int component_idx;
2802 int component_len;
2803 Lisp_Object attrs, eol_type, charset_list;
ff0dacd7
KH
2804 int char_offset = coding->produced_char;
2805 int last_offset = char_offset;
2806 int last_id = charset_ascii;
df7492f9
KH
2807
2808 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2809 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2810
2811 while (1)
4ed46869 2812 {
463f5630 2813 int c1, c2;
b73bfc1c
KH
2814
2815 src_base = src;
df7492f9
KH
2816 consumed_chars_base = consumed_chars;
2817
2818 if (charbuf >= charbuf_end)
2819 break;
2820
b73bfc1c 2821 ONE_MORE_BYTE (c1);
4ed46869 2822
98725083 2823 /* We produce at most one character. */
4ed46869
KH
2824 switch (iso_code_class [c1])
2825 {
2826 case ISO_0x20_or_0x7F:
df7492f9 2827 if (composition_state != COMPOSING_NO)
ec6d2bb8 2828 {
df7492f9
KH
2829 if (composition_state == COMPOSING_RULE
2830 || composition_state == COMPOSING_COMPONENT_RULE)
2831 {
2832 DECODE_COMPOSITION_RULE (c1);
2833 components[component_idx++] = c1;
2834 composition_state--;
2835 continue;
2836 }
4ed46869 2837 }
df7492f9
KH
2838 if (charset_id_0 < 0
2839 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2840 /* This is SPACE or DEL. */
2841 charset = CHARSET_FROM_ID (charset_ascii);
2842 else
2843 charset = CHARSET_FROM_ID (charset_id_0);
2844 break;
4ed46869
KH
2845
2846 case ISO_graphic_plane_0:
781d7a48 2847 if (composition_state != COMPOSING_NO)
b73bfc1c 2848 {
781d7a48
KH
2849 if (composition_state == COMPOSING_RULE
2850 || composition_state == COMPOSING_COMPONENT_RULE)
2851 {
2852 DECODE_COMPOSITION_RULE (c1);
2853 components[component_idx++] = c1;
2854 composition_state--;
2855 continue;
2856 }
b73bfc1c 2857 }
df7492f9 2858 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2859 break;
2860
2861 case ISO_0xA0_or_0xFF:
df7492f9
KH
2862 if (charset_id_1 < 0
2863 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2864 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2865 goto invalid_code;
4ed46869
KH
2866 /* This is a graphic character, we fall down ... */
2867
2868 case ISO_graphic_plane_1:
df7492f9
KH
2869 if (charset_id_1 < 0)
2870 goto invalid_code;
2871 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2872 break;
2873
2874 case ISO_carriage_return:
df7492f9 2875 if (c1 == '\r')
4ed46869 2876 {
df7492f9 2877 if (EQ (eol_type, Qdos))
4ed46869 2878 {
df7492f9 2879 if (src == src_end)
98725083
KH
2880 {
2881 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2882 goto no_more_source;
8f924df7 2883 }
df7492f9
KH
2884 if (*src == '\n')
2885 ONE_MORE_BYTE (c1);
4ed46869 2886 }
df7492f9
KH
2887 else if (EQ (eol_type, Qmac))
2888 c1 = '\n';
4ed46869 2889 }
df7492f9
KH
2890 /* fall through */
2891
2892 case ISO_control_0:
2893 MAYBE_FINISH_COMPOSITION ();
2894 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2895 break;
2896
df7492f9
KH
2897 case ISO_control_1:
2898 MAYBE_FINISH_COMPOSITION ();
2899 goto invalid_code;
2900
4ed46869 2901 case ISO_shift_out:
df7492f9
KH
2902 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2903 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2904 goto invalid_code;
2905 CODING_ISO_INVOCATION (coding, 0) = 1;
2906 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2907 continue;
4ed46869
KH
2908
2909 case ISO_shift_in:
df7492f9
KH
2910 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2911 goto invalid_code;
2912 CODING_ISO_INVOCATION (coding, 0) = 0;
2913 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2914 continue;
4ed46869
KH
2915
2916 case ISO_single_shift_2_7:
2917 case ISO_single_shift_2:
df7492f9
KH
2918 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2919 goto invalid_code;
4ed46869
KH
2920 /* SS2 is handled as an escape sequence of ESC 'N' */
2921 c1 = 'N';
2922 goto label_escape_sequence;
2923
2924 case ISO_single_shift_3:
df7492f9
KH
2925 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2926 goto invalid_code;
4ed46869
KH
2927 /* SS2 is handled as an escape sequence of ESC 'O' */
2928 c1 = 'O';
2929 goto label_escape_sequence;
2930
2931 case ISO_control_sequence_introducer:
2932 /* CSI is handled as an escape sequence of ESC '[' ... */
2933 c1 = '[';
2934 goto label_escape_sequence;
2935
2936 case ISO_escape:
2937 ONE_MORE_BYTE (c1);
2938 label_escape_sequence:
df7492f9 2939 /* Escape sequences handled here are invocation,
4ed46869
KH
2940 designation, direction specification, and character
2941 composition specification. */
2942 switch (c1)
2943 {
2944 case '&': /* revision of following character set */
2945 ONE_MORE_BYTE (c1);
2946 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2947 goto invalid_code;
4ed46869
KH
2948 ONE_MORE_BYTE (c1);
2949 if (c1 != ISO_CODE_ESC)
df7492f9 2950 goto invalid_code;
4ed46869
KH
2951 ONE_MORE_BYTE (c1);
2952 goto label_escape_sequence;
2953
2954 case '$': /* designation of 2-byte character set */
df7492f9
KH
2955 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2956 goto invalid_code;
4ed46869
KH
2957 ONE_MORE_BYTE (c1);
2958 if (c1 >= '@' && c1 <= 'B')
2959 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2960 or JISX0208.1980 */
df7492f9 2961 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2962 }
2963 else if (c1 >= 0x28 && c1 <= 0x2B)
2964 { /* designation of DIMENSION2_CHARS94 character set */
2965 ONE_MORE_BYTE (c2);
df7492f9 2966 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2967 }
2968 else if (c1 >= 0x2C && c1 <= 0x2F)
2969 { /* designation of DIMENSION2_CHARS96 character set */
2970 ONE_MORE_BYTE (c2);
df7492f9 2971 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2972 }
2973 else
df7492f9 2974 goto invalid_code;
b73bfc1c 2975 /* We must update these variables now. */
df7492f9
KH
2976 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2977 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2978 continue;
4ed46869
KH
2979
2980 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
2981 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2982 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2983 goto invalid_code;
2984 CODING_ISO_INVOCATION (coding, 0) = 2;
2985 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2986 continue;
4ed46869
KH
2987
2988 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
2989 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2990 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2991 goto invalid_code;
2992 CODING_ISO_INVOCATION (coding, 0) = 3;
2993 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2994 continue;
4ed46869
KH
2995
2996 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
2997 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2998 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2999 goto invalid_code;
3000 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 3001 ONE_MORE_BYTE (c1);
e7046a18 3002 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3003 goto invalid_code;
4ed46869
KH
3004 break;
3005
3006 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3007 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3008 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3009 goto invalid_code;
3010 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 3011 ONE_MORE_BYTE (c1);
e7046a18 3012 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3013 goto invalid_code;
4ed46869
KH
3014 break;
3015
ec6d2bb8 3016 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3017 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3018 goto invalid_code;
ec6d2bb8 3019 DECODE_COMPOSITION_START (c1);
b73bfc1c 3020 continue;
4ed46869 3021
ec6d2bb8 3022 case '1': /* end composition */
df7492f9
KH
3023 if (composition_state == COMPOSING_NO)
3024 goto invalid_code;
3025 DECODE_COMPOSITION_END ();
b73bfc1c 3026 continue;
4ed46869
KH
3027
3028 case '[': /* specification of direction */
df7492f9
KH
3029 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3030 goto invalid_code;
4ed46869 3031 /* For the moment, nested direction is not supported.
d46c5b12 3032 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3033 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3034 ONE_MORE_BYTE (c1);
3035 switch (c1)
3036 {
3037 case ']': /* end of the current direction */
d46c5b12 3038 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3039
3040 case '0': /* end of the current direction */
3041 case '1': /* start of left-to-right direction */
3042 ONE_MORE_BYTE (c1);
3043 if (c1 == ']')
d46c5b12 3044 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3045 else
df7492f9 3046 goto invalid_code;
4ed46869
KH
3047 break;
3048
3049 case '2': /* start of right-to-left direction */
3050 ONE_MORE_BYTE (c1);
3051 if (c1 == ']')
d46c5b12 3052 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3053 else
df7492f9 3054 goto invalid_code;
4ed46869
KH
3055 break;
3056
3057 default:
df7492f9 3058 goto invalid_code;
4ed46869 3059 }
b73bfc1c 3060 continue;
4ed46869 3061
103e0180 3062 case '%':
103e0180
KH
3063 ONE_MORE_BYTE (c1);
3064 if (c1 == '/')
3065 {
3066 /* CTEXT extended segment:
3067 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3068 We keep these bytes as is for the moment.
3069 They may be decoded by post-read-conversion. */
3070 int dim, M, L;
4776e638 3071 int size;
8f924df7 3072
103e0180
KH
3073 ONE_MORE_BYTE (dim);
3074 ONE_MORE_BYTE (M);
3075 ONE_MORE_BYTE (L);
3076 size = ((M - 128) * 128) + (L - 128);
4776e638
KH
3077 if (charbuf + 8 + size > charbuf_end)
3078 goto break_loop;
3079 *charbuf++ = ISO_CODE_ESC;
3080 *charbuf++ = '%';
3081 *charbuf++ = '/';
3082 *charbuf++ = dim;
3083 *charbuf++ = BYTE8_TO_CHAR (M);
3084 *charbuf++ = BYTE8_TO_CHAR (L);
103e0180
KH
3085 while (size-- > 0)
3086 {
3087 ONE_MORE_BYTE (c1);
4776e638 3088 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3089 }
103e0180
KH
3090 }
3091 else if (c1 == 'G')
3092 {
103e0180
KH
3093 /* XFree86 extension for embedding UTF-8 in CTEXT:
3094 ESC % G --UTF-8-BYTES-- ESC % @
3095 We keep these bytes as is for the moment.
3096 They may be decoded by post-read-conversion. */
4776e638
KH
3097 int *p = charbuf;
3098
3099 if (p + 6 > charbuf_end)
3100 goto break_loop;
3101 *p++ = ISO_CODE_ESC;
3102 *p++ = '%';
3103 *p++ = 'G';
3104 while (p < charbuf_end)
103e0180
KH
3105 {
3106 ONE_MORE_BYTE (c1);
3107 if (c1 == ISO_CODE_ESC
3108 && src + 1 < src_end
3109 && src[0] == '%'
3110 && src[1] == '@')
3111 break;
4776e638 3112 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3113 }
4776e638
KH
3114 if (p + 3 > charbuf_end)
3115 goto break_loop;
3116 *p++ = ISO_CODE_ESC;
3117 *p++ = '%';
3118 *p++ = '@';
3119 charbuf = p;
103e0180
KH
3120 }
3121 else
4776e638 3122 goto invalid_code;
103e0180 3123 continue;
4776e638 3124 break;
103e0180 3125
4ed46869 3126 default:
df7492f9
KH
3127 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3128 goto invalid_code;
4ed46869
KH
3129 if (c1 >= 0x28 && c1 <= 0x2B)
3130 { /* designation of DIMENSION1_CHARS94 character set */
3131 ONE_MORE_BYTE (c2);
df7492f9 3132 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
3133 }
3134 else if (c1 >= 0x2C && c1 <= 0x2F)
3135 { /* designation of DIMENSION1_CHARS96 character set */
3136 ONE_MORE_BYTE (c2);
df7492f9 3137 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
3138 }
3139 else
df7492f9 3140 goto invalid_code;
b73bfc1c 3141 /* We must update these variables now. */
df7492f9
KH
3142 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3143 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3144 continue;
4ed46869 3145 }
b73bfc1c 3146 }
4ed46869 3147
ff0dacd7
KH
3148 if (charset->id != charset_ascii
3149 && last_id != charset->id)
3150 {
3151 if (last_id != charset_ascii)
3152 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3153 last_id = charset->id;
3154 last_offset = char_offset;
3155 }
3156
b73bfc1c 3157 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3158 Produce a decoded character while getting 2nd position code
3159 C2 if necessary. */
3160 c1 &= 0x7F;
3161 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3162 {
3163 ONE_MORE_BYTE (c2);
df7492f9 3164 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3165 /* C2 is not in a valid range. */
df7492f9
KH
3166 goto invalid_code;
3167 c1 = (c1 << 8) | (c2 & 0x7F);
3168 if (CHARSET_DIMENSION (charset) > 2)
3169 {
3170 ONE_MORE_BYTE (c2);
3171 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3172 /* C2 is not in a valid range. */
3173 goto invalid_code;
3174 c1 = (c1 << 8) | (c2 & 0x7F);
3175 }
3176 }
3177
3178 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3179 if (c < 0)
3180 {
3181 MAYBE_FINISH_COMPOSITION ();
3182 for (; src_base < src; src_base++, char_offset++)
3183 {
3184 if (ASCII_BYTE_P (*src_base))
3185 *charbuf++ = *src_base;
3186 else
3187 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3188 }
3189 }
3190 else if (composition_state == COMPOSING_NO)
3191 {
3192 *charbuf++ = c;
3193 char_offset++;
4ed46869 3194 }
df7492f9 3195 else
781d7a48
KH
3196 {
3197 components[component_idx++] = c;
3198 if (method == COMPOSITION_WITH_RULE
3199 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3200 && composition_state == COMPOSING_COMPONENT_CHAR))
3201 composition_state++;
4ed46869
KH
3202 }
3203 continue;
3204
df7492f9
KH
3205 invalid_code:
3206 MAYBE_FINISH_COMPOSITION ();
4ed46869 3207 src = src_base;
df7492f9
KH
3208 consumed_chars = consumed_chars_base;
3209 ONE_MORE_BYTE (c);
3210 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3211 char_offset++;
df7492f9 3212 coding->errors++;
4776e638
KH
3213 continue;
3214
3215 break_loop:
3216 break;
4ed46869 3217 }
fb88bf2d 3218
df7492f9 3219 no_more_source:
ff0dacd7
KH
3220 if (last_id != charset_ascii)
3221 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
3222 coding->consumed_char += consumed_chars_base;
3223 coding->consumed = src_base - coding->source;
3224 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3225}
3226
b73bfc1c 3227
f4dee582 3228/* ISO2022 encoding stuff. */
4ed46869
KH
3229
3230/*
f4dee582 3231 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3232 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3233 variant has the following specifications:
df7492f9 3234 1. Initial designation to G0 thru G3.
4ed46869
KH
3235 2. Allows short-form designation?
3236 3. ASCII should be designated to G0 before control characters?
3237 4. ASCII should be designated to G0 at end of line?
3238 5. 7-bit environment or 8-bit environment?
3239 6. Use locking-shift?
3240 7. Use Single-shift?
3241 And the following two are only for Japanese:
3242 8. Use ASCII in place of JIS0201-1976-Roman?
3243 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3244 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3245 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3246 details.
4ed46869
KH
3247*/
3248
3249/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3250 register REG at DST, and increment DST. If <final-char> of CHARSET is
3251 '@', 'A', or 'B' and the coding system CODING allows, produce
3252 designation sequence of short-form. */
4ed46869
KH
3253
3254#define ENCODE_DESIGNATION(charset, reg, coding) \
3255 do { \
df7492f9 3256 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3257 char *intermediate_char_94 = "()*+"; \
3258 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3259 int revision = -1; \
3260 int c; \
3261 \
3262 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3263 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3264 \
3265 if (revision >= 0) \
70c22245 3266 { \
df7492f9
KH
3267 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3268 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3269 } \
df7492f9 3270 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3271 if (CHARSET_DIMENSION (charset) == 1) \
3272 { \
df7492f9
KH
3273 if (! CHARSET_ISO_CHARS_96 (charset)) \
3274 c = intermediate_char_94[reg]; \
4ed46869 3275 else \
df7492f9
KH
3276 c = intermediate_char_96[reg]; \
3277 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3278 } \
3279 else \
3280 { \
df7492f9
KH
3281 EMIT_ONE_ASCII_BYTE ('$'); \
3282 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3283 { \
df7492f9 3284 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3285 || reg != 0 \
3286 || final_char < '@' || final_char > 'B') \
df7492f9 3287 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3288 } \
3289 else \
df7492f9 3290 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3291 } \
df7492f9
KH
3292 EMIT_ONE_ASCII_BYTE (final_char); \
3293 \
3294 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3295 } while (0)
3296
df7492f9 3297
4ed46869
KH
3298/* The following two macros produce codes (control character or escape
3299 sequence) for ISO2022 single-shift functions (single-shift-2 and
3300 single-shift-3). */
3301
df7492f9
KH
3302#define ENCODE_SINGLE_SHIFT_2 \
3303 do { \
3304 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3305 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3306 else \
3307 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3308 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3309 } while (0)
3310
df7492f9
KH
3311
3312#define ENCODE_SINGLE_SHIFT_3 \
3313 do { \
3314 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3315 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3316 else \
3317 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3318 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3319 } while (0)
3320
df7492f9 3321
4ed46869
KH
3322/* The following four macros produce codes (control character or
3323 escape sequence) for ISO2022 locking-shift functions (shift-in,
3324 shift-out, locking-shift-2, and locking-shift-3). */
3325
df7492f9
KH
3326#define ENCODE_SHIFT_IN \
3327 do { \
3328 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3329 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3330 } while (0)
3331
df7492f9
KH
3332
3333#define ENCODE_SHIFT_OUT \
3334 do { \
3335 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3336 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3337 } while (0)
3338
df7492f9
KH
3339
3340#define ENCODE_LOCKING_SHIFT_2 \
3341 do { \
3342 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3343 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3344 } while (0)
3345
df7492f9
KH
3346
3347#define ENCODE_LOCKING_SHIFT_3 \
3348 do { \
3349 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3350 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3351 } while (0)
3352
df7492f9 3353
f4dee582
RS
3354/* Produce codes for a DIMENSION1 character whose character set is
3355 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3356 sequences are also produced in advance if necessary. */
3357
6e85d753
KH
3358#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3359 do { \
df7492f9 3360 int id = CHARSET_ID (charset); \
bf16eb23
KH
3361 \
3362 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3363 && id == charset_ascii) \
3364 { \
3365 id = charset_jisx0201_roman; \
3366 charset = CHARSET_FROM_ID (id); \
3367 } \
3368 \
df7492f9 3369 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3370 { \
df7492f9
KH
3371 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3372 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3373 else \
df7492f9
KH
3374 EMIT_ONE_BYTE (c1 | 0x80); \
3375 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3376 break; \
3377 } \
df7492f9 3378 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3379 { \
df7492f9 3380 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3381 break; \
3382 } \
df7492f9 3383 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3384 { \
df7492f9 3385 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3386 break; \
3387 } \
6e85d753
KH
3388 else \
3389 /* Since CHARSET is not yet invoked to any graphic planes, we \
3390 must invoke it, or, at first, designate it to some graphic \
3391 register. Then repeat the loop to actually produce the \
3392 character. */ \
df7492f9
KH
3393 dst = encode_invocation_designation (charset, coding, dst, \
3394 &produced_chars); \
4ed46869
KH
3395 } while (1)
3396
df7492f9 3397
f4dee582
RS
3398/* Produce codes for a DIMENSION2 character whose character set is
3399 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3400 invocation codes are also produced in advance if necessary. */
3401
6e85d753
KH
3402#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3403 do { \
df7492f9 3404 int id = CHARSET_ID (charset); \
bf16eb23
KH
3405 \
3406 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3407 && id == charset_jisx0208) \
3408 { \
3409 id = charset_jisx0208_1978; \
3410 charset = CHARSET_FROM_ID (id); \
3411 } \
3412 \
df7492f9 3413 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3414 { \
df7492f9
KH
3415 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3416 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3417 else \
df7492f9
KH
3418 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3419 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3420 break; \
3421 } \
df7492f9 3422 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3423 { \
df7492f9 3424 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3425 break; \
3426 } \
df7492f9 3427 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3428 { \
df7492f9 3429 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3430 break; \
3431 } \
6e85d753
KH
3432 else \
3433 /* Since CHARSET is not yet invoked to any graphic planes, we \
3434 must invoke it, or, at first, designate it to some graphic \
3435 register. Then repeat the loop to actually produce the \
3436 character. */ \
df7492f9
KH
3437 dst = encode_invocation_designation (charset, coding, dst, \
3438 &produced_chars); \
4ed46869
KH
3439 } while (1)
3440
05e6f5dc 3441
df7492f9
KH
3442#define ENCODE_ISO_CHARACTER(charset, c) \
3443 do { \
3444 int code = ENCODE_CHAR ((charset),(c)); \
3445 \
3446 if (CHARSET_DIMENSION (charset) == 1) \
3447 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3448 else \
3449 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3450 } while (0)
bdd9fb48 3451
05e6f5dc 3452
4ed46869 3453/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3454 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3455 Return new DST. */
3456
3457unsigned char *
df7492f9
KH
3458encode_invocation_designation (charset, coding, dst, p_nchars)
3459 struct charset *charset;
4ed46869
KH
3460 struct coding_system *coding;
3461 unsigned char *dst;
df7492f9 3462 int *p_nchars;
4ed46869 3463{
df7492f9
KH
3464 int multibytep = coding->dst_multibyte;
3465 int produced_chars = *p_nchars;
4ed46869 3466 int reg; /* graphic register number */
df7492f9 3467 int id = CHARSET_ID (charset);
4ed46869
KH
3468
3469 /* At first, check designations. */
3470 for (reg = 0; reg < 4; reg++)
df7492f9 3471 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3472 break;
3473
3474 if (reg >= 4)
3475 {
3476 /* CHARSET is not yet designated to any graphic registers. */
3477 /* At first check the requested designation. */
df7492f9
KH
3478 reg = CODING_ISO_REQUEST (coding, id);
3479 if (reg < 0)
1ba9e4ab
KH
3480 /* Since CHARSET requests no special designation, designate it
3481 to graphic register 0. */
4ed46869
KH
3482 reg = 0;
3483
3484 ENCODE_DESIGNATION (charset, reg, coding);
3485 }
3486
df7492f9
KH
3487 if (CODING_ISO_INVOCATION (coding, 0) != reg
3488 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3489 {
3490 /* Since the graphic register REG is not invoked to any graphic
3491 planes, invoke it to graphic plane 0. */
3492 switch (reg)
3493 {
3494 case 0: /* graphic register 0 */
3495 ENCODE_SHIFT_IN;
3496 break;
3497
3498 case 1: /* graphic register 1 */
3499 ENCODE_SHIFT_OUT;
3500 break;
3501
3502 case 2: /* graphic register 2 */
df7492f9 3503 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3504 ENCODE_SINGLE_SHIFT_2;
3505 else
3506 ENCODE_LOCKING_SHIFT_2;
3507 break;
3508
3509 case 3: /* graphic register 3 */
df7492f9 3510 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3511 ENCODE_SINGLE_SHIFT_3;
3512 else
3513 ENCODE_LOCKING_SHIFT_3;
3514 break;
3515 }
3516 }
b73bfc1c 3517
df7492f9 3518 *p_nchars = produced_chars;
4ed46869
KH
3519 return dst;
3520}
3521
df7492f9
KH
3522/* The following three macros produce codes for indicating direction
3523 of text. */
3524#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3525 do { \
df7492f9
KH
3526 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3527 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3528 else \
df7492f9 3529 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3530 } while (0)
3531
ec6d2bb8 3532
df7492f9
KH
3533#define ENCODE_DIRECTION_R2L() \
3534 do { \
3535 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3536 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3537 } while (0)
3538
ec6d2bb8 3539
df7492f9 3540#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3541 do { \
df7492f9
KH
3542 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3543 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 3544 } while (0)
4ed46869 3545
4ed46869
KH
3546
3547/* Produce codes for designation and invocation to reset the graphic
3548 planes and registers to initial state. */
df7492f9
KH
3549#define ENCODE_RESET_PLANE_AND_REGISTER() \
3550 do { \
3551 int reg; \
3552 struct charset *charset; \
3553 \
3554 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3555 ENCODE_SHIFT_IN; \
3556 for (reg = 0; reg < 4; reg++) \
3557 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3558 && (CODING_ISO_DESIGNATION (coding, reg) \
3559 != CODING_ISO_INITIAL (coding, reg))) \
3560 { \
3561 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3562 ENCODE_DESIGNATION (charset, reg, coding); \
3563 } \
4ed46869
KH
3564 } while (0)
3565
df7492f9 3566
bdd9fb48 3567/* Produce designation sequences of charsets in the line started from
b73bfc1c 3568 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3569
3570 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3571 find all the necessary designations. */
3572
b73bfc1c 3573static unsigned char *
df7492f9 3574encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3575 struct coding_system *coding;
df7492f9
KH
3576 int *charbuf, *charbuf_end;
3577 unsigned char *dst;
e0e989f6 3578{
df7492f9 3579 struct charset *charset;
bdd9fb48
KH
3580 /* Table of charsets to be designated to each graphic register. */
3581 int r[4];
df7492f9
KH
3582 int c, found = 0, reg;
3583 int produced_chars = 0;
3584 int multibytep = coding->dst_multibyte;
3585 Lisp_Object attrs;
3586 Lisp_Object charset_list;
3587
3588 attrs = CODING_ID_ATTRS (coding->id);
3589 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3590 if (EQ (charset_list, Qiso_2022))
3591 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3592
3593 for (reg = 0; reg < 4; reg++)
3594 r[reg] = -1;
3595
b73bfc1c 3596 while (found < 4)
e0e989f6 3597 {
df7492f9
KH
3598 int id;
3599
3600 c = *charbuf++;
b73bfc1c
KH
3601 if (c == '\n')
3602 break;
df7492f9
KH
3603 charset = char_charset (c, charset_list, NULL);
3604 id = CHARSET_ID (charset);
3605 reg = CODING_ISO_REQUEST (coding, id);
3606 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3607 {
3608 found++;
df7492f9 3609 r[reg] = id;
bdd9fb48 3610 }
bdd9fb48
KH
3611 }
3612
3613 if (found)
3614 {
3615 for (reg = 0; reg < 4; reg++)
3616 if (r[reg] >= 0
df7492f9
KH
3617 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3618 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3619 }
b73bfc1c
KH
3620
3621 return dst;
e0e989f6
KH
3622}
3623
4ed46869
KH
3624/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3625
df7492f9
KH
3626static int
3627encode_coding_iso_2022 (coding)
4ed46869 3628 struct coding_system *coding;
4ed46869 3629{
df7492f9
KH
3630 int multibytep = coding->dst_multibyte;
3631 int *charbuf = coding->charbuf;
3632 int *charbuf_end = charbuf + coding->charbuf_used;
3633 unsigned char *dst = coding->destination + coding->produced;
3634 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3635 int safe_room = 16;
3636 int bol_designation
3637 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3638 && CODING_ISO_BOL (coding));
3639 int produced_chars = 0;
3640 Lisp_Object attrs, eol_type, charset_list;
3641 int ascii_compatible;
b73bfc1c 3642 int c;
ff0dacd7 3643 int preferred_charset_id = -1;
05e6f5dc 3644
df7492f9 3645 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4 3646 setup_iso_safe_charsets (attrs);
ff0dacd7
KH
3647 /* Charset list may have been changed. */
3648 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
8f924df7 3649 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
0eecad43 3650
df7492f9 3651 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 3652
df7492f9 3653 while (charbuf < charbuf_end)
4ed46869 3654 {
df7492f9 3655 ASSURE_DESTINATION (safe_room);
b73bfc1c 3656
df7492f9 3657 if (bol_designation)
b73bfc1c 3658 {
df7492f9 3659 unsigned char *dst_prev = dst;
4ed46869 3660
bdd9fb48 3661 /* We have to produce designation sequences if any now. */
df7492f9
KH
3662 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3663 bol_designation = 0;
3664 /* We are sure that designation sequences are all ASCII bytes. */
3665 produced_chars += dst - dst_prev;
e0e989f6
KH
3666 }
3667
df7492f9 3668 c = *charbuf++;
ec6d2bb8 3669
ff0dacd7
KH
3670 if (c < 0)
3671 {
3672 /* Handle an annotation. */
3673 switch (*charbuf)
ec6d2bb8 3674 {
ff0dacd7
KH
3675 case CODING_ANNOTATE_COMPOSITION_MASK:
3676 /* Not yet implemented. */
3677 break;
3678 case CODING_ANNOTATE_CHARSET_MASK:
3679 preferred_charset_id = charbuf[3];
3680 if (preferred_charset_id >= 0
3681 && NILP (Fmemq (make_number (preferred_charset_id),
3682 charset_list)))
3683 preferred_charset_id = -1;
3684 break;
3685 default:
3686 abort ();
4ed46869 3687 }
ff0dacd7
KH
3688 charbuf += -c - 1;
3689 continue;
4ed46869 3690 }
ec6d2bb8 3691
b73bfc1c
KH
3692 /* Now encode the character C. */
3693 if (c < 0x20 || c == 0x7F)
3694 {
df7492f9
KH
3695 if (c == '\n'
3696 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3697 {
df7492f9
KH
3698 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3699 ENCODE_RESET_PLANE_AND_REGISTER ();
3700 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3701 {
df7492f9
KH
3702 int i;
3703
3704 for (i = 0; i < 4; i++)
3705 CODING_ISO_DESIGNATION (coding, i)
3706 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3707 }
df7492f9
KH
3708 bol_designation
3709 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3710 }
df7492f9
KH
3711 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3712 ENCODE_RESET_PLANE_AND_REGISTER ();
3713 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3714 }
df7492f9 3715 else if (ASCII_CHAR_P (c))
88993dfd 3716 {
df7492f9
KH
3717 if (ascii_compatible)
3718 EMIT_ONE_ASCII_BYTE (c);
93dec019 3719 else
19a8d9e0 3720 {
bf16eb23
KH
3721 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3722 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 3723 }
4ed46869 3724 }
16eafb5d 3725 else if (CHAR_BYTE8_P (c))
88993dfd 3726 {
16eafb5d
KH
3727 c = CHAR_TO_BYTE8 (c);
3728 EMIT_ONE_BYTE (c);
88993dfd 3729 }
b73bfc1c 3730 else
df7492f9 3731 {
ff0dacd7 3732 struct charset *charset;
b73bfc1c 3733
ff0dacd7
KH
3734 if (preferred_charset_id >= 0)
3735 {
3736 charset = CHARSET_FROM_ID (preferred_charset_id);
3737 if (! CHAR_CHARSET_P (c, charset))
3738 charset = char_charset (c, charset_list, NULL);
3739 }
3740 else
3741 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3742 if (!charset)
3743 {
41cbe562
KH
3744 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3745 {
3746 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3747 charset = CHARSET_FROM_ID (charset_ascii);
3748 }
3749 else
3750 {
3751 c = coding->default_char;
3752 charset = char_charset (c, charset_list, NULL);
3753 }
df7492f9
KH
3754 }
3755 ENCODE_ISO_CHARACTER (charset, c);
3756 }
84fbb8a0 3757 }
b73bfc1c 3758
df7492f9
KH
3759 if (coding->mode & CODING_MODE_LAST_BLOCK
3760 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3761 {
3762 ASSURE_DESTINATION (safe_room);
3763 ENCODE_RESET_PLANE_AND_REGISTER ();
3764 }
3765 coding->result = CODING_RESULT_SUCCESS;
3766 CODING_ISO_BOL (coding) = bol_designation;
3767 coding->produced_char += produced_chars;
3768 coding->produced = dst - coding->destination;
3769 return 0;
4ed46869
KH
3770}
3771
3772\f
df7492f9 3773/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3774
df7492f9 3775/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3776 quite widely. So, for the moment, Emacs supports them in the bare
3777 C code. But, in the future, they may be supported only by CCL. */
3778
3779/* SJIS is a coding system encoding three character sets: ASCII, right
3780 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3781 as is. A character of charset katakana-jisx0201 is encoded by
3782 "position-code + 0x80". A character of charset japanese-jisx0208
3783 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3784 so that it fit in the range below.
4ed46869
KH
3785
3786 --- CODE RANGE of SJIS ---
3787 (character set) (range)
3788 ASCII 0x00 .. 0x7F
df7492f9 3789 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3790 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3791 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3792 -------------------------------
3793
3794*/
3795
3796/* BIG5 is a coding system encoding two character sets: ASCII and
3797 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3798 character set and is encoded in two-byte.
4ed46869
KH
3799
3800 --- CODE RANGE of BIG5 ---
3801 (character set) (range)
3802 ASCII 0x00 .. 0x7F
3803 Big5 (1st byte) 0xA1 .. 0xFE
3804 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3805 --------------------------
3806
df7492f9 3807 */
4ed46869
KH
3808
3809/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3810 Check if a text is encoded in SJIS. If it is, return
df7492f9 3811 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3812
0a28aafb 3813static int
ff0dacd7 3814detect_coding_sjis (coding, detect_info)
df7492f9 3815 struct coding_system *coding;
ff0dacd7 3816 struct coding_detection_info *detect_info;
4ed46869 3817{
8f924df7
KH
3818 const unsigned char *src = coding->source, *src_base = src;
3819 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
3820 int multibytep = coding->src_multibyte;
3821 int consumed_chars = 0;
3822 int found = 0;
b73bfc1c 3823 int c;
89528eb3 3824 int incomplete;
df7492f9 3825
ff0dacd7 3826 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
3827 /* A coding system of this category is always ASCII compatible. */
3828 src += coding->head_ascii;
4ed46869 3829
b73bfc1c 3830 while (1)
4ed46869 3831 {
89528eb3 3832 incomplete = 0;
df7492f9 3833 ONE_MORE_BYTE (c);
89528eb3 3834 incomplete = 1;
682169fe
KH
3835 if (c < 0x80)
3836 continue;
df7492f9 3837 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3838 {
df7492f9 3839 ONE_MORE_BYTE (c);
682169fe 3840 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 3841 break;
ff0dacd7 3842 found = CATEGORY_MASK_SJIS;
4ed46869 3843 }
df7492f9 3844 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 3845 found = CATEGORY_MASK_SJIS;
df7492f9
KH
3846 else
3847 break;
4ed46869 3848 }
ff0dacd7 3849 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
3850 return 0;
3851
3852 no_more_source:
89528eb3
KH
3853 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3854 {
ff0dacd7 3855 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 3856 return 0;
4ed46869 3857 }
ff0dacd7
KH
3858 detect_info->found |= found;
3859 return 1;
4ed46869
KH
3860}
3861
3862/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3863 Check if a text is encoded in BIG5. If it is, return
df7492f9 3864 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3865
0a28aafb 3866static int
ff0dacd7 3867detect_coding_big5 (coding, detect_info)
df7492f9 3868 struct coding_system *coding;
ff0dacd7 3869 struct coding_detection_info *detect_info;
4ed46869 3870{
8f924df7
KH
3871 const unsigned char *src = coding->source, *src_base = src;
3872 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
3873 int multibytep = coding->src_multibyte;
3874 int consumed_chars = 0;
3875 int found = 0;
b73bfc1c 3876 int c;
89528eb3 3877 int incomplete;
fa42c37f 3878
ff0dacd7 3879 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
3880 /* A coding system of this category is always ASCII compatible. */
3881 src += coding->head_ascii;
fa42c37f 3882
b73bfc1c 3883 while (1)
fa42c37f 3884 {
89528eb3 3885 incomplete = 0;
df7492f9 3886 ONE_MORE_BYTE (c);
89528eb3 3887 incomplete = 1;
df7492f9 3888 if (c < 0x80)
fa42c37f 3889 continue;
df7492f9 3890 if (c >= 0xA1)
fa42c37f 3891 {
df7492f9
KH
3892 ONE_MORE_BYTE (c);
3893 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3894 return 0;
ff0dacd7 3895 found = CATEGORY_MASK_BIG5;
fa42c37f 3896 }
df7492f9
KH
3897 else
3898 break;
fa42c37f 3899 }
ff0dacd7 3900 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 3901 return 0;
fa42c37f 3902
df7492f9 3903 no_more_source:
89528eb3
KH
3904 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3905 {
ff0dacd7 3906 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
3907 return 0;
3908 }
ff0dacd7
KH
3909 detect_info->found |= found;
3910 return 1;
fa42c37f
KH
3911}
3912
4ed46869
KH
3913/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3914 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 3915
b73bfc1c 3916static void
df7492f9 3917decode_coding_sjis (coding)
4ed46869 3918 struct coding_system *coding;
4ed46869 3919{
8f924df7
KH
3920 const unsigned char *src = coding->source + coding->consumed;
3921 const unsigned char *src_end = coding->source + coding->src_bytes;
3922 const unsigned char *src_base;
df7492f9 3923 int *charbuf = coding->charbuf;
ff0dacd7 3924 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
3925 int consumed_chars = 0, consumed_chars_base;
3926 int multibytep = coding->src_multibyte;
3927 struct charset *charset_roman, *charset_kanji, *charset_kana;
3928 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
3929 int char_offset = coding->produced_char;
3930 int last_offset = char_offset;
3931 int last_id = charset_ascii;
a5d301df 3932
df7492f9
KH
3933 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3934
3935 val = charset_list;
3936 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3
KH
3937 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3938 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 3939
b73bfc1c 3940 while (1)
4ed46869 3941 {
df7492f9 3942 int c, c1;
fa42c37f 3943
b73bfc1c 3944 src_base = src;
df7492f9 3945 consumed_chars_base = consumed_chars;
fa42c37f 3946
df7492f9
KH
3947 if (charbuf >= charbuf_end)
3948 break;
3949
3950 ONE_MORE_BYTE (c);
b73bfc1c 3951
df7492f9 3952 if (c == '\r')
4ed46869 3953 {
df7492f9 3954 if (EQ (eol_type, Qdos))
4ed46869 3955 {
df7492f9 3956 if (src == src_end)
98725083
KH
3957 {
3958 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3959 goto no_more_source;
3960 }
df7492f9
KH
3961 if (*src == '\n')
3962 ONE_MORE_BYTE (c);
4ed46869 3963 }
df7492f9
KH
3964 else if (EQ (eol_type, Qmac))
3965 c = '\n';
4ed46869 3966 }
54f78171 3967 else
df7492f9
KH
3968 {
3969 struct charset *charset;
fa42c37f 3970
df7492f9
KH
3971 if (c < 0x80)
3972 charset = charset_roman;
3973 else
4ed46869 3974 {
df7492f9
KH
3975 if (c >= 0xF0)
3976 goto invalid_code;
3977 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3978 {
54f78171 3979 /* SJIS -> JISX0208 */
df7492f9
KH
3980 ONE_MORE_BYTE (c1);
3981 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3982 goto invalid_code;
3983 c = (c << 8) | c1;
3984 SJIS_TO_JIS (c);
3985 charset = charset_kanji;
5e34de15 3986 }
7487494c 3987 else if (c > 0xA0)
89528eb3
KH
3988 {
3989 /* SJIS -> JISX0201-Kana */
3990 c &= 0x7F;
3991 charset = charset_kana;
3992 }
7487494c
KH
3993 else
3994 goto invalid_code;
df7492f9 3995 }
ff0dacd7
KH
3996 if (charset->id != charset_ascii
3997 && last_id != charset->id)
3998 {
3999 if (last_id != charset_ascii)
4000 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4001 last_id = charset->id;
4002 last_offset = char_offset;
4003 }
df7492f9
KH
4004 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4005 }
4006 *charbuf++ = c;
ff0dacd7 4007 char_offset++;
df7492f9 4008 continue;
b73bfc1c 4009
df7492f9
KH
4010 invalid_code:
4011 src = src_base;
4012 consumed_chars = consumed_chars_base;
4013 ONE_MORE_BYTE (c);
4014 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4015 char_offset++;
df7492f9
KH
4016 coding->errors++;
4017 }
fa42c37f 4018
df7492f9 4019 no_more_source:
ff0dacd7
KH
4020 if (last_id != charset_ascii)
4021 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4022 coding->consumed_char += consumed_chars_base;
4023 coding->consumed = src_base - coding->source;
4024 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4025}
4026
b73bfc1c 4027static void
df7492f9 4028decode_coding_big5 (coding)
4ed46869 4029 struct coding_system *coding;
4ed46869 4030{
8f924df7
KH
4031 const unsigned char *src = coding->source + coding->consumed;
4032 const unsigned char *src_end = coding->source + coding->src_bytes;
4033 const unsigned char *src_base;
df7492f9 4034 int *charbuf = coding->charbuf;
ff0dacd7 4035 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4036 int consumed_chars = 0, consumed_chars_base;
4037 int multibytep = coding->src_multibyte;
4038 struct charset *charset_roman, *charset_big5;
4039 Lisp_Object attrs, eol_type, charset_list, val;
ff0dacd7
KH
4040 int char_offset = coding->produced_char;
4041 int last_offset = char_offset;
4042 int last_id = charset_ascii;
df7492f9
KH
4043
4044 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4045 val = charset_list;
4046 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4047 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4048
b73bfc1c 4049 while (1)
4ed46869 4050 {
df7492f9 4051 int c, c1;
b73bfc1c
KH
4052
4053 src_base = src;
df7492f9
KH
4054 consumed_chars_base = consumed_chars;
4055
4056 if (charbuf >= charbuf_end)
4057 break;
4058
4059 ONE_MORE_BYTE (c);
b73bfc1c 4060
df7492f9 4061 if (c == '\r')
4ed46869 4062 {
df7492f9 4063 if (EQ (eol_type, Qdos))
4ed46869 4064 {
df7492f9 4065 if (src == src_end)
b73bfc1c 4066 {
98725083
KH
4067 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4068 goto no_more_source;
d46c5b12 4069 }
df7492f9
KH
4070 if (*src == '\n')
4071 ONE_MORE_BYTE (c);
4ed46869 4072 }
df7492f9
KH
4073 else if (EQ (eol_type, Qmac))
4074 c = '\n';
4ed46869 4075 }
54f78171 4076 else
df7492f9
KH
4077 {
4078 struct charset *charset;
4079 if (c < 0x80)
4080 charset = charset_roman;
fb88bf2d 4081 else
fb88bf2d 4082 {
54f78171 4083 /* BIG5 -> Big5 */
df7492f9
KH
4084 if (c < 0xA1 || c > 0xFE)
4085 goto invalid_code;
4086 ONE_MORE_BYTE (c1);
4087 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4088 goto invalid_code;
4089 c = c << 8 | c1;
4090 charset = charset_big5;
4ed46869 4091 }
ff0dacd7
KH
4092 if (charset->id != charset_ascii
4093 && last_id != charset->id)
4094 {
4095 if (last_id != charset_ascii)
4096 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4097 last_id = charset->id;
4098 last_offset = char_offset;
4ed46869 4099 }
df7492f9 4100 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 4101 }
4ed46869 4102
df7492f9 4103 *charbuf++ = c;
ff0dacd7 4104 char_offset++;
fb88bf2d
KH
4105 continue;
4106
df7492f9 4107 invalid_code:
4ed46869 4108 src = src_base;
df7492f9
KH
4109 consumed_chars = consumed_chars_base;
4110 ONE_MORE_BYTE (c);
4111 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4112 char_offset++;
df7492f9 4113 coding->errors++;
fb88bf2d 4114 }
d46c5b12 4115
df7492f9 4116 no_more_source:
ff0dacd7
KH
4117 if (last_id != charset_ascii)
4118 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4119 coding->consumed_char += consumed_chars_base;
4120 coding->consumed = src_base - coding->source;
4121 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4122}
4123
4124/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4125 This function can encode charsets `ascii', `katakana-jisx0201',
4126 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4127 are sure that all these charsets are registered as official charset
4ed46869
KH
4128 (i.e. do not have extended leading-codes). Characters of other
4129 charsets are produced without any encoding. If SJIS_P is 1, encode
4130 SJIS text, else encode BIG5 text. */
4131
df7492f9
KH
4132static int
4133encode_coding_sjis (coding)
4ed46869 4134 struct coding_system *coding;
4ed46869 4135{
df7492f9
KH
4136 int multibytep = coding->dst_multibyte;
4137 int *charbuf = coding->charbuf;
4138 int *charbuf_end = charbuf + coding->charbuf_used;
4139 unsigned char *dst = coding->destination + coding->produced;
4140 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4141 int safe_room = 4;
4142 int produced_chars = 0;
4143 Lisp_Object attrs, eol_type, charset_list, val;
4144 int ascii_compatible;
4145 struct charset *charset_roman, *charset_kanji, *charset_kana;
4146 int c;
a5d301df 4147
df7492f9
KH
4148 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4149 val = charset_list;
4150 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4151 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4152 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4153
df7492f9 4154 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4155
df7492f9
KH
4156 while (charbuf < charbuf_end)
4157 {
4158 ASSURE_DESTINATION (safe_room);
4159 c = *charbuf++;
b73bfc1c 4160 /* Now encode the character C. */
df7492f9
KH
4161 if (ASCII_CHAR_P (c) && ascii_compatible)
4162 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4163 else if (CHAR_BYTE8_P (c))
4164 {
4165 c = CHAR_TO_BYTE8 (c);
4166 EMIT_ONE_BYTE (c);
4167 }
df7492f9 4168 else
b73bfc1c 4169 {
df7492f9
KH
4170 unsigned code;
4171 struct charset *charset = char_charset (c, charset_list, &code);
4172
4173 if (!charset)
4ed46869 4174 {
41cbe562 4175 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4176 {
41cbe562
KH
4177 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4178 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4179 }
41cbe562 4180 else
b73bfc1c 4181 {
41cbe562
KH
4182 c = coding->default_char;
4183 charset = char_charset (c, charset_list, &code);
b73bfc1c 4184 }
b73bfc1c 4185 }
df7492f9
KH
4186 if (code == CHARSET_INVALID_CODE (charset))
4187 abort ();
4188 if (charset == charset_kanji)
4189 {
4190 int c1, c2;
4191 JIS_TO_SJIS (code);
4192 c1 = code >> 8, c2 = code & 0xFF;
4193 EMIT_TWO_BYTES (c1, c2);
4194 }
4195 else if (charset == charset_kana)
4196 EMIT_ONE_BYTE (code | 0x80);
4197 else
4198 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4199 }
4200 }
4201 coding->result = CODING_RESULT_SUCCESS;
4202 coding->produced_char += produced_chars;
4203 coding->produced = dst - coding->destination;
4204 return 0;
4205}
4206
4207static int
4208encode_coding_big5 (coding)
4209 struct coding_system *coding;
4210{
4211 int multibytep = coding->dst_multibyte;
4212 int *charbuf = coding->charbuf;
4213 int *charbuf_end = charbuf + coding->charbuf_used;
4214 unsigned char *dst = coding->destination + coding->produced;
4215 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4216 int safe_room = 4;
4217 int produced_chars = 0;
4218 Lisp_Object attrs, eol_type, charset_list, val;
4219 int ascii_compatible;
4220 struct charset *charset_roman, *charset_big5;
4221 int c;
4222
4223 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4224 val = charset_list;
4225 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4226 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4227 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4228
4229 while (charbuf < charbuf_end)
4230 {
4231 ASSURE_DESTINATION (safe_room);
4232 c = *charbuf++;
4233 /* Now encode the character C. */
4234 if (ASCII_CHAR_P (c) && ascii_compatible)
4235 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4236 else if (CHAR_BYTE8_P (c))
4237 {
4238 c = CHAR_TO_BYTE8 (c);
4239 EMIT_ONE_BYTE (c);
b73bfc1c
KH
4240 }
4241 else
4242 {
df7492f9
KH
4243 unsigned code;
4244 struct charset *charset = char_charset (c, charset_list, &code);
4245
4246 if (! charset)
b73bfc1c 4247 {
41cbe562 4248 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4249 {
41cbe562
KH
4250 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4251 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4252 }
41cbe562 4253 else
0eecad43 4254 {
41cbe562
KH
4255 c = coding->default_char;
4256 charset = char_charset (c, charset_list, &code);
0eecad43 4257 }
4ed46869 4258 }
df7492f9
KH
4259 if (code == CHARSET_INVALID_CODE (charset))
4260 abort ();
4261 if (charset == charset_big5)
b73bfc1c 4262 {
df7492f9
KH
4263 int c1, c2;
4264
4265 c1 = code >> 8, c2 = code & 0xFF;
4266 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 4267 }
df7492f9
KH
4268 else
4269 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 4270 }
4ed46869 4271 }
df7492f9
KH
4272 coding->result = CODING_RESULT_SUCCESS;
4273 coding->produced_char += produced_chars;
4274 coding->produced = dst - coding->destination;
4275 return 0;
4ed46869
KH
4276}
4277
4278\f
df7492f9 4279/*** 10. CCL handlers ***/
1397dc18
KH
4280
4281/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4282 Check if a text is encoded in a coding system of which
4283 encoder/decoder are written in CCL program. If it is, return
df7492f9 4284 CATEGORY_MASK_CCL, else return 0. */
1397dc18 4285
0a28aafb 4286static int
ff0dacd7 4287detect_coding_ccl (coding, detect_info)
df7492f9 4288 struct coding_system *coding;
ff0dacd7 4289 struct coding_detection_info *detect_info;
1397dc18 4290{
8f924df7
KH
4291 const unsigned char *src = coding->source, *src_base = src;
4292 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4293 int multibytep = coding->src_multibyte;
4294 int consumed_chars = 0;
4295 int found = 0;
4296 unsigned char *valids = CODING_CCL_VALIDS (coding);
4297 int head_ascii = coding->head_ascii;
4298 Lisp_Object attrs;
4299
ff0dacd7
KH
4300 detect_info->checked |= CATEGORY_MASK_CCL;
4301
df7492f9
KH
4302 coding = &coding_categories[coding_category_ccl];
4303 attrs = CODING_ID_ATTRS (coding->id);
4304 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4305 src += head_ascii;
1397dc18 4306
b73bfc1c 4307 while (1)
1397dc18 4308 {
df7492f9
KH
4309 int c;
4310 ONE_MORE_BYTE (c);
4311 if (! valids[c])
4312 break;
ff0dacd7
KH
4313 if ((valids[c] > 1))
4314 found = CATEGORY_MASK_CCL;
df7492f9 4315 }
ff0dacd7 4316 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4317 return 0;
4318
4319 no_more_source:
ff0dacd7
KH
4320 detect_info->found |= found;
4321 return 1;
df7492f9
KH
4322}
4323
4324static void
4325decode_coding_ccl (coding)
4326 struct coding_system *coding;
4327{
7c78e542 4328 const unsigned char *src = coding->source + coding->consumed;
8f924df7 4329 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4330 int *charbuf = coding->charbuf;
4331 int *charbuf_end = charbuf + coding->charbuf_size;
4332 int consumed_chars = 0;
4333 int multibytep = coding->src_multibyte;
4334 struct ccl_program ccl;
4335 int source_charbuf[1024];
4336 int source_byteidx[1024];
4776e638 4337 Lisp_Object attrs, eol_type, charset_list;
df7492f9 4338
8dcbea82 4339 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4340 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4341
4342 while (src < src_end)
4343 {
7c78e542 4344 const unsigned char *p = src;
df7492f9
KH
4345 int *source, *source_end;
4346 int i = 0;
4347
4348 if (multibytep)
4349 while (i < 1024 && p < src_end)
4350 {
4351 source_byteidx[i] = p - src;
4352 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4353 }
4354 else
4355 while (i < 1024 && p < src_end)
4356 source_charbuf[i++] = *p++;
8f924df7 4357
df7492f9
KH
4358 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4359 ccl.last_block = 1;
4360
4361 source = source_charbuf;
4362 source_end = source + i;
4363 while (source < source_end)
4364 {
4365 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4366 source_end - source, charbuf_end - charbuf,
4367 charset_list);
df7492f9
KH
4368 source += ccl.consumed;
4369 charbuf += ccl.produced;
4370 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4371 break;
4372 }
4373 if (source < source_end)
4374 src += source_byteidx[source - source_charbuf];
4375 else
4376 src = p;
4377 consumed_chars += source - source_charbuf;
4378
4379 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4380 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4381 break;
4382 }
4383
4384 switch (ccl.status)
4385 {
4386 case CCL_STAT_SUSPEND_BY_SRC:
4387 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4388 break;
4389 case CCL_STAT_SUSPEND_BY_DST:
4390 break;
4391 case CCL_STAT_QUIT:
4392 case CCL_STAT_INVALID_CMD:
4393 coding->result = CODING_RESULT_INTERRUPT;
4394 break;
4395 default:
4396 coding->result = CODING_RESULT_SUCCESS;
4397 break;
4398 }
4399 coding->consumed_char += consumed_chars;
4400 coding->consumed = src - coding->source;
4401 coding->charbuf_used = charbuf - coding->charbuf;
4402}
4403
4404static int
4405encode_coding_ccl (coding)
4406 struct coding_system *coding;
4407{
4408 struct ccl_program ccl;
4409 int multibytep = coding->dst_multibyte;
4410 int *charbuf = coding->charbuf;
4411 int *charbuf_end = charbuf + coding->charbuf_used;
4412 unsigned char *dst = coding->destination + coding->produced;
4413 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4414 unsigned char *adjusted_dst_end = dst_end - 1;
4415 int destination_charbuf[1024];
4416 int i, produced_chars = 0;
8dcbea82 4417 Lisp_Object attrs, eol_type, charset_list;
df7492f9 4418
8dcbea82 4419 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9
KH
4420 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4421
4422 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4423 ccl.dst_multibyte = coding->dst_multibyte;
4424
4425 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4426 {
4427 int dst_bytes = dst_end - dst;
4428 if (dst_bytes > 1024)
4429 dst_bytes = 1024;
4430
4431 ccl_driver (&ccl, charbuf, destination_charbuf,
8dcbea82 4432 charbuf_end - charbuf, dst_bytes, charset_list);
df7492f9
KH
4433 charbuf += ccl.consumed;
4434 if (multibytep)
4435 for (i = 0; i < ccl.produced; i++)
4436 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4437 else
4438 {
4439 for (i = 0; i < ccl.produced; i++)
4440 *dst++ = destination_charbuf[i] & 0xFF;
4441 produced_chars += ccl.produced;
4442 }
4443 }
4444
4445 switch (ccl.status)
4446 {
4447 case CCL_STAT_SUSPEND_BY_SRC:
4448 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4449 break;
4450 case CCL_STAT_SUSPEND_BY_DST:
4451 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4452 break;
4453 case CCL_STAT_QUIT:
4454 case CCL_STAT_INVALID_CMD:
4455 coding->result = CODING_RESULT_INTERRUPT;
4456 break;
4457 default:
4458 coding->result = CODING_RESULT_SUCCESS;
4459 break;
1397dc18 4460 }
df7492f9
KH
4461
4462 coding->produced_char += produced_chars;
4463 coding->produced = dst - coding->destination;
4464 return 0;
1397dc18
KH
4465}
4466
df7492f9 4467
1397dc18 4468\f
df7492f9 4469/*** 10, 11. no-conversion handlers ***/
4ed46869 4470
b73bfc1c 4471/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4472
b73bfc1c 4473static void
df7492f9 4474decode_coding_raw_text (coding)
4ed46869 4475 struct coding_system *coding;
4ed46869 4476{
df7492f9 4477 coding->chars_at_source = 1;
2c78b7e1
KH
4478 coding->consumed_char = 0;
4479 coding->consumed = 0;
df7492f9
KH
4480 coding->result = CODING_RESULT_SUCCESS;
4481}
4ed46869 4482
df7492f9
KH
4483static int
4484encode_coding_raw_text (coding)
4485 struct coding_system *coding;
4486{
4487 int multibytep = coding->dst_multibyte;
4488 int *charbuf = coding->charbuf;
4489 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4490 unsigned char *dst = coding->destination + coding->produced;
4491 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4492 int produced_chars = 0;
b73bfc1c
KH
4493 int c;
4494
df7492f9 4495 if (multibytep)
b73bfc1c 4496 {
df7492f9 4497 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4498
df7492f9
KH
4499 if (coding->src_multibyte)
4500 while (charbuf < charbuf_end)
4501 {
4502 ASSURE_DESTINATION (safe_room);
4503 c = *charbuf++;
4504 if (ASCII_CHAR_P (c))
4505 EMIT_ONE_ASCII_BYTE (c);
4506 else if (CHAR_BYTE8_P (c))
4507 {
4508 c = CHAR_TO_BYTE8 (c);
4509 EMIT_ONE_BYTE (c);
4510 }
4511 else
4512 {
4513 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4514
df7492f9
KH
4515 CHAR_STRING_ADVANCE (c, p1);
4516 while (p0 < p1)
9d123124
KH
4517 {
4518 EMIT_ONE_BYTE (*p0);
4519 p0++;
4520 }
df7492f9
KH
4521 }
4522 }
b73bfc1c 4523 else
df7492f9
KH
4524 while (charbuf < charbuf_end)
4525 {
4526 ASSURE_DESTINATION (safe_room);
4527 c = *charbuf++;
4528 EMIT_ONE_BYTE (c);
4529 }
4530 }
4531 else
4ed46869 4532 {
df7492f9 4533 if (coding->src_multibyte)
d46c5b12 4534 {
df7492f9
KH
4535 int safe_room = MAX_MULTIBYTE_LENGTH;
4536
4537 while (charbuf < charbuf_end)
d46c5b12 4538 {
df7492f9
KH
4539 ASSURE_DESTINATION (safe_room);
4540 c = *charbuf++;
4541 if (ASCII_CHAR_P (c))
4542 *dst++ = c;
4543 else if (CHAR_BYTE8_P (c))
4544 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4545 else
df7492f9
KH
4546 CHAR_STRING_ADVANCE (c, dst);
4547 produced_chars++;
d46c5b12
KH
4548 }
4549 }
df7492f9
KH
4550 else
4551 {
4552 ASSURE_DESTINATION (charbuf_end - charbuf);
4553 while (charbuf < charbuf_end && dst < dst_end)
4554 *dst++ = *charbuf++;
4555 produced_chars = dst - (coding->destination + coding->dst_bytes);
8f924df7 4556 }
4ed46869 4557 }
df7492f9
KH
4558 coding->result = CODING_RESULT_SUCCESS;
4559 coding->produced_char += produced_chars;
4560 coding->produced = dst - coding->destination;
4561 return 0;
4ed46869
KH
4562}
4563
ff0dacd7
KH
4564/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4565 Check if a text is encoded in a charset-based coding system. If it
4566 is, return 1, else return 0. */
4567
0a28aafb 4568static int
ff0dacd7 4569detect_coding_charset (coding, detect_info)
df7492f9 4570 struct coding_system *coding;
ff0dacd7 4571 struct coding_detection_info *detect_info;
1397dc18 4572{
8f924df7
KH
4573 const unsigned char *src = coding->source, *src_base = src;
4574 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4575 int multibytep = coding->src_multibyte;
4576 int consumed_chars = 0;
4577 Lisp_Object attrs, valids;
584948ac 4578 int found = 0;
1397dc18 4579
ff0dacd7
KH
4580 detect_info->checked |= CATEGORY_MASK_CHARSET;
4581
df7492f9
KH
4582 coding = &coding_categories[coding_category_charset];
4583 attrs = CODING_ID_ATTRS (coding->id);
4584 valids = AREF (attrs, coding_attr_charset_valids);
4585
4586 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4587 src += coding->head_ascii;
1397dc18 4588
b73bfc1c 4589 while (1)
1397dc18 4590 {
df7492f9 4591 int c;
1397dc18 4592
df7492f9
KH
4593 ONE_MORE_BYTE (c);
4594 if (NILP (AREF (valids, c)))
4595 break;
584948ac 4596 if (c >= 0x80)
ff0dacd7 4597 found = CATEGORY_MASK_CHARSET;
df7492f9 4598 }
ff0dacd7 4599 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4600 return 0;
4ed46869 4601
df7492f9 4602 no_more_source:
ff0dacd7
KH
4603 detect_info->found |= found;
4604 return 1;
df7492f9 4605}
b73bfc1c 4606
b73bfc1c 4607static void
df7492f9 4608decode_coding_charset (coding)
4ed46869 4609 struct coding_system *coding;
4ed46869 4610{
8f924df7
KH
4611 const unsigned char *src = coding->source + coding->consumed;
4612 const unsigned char *src_end = coding->source + coding->src_bytes;
4613 const unsigned char *src_base;
df7492f9 4614 int *charbuf = coding->charbuf;
ff0dacd7 4615 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4616 int consumed_chars = 0, consumed_chars_base;
4617 int multibytep = coding->src_multibyte;
4eb6d3f1 4618 Lisp_Object attrs, eol_type, charset_list, valids;
ff0dacd7
KH
4619 int char_offset = coding->produced_char;
4620 int last_offset = char_offset;
4621 int last_id = charset_ascii;
df7492f9
KH
4622
4623 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4624 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4625
df7492f9 4626 while (1)
4ed46869 4627 {
4eb6d3f1 4628 int c;
df7492f9
KH
4629
4630 src_base = src;
4631 consumed_chars_base = consumed_chars;
b73bfc1c 4632
df7492f9
KH
4633 if (charbuf >= charbuf_end)
4634 break;
4635
4eb6d3f1 4636 ONE_MORE_BYTE (c);
df7492f9 4637 if (c == '\r')
d46c5b12 4638 {
c7c66a95
KH
4639 /* Here we assume that no charset maps '\r' to something
4640 else. */
df7492f9 4641 if (EQ (eol_type, Qdos))
b73bfc1c 4642 {
98725083 4643 if (src == src_end)
b73bfc1c 4644 {
98725083
KH
4645 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4646 goto no_more_source;
b73bfc1c 4647 }
98725083 4648 if (*src == '\n')
df7492f9 4649 ONE_MORE_BYTE (c);
b73bfc1c 4650 }
df7492f9 4651 else if (EQ (eol_type, Qmac))
b73bfc1c 4652 c = '\n';
d46c5b12 4653 }
df7492f9 4654 else
d46c5b12 4655 {
4eb6d3f1
KH
4656 Lisp_Object val;
4657 struct charset *charset;
c7c66a95 4658 int dim;
acb2a965
KH
4659 int len = 1;
4660 unsigned code = c;
4eb6d3f1
KH
4661
4662 val = AREF (valids, c);
4663 if (NILP (val))
4664 goto invalid_code;
c7c66a95 4665 if (INTEGERP (val))
4eb6d3f1 4666 {
c7c66a95
KH
4667 charset = CHARSET_FROM_ID (XFASTINT (val));
4668 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4669 while (len < dim)
4eb6d3f1 4670 {
acb2a965
KH
4671 ONE_MORE_BYTE (c);
4672 code = (code << 8) | c;
f9d71dcd 4673 len++;
4eb6d3f1 4674 }
c7c66a95
KH
4675 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4676 charset, code, c);
4677 }
4678 else
4679 {
4680 /* VAL is a list of charset IDs. It is assured that the
4681 list is sorted by charset dimensions (smaller one
4682 comes first). */
c7c66a95
KH
4683 while (CONSP (val))
4684 {
4685 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4686 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4687 while (len < dim)
c7c66a95 4688 {
acb2a965
KH
4689 ONE_MORE_BYTE (c);
4690 code = (code << 8) | c;
f9d71dcd 4691 len++;
c7c66a95 4692 }
c7c66a95
KH
4693 CODING_DECODE_CHAR (coding, src, src_base,
4694 src_end, charset, code, c);
4695 if (c >= 0)
4696 break;
4697 val = XCDR (val);
4698 }
4eb6d3f1 4699 }
df7492f9
KH
4700 if (c < 0)
4701 goto invalid_code;
ff0dacd7
KH
4702 if (charset->id != charset_ascii
4703 && last_id != charset->id)
4704 {
4705 if (last_id != charset_ascii)
4706 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4707 last_id = charset->id;
4708 last_offset = char_offset;
4709 }
d46c5b12 4710 }
df7492f9 4711 *charbuf++ = c;
ff0dacd7 4712 char_offset++;
df7492f9
KH
4713 continue;
4714
4715 invalid_code:
4716 src = src_base;
4717 consumed_chars = consumed_chars_base;
4718 ONE_MORE_BYTE (c);
4719 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4720 char_offset++;
df7492f9 4721 coding->errors++;
4ed46869
KH
4722 }
4723
df7492f9 4724 no_more_source:
ff0dacd7
KH
4725 if (last_id != charset_ascii)
4726 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
df7492f9
KH
4727 coding->consumed_char += consumed_chars_base;
4728 coding->consumed = src_base - coding->source;
4729 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4730}
4731
df7492f9
KH
4732static int
4733encode_coding_charset (coding)
4ed46869 4734 struct coding_system *coding;
4ed46869 4735{
df7492f9
KH
4736 int multibytep = coding->dst_multibyte;
4737 int *charbuf = coding->charbuf;
4738 int *charbuf_end = charbuf + coding->charbuf_used;
4739 unsigned char *dst = coding->destination + coding->produced;
4740 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4741 int safe_room = MAX_MULTIBYTE_LENGTH;
4742 int produced_chars = 0;
df7492f9
KH
4743 Lisp_Object attrs, eol_type, charset_list;
4744 int ascii_compatible;
b73bfc1c 4745 int c;
b73bfc1c 4746
df7492f9 4747 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4748 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4749
df7492f9 4750 while (charbuf < charbuf_end)
4ed46869 4751 {
4eb6d3f1 4752 struct charset *charset;
df7492f9 4753 unsigned code;
8f924df7 4754
df7492f9
KH
4755 ASSURE_DESTINATION (safe_room);
4756 c = *charbuf++;
4757 if (ascii_compatible && ASCII_CHAR_P (c))
4758 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 4759 else if (CHAR_BYTE8_P (c))
4ed46869 4760 {
16eafb5d
KH
4761 c = CHAR_TO_BYTE8 (c);
4762 EMIT_ONE_BYTE (c);
d46c5b12 4763 }
d46c5b12 4764 else
b73bfc1c 4765 {
4eb6d3f1
KH
4766 charset = char_charset (c, charset_list, &code);
4767 if (charset)
4768 {
4769 if (CHARSET_DIMENSION (charset) == 1)
4770 EMIT_ONE_BYTE (code);
4771 else if (CHARSET_DIMENSION (charset) == 2)
4772 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4773 else if (CHARSET_DIMENSION (charset) == 3)
4774 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4775 else
4776 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4777 (code >> 8) & 0xFF, code & 0xFF);
4778 }
4779 else
41cbe562
KH
4780 {
4781 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4782 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4783 else
4784 c = coding->default_char;
4785 EMIT_ONE_BYTE (c);
4786 }
4ed46869 4787 }
4ed46869
KH
4788 }
4789
df7492f9
KH
4790 coding->result = CODING_RESULT_SUCCESS;
4791 coding->produced_char += produced_chars;
4792 coding->produced = dst - coding->destination;
4793 return 0;
4ed46869
KH
4794}
4795
4796\f
1397dc18 4797/*** 7. C library functions ***/
4ed46869 4798
df7492f9
KH
4799/* Setup coding context CODING from information about CODING_SYSTEM.
4800 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4801 CODING_SYSTEM is invalid, signal an error. */
4ed46869 4802
ec6d2bb8 4803void
e0e989f6
KH
4804setup_coding_system (coding_system, coding)
4805 Lisp_Object coding_system;
4ed46869
KH
4806 struct coding_system *coding;
4807{
df7492f9
KH
4808 Lisp_Object attrs;
4809 Lisp_Object eol_type;
4810 Lisp_Object coding_type;
4608c386 4811 Lisp_Object val;
4ed46869 4812
df7492f9
KH
4813 if (NILP (coding_system))
4814 coding_system = Qno_conversion;
c07c8e12 4815
df7492f9 4816 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 4817
df7492f9
KH
4818 attrs = CODING_ID_ATTRS (coding->id);
4819 eol_type = CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 4820
df7492f9
KH
4821 coding->mode = 0;
4822 coding->head_ascii = -1;
4823 coding->common_flags
4824 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
5e5c78be
KH
4825 if (! NILP (CODING_ATTR_POST_READ (attrs)))
4826 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4827 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4828 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
4829 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4830 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 4831
df7492f9 4832 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
4833 coding->max_charset_id = SCHARS (val) - 1;
4834 coding->safe_charsets = (char *) SDATA (val);
df7492f9 4835 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4608c386 4836
df7492f9
KH
4837 coding_type = CODING_ATTR_TYPE (attrs);
4838 if (EQ (coding_type, Qundecided))
d46c5b12 4839 {
df7492f9
KH
4840 coding->detector = NULL;
4841 coding->decoder = decode_coding_raw_text;
4842 coding->encoder = encode_coding_raw_text;
4843 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 4844 }
df7492f9 4845 else if (EQ (coding_type, Qiso_2022))
d46c5b12 4846 {
df7492f9
KH
4847 int i;
4848 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4849
4850 /* Invoke graphic register 0 to plane 0. */
4851 CODING_ISO_INVOCATION (coding, 0) = 0;
4852 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4853 CODING_ISO_INVOCATION (coding, 1)
4854 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4855 /* Setup the initial status of designation. */
4856 for (i = 0; i < 4; i++)
4857 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4858 /* Not single shifting initially. */
4859 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4860 /* Beginning of buffer should also be regarded as bol. */
4861 CODING_ISO_BOL (coding) = 1;
4862 coding->detector = detect_coding_iso_2022;
4863 coding->decoder = decode_coding_iso_2022;
4864 coding->encoder = encode_coding_iso_2022;
4865 if (flags & CODING_ISO_FLAG_SAFE)
4866 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 4867 coding->common_flags
df7492f9
KH
4868 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4869 | CODING_REQUIRE_FLUSHING_MASK);
4870 if (flags & CODING_ISO_FLAG_COMPOSITION)
4871 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
4872 if (flags & CODING_ISO_FLAG_DESIGNATION)
4873 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
4874 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4875 {
4876 setup_iso_safe_charsets (attrs);
4877 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
4878 coding->max_charset_id = SCHARS (val) - 1;
4879 coding->safe_charsets = (char *) SDATA (val);
df7492f9
KH
4880 }
4881 CODING_ISO_FLAGS (coding) = flags;
d46c5b12 4882 }
df7492f9 4883 else if (EQ (coding_type, Qcharset))
d46c5b12 4884 {
df7492f9
KH
4885 coding->detector = detect_coding_charset;
4886 coding->decoder = decode_coding_charset;
4887 coding->encoder = encode_coding_charset;
d46c5b12 4888 coding->common_flags
df7492f9 4889 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 4890 }
df7492f9 4891 else if (EQ (coding_type, Qutf_8))
d46c5b12 4892 {
df7492f9
KH
4893 coding->detector = detect_coding_utf_8;
4894 coding->decoder = decode_coding_utf_8;
4895 coding->encoder = encode_coding_utf_8;
4896 coding->common_flags
4897 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4898 }
4899 else if (EQ (coding_type, Qutf_16))
4900 {
4901 val = AREF (attrs, coding_attr_utf_16_bom);
4902 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4903 : EQ (val, Qt) ? utf_16_with_bom
4904 : utf_16_without_bom);
4905 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 4906 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 4907 : utf_16_little_endian);
e19c3639 4908 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4909 coding->detector = detect_coding_utf_16;
4910 coding->decoder = decode_coding_utf_16;
4911 coding->encoder = encode_coding_utf_16;
4912 coding->common_flags
4913 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
4914 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4915 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 4916 }
df7492f9 4917 else if (EQ (coding_type, Qccl))
4ed46869 4918 {
df7492f9
KH
4919 coding->detector = detect_coding_ccl;
4920 coding->decoder = decode_coding_ccl;
4921 coding->encoder = encode_coding_ccl;
c952af22 4922 coding->common_flags
df7492f9
KH
4923 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4924 | CODING_REQUIRE_FLUSHING_MASK);
4925 }
4926 else if (EQ (coding_type, Qemacs_mule))
4927 {
4928 coding->detector = detect_coding_emacs_mule;
4929 coding->decoder = decode_coding_emacs_mule;
4930 coding->encoder = encode_coding_emacs_mule;
c952af22 4931 coding->common_flags
df7492f9
KH
4932 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4933 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4934 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4935 {
4936 Lisp_Object tail, safe_charsets;
4937 int max_charset_id = 0;
4938
4939 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4940 tail = XCDR (tail))
4941 if (max_charset_id < XFASTINT (XCAR (tail)))
4942 max_charset_id = XFASTINT (XCAR (tail));
4943 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4944 make_number (255));
4945 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4946 tail = XCDR (tail))
8f924df7 4947 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 4948 coding->max_charset_id = max_charset_id;
8f924df7 4949 coding->safe_charsets = (char *) SDATA (safe_charsets);
df7492f9
KH
4950 }
4951 }
4952 else if (EQ (coding_type, Qshift_jis))
4953 {
4954 coding->detector = detect_coding_sjis;
4955 coding->decoder = decode_coding_sjis;
4956 coding->encoder = encode_coding_sjis;
c952af22 4957 coding->common_flags
df7492f9
KH
4958 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4959 }
4960 else if (EQ (coding_type, Qbig5))
4961 {
4962 coding->detector = detect_coding_big5;
4963 coding->decoder = decode_coding_big5;
4964 coding->encoder = encode_coding_big5;
c952af22 4965 coding->common_flags
df7492f9
KH
4966 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4967 }
4968 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4969 {
df7492f9
KH
4970 coding->detector = NULL;
4971 coding->decoder = decode_coding_raw_text;
4972 coding->encoder = encode_coding_raw_text;
4ed46869 4973 }
4ed46869 4974
df7492f9 4975 return;
4ed46869
KH
4976}
4977
df7492f9
KH
4978/* Return raw-text or one of its subsidiaries that has the same
4979 eol_type as CODING-SYSTEM. */
ec6d2bb8 4980
df7492f9
KH
4981Lisp_Object
4982raw_text_coding_system (coding_system)
4983 Lisp_Object coding_system;
ec6d2bb8 4984{
0be8721c 4985 Lisp_Object spec, attrs;
df7492f9 4986 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 4987
d3e4cb56
KH
4988 if (NILP (coding_system))
4989 return Qraw_text;
df7492f9
KH
4990 spec = CODING_SYSTEM_SPEC (coding_system);
4991 attrs = AREF (spec, 0);
ec6d2bb8 4992
df7492f9
KH
4993 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4994 return coding_system;
ec6d2bb8 4995
df7492f9
KH
4996 eol_type = AREF (spec, 2);
4997 if (VECTORP (eol_type))
4998 return Qraw_text;
4999 spec = CODING_SYSTEM_SPEC (Qraw_text);
5000 raw_text_eol_type = AREF (spec, 2);
5001 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5002 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5003 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5004}
5005
54f78171 5006
df7492f9
KH
5007/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5008 does, return one of the subsidiary that has the same eol-spec as
5009 PARENT. Otherwise, return CODING_SYSTEM. */
5010
5011Lisp_Object
5012coding_inherit_eol_type (coding_system, parent)
b74e4686 5013 Lisp_Object coding_system, parent;
54f78171 5014{
3e139625 5015 Lisp_Object spec, eol_type;
54f78171 5016
d3e4cb56
KH
5017 if (NILP (coding_system))
5018 coding_system = Qraw_text;
df7492f9 5019 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5020 eol_type = AREF (spec, 2);
d3e4cb56
KH
5021 if (VECTORP (eol_type)
5022 && ! NILP (parent))
df7492f9
KH
5023 {
5024 Lisp_Object parent_spec;
df7492f9
KH
5025 Lisp_Object parent_eol_type;
5026
5027 parent_spec
5028 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5029 parent_eol_type = AREF (parent_spec, 2);
5030 if (EQ (parent_eol_type, Qunix))
5031 coding_system = AREF (eol_type, 0);
5032 else if (EQ (parent_eol_type, Qdos))
5033 coding_system = AREF (eol_type, 1);
5034 else if (EQ (parent_eol_type, Qmac))
5035 coding_system = AREF (eol_type, 2);
54f78171 5036 }
df7492f9 5037 return coding_system;
54f78171
KH
5038}
5039
4ed46869
KH
5040/* Emacs has a mechanism to automatically detect a coding system if it
5041 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5042 it's impossible to distinguish some coding systems accurately
5043 because they use the same range of codes. So, at first, coding
5044 systems are categorized into 7, those are:
5045
0ef69138 5046 o coding-category-emacs-mule
4ed46869
KH
5047
5048 The category for a coding system which has the same code range
5049 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5050 symbol) `emacs-mule' by default.
4ed46869
KH
5051
5052 o coding-category-sjis
5053
5054 The category for a coding system which has the same code range
5055 as SJIS. Assigned the coding-system (Lisp
7717c392 5056 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5057
5058 o coding-category-iso-7
5059
5060 The category for a coding system which has the same code range
7717c392 5061 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5062 shift and single shift functions. This can encode/decode all
5063 charsets. Assigned the coding-system (Lisp symbol)
5064 `iso-2022-7bit' by default.
5065
5066 o coding-category-iso-7-tight
5067
5068 Same as coding-category-iso-7 except that this can
5069 encode/decode only the specified charsets.
4ed46869
KH
5070
5071 o coding-category-iso-8-1
5072
5073 The category for a coding system which has the same code range
5074 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5075 for DIMENSION1 charset. This doesn't use any locking shift
5076 and single shift functions. Assigned the coding-system (Lisp
5077 symbol) `iso-latin-1' by default.
4ed46869
KH
5078
5079 o coding-category-iso-8-2
5080
5081 The category for a coding system which has the same code range
5082 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5083 for DIMENSION2 charset. This doesn't use any locking shift
5084 and single shift functions. Assigned the coding-system (Lisp
5085 symbol) `japanese-iso-8bit' by default.
4ed46869 5086
7717c392 5087 o coding-category-iso-7-else
4ed46869
KH
5088
5089 The category for a coding system which has the same code range
df7492f9 5090 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5091 single shift functions. Assigned the coding-system (Lisp
5092 symbol) `iso-2022-7bit-lock' by default.
5093
5094 o coding-category-iso-8-else
5095
5096 The category for a coding system which has the same code range
df7492f9 5097 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5098 single shift functions. Assigned the coding-system (Lisp
5099 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5100
5101 o coding-category-big5
5102
5103 The category for a coding system which has the same code range
5104 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5105 `cn-big5' by default.
4ed46869 5106
fa42c37f
KH
5107 o coding-category-utf-8
5108
5109 The category for a coding system which has the same code range
5110 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5111 symbol) `utf-8' by default.
5112
5113 o coding-category-utf-16-be
5114
5115 The category for a coding system in which a text has an
5116 Unicode signature (cf. Unicode Standard) in the order of BIG
5117 endian at the head. Assigned the coding-system (Lisp symbol)
5118 `utf-16-be' by default.
5119
5120 o coding-category-utf-16-le
5121
5122 The category for a coding system in which a text has an
5123 Unicode signature (cf. Unicode Standard) in the order of
5124 LITTLE endian at the head. Assigned the coding-system (Lisp
5125 symbol) `utf-16-le' by default.
5126
1397dc18
KH
5127 o coding-category-ccl
5128
5129 The category for a coding system of which encoder/decoder is
5130 written in CCL programs. The default value is nil, i.e., no
5131 coding system is assigned.
5132
4ed46869
KH
5133 o coding-category-binary
5134
5135 The category for a coding system not categorized in any of the
5136 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5137 `no-conversion' by default.
4ed46869
KH
5138
5139 Each of them is a Lisp symbol and the value is an actual
df7492f9 5140 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5141 What Emacs does actually is to detect a category of coding system.
5142 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5143 decide only one possible category, it selects a category of the
4ed46869
KH
5144 highest priority. Priorities of categories are also specified by a
5145 user in a Lisp variable `coding-category-list'.
5146
5147*/
5148
df7492f9
KH
5149#define EOL_SEEN_NONE 0
5150#define EOL_SEEN_LF 1
5151#define EOL_SEEN_CR 2
5152#define EOL_SEEN_CRLF 4
66cfb530 5153
ff0dacd7
KH
5154/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5155 SOURCE is encoded. If CATEGORY is one of
5156 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5157 two-byte, else they are encoded by one-byte.
5158
5159 Return one of EOL_SEEN_XXX. */
4ed46869 5160
bc4bc72a 5161#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
5162
5163static int
89528eb3 5164detect_eol (source, src_bytes, category)
d46c5b12 5165 unsigned char *source;
df7492f9 5166 EMACS_INT src_bytes;
89528eb3 5167 enum coding_category category;
4ed46869 5168{
d46c5b12 5169 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5170 unsigned char c;
df7492f9
KH
5171 int total = 0;
5172 int eol_seen = EOL_SEEN_NONE;
4ed46869 5173
89528eb3 5174 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5175 {
df7492f9 5176 int msb, lsb;
fa42c37f 5177
89528eb3
KH
5178 msb = category == (coding_category_utf_16_le
5179 | coding_category_utf_16_le_nosig);
df7492f9 5180 lsb = 1 - msb;
fa42c37f 5181
df7492f9 5182 while (src + 1 < src_end)
fa42c37f 5183 {
df7492f9
KH
5184 c = src[lsb];
5185 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5186 {
df7492f9
KH
5187 int this_eol;
5188
5189 if (c == '\n')
5190 this_eol = EOL_SEEN_LF;
5191 else if (src + 3 >= src_end
5192 || src[msb + 2] != 0
5193 || src[lsb + 2] != '\n')
5194 this_eol = EOL_SEEN_CR;
fa42c37f 5195 else
8f924df7 5196 this_eol = EOL_SEEN_CRLF;
df7492f9
KH
5197
5198 if (eol_seen == EOL_SEEN_NONE)
5199 /* This is the first end-of-line. */
5200 eol_seen = this_eol;
5201 else if (eol_seen != this_eol)
fa42c37f 5202 {
df7492f9
KH
5203 /* The found type is different from what found before. */
5204 eol_seen = EOL_SEEN_LF;
5205 break;
fa42c37f 5206 }
df7492f9
KH
5207 if (++total == MAX_EOL_CHECK_COUNT)
5208 break;
fa42c37f 5209 }
df7492f9 5210 src += 2;
fa42c37f 5211 }
bcf26d6a 5212 }
d46c5b12 5213 else
c4825358 5214 {
df7492f9 5215 while (src < src_end)
27901516 5216 {
df7492f9
KH
5217 c = *src++;
5218 if (c == '\n' || c == '\r')
5219 {
5220 int this_eol;
d46c5b12 5221
df7492f9
KH
5222 if (c == '\n')
5223 this_eol = EOL_SEEN_LF;
5224 else if (src >= src_end || *src != '\n')
5225 this_eol = EOL_SEEN_CR;
5226 else
5227 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5228
df7492f9
KH
5229 if (eol_seen == EOL_SEEN_NONE)
5230 /* This is the first end-of-line. */
5231 eol_seen = this_eol;
5232 else if (eol_seen != this_eol)
5233 {
5234 /* The found type is different from what found before. */
5235 eol_seen = EOL_SEEN_LF;
5236 break;
5237 }
5238 if (++total == MAX_EOL_CHECK_COUNT)
5239 break;
5240 }
5241 }
73be902c 5242 }
df7492f9 5243 return eol_seen;
73be902c
KH
5244}
5245
df7492f9 5246
73be902c 5247static void
df7492f9
KH
5248adjust_coding_eol_type (coding, eol_seen)
5249 struct coding_system *coding;
5250 int eol_seen;
73be902c 5251{
0be8721c 5252 Lisp_Object eol_type;
8f924df7 5253
df7492f9
KH
5254 eol_type = CODING_ID_EOL_TYPE (coding->id);
5255 if (eol_seen & EOL_SEEN_LF)
5256 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6f197c07 5257 else if (eol_seen & EOL_SEEN_CRLF)
df7492f9 5258 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6f197c07 5259 else if (eol_seen & EOL_SEEN_CR)
df7492f9 5260 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12 5261}
4ed46869 5262
df7492f9
KH
5263/* Detect how a text specified in CODING is encoded. If a coding
5264 system is detected, update fields of CODING by the detected coding
5265 system. */
0a28aafb 5266
df7492f9
KH
5267void
5268detect_coding (coding)
d46c5b12 5269 struct coding_system *coding;
d46c5b12 5270{
8f924df7 5271 const unsigned char *src, *src_end;
df7492f9 5272 Lisp_Object attrs, coding_type;
d46c5b12 5273
df7492f9
KH
5274 coding->consumed = coding->consumed_char = 0;
5275 coding->produced = coding->produced_char = 0;
5276 coding_set_source (coding);
1c3478b0 5277
df7492f9 5278 src_end = coding->source + coding->src_bytes;
1c3478b0 5279
df7492f9
KH
5280 /* If we have not yet decided the text encoding type, detect it
5281 now. */
5282 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5283 {
df7492f9
KH
5284 int c, i;
5285
5286 for (src = coding->source; src < src_end; src++)
d46c5b12 5287 {
df7492f9
KH
5288 c = *src;
5289 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5290 || c == ISO_CODE_SI
5291 || c == ISO_CODE_SO)))
5292 break;
d46c5b12 5293 }
df7492f9
KH
5294 coding->head_ascii = src - (coding->source + coding->consumed);
5295
5296 if (coding->head_ascii < coding->src_bytes)
d46c5b12 5297 {
ff0dacd7
KH
5298 struct coding_detection_info detect_info;
5299 enum coding_category category;
5300 struct coding_system *this;
df7492f9 5301
ff0dacd7 5302 detect_info.checked = detect_info.found = detect_info.rejected = 0;
df7492f9 5303 for (i = 0; i < coding_category_raw_text; i++)
d46c5b12 5304 {
ff0dacd7
KH
5305 category = coding_priorities[i];
5306 this = coding_categories + category;
df7492f9 5307 if (this->id < 0)
fa42c37f 5308 {
df7492f9 5309 /* No coding system of this category is defined. */
ff0dacd7 5310 detect_info.rejected |= (1 << category);
fa42c37f 5311 }
ff0dacd7 5312 else if (category >= coding_category_raw_text)
89528eb3 5313 continue;
ff0dacd7 5314 else if (detect_info.checked & (1 << category))
fa42c37f 5315 {
ff0dacd7
KH
5316 if (detect_info.found & (1 << category))
5317 break;
fa42c37f 5318 }
ff0dacd7
KH
5319 else if ((*(this->detector)) (coding, &detect_info)
5320 && detect_info.found & (1 << category))
5321 break;
d46c5b12 5322 }
ff0dacd7
KH
5323 if (i < coding_category_raw_text)
5324 setup_coding_system (CODING_ID_NAME (this->id), coding);
5325 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5326 setup_coding_system (Qraw_text, coding);
ff0dacd7 5327 else if (detect_info.rejected)
df7492f9 5328 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5329 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5330 {
5331 this = coding_categories + coding_priorities[i];
5332 setup_coding_system (CODING_ID_NAME (this->id), coding);
5333 break;
5334 }
d46c5b12 5335 }
b73bfc1c 5336 }
b49a1807
KH
5337 else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
5338 {
5339 Lisp_Object coding_systems;
5340 struct coding_detection_info detect_info;
5341
5342 coding_systems
5343 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5344 detect_info.found = detect_info.rejected = 0;
5345 if (CONSP (coding_systems)
5346 && detect_coding_utf_16 (coding, &detect_info)
5347 && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
5348 | CATEGORY_MASK_UTF_16_BE)))
5349 {
5350 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5351 setup_coding_system (XCAR (coding_systems), coding);
5352 else
5353 setup_coding_system (XCDR (coding_systems), coding);
5354 }
5355 }
4ed46869 5356
df7492f9
KH
5357 attrs = CODING_ID_ATTRS (coding->id);
5358 coding_type = CODING_ATTR_TYPE (attrs);
4ed46869 5359
df7492f9
KH
5360 /* If we have not yet decided the EOL type, detect it now. But, the
5361 detection is impossible for a CCL based coding system, in which
5362 case, we detct the EOL type after decoding. */
5363 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5364 && ! EQ (coding_type, Qccl))
d46c5b12 5365 {
89528eb3 5366 int eol_seen = detect_eol (coding->source, coding->src_bytes,
3e139625 5367 (enum coding_category) XINT (CODING_ATTR_CATEGORY (attrs)));
4ed46869 5368
df7492f9
KH
5369 if (eol_seen != EOL_SEEN_NONE)
5370 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5371 }
4ed46869 5372}
4ed46869 5373
d46c5b12 5374
aaaf0b1e 5375static void
df7492f9 5376decode_eol (coding)
aaaf0b1e 5377 struct coding_system *coding;
aaaf0b1e 5378{
df7492f9 5379 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5380 {
df7492f9
KH
5381 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5382 unsigned char *pend = p + coding->produced;
5383 int eol_seen = EOL_SEEN_NONE;
4ed46869 5384
df7492f9 5385 for (; p < pend; p++)
aaaf0b1e 5386 {
df7492f9
KH
5387 if (*p == '\n')
5388 eol_seen |= EOL_SEEN_LF;
5389 else if (*p == '\r')
aaaf0b1e 5390 {
df7492f9 5391 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5392 {
df7492f9
KH
5393 eol_seen |= EOL_SEEN_CRLF;
5394 p++;
aaaf0b1e 5395 }
aaaf0b1e 5396 else
df7492f9 5397 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5398 }
aaaf0b1e 5399 }
df7492f9
KH
5400 if (eol_seen != EOL_SEEN_NONE)
5401 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5402 }
d46c5b12 5403
df7492f9 5404 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
27901516 5405 {
df7492f9
KH
5406 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5407 unsigned char *pend = p + coding->produced;
d46c5b12 5408
df7492f9
KH
5409 for (; p < pend; p++)
5410 if (*p == '\r')
5411 *p = '\n';
4ed46869 5412 }
df7492f9
KH
5413 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5414 {
5415 unsigned char *p, *pbeg, *pend;
5416 Lisp_Object undo_list;
b73bfc1c 5417
df7492f9
KH
5418 move_gap_both (coding->dst_pos + coding->produced_char,
5419 coding->dst_pos_byte + coding->produced);
5420 undo_list = current_buffer->undo_list;
5421 current_buffer->undo_list = Qt;
c197f191 5422 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
df7492f9
KH
5423 current_buffer->undo_list = undo_list;
5424 pbeg = GPT_ADDR;
5425 pend = pbeg + coding->produced;
b73bfc1c 5426
df7492f9
KH
5427 for (p = pend - 1; p >= pbeg; p--)
5428 if (*p == '\r')
5429 {
5430 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5431 pend--;
5432 }
5433 coding->produced_char -= coding->produced - (pend - pbeg);
5434 coding->produced = pend - pbeg;
5435 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e 5436 }
4ed46869
KH
5437}
5438
df7492f9
KH
5439static void
5440translate_chars (coding, table)
4ed46869 5441 struct coding_system *coding;
df7492f9 5442 Lisp_Object table;
4ed46869 5443{
df7492f9
KH
5444 int *charbuf = coding->charbuf;
5445 int *charbuf_end = charbuf + coding->charbuf_used;
5446 int c;
d46c5b12 5447
df7492f9
KH
5448 if (coding->chars_at_source)
5449 return;
4ed46869 5450
df7492f9 5451 while (charbuf < charbuf_end)
8844fa83 5452 {
df7492f9
KH
5453 c = *charbuf;
5454 if (c < 0)
5455 charbuf += c;
5456 else
5457 *charbuf++ = translate_char (table, c);
8844fa83 5458 }
df7492f9 5459}
bc4bc72a 5460
d46c5b12 5461static int
df7492f9
KH
5462produce_chars (coding)
5463 struct coding_system *coding;
4ed46869 5464{
df7492f9
KH
5465 unsigned char *dst = coding->destination + coding->produced;
5466 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5467 int produced;
5468 int produced_chars = 0;
4ed46869 5469
df7492f9 5470 if (! coding->chars_at_source)
4ed46869 5471 {
df7492f9 5472 /* Characters are in coding->charbuf. */
3e139625
DL
5473 EMACS_INT *buf = coding->charbuf;
5474 EMACS_INT *buf_end = buf + coding->charbuf_used;
df7492f9 5475 unsigned char *adjusted_dst_end;
4ed46869 5476
df7492f9
KH
5477 if (BUFFERP (coding->src_object)
5478 && EQ (coding->src_object, coding->dst_object))
8f924df7 5479 dst_end = ((unsigned char *) coding->source) + coding->consumed;
df7492f9 5480 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5481
df7492f9 5482 while (buf < buf_end)
4ed46869 5483 {
df7492f9 5484 int c = *buf++;
bc4bc72a 5485
df7492f9 5486 if (dst >= adjusted_dst_end)
d46c5b12 5487 {
df7492f9
KH
5488 dst = alloc_destination (coding,
5489 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5490 dst);
5491 dst_end = coding->destination + coding->dst_bytes;
5492 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5493 }
5494 if (c >= 0)
5495 {
5496 if (coding->dst_multibyte
5497 || ! CHAR_BYTE8_P (c))
5498 CHAR_STRING_ADVANCE (c, dst);
5499 else
5500 *dst++ = CHAR_TO_BYTE8 (c);
5501 produced_chars++;
d46c5b12 5502 }
df7492f9 5503 else
d3e4cb56
KH
5504 /* This is an annotation datum. (-C) is the length of
5505 it. */
5506 buf += -c - 1;
4ed46869
KH
5507 }
5508 }
fa42c37f 5509 else
fa42c37f 5510 {
8f924df7
KH
5511 const unsigned char *src = coding->source;
5512 const unsigned char *src_end = src + coding->src_bytes;
df7492f9 5513 Lisp_Object eol_type;
fa42c37f 5514
df7492f9 5515 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5516
df7492f9 5517 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 5518 {
df7492f9 5519 if (coding->src_multibyte)
fa42c37f 5520 {
71c81426 5521 int multibytep = 1;
df7492f9 5522 int consumed_chars;
d46c5b12 5523
df7492f9
KH
5524 while (1)
5525 {
8f924df7 5526 const unsigned char *src_base = src;
df7492f9 5527 int c;
b73bfc1c 5528
df7492f9
KH
5529 ONE_MORE_BYTE (c);
5530 if (c == '\r')
5531 {
5532 if (EQ (eol_type, Qdos))
5533 {
98725083
KH
5534 if (src == src_end)
5535 {
5536 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5537 goto no_more_source;
5538 }
5539 if (*src == '\n')
df7492f9
KH
5540 c = *src++;
5541 }
5542 else if (EQ (eol_type, Qmac))
5543 c = '\n';
5544 }
5545 if (dst == dst_end)
5546 {
2c78b7e1 5547 coding->consumed = src - coding->source;
b73bfc1c 5548
2c78b7e1 5549 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5550 dst_end = (unsigned char *) src;
2c78b7e1
KH
5551 if (dst == dst_end)
5552 {
5553 dst = alloc_destination (coding, src_end - src + 1,
5554 dst);
5555 dst_end = coding->destination + coding->dst_bytes;
5556 coding_set_source (coding);
5557 src = coding->source + coding->consumed;
5558 src_end = coding->source + coding->src_bytes;
5559 }
df7492f9
KH
5560 }
5561 *dst++ = c;
5562 produced_chars++;
5563 }
5564 no_more_source:
5565 ;
fa42c37f
KH
5566 }
5567 else
df7492f9
KH
5568 while (src < src_end)
5569 {
71c81426 5570 int multibytep = 1;
df7492f9 5571 int c = *src++;
b73bfc1c 5572
df7492f9
KH
5573 if (c == '\r')
5574 {
5575 if (EQ (eol_type, Qdos))
5576 {
5577 if (src < src_end
5578 && *src == '\n')
5579 c = *src++;
5580 }
5581 else if (EQ (eol_type, Qmac))
5582 c = '\n';
5583 }
5584 if (dst >= dst_end - 1)
5585 {
2c78b7e1 5586 coding->consumed = src - coding->source;
df7492f9 5587
2c78b7e1 5588 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5589 dst_end = (unsigned char *) src;
2c78b7e1
KH
5590 if (dst >= dst_end - 1)
5591 {
5592 dst = alloc_destination (coding, src_end - src + 2,
5593 dst);
5594 dst_end = coding->destination + coding->dst_bytes;
5595 coding_set_source (coding);
5596 src = coding->source + coding->consumed;
5597 src_end = coding->source + coding->src_bytes;
5598 }
df7492f9
KH
5599 }
5600 EMIT_ONE_BYTE (c);
5601 }
d46c5b12 5602 }
df7492f9
KH
5603 else
5604 {
5605 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 5606 {
df7492f9 5607 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5608
df7492f9 5609 if (require > 0)
fa42c37f 5610 {
df7492f9
KH
5611 EMACS_INT offset = src - coding->source;
5612
5613 dst = alloc_destination (coding, require, dst);
5614 coding_set_source (coding);
5615 src = coding->source + offset;
5616 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
5617 }
5618 }
df7492f9
KH
5619 produced_chars = coding->src_chars;
5620 while (src < src_end)
fa42c37f 5621 {
df7492f9
KH
5622 int c = *src++;
5623
5624 if (c == '\r')
5625 {
5626 if (EQ (eol_type, Qdos))
5627 {
5628 if (src < src_end
5629 && *src == '\n')
5630 c = *src++;
5631 produced_chars--;
5632 }
5633 else if (EQ (eol_type, Qmac))
5634 c = '\n';
5635 }
5636 *dst++ = c;
fa42c37f
KH
5637 }
5638 }
2c78b7e1
KH
5639 coding->consumed = coding->src_bytes;
5640 coding->consumed_char = coding->src_chars;
fa42c37f
KH
5641 }
5642
df7492f9
KH
5643 produced = dst - (coding->destination + coding->produced);
5644 if (BUFFERP (coding->dst_object))
5645 insert_from_gap (produced_chars, produced);
5646 coding->produced += produced;
5647 coding->produced_char += produced_chars;
5648 return produced_chars;
fa42c37f
KH
5649}
5650
ff0dacd7
KH
5651/* Compose text in CODING->object according to the annotation data at
5652 CHARBUF. CHARBUF is an array:
5653 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 5654 */
4ed46869 5655
df7492f9
KH
5656static INLINE void
5657produce_composition (coding, charbuf)
4ed46869 5658 struct coding_system *coding;
df7492f9 5659 int *charbuf;
4ed46869 5660{
df7492f9 5661 int len;
ff0dacd7 5662 EMACS_INT from, to;
df7492f9 5663 enum composition_method method;
df7492f9 5664 Lisp_Object components;
fa42c37f 5665
df7492f9 5666 len = -charbuf[0];
ff0dacd7
KH
5667 from = coding->dst_pos + charbuf[2];
5668 to = coding->dst_pos + charbuf[3];
5669 method = (enum composition_method) (charbuf[4]);
d46c5b12 5670
df7492f9
KH
5671 if (method == COMPOSITION_RELATIVE)
5672 components = Qnil;
d46c5b12 5673 else
d46c5b12 5674 {
df7492f9
KH
5675 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5676 int i;
b73bfc1c 5677
df7492f9
KH
5678 len -= 5;
5679 charbuf += 5;
5680 for (i = 0; i < len; i++)
5681 args[i] = make_number (charbuf[i]);
5682 components = (method == COMPOSITION_WITH_ALTCHARS
5683 ? Fstring (len, args) : Fvector (len, args));
d46c5b12 5684 }
ff0dacd7 5685 compose_text (from, to, components, Qnil, coding->dst_object);
d46c5b12
KH
5686}
5687
d46c5b12 5688
ff0dacd7
KH
5689/* Put `charset' property on text in CODING->object according to
5690 the annotation data at CHARBUF. CHARBUF is an array:
5691 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5692 */
d46c5b12 5693
ff0dacd7
KH
5694static INLINE void
5695produce_charset (coding, charbuf)
d46c5b12 5696 struct coding_system *coding;
ff0dacd7 5697 int *charbuf;
d46c5b12 5698{
ff0dacd7
KH
5699 EMACS_INT from = coding->dst_pos + charbuf[2];
5700 EMACS_INT to = coding->dst_pos + charbuf[3];
5701 struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
b73bfc1c 5702
ff0dacd7
KH
5703 Fput_text_property (make_number (from), make_number (to),
5704 Qcharset, CHARSET_NAME (charset),
5705 coding->dst_object);
d46c5b12
KH
5706}
5707
d46c5b12 5708
df7492f9
KH
5709#define CHARBUF_SIZE 0x4000
5710
5711#define ALLOC_CONVERSION_WORK_AREA(coding) \
5712 do { \
5713 int size = CHARBUF_SIZE;; \
5714 \
5715 coding->charbuf = NULL; \
5716 while (size > 1024) \
5717 { \
5718 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5719 if (coding->charbuf) \
5720 break; \
5721 size >>= 1; \
5722 } \
5723 if (! coding->charbuf) \
5724 { \
5725 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5726 return coding->result; \
5727 } \
5728 coding->charbuf_size = size; \
5729 } while (0)
4ed46869 5730
d46c5b12
KH
5731
5732static void
df7492f9 5733produce_annotation (coding)
d46c5b12 5734 struct coding_system *coding;
d46c5b12 5735{
df7492f9
KH
5736 int *charbuf = coding->charbuf;
5737 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5738
ff0dacd7
KH
5739 if (NILP (coding->dst_object))
5740 return;
d46c5b12 5741
df7492f9 5742 while (charbuf < charbuf_end)
a84f1519 5743 {
df7492f9
KH
5744 if (*charbuf >= 0)
5745 charbuf++;
d46c5b12 5746 else
d46c5b12 5747 {
df7492f9 5748 int len = -*charbuf;
ff0dacd7 5749 switch (charbuf[1])
df7492f9
KH
5750 {
5751 case CODING_ANNOTATE_COMPOSITION_MASK:
5752 produce_composition (coding, charbuf);
5753 break;
ff0dacd7
KH
5754 case CODING_ANNOTATE_CHARSET_MASK:
5755 produce_charset (coding, charbuf);
5756 break;
df7492f9
KH
5757 default:
5758 abort ();
5759 }
5760 charbuf += len;
d46c5b12 5761 }
a84f1519 5762 }
d46c5b12
KH
5763}
5764
df7492f9
KH
5765/* Decode the data at CODING->src_object into CODING->dst_object.
5766 CODING->src_object is a buffer, a string, or nil.
5767 CODING->dst_object is a buffer.
d46c5b12 5768
df7492f9
KH
5769 If CODING->src_object is a buffer, it must be the current buffer.
5770 In this case, if CODING->src_pos is positive, it is a position of
5771 the source text in the buffer, otherwise, the source text is in the
5772 gap area of the buffer, and CODING->src_pos specifies the offset of
5773 the text from GPT (which must be the same as PT). If this is the
5774 same buffer as CODING->dst_object, CODING->src_pos must be
5775 negative.
d46c5b12 5776
df7492f9
KH
5777 If CODING->src_object is a string, CODING->src_pos in an index to
5778 that string.
d46c5b12 5779
df7492f9
KH
5780 If CODING->src_object is nil, CODING->source must already point to
5781 the non-relocatable memory area. In this case, CODING->src_pos is
5782 an offset from CODING->source.
73be902c 5783
df7492f9
KH
5784 The decoded data is inserted at the current point of the buffer
5785 CODING->dst_object.
5786*/
d46c5b12 5787
df7492f9
KH
5788static int
5789decode_coding (coding)
d46c5b12 5790 struct coding_system *coding;
d46c5b12 5791{
df7492f9 5792 Lisp_Object attrs;
d46c5b12 5793
df7492f9
KH
5794 if (BUFFERP (coding->src_object)
5795 && coding->src_pos > 0
5796 && coding->src_pos < GPT
5797 && coding->src_pos + coding->src_chars > GPT)
5798 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5799
df7492f9 5800 if (BUFFERP (coding->dst_object))
1c3478b0 5801 {
df7492f9
KH
5802 if (current_buffer != XBUFFER (coding->dst_object))
5803 set_buffer_internal (XBUFFER (coding->dst_object));
5804 if (GPT != PT)
5805 move_gap_both (PT, PT_BYTE);
1c3478b0
KH
5806 }
5807
df7492f9
KH
5808 coding->consumed = coding->consumed_char = 0;
5809 coding->produced = coding->produced_char = 0;
5810 coding->chars_at_source = 0;
5811 coding->result = CODING_RESULT_SUCCESS;
5812 coding->errors = 0;
1c3478b0 5813
df7492f9
KH
5814 ALLOC_CONVERSION_WORK_AREA (coding);
5815
5816 attrs = CODING_ID_ATTRS (coding->id);
5817
5818 do
b73bfc1c 5819 {
df7492f9
KH
5820 coding_set_source (coding);
5821 coding->annotated = 0;
5822 (*(coding->decoder)) (coding);
5823 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
da4109a9
KH
5824 translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5825 else if (!NILP (Vstandard_translation_table_for_decode))
5826 translate_chars (coding, Vstandard_translation_table_for_decode);
df7492f9
KH
5827 coding_set_destination (coding);
5828 produce_chars (coding);
5829 if (coding->annotated)
5830 produce_annotation (coding);
d46c5b12 5831 }
df7492f9
KH
5832 while (coding->consumed < coding->src_bytes
5833 && ! coding->result);
d46c5b12 5834
df7492f9
KH
5835 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5836 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5837 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5838 decode_eol (coding);
d46c5b12 5839
df7492f9
KH
5840 coding->carryover_bytes = 0;
5841 if (coding->consumed < coding->src_bytes)
d46c5b12 5842 {
df7492f9 5843 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 5844 const unsigned char *src;
df7492f9
KH
5845
5846 coding_set_source (coding);
5847 coding_set_destination (coding);
5848 src = coding->source + coding->consumed;
5849
5850 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 5851 {
df7492f9
KH
5852 /* Flush out unprocessed data as binary chars. We are sure
5853 that the number of data is less than the size of
5854 coding->charbuf. */
df7492f9 5855 while (nbytes-- > 0)
1c3478b0 5856 {
df7492f9 5857 int c = *src++;
98725083
KH
5858
5859 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
1c3478b0 5860 }
df7492f9 5861 produce_chars (coding);
d46c5b12 5862 }
d46c5b12 5863 else
df7492f9
KH
5864 {
5865 /* Record unprocessed bytes in coding->carryover. We are
5866 sure that the number of data is less than the size of
5867 coding->carryover. */
5868 unsigned char *p = coding->carryover;
5869
5870 coding->carryover_bytes = nbytes;
5871 while (nbytes-- > 0)
5872 *p++ = *src++;
1c3478b0 5873 }
df7492f9 5874 coding->consumed = coding->src_bytes;
b73bfc1c 5875 }
69f76525 5876
73be902c 5877 return coding->result;
4ed46869
KH
5878}
5879
aaaf0b1e 5880
e1c23804 5881/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
5882 ending before LIMIT of CODING->src_object (buffer or string), store
5883 the data in BUF, set *STOP to a starting position of the next
5884 composition (if any) or to LIMIT, and return the address of the
5885 next element of BUF.
5886
5887 If such an annotation is not found, set *STOP to a starting
5888 position of a composition after POS (if any) or to LIMIT, and
5889 return BUF. */
5890
5891static INLINE int *
5892handle_composition_annotation (pos, limit, coding, buf, stop)
5893 EMACS_INT pos, limit;
aaaf0b1e 5894 struct coding_system *coding;
ff0dacd7
KH
5895 int *buf;
5896 EMACS_INT *stop;
aaaf0b1e 5897{
ff0dacd7
KH
5898 EMACS_INT start, end;
5899 Lisp_Object prop;
aaaf0b1e 5900
ff0dacd7
KH
5901 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5902 || end > limit)
5903 *stop = limit;
5904 else if (start > pos)
5905 *stop = start;
5906 else
aaaf0b1e 5907 {
ff0dacd7 5908 if (start == pos)
aaaf0b1e 5909 {
ff0dacd7
KH
5910 /* We found a composition. Store the corresponding
5911 annotation data in BUF. */
5912 int *head = buf;
5913 enum composition_method method = COMPOSITION_METHOD (prop);
5914 int nchars = COMPOSITION_LENGTH (prop);
5915
5916 ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5917 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 5918 {
ff0dacd7
KH
5919 Lisp_Object components;
5920 int len, i, i_byte;
5921
5922 components = COMPOSITION_COMPONENTS (prop);
5923 if (VECTORP (components))
aaaf0b1e 5924 {
ff0dacd7
KH
5925 len = XVECTOR (components)->size;
5926 for (i = 0; i < len; i++)
5927 *buf++ = XINT (AREF (components, i));
aaaf0b1e 5928 }
ff0dacd7 5929 else if (STRINGP (components))
aaaf0b1e 5930 {
8f924df7 5931 len = SCHARS (components);
ff0dacd7
KH
5932 i = i_byte = 0;
5933 while (i < len)
5934 {
5935 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5936 buf++;
5937 }
5938 }
5939 else if (INTEGERP (components))
5940 {
5941 len = 1;
5942 *buf++ = XINT (components);
5943 }
5944 else if (CONSP (components))
5945 {
5946 for (len = 0; CONSP (components);
5947 len++, components = XCDR (components))
5948 *buf++ = XINT (XCAR (components));
aaaf0b1e 5949 }
aaaf0b1e 5950 else
ff0dacd7
KH
5951 abort ();
5952 *head -= len;
aaaf0b1e 5953 }
aaaf0b1e 5954 }
ff0dacd7
KH
5955
5956 if (find_composition (end, limit, &start, &end, &prop,
5957 coding->src_object)
5958 && end <= limit)
5959 *stop = start;
5960 else
5961 *stop = limit;
aaaf0b1e 5962 }
ff0dacd7
KH
5963 return buf;
5964}
5965
5966
e1c23804 5967/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
5968 CODING->src_object (buffer of string), store the data in BUF, set
5969 *STOP to the position where the value of `charset' property changes
5970 (limiting by LIMIT), and return the address of the next element of
5971 BUF.
5972
5973 If the property value is nil, set *STOP to the position where the
5974 property value is non-nil (limiting by LIMIT), and return BUF. */
5975
5976static INLINE int *
5977handle_charset_annotation (pos, limit, coding, buf, stop)
5978 EMACS_INT pos, limit;
5979 struct coding_system *coding;
5980 int *buf;
5981 EMACS_INT *stop;
5982{
5983 Lisp_Object val, next;
5984 int id;
5985
5986 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5987 if (! NILP (val) && CHARSETP (val))
5988 id = XINT (CHARSET_SYMBOL_ID (val));
5989 else
5990 id = -1;
5991 ADD_CHARSET_DATA (buf, 0, 0, id);
5992 next = Fnext_single_property_change (make_number (pos), Qcharset,
5993 coding->src_object,
5994 make_number (limit));
5995 *stop = XINT (next);
5996 return buf;
5997}
5998
5999
df7492f9
KH
6000static void
6001consume_chars (coding)
6002 struct coding_system *coding;
6003{
6004 int *buf = coding->charbuf;
ff0dacd7 6005 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 6006 const unsigned char *src = coding->source + coding->consumed;
4776e638 6007 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
6008 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6009 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6010 int multibytep = coding->src_multibyte;
6011 Lisp_Object eol_type;
6012 int c;
ff0dacd7 6013 EMACS_INT stop, stop_composition, stop_charset;
88993dfd 6014
df7492f9
KH
6015 eol_type = CODING_ID_EOL_TYPE (coding->id);
6016 if (VECTORP (eol_type))
6017 eol_type = Qunix;
88993dfd 6018
df7492f9
KH
6019 /* Note: composition handling is not yet implemented. */
6020 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6021
ff0dacd7
KH
6022 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6023 stop = stop_composition = pos;
6024 else
6025 stop = stop_composition = end_pos;
6026 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6027 stop = stop_charset = pos;
df7492f9 6028 else
ff0dacd7 6029 stop_charset = end_pos;
ec6d2bb8 6030
ff0dacd7
KH
6031 /* Compensate for CRLF and annotation. */
6032 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6033 while (buf < buf_end)
aaaf0b1e 6034 {
df7492f9 6035 if (pos == stop)
ec6d2bb8 6036 {
df7492f9
KH
6037 if (pos == end_pos)
6038 break;
ff0dacd7
KH
6039 if (pos == stop_composition)
6040 buf = handle_composition_annotation (pos, end_pos, coding,
6041 buf, &stop_composition);
6042 if (pos == stop_charset)
6043 buf = handle_charset_annotation (pos, end_pos, coding,
6044 buf, &stop_charset);
6045 stop = (stop_composition < stop_charset
6046 ? stop_composition : stop_charset);
df7492f9
KH
6047 }
6048
6049 if (! multibytep)
4776e638 6050 {
d3e4cb56 6051 EMACS_INT bytes;
aaaf0b1e 6052
d3e4cb56
KH
6053 if (! CODING_FOR_UNIBYTE (coding)
6054 && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
4776e638
KH
6055 c = STRING_CHAR_ADVANCE (src), pos += bytes;
6056 else
6057 c = *src++, pos++;
6058 }
df7492f9 6059 else
4776e638 6060 c = STRING_CHAR_ADVANCE (src), pos++;
df7492f9
KH
6061 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6062 c = '\n';
6063 if (! EQ (eol_type, Qunix))
aaaf0b1e 6064 {
df7492f9 6065 if (c == '\n')
aaaf0b1e 6066 {
df7492f9
KH
6067 if (EQ (eol_type, Qdos))
6068 *buf++ = '\r';
6069 else
6070 c = '\r';
aaaf0b1e
KH
6071 }
6072 }
df7492f9 6073 *buf++ = c;
aaaf0b1e 6074 }
ec6d2bb8 6075
df7492f9
KH
6076 coding->consumed = src - coding->source;
6077 coding->consumed_char = pos - coding->src_pos;
6078 coding->charbuf_used = buf - coding->charbuf;
6079 coding->chars_at_source = 0;
aaaf0b1e
KH
6080}
6081
4ed46869 6082
df7492f9
KH
6083/* Encode the text at CODING->src_object into CODING->dst_object.
6084 CODING->src_object is a buffer or a string.
6085 CODING->dst_object is a buffer or nil.
6086
6087 If CODING->src_object is a buffer, it must be the current buffer.
6088 In this case, if CODING->src_pos is positive, it is a position of
6089 the source text in the buffer, otherwise. the source text is in the
6090 gap area of the buffer, and coding->src_pos specifies the offset of
6091 the text from GPT (which must be the same as PT). If this is the
6092 same buffer as CODING->dst_object, CODING->src_pos must be
6093 negative and CODING should not have `pre-write-conversion'.
6094
6095 If CODING->src_object is a string, CODING should not have
6096 `pre-write-conversion'.
6097
6098 If CODING->dst_object is a buffer, the encoded data is inserted at
6099 the current point of that buffer.
6100
6101 If CODING->dst_object is nil, the encoded data is placed at the
6102 memory area specified by CODING->destination. */
6103
6104static int
6105encode_coding (coding)
4ed46869 6106 struct coding_system *coding;
4ed46869 6107{
df7492f9 6108 Lisp_Object attrs;
9861e777 6109
df7492f9 6110 attrs = CODING_ID_ATTRS (coding->id);
4ed46869 6111
df7492f9 6112 if (BUFFERP (coding->dst_object))
8844fa83 6113 {
df7492f9
KH
6114 set_buffer_internal (XBUFFER (coding->dst_object));
6115 coding->dst_multibyte
6116 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 6117 }
4ed46869 6118
b73bfc1c 6119 coding->consumed = coding->consumed_char = 0;
df7492f9
KH
6120 coding->produced = coding->produced_char = 0;
6121 coding->result = CODING_RESULT_SUCCESS;
b73bfc1c 6122 coding->errors = 0;
b73bfc1c 6123
df7492f9 6124 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 6125
df7492f9
KH
6126 do {
6127 coding_set_source (coding);
6128 consume_chars (coding);
4ed46869 6129
df7492f9 6130 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
da4109a9
KH
6131 translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6132 else if (!NILP (Vstandard_translation_table_for_encode))
6133 translate_chars (coding, Vstandard_translation_table_for_encode);
b73bfc1c 6134
df7492f9
KH
6135 coding_set_destination (coding);
6136 (*(coding->encoder)) (coding);
6137 } while (coding->consumed_char < coding->src_chars);
6138
6139 if (BUFFERP (coding->dst_object))
6140 insert_from_gap (coding->produced_char, coding->produced);
6141
6142 return (coding->result);
ec6d2bb8
KH
6143}
6144
fb88bf2d 6145
4776e638
KH
6146/* Stack of working buffers used in code conversion. An nil element
6147 means that the code conversion of that level is not using a working
6148 buffer. */
df7492f9 6149Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 6150
df7492f9
KH
6151/* A working buffer used by the top level conversion. */
6152Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 6153
4ed46869 6154
df7492f9
KH
6155/* Return a working buffer that can be freely used by the following
6156 code conversion. MULTIBYTEP specifies the multibyteness of the
6157 buffer. */
b73bfc1c 6158
df7492f9 6159Lisp_Object
4776e638
KH
6160make_conversion_work_buffer (multibytep, depth)
6161 int multibytep, depth;
df7492f9
KH
6162{
6163 struct buffer *current = current_buffer;
4776e638 6164 Lisp_Object buf, name;
4ed46869 6165
4776e638 6166 if (depth == 0)
e133c8fa 6167 {
df7492f9
KH
6168 if (NILP (Vcode_conversion_reused_work_buf))
6169 Vcode_conversion_reused_work_buf
857dccb0 6170 = Fget_buffer_create (build_string (" *code-converting-work<0>*"));
4776e638 6171 buf = Vcode_conversion_reused_work_buf;
e133c8fa 6172 }
df7492f9 6173 else
d46c5b12 6174 {
4776e638 6175 if (depth < 0)
aaaf0b1e 6176 {
857dccb0 6177 name = build_string (" *code-converting-work*");
4776e638 6178 name = Fgenerate_new_buffer_name (name, Qnil);
aaaf0b1e 6179 }
4776e638 6180 else
9861e777 6181 {
4776e638 6182 char str[128];
d46c5b12 6183
857dccb0 6184 sprintf (str, " *code-converting-work*<%d>", depth);
4776e638
KH
6185 name = build_string (str);
6186 }
6187 buf = Fget_buffer_create (name);
b73bfc1c 6188 }
df7492f9
KH
6189 set_buffer_internal (XBUFFER (buf));
6190 current_buffer->undo_list = Qt;
6191 Ferase_buffer ();
8f924df7 6192 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
df7492f9
KH
6193 set_buffer_internal (current);
6194 return buf;
6195}
d46c5b12 6196
4776e638
KH
6197static Lisp_Object
6198code_conversion_restore (buffer)
6199 Lisp_Object buffer;
6200{
6201 Lisp_Object workbuf;
6202
6203 workbuf = XCAR (Vcode_conversion_work_buf_list);
6204 if (! NILP (workbuf)
6205 && ! EQ (workbuf, Vcode_conversion_reused_work_buf)
6206 && ! NILP (Fbuffer_live_p (workbuf)))
6207 Fkill_buffer (workbuf);
6208 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6209 set_buffer_internal (XBUFFER (buffer));
6210 return Qnil;
6211}
b73bfc1c 6212
4776e638
KH
6213static Lisp_Object
6214code_conversion_save (buffer, with_work_buf, multibyte)
6215 Lisp_Object buffer;
6216 int with_work_buf, multibyte;
df7492f9 6217{
4776e638 6218 Lisp_Object workbuf;
b73bfc1c 6219
4776e638 6220 if (with_work_buf)
b73bfc1c 6221 {
4776e638 6222 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
b73bfc1c 6223
4776e638 6224 workbuf = make_conversion_work_buffer (multibyte, depth);
4ed46869 6225 }
4776e638
KH
6226 else
6227 workbuf = Qnil;
6228 Vcode_conversion_work_buf_list
6229 = Fcons (workbuf, Vcode_conversion_work_buf_list);
6230 record_unwind_protect (code_conversion_restore, buffer);
6231 return workbuf;
df7492f9 6232}
d46c5b12 6233
df7492f9
KH
6234int
6235decode_coding_gap (coding, chars, bytes)
6236 struct coding_system *coding;
6237 EMACS_INT chars, bytes;
6238{
6239 int count = specpdl_ptr - specpdl;
5e5c78be 6240 Lisp_Object attrs;
4776e638 6241 Lisp_Object buffer;
fb88bf2d 6242
4776e638
KH
6243 buffer = Fcurrent_buffer ();
6244 code_conversion_save (buffer, 0, 0);
ec6d2bb8 6245
4776e638 6246 coding->src_object = buffer;
df7492f9
KH
6247 coding->src_chars = chars;
6248 coding->src_bytes = bytes;
6249 coding->src_pos = -chars;
6250 coding->src_pos_byte = -bytes;
6251 coding->src_multibyte = chars < bytes;
5e5c78be 6252 coding->dst_object = buffer;
df7492f9
KH
6253 coding->dst_pos = PT;
6254 coding->dst_pos_byte = PT_BYTE;
71c81426 6255 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
98725083 6256 coding->mode |= CODING_MODE_LAST_BLOCK;
4ed46869 6257
df7492f9
KH
6258 if (CODING_REQUIRE_DETECTION (coding))
6259 detect_coding (coding);
8f924df7 6260
df7492f9 6261 decode_coding (coding);
d46c5b12 6262
5e5c78be
KH
6263 attrs = CODING_ID_ATTRS (coding->id);
6264 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 6265 {
5e5c78be
KH
6266 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6267 Lisp_Object val;
6268
6269 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
6270 val = call1 (CODING_ATTR_POST_READ (attrs),
6271 make_number (coding->produced_char));
5e5c78be
KH
6272 CHECK_NATNUM (val);
6273 coding->produced_char += Z - prev_Z;
6274 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 6275 }
4ed46869 6276
df7492f9 6277 unbind_to (count, Qnil);
b73bfc1c
KH
6278 return coding->result;
6279}
52d41803 6280
4ed46869 6281int
df7492f9 6282encode_coding_gap (coding, chars, bytes)
4ed46869 6283 struct coding_system *coding;
df7492f9 6284 EMACS_INT chars, bytes;
4ed46869 6285{
df7492f9
KH
6286 int count = specpdl_ptr - specpdl;
6287 Lisp_Object buffer;
4ed46869 6288
df7492f9 6289 buffer = Fcurrent_buffer ();
4776e638 6290 code_conversion_save (buffer, 0, 0);
4ed46869 6291
df7492f9
KH
6292 coding->src_object = buffer;
6293 coding->src_chars = chars;
6294 coding->src_bytes = bytes;
6295 coding->src_pos = -chars;
6296 coding->src_pos_byte = -bytes;
6297 coding->src_multibyte = chars < bytes;
6298 coding->dst_object = coding->src_object;
6299 coding->dst_pos = PT;
6300 coding->dst_pos_byte = PT_BYTE;
4ed46869 6301
df7492f9 6302 encode_coding (coding);
b73bfc1c 6303
df7492f9
KH
6304 unbind_to (count, Qnil);
6305 return coding->result;
6306}
4ed46869 6307
d46c5b12 6308
df7492f9
KH
6309/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6310 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 6311
df7492f9 6312 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 6313
df7492f9
KH
6314 If it is a buffer, the text is at point of the buffer. FROM and TO
6315 are positions in the buffer.
b73bfc1c 6316
df7492f9
KH
6317 If it is a string, the text is at the beginning of the string.
6318 FROM and TO are indices to the string.
4ed46869 6319
df7492f9
KH
6320 If it is nil, the text is at coding->source. FROM and TO are
6321 indices to coding->source.
bb10be8b 6322
df7492f9 6323 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 6324
df7492f9
KH
6325 If it is a buffer, the decoded text is inserted at point of the
6326 buffer. If the buffer is the same as SRC_OBJECT, the source text
6327 is deleted.
4ed46869 6328
df7492f9
KH
6329 If it is Qt, a string is made from the decoded text, and
6330 set in CODING->dst_object.
d46c5b12 6331
df7492f9 6332 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6333 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6334 CODING->destination by xmalloc. If the decoded text is longer than
6335 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6336 */
d46c5b12 6337
df7492f9
KH
6338void
6339decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6340 dst_object)
d46c5b12 6341 struct coding_system *coding;
df7492f9
KH
6342 Lisp_Object src_object;
6343 EMACS_INT from, from_byte, to, to_byte;
6344 Lisp_Object dst_object;
d46c5b12 6345{
df7492f9
KH
6346 int count = specpdl_ptr - specpdl;
6347 unsigned char *destination;
6348 EMACS_INT dst_bytes;
6349 EMACS_INT chars = to - from;
6350 EMACS_INT bytes = to_byte - from_byte;
6351 Lisp_Object attrs;
4776e638
KH
6352 Lisp_Object buffer;
6353 int saved_pt = -1, saved_pt_byte;
d46c5b12 6354
4776e638 6355 buffer = Fcurrent_buffer ();
93dec019 6356
df7492f9 6357 if (NILP (dst_object))
d46c5b12 6358 {
df7492f9
KH
6359 destination = coding->destination;
6360 dst_bytes = coding->dst_bytes;
d46c5b12 6361 }
93dec019 6362
df7492f9
KH
6363 coding->src_object = src_object;
6364 coding->src_chars = chars;
6365 coding->src_bytes = bytes;
6366 coding->src_multibyte = chars < bytes;
70ad9fc4 6367
df7492f9 6368 if (STRINGP (src_object))
d46c5b12 6369 {
df7492f9
KH
6370 coding->src_pos = from;
6371 coding->src_pos_byte = from_byte;
d46c5b12 6372 }
df7492f9 6373 else if (BUFFERP (src_object))
88993dfd 6374 {
df7492f9
KH
6375 set_buffer_internal (XBUFFER (src_object));
6376 if (from != GPT)
6377 move_gap_both (from, from_byte);
6378 if (EQ (src_object, dst_object))
fb88bf2d 6379 {
4776e638 6380 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6381 TEMP_SET_PT_BOTH (from, from_byte);
6382 del_range_both (from, from_byte, to, to_byte, 1);
6383 coding->src_pos = -chars;
6384 coding->src_pos_byte = -bytes;
fb88bf2d 6385 }
df7492f9 6386 else
fb88bf2d 6387 {
df7492f9
KH
6388 coding->src_pos = from;
6389 coding->src_pos_byte = from_byte;
fb88bf2d 6390 }
88993dfd
KH
6391 }
6392
df7492f9
KH
6393 if (CODING_REQUIRE_DETECTION (coding))
6394 detect_coding (coding);
6395 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 6396
2cb26057
KH
6397 if (EQ (dst_object, Qt)
6398 || (! NILP (CODING_ATTR_POST_READ (attrs))
6399 && NILP (dst_object)))
b73bfc1c 6400 {
4776e638 6401 coding->dst_object = code_conversion_save (buffer, 1, 1);
df7492f9
KH
6402 coding->dst_pos = BEG;
6403 coding->dst_pos_byte = BEG_BYTE;
6404 coding->dst_multibyte = 1;
b73bfc1c 6405 }
df7492f9 6406 else if (BUFFERP (dst_object))
d46c5b12 6407 {
4776e638 6408 code_conversion_save (buffer, 0, 0);
df7492f9
KH
6409 coding->dst_object = dst_object;
6410 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6411 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6412 coding->dst_multibyte
6413 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
6414 }
6415 else
6416 {
4776e638 6417 code_conversion_save (buffer, 0, 0);
df7492f9
KH
6418 coding->dst_object = Qnil;
6419 coding->dst_multibyte = 1;
d46c5b12
KH
6420 }
6421
df7492f9 6422 decode_coding (coding);
fa46990e 6423
df7492f9
KH
6424 if (BUFFERP (coding->dst_object))
6425 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 6426
df7492f9 6427 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6428 {
df7492f9
KH
6429 struct gcpro gcpro1, gcpro2;
6430 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6431 Lisp_Object val;
d46c5b12 6432
c0cc7f7f 6433 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6434 GCPRO2 (coding->src_object, coding->dst_object);
6435 val = call1 (CODING_ATTR_POST_READ (attrs),
6436 make_number (coding->produced_char));
6437 UNGCPRO;
6438 CHECK_NATNUM (val);
6439 coding->produced_char += Z - prev_Z;
6440 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6441 }
de79a6a5 6442
df7492f9 6443 if (EQ (dst_object, Qt))
ec6d2bb8 6444 {
df7492f9
KH
6445 coding->dst_object = Fbuffer_string ();
6446 }
6447 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6448 {
6449 set_buffer_internal (XBUFFER (coding->dst_object));
6450 if (dst_bytes < coding->produced)
6451 {
6452 destination
6453 = (unsigned char *) xrealloc (destination, coding->produced);
6454 if (! destination)
6455 {
6456 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6457 unbind_to (count, Qnil);
6458 return;
6459 }
6460 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6461 move_gap_both (BEGV, BEGV_BYTE);
6462 bcopy (BEGV_ADDR, destination, coding->produced);
6463 coding->destination = destination;
d46c5b12 6464 }
ec6d2bb8 6465 }
b73bfc1c 6466
4776e638
KH
6467 if (saved_pt >= 0)
6468 {
6469 /* This is the case of:
6470 (BUFFERP (src_object) && EQ (src_object, dst_object))
6471 As we have moved PT while replacing the original buffer
6472 contents, we must recover it now. */
6473 set_buffer_internal (XBUFFER (src_object));
6474 if (saved_pt < from)
6475 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6476 else if (saved_pt < from + chars)
6477 TEMP_SET_PT_BOTH (from, from_byte);
6478 else if (! NILP (current_buffer->enable_multibyte_characters))
6479 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6480 saved_pt_byte + (coding->produced - bytes));
6481 else
6482 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6483 saved_pt_byte + (coding->produced - bytes));
d46c5b12 6484 }
4776e638 6485
df7492f9 6486 unbind_to (count, Qnil);
d46c5b12
KH
6487}
6488
d46c5b12 6489
df7492f9
KH
6490void
6491encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6492 dst_object)
d46c5b12 6493 struct coding_system *coding;
df7492f9
KH
6494 Lisp_Object src_object;
6495 EMACS_INT from, from_byte, to, to_byte;
6496 Lisp_Object dst_object;
d46c5b12 6497{
b73bfc1c 6498 int count = specpdl_ptr - specpdl;
df7492f9
KH
6499 EMACS_INT chars = to - from;
6500 EMACS_INT bytes = to_byte - from_byte;
6501 Lisp_Object attrs;
4776e638
KH
6502 Lisp_Object buffer;
6503 int saved_pt = -1, saved_pt_byte;
df7492f9 6504
4776e638 6505 buffer = Fcurrent_buffer ();
df7492f9
KH
6506
6507 coding->src_object = src_object;
6508 coding->src_chars = chars;
6509 coding->src_bytes = bytes;
6510 coding->src_multibyte = chars < bytes;
6511
6512 attrs = CODING_ID_ATTRS (coding->id);
6513
6514 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6515 {
4776e638
KH
6516 coding->src_object = code_conversion_save (buffer, 1,
6517 coding->src_multibyte);
df7492f9
KH
6518 set_buffer_internal (XBUFFER (coding->src_object));
6519 if (STRINGP (src_object))
6520 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6521 else if (BUFFERP (src_object))
6522 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6523 else
6524 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 6525
df7492f9
KH
6526 if (EQ (src_object, dst_object))
6527 {
6528 set_buffer_internal (XBUFFER (src_object));
4776e638 6529 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6530 del_range_both (from, from_byte, to, to_byte, 1);
6531 set_buffer_internal (XBUFFER (coding->src_object));
6532 }
6533
ac87bbef
KH
6534 call2 (CODING_ATTR_PRE_WRITE (attrs),
6535 make_number (BEG), make_number (Z));
6536 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6537 if (BEG != GPT)
6538 move_gap_both (BEG, BEG_BYTE);
6539 coding->src_chars = Z - BEG;
6540 coding->src_bytes = Z_BYTE - BEG_BYTE;
6541 coding->src_pos = BEG;
6542 coding->src_pos_byte = BEG_BYTE;
6543 coding->src_multibyte = Z < Z_BYTE;
6544 }
6545 else if (STRINGP (src_object))
d46c5b12 6546 {
4776e638 6547 code_conversion_save (buffer, 0, 0);
df7492f9
KH
6548 coding->src_pos = from;
6549 coding->src_pos_byte = from_byte;
b73bfc1c 6550 }
df7492f9 6551 else if (BUFFERP (src_object))
b73bfc1c 6552 {
4776e638 6553 code_conversion_save (buffer, 0, 0);
df7492f9 6554 set_buffer_internal (XBUFFER (src_object));
df7492f9 6555 if (EQ (src_object, dst_object))
d46c5b12 6556 {
4776e638 6557 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
6558 coding->src_object = del_range_1 (from, to, 1, 1);
6559 coding->src_pos = 0;
6560 coding->src_pos_byte = 0;
d46c5b12 6561 }
df7492f9 6562 else
d46c5b12 6563 {
ff0dacd7
KH
6564 if (from < GPT && to >= GPT)
6565 move_gap_both (from, from_byte);
df7492f9
KH
6566 coding->src_pos = from;
6567 coding->src_pos_byte = from_byte;
d46c5b12 6568 }
d46c5b12 6569 }
4776e638
KH
6570 else
6571 code_conversion_save (buffer, 0, 0);
d46c5b12 6572
df7492f9 6573 if (BUFFERP (dst_object))
88993dfd 6574 {
df7492f9 6575 coding->dst_object = dst_object;
28f67a95
KH
6576 if (EQ (src_object, dst_object))
6577 {
6578 coding->dst_pos = from;
6579 coding->dst_pos_byte = from_byte;
6580 }
6581 else
6582 {
6583 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6584 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6585 }
df7492f9
KH
6586 coding->dst_multibyte
6587 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 6588 }
df7492f9 6589 else if (EQ (dst_object, Qt))
d46c5b12 6590 {
df7492f9 6591 coding->dst_object = Qnil;
df7492f9 6592 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6593 if (coding->dst_bytes == 0)
6594 coding->dst_bytes = 1;
6595 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6596 coding->dst_multibyte = 0;
d46c5b12
KH
6597 }
6598 else
6599 {
df7492f9
KH
6600 coding->dst_object = Qnil;
6601 coding->dst_multibyte = 0;
d46c5b12
KH
6602 }
6603
df7492f9 6604 encode_coding (coding);
d46c5b12 6605
df7492f9 6606 if (EQ (dst_object, Qt))
d46c5b12 6607 {
df7492f9
KH
6608 if (BUFFERP (coding->dst_object))
6609 coding->dst_object = Fbuffer_string ();
6610 else
d46c5b12 6611 {
df7492f9
KH
6612 coding->dst_object
6613 = make_unibyte_string ((char *) coding->destination,
6614 coding->produced);
6615 xfree (coding->destination);
d46c5b12 6616 }
4ed46869 6617 }
d46c5b12 6618
4776e638
KH
6619 if (saved_pt >= 0)
6620 {
6621 /* This is the case of:
6622 (BUFFERP (src_object) && EQ (src_object, dst_object))
6623 As we have moved PT while replacing the original buffer
6624 contents, we must recover it now. */
6625 set_buffer_internal (XBUFFER (src_object));
6626 if (saved_pt < from)
6627 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6628 else if (saved_pt < from + chars)
6629 TEMP_SET_PT_BOTH (from, from_byte);
6630 else if (! NILP (current_buffer->enable_multibyte_characters))
6631 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6632 saved_pt_byte + (coding->produced - bytes));
d46c5b12 6633 else
4776e638
KH
6634 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6635 saved_pt_byte + (coding->produced - bytes));
6636 }
6637
df7492f9 6638 unbind_to (count, Qnil);
b73bfc1c
KH
6639}
6640
df7492f9 6641
b73bfc1c 6642Lisp_Object
df7492f9 6643preferred_coding_system ()
b73bfc1c 6644{
df7492f9 6645 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6646
df7492f9 6647 return CODING_ID_NAME (id);
4ed46869
KH
6648}
6649
6650\f
6651#ifdef emacs
1397dc18 6652/*** 8. Emacs Lisp library functions ***/
4ed46869 6653
4ed46869 6654DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6655 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6656See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6657about coding-system objects. */)
6658 (obj)
4ed46869
KH
6659 Lisp_Object obj;
6660{
df7492f9 6661 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6662}
6663
9d991de8
RS
6664DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6665 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6666 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6667 (prompt)
4ed46869
KH
6668 Lisp_Object prompt;
6669{
e0e989f6 6670 Lisp_Object val;
9d991de8
RS
6671 do
6672 {
4608c386
KH
6673 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6674 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 6675 }
8f924df7 6676 while (SCHARS (val) == 0);
e0e989f6 6677 return (Fintern (val, Qnil));
4ed46869
KH
6678}
6679
9b787f3e 6680DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6681 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6682If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6683 (prompt, default_coding_system)
9b787f3e 6684 Lisp_Object prompt, default_coding_system;
4ed46869 6685{
f44d27ce 6686 Lisp_Object val;
9b787f3e 6687 if (SYMBOLP (default_coding_system))
a3181084 6688 XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
4608c386 6689 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6690 Qt, Qnil, Qcoding_system_history,
6691 default_coding_system, Qnil);
8f924df7 6692 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6693}
6694
6695DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6696 1, 1, 0,
48b0f3ae 6697 doc: /* Check validity of CODING-SYSTEM.
b054002f 6698If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
df7492f9 6699 (coding_system)
4ed46869
KH
6700 Lisp_Object coding_system;
6701{
b7826503 6702 CHECK_SYMBOL (coding_system);
4ed46869
KH
6703 if (!NILP (Fcoding_system_p (coding_system)))
6704 return coding_system;
6705 while (1)
02ba4723 6706 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6707}
df7492f9 6708
3a73fa5d 6709\f
89528eb3
KH
6710/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6711 HIGHEST is nonzero, return the coding system of the highest
6712 priority among the detected coding systems. Otherwize return a
6713 list of detected coding systems sorted by their priorities. If
6714 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6715 multibyte form but contains only ASCII and eight-bit chars.
6716 Otherwise, the bytes are raw bytes.
6717
6718 CODING-SYSTEM controls the detection as below:
6719
6720 If it is nil, detect both text-format and eol-format. If the
6721 text-format part of CODING-SYSTEM is already specified
6722 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6723 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6724 detect only text-format. */
6725
d46c5b12 6726Lisp_Object
df7492f9 6727detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
8f924df7 6728 const unsigned char *src;
d46c5b12 6729 int src_bytes, highest;
0a28aafb 6730 int multibytep;
df7492f9 6731 Lisp_Object coding_system;
4ed46869 6732{
8f924df7 6733 const unsigned char *src_end = src + src_bytes;
df7492f9
KH
6734 Lisp_Object attrs, eol_type;
6735 Lisp_Object val;
6736 struct coding_system coding;
89528eb3 6737 int id;
ff0dacd7 6738 struct coding_detection_info detect_info;
b73bfc1c 6739
df7492f9
KH
6740 if (NILP (coding_system))
6741 coding_system = Qundecided;
6742 setup_coding_system (coding_system, &coding);
6743 attrs = CODING_ID_ATTRS (coding.id);
6744 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 6745 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 6746
df7492f9
KH
6747 coding.source = src;
6748 coding.src_bytes = src_bytes;
6749 coding.src_multibyte = multibytep;
6750 coding.consumed = 0;
89528eb3 6751 coding.mode |= CODING_MODE_LAST_BLOCK;
d46c5b12 6752
ff0dacd7 6753 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 6754
89528eb3
KH
6755 /* At first, detect text-format if necessary. */
6756 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
4ed46869 6757 {
ff0dacd7
KH
6758 enum coding_category category;
6759 struct coding_system *this;
6760 int c, i;
88993dfd 6761
df7492f9 6762 for (; src < src_end; src++)
4ed46869 6763 {
df7492f9 6764 c = *src;
89528eb3
KH
6765 if (c & 0x80
6766 || (c < 0x20 && (c == ISO_CODE_ESC
6767 || c == ISO_CODE_SI
584948ac 6768 || c == ISO_CODE_SO)))
d46c5b12 6769 break;
4ed46869 6770 }
df7492f9 6771 coding.head_ascii = src - coding.source;
88993dfd 6772
df7492f9
KH
6773 if (src < src_end)
6774 for (i = 0; i < coding_category_raw_text; i++)
6775 {
ff0dacd7
KH
6776 category = coding_priorities[i];
6777 this = coding_categories + category;
b843d1ae 6778
df7492f9
KH
6779 if (this->id < 0)
6780 {
6781 /* No coding system of this category is defined. */
ff0dacd7 6782 detect_info.rejected |= (1 << category);
df7492f9 6783 }
ff0dacd7 6784 else if (category >= coding_category_raw_text)
89528eb3 6785 continue;
ff0dacd7
KH
6786 else if (detect_info.checked & (1 << category))
6787 {
6788 if (highest
6789 && (detect_info.found & (1 << category)))
6790 break;
6791 }
df7492f9
KH
6792 else
6793 {
ff0dacd7 6794 if ((*(this->detector)) (&coding, &detect_info)
89528eb3 6795 && highest
ff0dacd7
KH
6796 && (detect_info.found & (1 << category)))
6797 break;
df7492f9
KH
6798 }
6799 }
ec6d2bb8 6800
ec6d2bb8 6801
ff0dacd7 6802 if (detect_info.rejected == CATEGORY_MASK_ANY)
ec6d2bb8 6803 {
ff0dacd7 6804 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
6805 id = coding_categories[coding_category_raw_text].id;
6806 val = Fcons (make_number (id), Qnil);
6807 }
ff0dacd7 6808 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 6809 {
ff0dacd7 6810 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
6811 id = coding_categories[coding_category_undecided].id;
6812 val = Fcons (make_number (id), Qnil);
6813 }
6814 else if (highest)
6815 {
ff0dacd7 6816 if (detect_info.found)
ec6d2bb8 6817 {
ff0dacd7
KH
6818 detect_info.found = 1 << category;
6819 val = Fcons (make_number (this->id), Qnil);
6820 }
6821 else
6822 for (i = 0; i < coding_category_raw_text; i++)
6823 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6824 {
6825 detect_info.found = 1 << coding_priorities[i];
6826 id = coding_categories[coding_priorities[i]].id;
6827 val = Fcons (make_number (id), Qnil);
6828 break;
6829 }
6830 }
89528eb3
KH
6831 else
6832 {
ff0dacd7
KH
6833 int mask = detect_info.rejected | detect_info.found;
6834 int found = 0;
89528eb3 6835 val = Qnil;
ec6d2bb8 6836
89528eb3 6837 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
6838 {
6839 category = coding_priorities[i];
6840 if (! (mask & (1 << category)))
ec6d2bb8 6841 {
ff0dacd7
KH
6842 found |= 1 << category;
6843 id = coding_categories[category].id;
6844 val = Fcons (make_number (id), val);
6845 }
6846 }
6847 for (i = coding_category_raw_text - 1; i >= 0; i--)
6848 {
6849 category = coding_priorities[i];
6850 if (detect_info.found & (1 << category))
6851 {
6852 id = coding_categories[category].id;
6853 val = Fcons (make_number (id), val);
ec6d2bb8 6854 }
ec6d2bb8 6855 }
ff0dacd7 6856 detect_info.found |= found;
ec6d2bb8 6857 }
ec6d2bb8 6858 }
df7492f9
KH
6859 else
6860 {
ff0dacd7 6861 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 6862 val = Fcons (make_number (coding.id), Qnil);
4ed46869 6863 }
df7492f9 6864
89528eb3 6865 /* Then, detect eol-format if necessary. */
df7492f9 6866 {
89528eb3 6867 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
6868 Lisp_Object tail;
6869
89528eb3
KH
6870 if (VECTORP (eol_type))
6871 {
ff0dacd7 6872 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
6873 normal_eol = detect_eol (coding.source, src_bytes,
6874 coding_category_raw_text);
ff0dacd7
KH
6875 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6876 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
6877 utf_16_be_eol = detect_eol (coding.source, src_bytes,
6878 coding_category_utf_16_be);
ff0dacd7
KH
6879 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6880 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
6881 utf_16_le_eol = detect_eol (coding.source, src_bytes,
6882 coding_category_utf_16_le);
6883 }
6884 else
6885 {
6886 if (EQ (eol_type, Qunix))
6887 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6888 else if (EQ (eol_type, Qdos))
6889 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6890 else
6891 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6892 }
6893
df7492f9
KH
6894 for (tail = val; CONSP (tail); tail = XCDR (tail))
6895 {
89528eb3 6896 enum coding_category category;
df7492f9 6897 int this_eol;
89528eb3
KH
6898
6899 id = XINT (XCAR (tail));
6900 attrs = CODING_ID_ATTRS (id);
6901 category = XINT (CODING_ATTR_CATEGORY (attrs));
6902 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
6903 if (VECTORP (eol_type))
6904 {
89528eb3
KH
6905 if (category == coding_category_utf_16_be
6906 || category == coding_category_utf_16_be_nosig)
6907 this_eol = utf_16_be_eol;
6908 else if (category == coding_category_utf_16_le
6909 || category == coding_category_utf_16_le_nosig)
6910 this_eol = utf_16_le_eol;
df7492f9 6911 else
89528eb3
KH
6912 this_eol = normal_eol;
6913
df7492f9
KH
6914 if (this_eol == EOL_SEEN_LF)
6915 XSETCAR (tail, AREF (eol_type, 0));
6916 else if (this_eol == EOL_SEEN_CRLF)
6917 XSETCAR (tail, AREF (eol_type, 1));
6918 else if (this_eol == EOL_SEEN_CR)
6919 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
6920 else
6921 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 6922 }
89528eb3
KH
6923 else
6924 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
6925 }
6926 }
ec6d2bb8 6927
03699b14 6928 return (highest ? XCAR (val) : val);
ec6d2bb8
KH
6929}
6930
ec6d2bb8 6931
d46c5b12
KH
6932DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6933 2, 3, 0,
48b0f3ae
PJ
6934 doc: /* Detect coding system of the text in the region between START and END.
6935Return a list of possible coding systems ordered by priority.
ec6d2bb8 6936
48b0f3ae
PJ
6937If only ASCII characters are found, it returns a list of single element
6938`undecided' or its subsidiary coding system according to a detected
6939end-of-line format.
ec6d2bb8 6940
48b0f3ae
PJ
6941If optional argument HIGHEST is non-nil, return the coding system of
6942highest priority. */)
6943 (start, end, highest)
d46c5b12
KH
6944 Lisp_Object start, end, highest;
6945{
6946 int from, to;
6947 int from_byte, to_byte;
ec6d2bb8 6948
b7826503
PJ
6949 CHECK_NUMBER_COERCE_MARKER (start);
6950 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 6951
d46c5b12
KH
6952 validate_region (&start, &end);
6953 from = XINT (start), to = XINT (end);
6954 from_byte = CHAR_TO_BYTE (from);
6955 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 6956
d46c5b12
KH
6957 if (from < GPT && to >= GPT)
6958 move_gap_both (to, to_byte);
c210f766 6959
d46c5b12 6960 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6961 to_byte - from_byte,
0a28aafb
KH
6962 !NILP (highest),
6963 !NILP (current_buffer
df7492f9
KH
6964 ->enable_multibyte_characters),
6965 Qnil);
ec6d2bb8
KH
6966}
6967
d46c5b12
KH
6968DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6969 1, 2, 0,
48b0f3ae
PJ
6970 doc: /* Detect coding system of the text in STRING.
6971Return a list of possible coding systems ordered by priority.
fb88bf2d 6972
48b0f3ae
PJ
6973If only ASCII characters are found, it returns a list of single element
6974`undecided' or its subsidiary coding system according to a detected
6975end-of-line format.
d46c5b12 6976
48b0f3ae
PJ
6977If optional argument HIGHEST is non-nil, return the coding system of
6978highest priority. */)
6979 (string, highest)
d46c5b12
KH
6980 Lisp_Object string, highest;
6981{
b7826503 6982 CHECK_STRING (string);
b73bfc1c 6983
8f924df7
KH
6984 return detect_coding_system (SDATA (string), SBYTES (string),
6985 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 6986 Qnil);
4ed46869 6987}
4ed46869 6988
b73bfc1c 6989
df7492f9
KH
6990static INLINE int
6991char_encodable_p (c, attrs)
6992 int c;
6993 Lisp_Object attrs;
05e6f5dc 6994{
df7492f9 6995 Lisp_Object tail;
df7492f9 6996 struct charset *charset;
d46c5b12 6997
df7492f9
KH
6998 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6999 CONSP (tail); tail = XCDR (tail))
e133c8fa 7000 {
df7492f9
KH
7001 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7002 if (CHAR_CHARSET_P (c, charset))
7003 break;
e133c8fa 7004 }
df7492f9 7005 return (! NILP (tail));
05e6f5dc 7006}
83fa074f 7007
fb88bf2d 7008
df7492f9
KH
7009/* Return a list of coding systems that safely encode the text between
7010 START and END. If EXCLUDE is non-nil, it is a list of coding
7011 systems not to check. The returned list doesn't contain any such
48468dac 7012 coding systems. In any case, if the text contains only ASCII or is
df7492f9 7013 unibyte, return t. */
e077cc80 7014
df7492f9
KH
7015DEFUN ("find-coding-systems-region-internal",
7016 Ffind_coding_systems_region_internal,
7017 Sfind_coding_systems_region_internal, 2, 3, 0,
7018 doc: /* Internal use only. */)
7019 (start, end, exclude)
7020 Lisp_Object start, end, exclude;
7021{
7022 Lisp_Object coding_attrs_list, safe_codings;
7023 EMACS_INT start_byte, end_byte;
7c78e542 7024 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7025 int c;
7026 Lisp_Object tail, elt;
d46c5b12 7027
df7492f9
KH
7028 if (STRINGP (start))
7029 {
7030 if (!STRING_MULTIBYTE (start)
8f924df7 7031 || SCHARS (start) == SBYTES (start))
df7492f9
KH
7032 return Qt;
7033 start_byte = 0;
8f924df7 7034 end_byte = SBYTES (start);
df7492f9
KH
7035 }
7036 else
d46c5b12 7037 {
df7492f9
KH
7038 CHECK_NUMBER_COERCE_MARKER (start);
7039 CHECK_NUMBER_COERCE_MARKER (end);
7040 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7041 args_out_of_range (start, end);
7042 if (NILP (current_buffer->enable_multibyte_characters))
7043 return Qt;
7044 start_byte = CHAR_TO_BYTE (XINT (start));
7045 end_byte = CHAR_TO_BYTE (XINT (end));
7046 if (XINT (end) - XINT (start) == end_byte - start_byte)
7047 return Qt;
d46c5b12 7048
e1c23804 7049 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 7050 {
e1c23804
DL
7051 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7052 move_gap_both (XINT (start), start_byte);
df7492f9 7053 else
e1c23804 7054 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
7055 }
7056 }
7057
df7492f9
KH
7058 coding_attrs_list = Qnil;
7059 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7060 if (NILP (exclude)
7061 || NILP (Fmemq (XCAR (tail), exclude)))
7062 {
7063 Lisp_Object attrs;
d46c5b12 7064
df7492f9
KH
7065 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7066 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7067 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7068 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7069 }
d46c5b12 7070
df7492f9 7071 if (STRINGP (start))
8f924df7 7072 p = pbeg = SDATA (start);
df7492f9
KH
7073 else
7074 p = pbeg = BYTE_POS_ADDR (start_byte);
7075 pend = p + (end_byte - start_byte);
b843d1ae 7076
df7492f9
KH
7077 while (p < pend && ASCII_BYTE_P (*p)) p++;
7078 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 7079
05e6f5dc 7080 while (p < pend)
72d1a715 7081 {
df7492f9
KH
7082 if (ASCII_BYTE_P (*p))
7083 p++;
72d1a715
RS
7084 else
7085 {
df7492f9 7086 c = STRING_CHAR_ADVANCE (p);
12410ef1 7087
df7492f9
KH
7088 charset_map_loaded = 0;
7089 for (tail = coding_attrs_list; CONSP (tail);)
7090 {
7091 elt = XCAR (tail);
7092 if (NILP (elt))
7093 tail = XCDR (tail);
7094 else if (char_encodable_p (c, elt))
7095 tail = XCDR (tail);
7096 else if (CONSP (XCDR (tail)))
7097 {
7098 XSETCAR (tail, XCAR (XCDR (tail)));
7099 XSETCDR (tail, XCDR (XCDR (tail)));
7100 }
7101 else
7102 {
7103 XSETCAR (tail, Qnil);
7104 tail = XCDR (tail);
7105 }
7106 }
7107 if (charset_map_loaded)
7108 {
7109 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 7110
df7492f9 7111 if (STRINGP (start))
8f924df7 7112 pbeg = SDATA (start);
df7492f9
KH
7113 else
7114 pbeg = BYTE_POS_ADDR (start_byte);
7115 p = pbeg + p_offset;
7116 pend = pbeg + pend_offset;
7117 }
7118 }
ec6d2bb8 7119 }
fb88bf2d 7120
df7492f9
KH
7121 safe_codings = Qnil;
7122 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7123 if (! NILP (XCAR (tail)))
7124 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 7125
05e6f5dc
KH
7126 return safe_codings;
7127}
4956c225 7128
d46c5b12 7129
8f924df7
KH
7130DEFUN ("unencodable-char-position", Funencodable_char_position,
7131 Sunencodable_char_position, 3, 5, 0,
7132 doc: /*
7133Return position of first un-encodable character in a region.
7134START and END specfiy the region and CODING-SYSTEM specifies the
7135encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 7136
8f924df7
KH
7137If optional 4th argument COUNT is non-nil, it specifies at most how
7138many un-encodable characters to search. In this case, the value is a
7139list of positions.
d46c5b12 7140
8f924df7
KH
7141If optional 5th argument STRING is non-nil, it is a string to search
7142for un-encodable characters. In that case, START and END are indexes
7143to the string. */)
7144 (start, end, coding_system, count, string)
7145 Lisp_Object start, end, coding_system, count, string;
7146{
7147 int n;
7148 struct coding_system coding;
7149 Lisp_Object attrs, charset_list;
7150 Lisp_Object positions;
7151 int from, to;
7152 const unsigned char *p, *stop, *pend;
7153 int ascii_compatible;
fb88bf2d 7154
8f924df7
KH
7155 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7156 attrs = CODING_ID_ATTRS (coding.id);
7157 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7158 return Qnil;
7159 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7160 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
fb88bf2d 7161
8f924df7
KH
7162 if (NILP (string))
7163 {
7164 validate_region (&start, &end);
7165 from = XINT (start);
7166 to = XINT (end);
7167 if (NILP (current_buffer->enable_multibyte_characters)
7168 || (ascii_compatible
7169 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7170 return Qnil;
7171 p = CHAR_POS_ADDR (from);
7172 pend = CHAR_POS_ADDR (to);
7173 if (from < GPT && to >= GPT)
7174 stop = GPT_ADDR;
7175 else
7176 stop = pend;
7177 }
7178 else
7179 {
7180 CHECK_STRING (string);
7181 CHECK_NATNUM (start);
7182 CHECK_NATNUM (end);
7183 from = XINT (start);
7184 to = XINT (end);
7185 if (from > to
7186 || to > SCHARS (string))
7187 args_out_of_range_3 (string, start, end);
7188 if (! STRING_MULTIBYTE (string))
7189 return Qnil;
7190 p = SDATA (string) + string_char_to_byte (string, from);
7191 stop = pend = SDATA (string) + string_char_to_byte (string, to);
7192 if (ascii_compatible && (to - from) == (pend - p))
7193 return Qnil;
7194 }
f2558efd 7195
8f924df7
KH
7196 if (NILP (count))
7197 n = 1;
7198 else
b73bfc1c 7199 {
8f924df7
KH
7200 CHECK_NATNUM (count);
7201 n = XINT (count);
b73bfc1c
KH
7202 }
7203
8f924df7
KH
7204 positions = Qnil;
7205 while (1)
d46c5b12 7206 {
8f924df7 7207 int c;
ec6d2bb8 7208
8f924df7
KH
7209 if (ascii_compatible)
7210 while (p < stop && ASCII_BYTE_P (*p))
7211 p++, from++;
7212 if (p >= stop)
0e79d667 7213 {
8f924df7
KH
7214 if (p >= pend)
7215 break;
7216 stop = pend;
7217 p = GAP_END_ADDR;
0e79d667 7218 }
ec6d2bb8 7219
8f924df7
KH
7220 c = STRING_CHAR_ADVANCE (p);
7221 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7222 && ! char_charset (c, charset_list, NULL))
ec6d2bb8 7223 {
8f924df7
KH
7224 positions = Fcons (make_number (from), positions);
7225 n--;
7226 if (n == 0)
7227 break;
ec6d2bb8
KH
7228 }
7229
8f924df7
KH
7230 from++;
7231 }
d46c5b12 7232
8f924df7
KH
7233 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7234}
d46c5b12 7235
d46c5b12 7236
df7492f9
KH
7237DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7238 Scheck_coding_systems_region, 3, 3, 0,
7239 doc: /* Check if the region is encodable by coding systems.
d46c5b12 7240
df7492f9
KH
7241START and END are buffer positions specifying the region.
7242CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 7243
df7492f9
KH
7244The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7245CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7246whole region, POS0, POS1, ... are buffer positions where non-encodable
7247characters are found.
93dec019 7248
df7492f9
KH
7249If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7250value is nil.
93dec019 7251
df7492f9
KH
7252START may be a string. In that case, check if the string is
7253encodable, and the value contains indices to the string instead of
7254buffer positions. END is ignored. */)
7255 (start, end, coding_system_list)
7256 Lisp_Object start, end, coding_system_list;
05e6f5dc 7257{
df7492f9
KH
7258 Lisp_Object list;
7259 EMACS_INT start_byte, end_byte;
7260 int pos;
7c78e542 7261 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7262 int c;
7263 Lisp_Object tail, elt;
70ad9fc4 7264
05e6f5dc
KH
7265 if (STRINGP (start))
7266 {
df7492f9 7267 if (!STRING_MULTIBYTE (start)
8f924df7 7268 && SCHARS (start) != SBYTES (start))
df7492f9
KH
7269 return Qnil;
7270 start_byte = 0;
8f924df7 7271 end_byte = SBYTES (start);
df7492f9 7272 pos = 0;
d46c5b12 7273 }
05e6f5dc 7274 else
b73bfc1c 7275 {
b7826503
PJ
7276 CHECK_NUMBER_COERCE_MARKER (start);
7277 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7278 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7279 args_out_of_range (start, end);
7280 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7281 return Qnil;
7282 start_byte = CHAR_TO_BYTE (XINT (start));
7283 end_byte = CHAR_TO_BYTE (XINT (end));
7284 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7285 return Qt;
df7492f9 7286
e1c23804 7287 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 7288 {
e1c23804
DL
7289 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7290 move_gap_both (XINT (start), start_byte);
df7492f9 7291 else
e1c23804 7292 move_gap_both (XINT (end), end_byte);
b73bfc1c 7293 }
e1c23804 7294 pos = XINT (start);
b73bfc1c 7295 }
7553d0e1 7296
df7492f9
KH
7297 list = Qnil;
7298 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 7299 {
df7492f9
KH
7300 elt = XCAR (tail);
7301 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7302 Qnil)),
7303 list);
12410ef1
KH
7304 }
7305
df7492f9 7306 if (STRINGP (start))
8f924df7 7307 p = pbeg = SDATA (start);
72d1a715 7308 else
df7492f9
KH
7309 p = pbeg = BYTE_POS_ADDR (start_byte);
7310 pend = p + (end_byte - start_byte);
4ed46869 7311
df7492f9
KH
7312 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7313 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 7314
df7492f9 7315 while (p < pend)
d46c5b12 7316 {
df7492f9
KH
7317 if (ASCII_BYTE_P (*p))
7318 p++;
e133c8fa 7319 else
05e6f5dc 7320 {
df7492f9
KH
7321 c = STRING_CHAR_ADVANCE (p);
7322
7323 charset_map_loaded = 0;
7324 for (tail = list; CONSP (tail); tail = XCDR (tail))
7325 {
7326 elt = XCDR (XCAR (tail));
7327 if (! char_encodable_p (c, XCAR (elt)))
7328 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7329 }
7330 if (charset_map_loaded)
7331 {
7332 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7333
7334 if (STRINGP (start))
8f924df7 7335 pbeg = SDATA (start);
df7492f9
KH
7336 else
7337 pbeg = BYTE_POS_ADDR (start_byte);
7338 p = pbeg + p_offset;
7339 pend = pbeg + pend_offset;
7340 }
05e6f5dc 7341 }
df7492f9 7342 pos++;
d46c5b12 7343 }
4ed46869 7344
df7492f9
KH
7345 tail = list;
7346 list = Qnil;
7347 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 7348 {
df7492f9
KH
7349 elt = XCAR (tail);
7350 if (CONSP (XCDR (XCDR (elt))))
7351 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7352 list);
ec6d2bb8 7353 }
2b4f9037 7354
df7492f9 7355 return list;
d46c5b12
KH
7356}
7357
3fd9494b 7358
b73bfc1c
KH
7359
7360Lisp_Object
df7492f9
KH
7361code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7362 Lisp_Object start, end, coding_system, dst_object;
7363 int encodep, norecord;
4ed46869 7364{
3a73fa5d 7365 struct coding_system coding;
df7492f9
KH
7366 EMACS_INT from, from_byte, to, to_byte;
7367 Lisp_Object src_object;
4ed46869 7368
b7826503
PJ
7369 CHECK_NUMBER_COERCE_MARKER (start);
7370 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
7371 if (NILP (coding_system))
7372 coding_system = Qno_conversion;
7373 else
7374 CHECK_CODING_SYSTEM (coding_system);
7375 src_object = Fcurrent_buffer ();
7376 if (NILP (dst_object))
7377 dst_object = src_object;
7378 else if (! EQ (dst_object, Qt))
7379 CHECK_BUFFER (dst_object);
3a73fa5d 7380
d46c5b12
KH
7381 validate_region (&start, &end);
7382 from = XFASTINT (start);
df7492f9 7383 from_byte = CHAR_TO_BYTE (from);
d46c5b12 7384 to = XFASTINT (end);
df7492f9 7385 to_byte = CHAR_TO_BYTE (to);
764ca8da 7386
df7492f9
KH
7387 setup_coding_system (coding_system, &coding);
7388 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 7389
df7492f9
KH
7390 if (encodep)
7391 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7392 dst_object);
7393 else
7394 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7395 dst_object);
7396 if (! norecord)
7397 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 7398
df7492f9
KH
7399 if (coding.result != CODING_RESULT_SUCCESS)
7400 error ("Code conversion error: %d", coding.result);
b73bfc1c 7401
df7492f9
KH
7402 return (BUFFERP (dst_object)
7403 ? make_number (coding.produced_char)
7404 : coding.dst_object);
4031e2bf 7405}
78108bcd 7406
4ed46869 7407
4031e2bf 7408DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 7409 3, 4, "r\nzCoding system: ",
48b0f3ae 7410 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
7411When called from a program, takes four arguments:
7412 START, END, CODING-SYSTEM, and DESTINATION.
7413START and END are buffer positions.
8844fa83 7414
df7492f9
KH
7415Optional 4th arguments DESTINATION specifies where the decoded text goes.
7416If nil, the region between START and END is replace by the decoded text.
7417If buffer, the decoded text is inserted in the buffer.
7418If t, the decoded text is returned.
8844fa83 7419
48b0f3ae
PJ
7420This function sets `last-coding-system-used' to the precise coding system
7421used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7422not fully specified.)
7423It returns the length of the decoded text. */)
df7492f9
KH
7424 (start, end, coding_system, destination)
7425 Lisp_Object start, end, coding_system, destination;
4031e2bf 7426{
df7492f9 7427 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 7428}
8844fa83 7429
3a73fa5d 7430DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
7431 3, 4, "r\nzCoding system: ",
7432 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
7433When called from a program, takes three arguments:
7434START, END, and CODING-SYSTEM. START and END are buffer positions.
d46c5b12 7435
df7492f9
KH
7436Optional 4th arguments DESTINATION specifies where the encoded text goes.
7437If nil, the region between START and END is replace by the encoded text.
7438If buffer, the encoded text is inserted in the buffer.
7439If t, the encoded text is returned.
2391eaa4 7440
48b0f3ae
PJ
7441This function sets `last-coding-system-used' to the precise coding system
7442used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7443not fully specified.)
7444It returns the length of the encoded text. */)
df7492f9
KH
7445 (start, end, coding_system, destination)
7446 Lisp_Object start, end, coding_system, destination;
3a73fa5d 7447{
df7492f9 7448 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
7449}
7450
7451Lisp_Object
df7492f9
KH
7452code_convert_string (string, coding_system, dst_object,
7453 encodep, nocopy, norecord)
7454 Lisp_Object string, coding_system, dst_object;
7455 int encodep, nocopy, norecord;
b73bfc1c 7456{
4031e2bf 7457 struct coding_system coding;
df7492f9 7458 EMACS_INT chars, bytes;
ec6d2bb8 7459
b7826503 7460 CHECK_STRING (string);
d46c5b12 7461 if (NILP (coding_system))
4956c225 7462 {
df7492f9
KH
7463 if (! norecord)
7464 Vlast_coding_system_used = Qno_conversion;
7465 if (NILP (dst_object))
7466 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 7467 }
b73bfc1c 7468
df7492f9
KH
7469 if (NILP (coding_system))
7470 coding_system = Qno_conversion;
7471 else
7472 CHECK_CODING_SYSTEM (coding_system);
7473 if (NILP (dst_object))
7474 dst_object = Qt;
7475 else if (! EQ (dst_object, Qt))
7476 CHECK_BUFFER (dst_object);
73be902c 7477
df7492f9 7478 setup_coding_system (coding_system, &coding);
d46c5b12 7479 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
7480 chars = SCHARS (string);
7481 bytes = SBYTES (string);
df7492f9
KH
7482 if (encodep)
7483 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7484 else
7485 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7486 if (! norecord)
7487 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 7488
df7492f9
KH
7489 if (coding.result != CODING_RESULT_SUCCESS)
7490 error ("Code conversion error: %d", coding.result);
2391eaa4 7491
df7492f9
KH
7492 return (BUFFERP (dst_object)
7493 ? make_number (coding.produced_char)
7494 : coding.dst_object);
4ed46869 7495}
73be902c 7496
b73bfc1c 7497
ecec61c1 7498/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 7499 Do not set Vlast_coding_system_used.
4ed46869 7500
ec6d2bb8
KH
7501 This function is called only from macros DECODE_FILE and
7502 ENCODE_FILE, thus we ignore character composition. */
4ed46869 7503
ecec61c1
KH
7504Lisp_Object
7505code_convert_string_norecord (string, coding_system, encodep)
7506 Lisp_Object string, coding_system;
7507 int encodep;
4ed46869 7508{
0be8721c 7509 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
7510}
7511
4ed46869 7512
df7492f9
KH
7513DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7514 2, 4, 0,
7515 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7516
7517Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7518if the decoding operation is trivial.
ecec61c1 7519
df7492f9 7520Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 7521inserted in BUFFER instead of returned as a string. In this case,
df7492f9 7522the return value is BUFFER.
ecec61c1 7523
df7492f9
KH
7524This function sets `last-coding-system-used' to the precise coding system
7525used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7526not fully specified. */)
7527 (string, coding_system, nocopy, buffer)
7528 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 7529{
df7492f9
KH
7530 return code_convert_string (string, coding_system, buffer,
7531 0, ! NILP (nocopy), 0);
4ed46869
KH
7532}
7533
df7492f9
KH
7534DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7535 2, 4, 0,
7536 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7537
7538Optional third arg NOCOPY non-nil means it is OK to return STRING
7539itself if the encoding operation is trivial.
7540
7541Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 7542inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
7543the return value is BUFFER.
7544
7545This function sets `last-coding-system-used' to the precise coding system
7546used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7547not fully specified.) */)
7548 (string, coding_system, nocopy, buffer)
7549 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 7550{
df7492f9 7551 return code_convert_string (string, coding_system, buffer,
c197f191 7552 1, ! NILP (nocopy), 1);
4ed46869 7553}
df7492f9 7554
3a73fa5d 7555\f
4ed46869 7556DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7557 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7558Return the corresponding character. */)
7559 (code)
4ed46869 7560 Lisp_Object code;
4ed46869 7561{
df7492f9
KH
7562 Lisp_Object spec, attrs, val;
7563 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7564 int c;
4ed46869 7565
df7492f9
KH
7566 CHECK_NATNUM (code);
7567 c = XFASTINT (code);
7568 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7569 attrs = AREF (spec, 0);
4ed46869 7570
df7492f9
KH
7571 if (ASCII_BYTE_P (c)
7572 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7573 return code;
4ed46869 7574
df7492f9
KH
7575 val = CODING_ATTR_CHARSET_LIST (attrs);
7576 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
7577 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7578 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 7579
df7492f9
KH
7580 if (c <= 0x7F)
7581 charset = charset_roman;
7582 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 7583 {
df7492f9
KH
7584 charset = charset_kana;
7585 c -= 0x80;
4ed46869 7586 }
55ab7be3 7587 else
4ed46869 7588 {
004068e4 7589 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
7590
7591 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7592 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7593 error ("Invalid code: %d", code);
7594 SJIS_TO_JIS (c);
7595 charset = charset_kanji;
4ed46869 7596 }
df7492f9
KH
7597 c = DECODE_CHAR (charset, c);
7598 if (c < 0)
7599 error ("Invalid code: %d", code);
7600 return make_number (c);
93dec019 7601}
4ed46869 7602
48b0f3ae 7603
4ed46869 7604DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7605 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7606Return the corresponding code in SJIS. */)
7607 (ch)
df7492f9 7608 Lisp_Object ch;
4ed46869 7609{
df7492f9
KH
7610 Lisp_Object spec, attrs, charset_list;
7611 int c;
7612 struct charset *charset;
7613 unsigned code;
48b0f3ae 7614
df7492f9
KH
7615 CHECK_CHARACTER (ch);
7616 c = XFASTINT (ch);
7617 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7618 attrs = AREF (spec, 0);
7619
7620 if (ASCII_CHAR_P (c)
7621 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7622 return ch;
7623
7624 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7625 charset = char_charset (c, charset_list, &code);
7626 if (code == CHARSET_INVALID_CODE (charset))
7627 error ("Can't encode by shift_jis encoding: %d", c);
7628 JIS_TO_SJIS (code);
7629
7630 return make_number (code);
4ed46869
KH
7631}
7632
7633DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7634 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7635Return the corresponding character. */)
7636 (code)
4ed46869 7637 Lisp_Object code;
d46c5b12 7638{
df7492f9
KH
7639 Lisp_Object spec, attrs, val;
7640 struct charset *charset_roman, *charset_big5, *charset;
7641 int c;
6289dd10 7642
df7492f9
KH
7643 CHECK_NATNUM (code);
7644 c = XFASTINT (code);
7645 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7646 attrs = AREF (spec, 0);
4ed46869 7647
df7492f9
KH
7648 if (ASCII_BYTE_P (c)
7649 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7650 return code;
6289dd10 7651
df7492f9
KH
7652 val = CODING_ATTR_CHARSET_LIST (attrs);
7653 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7654 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 7655
df7492f9
KH
7656 if (c <= 0x7F)
7657 charset = charset_roman;
c28a9453
KH
7658 else
7659 {
df7492f9
KH
7660 int b1 = c >> 8, b2 = c & 0x7F;
7661 if (b1 < 0xA1 || b1 > 0xFE
7662 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7663 error ("Invalid code: %d", code);
7664 charset = charset_big5;
c28a9453 7665 }
df7492f9
KH
7666 c = DECODE_CHAR (charset, (unsigned )c);
7667 if (c < 0)
7668 error ("Invalid code: %d", code);
7669 return make_number (c);
d46c5b12 7670}
6289dd10 7671
4ed46869 7672DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7673 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7674Return the corresponding character code in Big5. */)
7675 (ch)
4ed46869
KH
7676 Lisp_Object ch;
7677{
df7492f9
KH
7678 Lisp_Object spec, attrs, charset_list;
7679 struct charset *charset;
7680 int c;
7681 unsigned code;
7682
7683 CHECK_CHARACTER (ch);
7684 c = XFASTINT (ch);
7685 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7686 attrs = AREF (spec, 0);
7687 if (ASCII_CHAR_P (c)
7688 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7689 return ch;
7690
7691 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7692 charset = char_charset (c, charset_list, &code);
7693 if (code == CHARSET_INVALID_CODE (charset))
7694 error ("Can't encode by Big5 encoding: %d", c);
7695
7696 return make_number (code);
4ed46869 7697}
48b0f3ae 7698
3a73fa5d 7699\f
1ba9e4ab
KH
7700DEFUN ("set-terminal-coding-system-internal",
7701 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7702 Sset_terminal_coding_system_internal, 1, 1, 0,
7703 doc: /* Internal use only. */)
7704 (coding_system)
b74e4686 7705 Lisp_Object coding_system;
4ed46869 7706{
b7826503 7707 CHECK_SYMBOL (coding_system);
df7492f9
KH
7708 setup_coding_system (Fcheck_coding_system (coding_system),
7709 &terminal_coding);
48b0f3ae 7710
70c22245 7711 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7712 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7713 /* Characer composition should be disabled. */
7714 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7715 terminal_coding.src_multibyte = 1;
7716 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7717 return Qnil;
7718}
7719
c4825358
KH
7720DEFUN ("set-safe-terminal-coding-system-internal",
7721 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7722 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7723 doc: /* Internal use only. */)
48b0f3ae 7724 (coding_system)
b74e4686 7725 Lisp_Object coding_system;
d46c5b12 7726{
b7826503 7727 CHECK_SYMBOL (coding_system);
c4825358
KH
7728 setup_coding_system (Fcheck_coding_system (coding_system),
7729 &safe_terminal_coding);
df7492f9
KH
7730 /* Characer composition should be disabled. */
7731 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7732 safe_terminal_coding.src_multibyte = 1;
7733 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7734 return Qnil;
7735}
4ed46869 7736
4ed46869
KH
7737DEFUN ("terminal-coding-system",
7738 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7739 doc: /* Return coding system specified for terminal output. */)
7740 ()
4ed46869 7741{
df7492f9 7742 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7743}
7744
1ba9e4ab
KH
7745DEFUN ("set-keyboard-coding-system-internal",
7746 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7747 Sset_keyboard_coding_system_internal, 1, 1, 0,
7748 doc: /* Internal use only. */)
7749 (coding_system)
4ed46869
KH
7750 Lisp_Object coding_system;
7751{
b7826503 7752 CHECK_SYMBOL (coding_system);
df7492f9
KH
7753 setup_coding_system (Fcheck_coding_system (coding_system),
7754 &keyboard_coding);
7755 /* Characer composition should be disabled. */
7756 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7757 return Qnil;
7758}
7759
7760DEFUN ("keyboard-coding-system",
7761 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7762 doc: /* Return coding system specified for decoding keyboard input. */)
7763 ()
4ed46869 7764{
df7492f9 7765 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7766}
7767
4ed46869 7768\f
a5d301df
KH
7769DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7770 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7771 doc: /* Choose a coding system for an operation based on the target name.
7772The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7773DECODING-SYSTEM is the coding system to use for decoding
7774\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7775for encoding (in case OPERATION does encoding).
05e6f5dc 7776
48b0f3ae
PJ
7777The first argument OPERATION specifies an I/O primitive:
7778 For file I/O, `insert-file-contents' or `write-region'.
7779 For process I/O, `call-process', `call-process-region', or `start-process'.
7780 For network I/O, `open-network-stream'.
05e6f5dc 7781
48b0f3ae
PJ
7782The remaining arguments should be the same arguments that were passed
7783to the primitive. Depending on which primitive, one of those arguments
7784is selected as the TARGET. For example, if OPERATION does file I/O,
7785whichever argument specifies the file name is TARGET.
05e6f5dc 7786
48b0f3ae
PJ
7787TARGET has a meaning which depends on OPERATION:
7788 For file I/O, TARGET is a file name.
7789 For process I/O, TARGET is a process name.
7790 For network I/O, TARGET is a service name or a port number
05e6f5dc 7791
48b0f3ae
PJ
7792This function looks up what specified for TARGET in,
7793`file-coding-system-alist', `process-coding-system-alist',
7794or `network-coding-system-alist' depending on OPERATION.
7795They may specify a coding system, a cons of coding systems,
7796or a function symbol to call.
7797In the last case, we call the function with one argument,
7798which is a list of all the arguments given to this function.
7799
7800usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7801 (nargs, args)
4ed46869
KH
7802 int nargs;
7803 Lisp_Object *args;
6b89e3aa 7804{
4ed46869
KH
7805 Lisp_Object operation, target_idx, target, val;
7806 register Lisp_Object chain;
177c0ea7 7807
4ed46869
KH
7808 if (nargs < 2)
7809 error ("Too few arguments");
7810 operation = args[0];
7811 if (!SYMBOLP (operation)
7812 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7813 error ("Invalid first arguement");
4ed46869
KH
7814 if (nargs < 1 + XINT (target_idx))
7815 error ("Too few arguments for operation: %s",
8f924df7 7816 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
7817 target = args[XINT (target_idx) + 1];
7818 if (!(STRINGP (target)
7819 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7820 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7821
2e34157c
RS
7822 chain = ((EQ (operation, Qinsert_file_contents)
7823 || EQ (operation, Qwrite_region))
02ba4723 7824 ? Vfile_coding_system_alist
2e34157c 7825 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7826 ? Vnetwork_coding_system_alist
7827 : Vprocess_coding_system_alist));
4ed46869
KH
7828 if (NILP (chain))
7829 return Qnil;
7830
03699b14 7831 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 7832 {
f44d27ce 7833 Lisp_Object elt;
6b89e3aa 7834
df7492f9 7835 elt = XCAR (chain);
4ed46869
KH
7836 if (CONSP (elt)
7837 && ((STRINGP (target)
03699b14
KR
7838 && STRINGP (XCAR (elt))
7839 && fast_string_match (XCAR (elt), target) >= 0)
7840 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 7841 {
03699b14 7842 val = XCDR (elt);
b19fd4c5
KH
7843 /* Here, if VAL is both a valid coding system and a valid
7844 function symbol, we return VAL as a coding system. */
02ba4723
KH
7845 if (CONSP (val))
7846 return val;
7847 if (! SYMBOLP (val))
7848 return Qnil;
7849 if (! NILP (Fcoding_system_p (val)))
7850 return Fcons (val, val);
b19fd4c5 7851 if (! NILP (Ffboundp (val)))
6b89e3aa 7852 {
b19fd4c5
KH
7853 val = call1 (val, Flist (nargs, args));
7854 if (CONSP (val))
7855 return val;
7856 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7857 return Fcons (val, val);
6b89e3aa 7858 }
02ba4723 7859 return Qnil;
6b89e3aa
KH
7860 }
7861 }
4ed46869 7862 return Qnil;
6b89e3aa
KH
7863}
7864
df7492f9 7865DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 7866 Sset_coding_system_priority, 0, MANY, 0,
da7db224 7867 doc: /* Assign higher priority to the coding systems given as arguments.
ff563fce 7868If multiple coding systems belongs to the same category,
a3181084
DL
7869all but the first one are ignored.
7870
7871usage: (set-coding-system-priority ...) */)
df7492f9
KH
7872 (nargs, args)
7873 int nargs;
7874 Lisp_Object *args;
7875{
7876 int i, j;
7877 int changed[coding_category_max];
7878 enum coding_category priorities[coding_category_max];
7879
7880 bzero (changed, sizeof changed);
6b89e3aa 7881
df7492f9 7882 for (i = j = 0; i < nargs; i++)
6b89e3aa 7883 {
df7492f9
KH
7884 enum coding_category category;
7885 Lisp_Object spec, attrs;
6b89e3aa 7886
df7492f9
KH
7887 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7888 attrs = AREF (spec, 0);
7889 category = XINT (CODING_ATTR_CATEGORY (attrs));
7890 if (changed[category])
7891 /* Ignore this coding system because a coding system of the
7892 same category already had a higher priority. */
7893 continue;
7894 changed[category] = 1;
7895 priorities[j++] = category;
7896 if (coding_categories[category].id >= 0
7897 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7898 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 7899 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 7900 }
6b89e3aa 7901
df7492f9
KH
7902 /* Now we have decided top J priorities. Reflect the order of the
7903 original priorities to the remaining priorities. */
6b89e3aa 7904
df7492f9 7905 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 7906 {
df7492f9
KH
7907 while (j < coding_category_max
7908 && changed[coding_priorities[j]])
7909 j++;
7910 if (j == coding_category_max)
7911 abort ();
7912 priorities[i] = coding_priorities[j];
7913 }
6b89e3aa 7914
df7492f9 7915 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 7916
ff563fce
KH
7917 /* Update `coding-category-list'. */
7918 Vcoding_category_list = Qnil;
7919 for (i = coding_category_max - 1; i >= 0; i--)
7920 Vcoding_category_list
7921 = Fcons (AREF (Vcoding_category_table, priorities[i]),
7922 Vcoding_category_list);
6b89e3aa 7923
df7492f9 7924 return Qnil;
6b89e3aa
KH
7925}
7926
df7492f9
KH
7927DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7928 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7929 doc: /* Return a list of coding systems ordered by their priorities.
7930HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7931 (highestp)
7932 Lisp_Object highestp;
d46c5b12
KH
7933{
7934 int i;
df7492f9 7935 Lisp_Object val;
6b89e3aa 7936
df7492f9 7937 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7938 {
df7492f9
KH
7939 enum coding_category category = coding_priorities[i];
7940 int id = coding_categories[category].id;
7941 Lisp_Object attrs;
068a9dbd 7942
df7492f9
KH
7943 if (id < 0)
7944 continue;
7945 attrs = CODING_ID_ATTRS (id);
7946 if (! NILP (highestp))
7947 return CODING_ATTR_BASE_NAME (attrs);
7948 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7949 }
7950 return Fnreverse (val);
7951}
068a9dbd 7952
f0064e1f 7953static char *suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
7954
7955static Lisp_Object
df7492f9
KH
7956make_subsidiaries (base)
7957 Lisp_Object base;
068a9dbd 7958{
df7492f9 7959 Lisp_Object subsidiaries;
8f924df7 7960 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
7961 char *buf = (char *) alloca (base_name_len + 6);
7962 int i;
068a9dbd 7963
8f924df7 7964 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
7965 subsidiaries = Fmake_vector (make_number (3), Qnil);
7966 for (i = 0; i < 3; i++)
068a9dbd 7967 {
df7492f9
KH
7968 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7969 ASET (subsidiaries, i, intern (buf));
068a9dbd 7970 }
df7492f9 7971 return subsidiaries;
068a9dbd
KH
7972}
7973
7974
df7492f9
KH
7975DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7976 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7977 doc: /* For internal use only.
7978usage: (define-coding-system-internal ...) */)
df7492f9
KH
7979 (nargs, args)
7980 int nargs;
7981 Lisp_Object *args;
068a9dbd 7982{
df7492f9
KH
7983 Lisp_Object name;
7984 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7985 Lisp_Object attrs; /* Vector of attributes. */
7986 Lisp_Object eol_type;
7987 Lisp_Object aliases;
7988 Lisp_Object coding_type, charset_list, safe_charsets;
7989 enum coding_category category;
7990 Lisp_Object tail, val;
7991 int max_charset_id = 0;
7992 int i;
068a9dbd 7993
df7492f9
KH
7994 if (nargs < coding_arg_max)
7995 goto short_args;
068a9dbd 7996
df7492f9 7997 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 7998
df7492f9
KH
7999 name = args[coding_arg_name];
8000 CHECK_SYMBOL (name);
8001 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 8002
df7492f9
KH
8003 val = args[coding_arg_mnemonic];
8004 if (! STRINGP (val))
8005 CHECK_CHARACTER (val);
8006 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 8007
df7492f9
KH
8008 coding_type = args[coding_arg_coding_type];
8009 CHECK_SYMBOL (coding_type);
8010 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 8011
df7492f9
KH
8012 charset_list = args[coding_arg_charset_list];
8013 if (SYMBOLP (charset_list))
8014 {
8015 if (EQ (charset_list, Qiso_2022))
8016 {
8017 if (! EQ (coding_type, Qiso_2022))
8018 error ("Invalid charset-list");
8019 charset_list = Viso_2022_charset_list;
8020 }
8021 else if (EQ (charset_list, Qemacs_mule))
8022 {
8023 if (! EQ (coding_type, Qemacs_mule))
8024 error ("Invalid charset-list");
8025 charset_list = Vemacs_mule_charset_list;
8026 }
8027 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8028 if (max_charset_id < XFASTINT (XCAR (tail)))
8029 max_charset_id = XFASTINT (XCAR (tail));
8030 }
068a9dbd
KH
8031 else
8032 {
df7492f9
KH
8033 charset_list = Fcopy_sequence (charset_list);
8034 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
068a9dbd 8035 {
df7492f9
KH
8036 struct charset *charset;
8037
8038 val = Fcar (tail);
8039 CHECK_CHARSET_GET_CHARSET (val, charset);
8040 if (EQ (coding_type, Qiso_2022)
8041 ? CHARSET_ISO_FINAL (charset) < 0
8042 : EQ (coding_type, Qemacs_mule)
8043 ? CHARSET_EMACS_MULE_ID (charset) < 0
8044 : 0)
8045 error ("Can't handle charset `%s'",
8f924df7 8046 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 8047
8f924df7 8048 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
8049 if (max_charset_id < charset->id)
8050 max_charset_id = charset->id;
068a9dbd
KH
8051 }
8052 }
df7492f9 8053 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 8054
df7492f9
KH
8055 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8056 make_number (255));
8057 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 8058 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 8059 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 8060
584948ac 8061 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 8062
df7492f9
KH
8063 val = args[coding_arg_decode_translation_table];
8064 if (! NILP (val))
8065 CHECK_CHAR_TABLE (val);
8066 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 8067
df7492f9
KH
8068 val = args[coding_arg_encode_translation_table];
8069 if (! NILP (val))
8070 CHECK_CHAR_TABLE (val);
8071 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 8072
df7492f9
KH
8073 val = args[coding_arg_post_read_conversion];
8074 CHECK_SYMBOL (val);
8075 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 8076
df7492f9
KH
8077 val = args[coding_arg_pre_write_conversion];
8078 CHECK_SYMBOL (val);
8079 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 8080
df7492f9
KH
8081 val = args[coding_arg_default_char];
8082 if (NILP (val))
8083 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8084 else
8085 {
8f924df7 8086 CHECK_CHARACTER (val);
df7492f9
KH
8087 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8088 }
4031e2bf 8089
8f924df7
KH
8090 val = args[coding_arg_for_unibyte];
8091 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 8092
df7492f9
KH
8093 val = args[coding_arg_plist];
8094 CHECK_LIST (val);
8095 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 8096
df7492f9
KH
8097 if (EQ (coding_type, Qcharset))
8098 {
4187a77d 8099 Lisp_Object list;
c7c66a95
KH
8100 /* Generate a lisp vector of 256 elements. Each element is nil,
8101 integer, or a list of charset IDs.
3a73fa5d 8102
c7c66a95
KH
8103 If Nth element is nil, the byte code N is invalid in this
8104 coding system.
4ed46869 8105
c7c66a95
KH
8106 If Nth element is a number NUM, N is the first byte of a
8107 charset whose ID is NUM.
4ed46869 8108
c7c66a95
KH
8109 If Nth element is a list of charset IDs, N is the first byte
8110 of one of them. The list is sorted by dimensions of the
8111 charsets. A charset of smaller dimension comes firtst.
8112 */
4187a77d
KH
8113 for (list = Qnil, tail = charset_list; CONSP (tail); tail = XCDR (tail))
8114 {
8115 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
5f1cd180 8116
4187a77d
KH
8117 if (charset->method == CHARSET_METHOD_SUPERSET)
8118 {
8119 val = CHARSET_SUPERSET (charset);
8120 for (; CONSP (val); val = XCDR (val))
8f924df7 8121 list = Fcons (XCAR (XCAR (val)), list);
4187a77d
KH
8122 }
8123 else
8124 list = Fcons (XCAR (tail), list);
8125 }
ec6d2bb8 8126
df7492f9 8127 val = Fmake_vector (make_number (256), Qnil);
4ed46869 8128
4187a77d 8129 for (tail = Fnreverse (list); CONSP (tail); tail = XCDR (tail))
df7492f9 8130 {
c7c66a95
KH
8131 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8132 int dim = CHARSET_DIMENSION (charset);
8133 int idx = (dim - 1) * 4;
4ed46869 8134
584948ac
KH
8135 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8136 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 8137
15d143f7
KH
8138 for (i = charset->code_space[idx];
8139 i <= charset->code_space[idx + 1]; i++)
8140 {
c7c66a95
KH
8141 Lisp_Object tmp, tmp2;
8142 int dim2;
ec6d2bb8 8143
c7c66a95
KH
8144 tmp = AREF (val, i);
8145 if (NILP (tmp))
8146 tmp = XCAR (tail);
8147 else if (NUMBERP (tmp))
8148 {
8149 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8150 if (dim < dim2)
c7c66a95 8151 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
8152 else
8153 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 8154 }
15d143f7 8155 else
c7c66a95
KH
8156 {
8157 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8158 {
8159 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8160 if (dim < dim2)
8161 break;
8162 }
8163 if (NILP (tmp2))
8164 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8165 else
8166 {
8167 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8168 XSETCAR (tmp2, XCAR (tail));
8169 }
8170 }
8171 ASET (val, i, tmp);
15d143f7 8172 }
df7492f9
KH
8173 }
8174 ASET (attrs, coding_attr_charset_valids, val);
8175 category = coding_category_charset;
8176 }
8177 else if (EQ (coding_type, Qccl))
8178 {
8179 Lisp_Object valids;
ecec61c1 8180
df7492f9
KH
8181 if (nargs < coding_arg_ccl_max)
8182 goto short_args;
ecec61c1 8183
df7492f9
KH
8184 val = args[coding_arg_ccl_decoder];
8185 CHECK_CCL_PROGRAM (val);
8186 if (VECTORP (val))
8187 val = Fcopy_sequence (val);
8188 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 8189
df7492f9
KH
8190 val = args[coding_arg_ccl_encoder];
8191 CHECK_CCL_PROGRAM (val);
8192 if (VECTORP (val))
8193 val = Fcopy_sequence (val);
8194 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 8195
df7492f9
KH
8196 val = args[coding_arg_ccl_valids];
8197 valids = Fmake_string (make_number (256), make_number (0));
8198 for (tail = val; !NILP (tail); tail = Fcdr (tail))
8199 {
8dcbea82 8200 int from, to;
ecec61c1 8201
df7492f9
KH
8202 val = Fcar (tail);
8203 if (INTEGERP (val))
8dcbea82
KH
8204 {
8205 from = to = XINT (val);
8206 if (from < 0 || from > 255)
8207 args_out_of_range_3 (val, make_number (0), make_number (255));
8208 }
df7492f9
KH
8209 else
8210 {
df7492f9 8211 CHECK_CONS (val);
8f924df7
KH
8212 CHECK_NATNUM_CAR (val);
8213 CHECK_NATNUM_CDR (val);
df7492f9 8214 from = XINT (XCAR (val));
8f924df7 8215 if (from > 255)
8dcbea82
KH
8216 args_out_of_range_3 (XCAR (val),
8217 make_number (0), make_number (255));
df7492f9 8218 to = XINT (XCDR (val));
8dcbea82
KH
8219 if (to < from || to > 255)
8220 args_out_of_range_3 (XCDR (val),
8221 XCAR (val), make_number (255));
df7492f9 8222 }
8dcbea82 8223 for (i = from; i <= to; i++)
8f924df7 8224 SSET (valids, i, 1);
df7492f9
KH
8225 }
8226 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 8227
df7492f9 8228 category = coding_category_ccl;
55ab7be3 8229 }
df7492f9 8230 else if (EQ (coding_type, Qutf_16))
55ab7be3 8231 {
df7492f9 8232 Lisp_Object bom, endian;
4ed46869 8233
584948ac 8234 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 8235
df7492f9
KH
8236 if (nargs < coding_arg_utf16_max)
8237 goto short_args;
4ed46869 8238
df7492f9
KH
8239 bom = args[coding_arg_utf16_bom];
8240 if (! NILP (bom) && ! EQ (bom, Qt))
8241 {
8242 CHECK_CONS (bom);
8f924df7
KH
8243 val = XCAR (bom);
8244 CHECK_CODING_SYSTEM (val);
8245 val = XCDR (bom);
8246 CHECK_CODING_SYSTEM (val);
df7492f9
KH
8247 }
8248 ASET (attrs, coding_attr_utf_16_bom, bom);
8249
8250 endian = args[coding_arg_utf16_endian];
b49a1807
KH
8251 CHECK_SYMBOL (endian);
8252 if (NILP (endian))
8253 endian = Qbig;
8254 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 8255 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
8256 ASET (attrs, coding_attr_utf_16_endian, endian);
8257
8258 category = (CONSP (bom)
8259 ? coding_category_utf_16_auto
8260 : NILP (bom)
b49a1807 8261 ? (EQ (endian, Qbig)
df7492f9
KH
8262 ? coding_category_utf_16_be_nosig
8263 : coding_category_utf_16_le_nosig)
b49a1807 8264 : (EQ (endian, Qbig)
df7492f9
KH
8265 ? coding_category_utf_16_be
8266 : coding_category_utf_16_le));
8267 }
8268 else if (EQ (coding_type, Qiso_2022))
8269 {
8270 Lisp_Object initial, reg_usage, request, flags;
4776e638 8271 int i;
1397dc18 8272
df7492f9
KH
8273 if (nargs < coding_arg_iso2022_max)
8274 goto short_args;
8275
8276 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8277 CHECK_VECTOR (initial);
8278 for (i = 0; i < 4; i++)
8279 {
8280 val = Faref (initial, make_number (i));
8281 if (! NILP (val))
8282 {
584948ac
KH
8283 struct charset *charset;
8284
8285 CHECK_CHARSET_GET_CHARSET (val, charset);
8286 ASET (initial, i, make_number (CHARSET_ID (charset)));
8287 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8288 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8289 }
8290 else
8291 ASET (initial, i, make_number (-1));
8292 }
8293
8294 reg_usage = args[coding_arg_iso2022_reg_usage];
8295 CHECK_CONS (reg_usage);
8f924df7
KH
8296 CHECK_NUMBER_CAR (reg_usage);
8297 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
8298
8299 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8300 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 8301 {
df7492f9 8302 int id;
8f924df7 8303 Lisp_Object tmp;
df7492f9
KH
8304
8305 val = Fcar (tail);
8306 CHECK_CONS (val);
8f924df7
KH
8307 tmp = XCAR (val);
8308 CHECK_CHARSET_GET_ID (tmp, id);
8309 CHECK_NATNUM_CDR (val);
df7492f9
KH
8310 if (XINT (XCDR (val)) >= 4)
8311 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 8312 XSETCAR (val, make_number (id));
1397dc18 8313 }
4ed46869 8314
df7492f9
KH
8315 flags = args[coding_arg_iso2022_flags];
8316 CHECK_NATNUM (flags);
8317 i = XINT (flags);
8318 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8319 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8320
8321 ASET (attrs, coding_attr_iso_initial, initial);
8322 ASET (attrs, coding_attr_iso_usage, reg_usage);
8323 ASET (attrs, coding_attr_iso_request, request);
8324 ASET (attrs, coding_attr_iso_flags, flags);
8325 setup_iso_safe_charsets (attrs);
8326
8327 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8328 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8329 | CODING_ISO_FLAG_SINGLE_SHIFT))
8330 ? coding_category_iso_7_else
8331 : EQ (args[coding_arg_charset_list], Qiso_2022)
8332 ? coding_category_iso_7
8333 : coding_category_iso_7_tight);
8334 else
8335 {
8336 int id = XINT (AREF (initial, 1));
8337
c6fb6e98 8338 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
8339 || EQ (args[coding_arg_charset_list], Qiso_2022)
8340 || id < 0)
8341 ? coding_category_iso_8_else
8342 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8343 ? coding_category_iso_8_1
8344 : coding_category_iso_8_2);
8345 }
0ce7886f
KH
8346 if (category != coding_category_iso_8_1
8347 && category != coding_category_iso_8_2)
8348 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
8349 }
8350 else if (EQ (coding_type, Qemacs_mule))
c28a9453 8351 {
df7492f9
KH
8352 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8353 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 8354 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 8355 category = coding_category_emacs_mule;
c28a9453 8356 }
df7492f9 8357 else if (EQ (coding_type, Qshift_jis))
c28a9453 8358 {
df7492f9
KH
8359
8360 struct charset *charset;
8361
8362 if (XINT (Flength (charset_list)) != 3)
8363 error ("There should be just three charsets");
8364
8365 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8366 if (CHARSET_DIMENSION (charset) != 1)
8367 error ("Dimension of charset %s is not one",
8f924df7 8368 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8369 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8370 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8371
8372 charset_list = XCDR (charset_list);
8373 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8374 if (CHARSET_DIMENSION (charset) != 1)
8375 error ("Dimension of charset %s is not one",
8f924df7 8376 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
8377
8378 charset_list = XCDR (charset_list);
8379 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8380 if (CHARSET_DIMENSION (charset) != 2)
8381 error ("Dimension of charset %s is not two",
8f924df7 8382 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
8383
8384 category = coding_category_sjis;
8385 Vsjis_coding_system = name;
c28a9453 8386 }
df7492f9
KH
8387 else if (EQ (coding_type, Qbig5))
8388 {
8389 struct charset *charset;
4ed46869 8390
df7492f9
KH
8391 if (XINT (Flength (charset_list)) != 2)
8392 error ("There should be just two charsets");
8393
8394 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8395 if (CHARSET_DIMENSION (charset) != 1)
8396 error ("Dimension of charset %s is not one",
8f924df7 8397 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8398 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8399 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8400
8401 charset_list = XCDR (charset_list);
8402 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8403 if (CHARSET_DIMENSION (charset) != 2)
8404 error ("Dimension of charset %s is not two",
8f924df7 8405 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 8406
df7492f9
KH
8407 category = coding_category_big5;
8408 Vbig5_coding_system = name;
8409 }
8410 else if (EQ (coding_type, Qraw_text))
c28a9453 8411 {
584948ac
KH
8412 category = coding_category_raw_text;
8413 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 8414 }
df7492f9 8415 else if (EQ (coding_type, Qutf_8))
4ed46869 8416 {
584948ac
KH
8417 category = coding_category_utf_8;
8418 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4ed46869 8419 }
df7492f9
KH
8420 else if (EQ (coding_type, Qundecided))
8421 category = coding_category_undecided;
4ed46869 8422 else
df7492f9 8423 error ("Invalid coding system type: %s",
8f924df7 8424 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 8425
df7492f9 8426 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
8427 CODING_ATTR_PLIST (attrs)
8428 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8429 CODING_ATTR_PLIST (attrs)));
c4825358 8430
df7492f9
KH
8431 eol_type = args[coding_arg_eol_type];
8432 if (! NILP (eol_type)
8433 && ! EQ (eol_type, Qunix)
8434 && ! EQ (eol_type, Qdos)
8435 && ! EQ (eol_type, Qmac))
8436 error ("Invalid eol-type");
4ed46869 8437
df7492f9 8438 aliases = Fcons (name, Qnil);
4ed46869 8439
df7492f9
KH
8440 if (NILP (eol_type))
8441 {
8442 eol_type = make_subsidiaries (name);
8443 for (i = 0; i < 3; i++)
1397dc18 8444 {
df7492f9
KH
8445 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8446
8447 this_name = AREF (eol_type, i);
8448 this_aliases = Fcons (this_name, Qnil);
8449 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8450 this_spec = Fmake_vector (make_number (3), attrs);
8451 ASET (this_spec, 1, this_aliases);
8452 ASET (this_spec, 2, this_eol_type);
8453 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8454 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8455 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8456 Vcoding_system_alist);
1397dc18 8457 }
d46c5b12 8458 }
4ed46869 8459
df7492f9
KH
8460 spec_vec = Fmake_vector (make_number (3), attrs);
8461 ASET (spec_vec, 1, aliases);
8462 ASET (spec_vec, 2, eol_type);
48b0f3ae 8463
df7492f9
KH
8464 Fputhash (name, spec_vec, Vcoding_system_hash_table);
8465 Vcoding_system_list = Fcons (name, Vcoding_system_list);
8466 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8467 Vcoding_system_alist);
48b0f3ae 8468
df7492f9
KH
8469 {
8470 int id = coding_categories[category].id;
48b0f3ae 8471
df7492f9
KH
8472 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8473 setup_coding_system (name, &coding_categories[category]);
8474 }
48b0f3ae 8475
d46c5b12 8476 return Qnil;
48b0f3ae 8477
df7492f9
KH
8478 short_args:
8479 return Fsignal (Qwrong_number_of_arguments,
8480 Fcons (intern ("define-coding-system-internal"),
8481 make_number (nargs)));
d46c5b12 8482}
4ed46869 8483
da7db224 8484/* Fixme: should this record the alias relationships for
e1c23804 8485 diagnostics? Should it update coding-system-list? */
df7492f9
KH
8486DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8487 Sdefine_coding_system_alias, 2, 2, 0,
8488 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8489 (alias, coding_system)
8490 Lisp_Object alias, coding_system;
66cfb530 8491{
df7492f9 8492 Lisp_Object spec, aliases, eol_type;
4ed46869 8493
df7492f9
KH
8494 CHECK_SYMBOL (alias);
8495 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8496 aliases = AREF (spec, 1);
8497 while (!NILP (XCDR (aliases)))
8498 aliases = XCDR (aliases);
8f924df7 8499 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 8500
df7492f9
KH
8501 eol_type = AREF (spec, 2);
8502 if (VECTORP (eol_type))
4ed46869 8503 {
df7492f9
KH
8504 Lisp_Object subsidiaries;
8505 int i;
4ed46869 8506
df7492f9
KH
8507 subsidiaries = make_subsidiaries (alias);
8508 for (i = 0; i < 3; i++)
8509 Fdefine_coding_system_alias (AREF (subsidiaries, i),
8510 AREF (eol_type, i));
8511
8512 ASET (spec, 2, subsidiaries);
4ed46869 8513 }
df7492f9
KH
8514
8515 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
8516 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8517 Vcoding_system_alist);
66cfb530 8518
4ed46869
KH
8519 return Qnil;
8520}
8521
df7492f9
KH
8522DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8523 1, 1, 0,
8524 doc: /* Return the base of CODING-SYSTEM.
da7db224 8525Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
8526 (coding_system)
8527 Lisp_Object coding_system;
d46c5b12 8528{
df7492f9 8529 Lisp_Object spec, attrs;
d46c5b12 8530
df7492f9
KH
8531 if (NILP (coding_system))
8532 return (Qno_conversion);
8533 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8534 attrs = AREF (spec, 0);
8535 return CODING_ATTR_BASE_NAME (attrs);
8536}
1397dc18 8537
df7492f9
KH
8538DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8539 1, 1, 0,
8540 doc: "Return the property list of CODING-SYSTEM.")
8541 (coding_system)
8542 Lisp_Object coding_system;
8543{
8544 Lisp_Object spec, attrs;
1397dc18 8545
df7492f9
KH
8546 if (NILP (coding_system))
8547 coding_system = Qno_conversion;
8548 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8549 attrs = AREF (spec, 0);
8550 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
8551}
8552
df7492f9
KH
8553
8554DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8555 1, 1, 0,
da7db224 8556 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
8557 (coding_system)
8558 Lisp_Object coding_system;
66cfb530 8559{
df7492f9 8560 Lisp_Object spec;
84d60297 8561
df7492f9
KH
8562 if (NILP (coding_system))
8563 coding_system = Qno_conversion;
8564 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 8565 return AREF (spec, 1);
df7492f9 8566}
66cfb530 8567
df7492f9
KH
8568DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8569 Scoding_system_eol_type, 1, 1, 0,
8570 doc: /* Return eol-type of CODING-SYSTEM.
8571An eol-type is integer 0, 1, 2, or a vector of coding systems.
66cfb530 8572
df7492f9
KH
8573Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8574and CR respectively.
66cfb530 8575
df7492f9
KH
8576A vector value indicates that a format of end-of-line should be
8577detected automatically. Nth element of the vector is the subsidiary
8578coding system whose eol-type is N. */)
6b89e3aa
KH
8579 (coding_system)
8580 Lisp_Object coding_system;
8581{
df7492f9
KH
8582 Lisp_Object spec, eol_type;
8583 int n;
6b89e3aa 8584
df7492f9
KH
8585 if (NILP (coding_system))
8586 coding_system = Qno_conversion;
8587 if (! CODING_SYSTEM_P (coding_system))
8588 return Qnil;
8589 spec = CODING_SYSTEM_SPEC (coding_system);
8590 eol_type = AREF (spec, 2);
8591 if (VECTORP (eol_type))
8592 return Fcopy_sequence (eol_type);
8593 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8594 return make_number (n);
6b89e3aa
KH
8595}
8596
4ed46869
KH
8597#endif /* emacs */
8598
8599\f
1397dc18 8600/*** 9. Post-amble ***/
4ed46869 8601
dfcf069d 8602void
4ed46869
KH
8603init_coding_once ()
8604{
8605 int i;
8606
df7492f9
KH
8607 for (i = 0; i < coding_category_max; i++)
8608 {
8609 coding_categories[i].id = -1;
8610 coding_priorities[i] = i;
8611 }
4ed46869
KH
8612
8613 /* ISO2022 specific initialize routine. */
8614 for (i = 0; i < 0x20; i++)
b73bfc1c 8615 iso_code_class[i] = ISO_control_0;
4ed46869
KH
8616 for (i = 0x21; i < 0x7F; i++)
8617 iso_code_class[i] = ISO_graphic_plane_0;
8618 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 8619 iso_code_class[i] = ISO_control_1;
4ed46869
KH
8620 for (i = 0xA1; i < 0xFF; i++)
8621 iso_code_class[i] = ISO_graphic_plane_1;
8622 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8623 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8624 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8625 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8626 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8627 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8628 iso_code_class[ISO_CODE_ESC] = ISO_escape;
8629 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8630 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8631 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8632
df7492f9
KH
8633 for (i = 0; i < 256; i++)
8634 {
8635 emacs_mule_bytes[i] = 1;
8636 }
7c78e542
KH
8637 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8638 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8639 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8640 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
8641}
8642
8643#ifdef emacs
8644
dfcf069d 8645void
e0e989f6
KH
8646syms_of_coding ()
8647{
df7492f9 8648 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
8649 {
8650 Lisp_Object args[2];
8651 args[0] = QCtest;
8652 args[1] = Qeq;
8653 Vcoding_system_hash_table = Fmake_hash_table (2, args);
8654 }
df7492f9
KH
8655
8656 staticpro (&Vsjis_coding_system);
8657 Vsjis_coding_system = Qnil;
e0e989f6 8658
df7492f9
KH
8659 staticpro (&Vbig5_coding_system);
8660 Vbig5_coding_system = Qnil;
8661
8662 staticpro (&Vcode_conversion_work_buf_list);
8663 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 8664
df7492f9
KH
8665 staticpro (&Vcode_conversion_reused_work_buf);
8666 Vcode_conversion_reused_work_buf = Qnil;
8667
8668 DEFSYM (Qcharset, "charset");
8669 DEFSYM (Qtarget_idx, "target-idx");
8670 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
8671 Fset (Qcoding_system_history, Qnil);
8672
9ce27fde 8673 /* Target FILENAME is the first argument. */
e0e989f6 8674 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 8675 /* Target FILENAME is the third argument. */
e0e989f6
KH
8676 Fput (Qwrite_region, Qtarget_idx, make_number (2));
8677
df7492f9 8678 DEFSYM (Qcall_process, "call-process");
9ce27fde 8679 /* Target PROGRAM is the first argument. */
e0e989f6
KH
8680 Fput (Qcall_process, Qtarget_idx, make_number (0));
8681
df7492f9 8682 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 8683 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8684 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8685
df7492f9 8686 DEFSYM (Qstart_process, "start-process");
9ce27fde 8687 /* Target PROGRAM is the third argument. */
e0e989f6
KH
8688 Fput (Qstart_process, Qtarget_idx, make_number (2));
8689
df7492f9 8690 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 8691 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
8692 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8693
df7492f9
KH
8694 DEFSYM (Qcoding_system, "coding-system");
8695 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 8696
df7492f9
KH
8697 DEFSYM (Qeol_type, "eol-type");
8698 DEFSYM (Qunix, "unix");
8699 DEFSYM (Qdos, "dos");
4ed46869 8700
df7492f9
KH
8701 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8702 DEFSYM (Qpost_read_conversion, "post-read-conversion");
8703 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8704 DEFSYM (Qdefault_char, "default-char");
8705 DEFSYM (Qundecided, "undecided");
8706 DEFSYM (Qno_conversion, "no-conversion");
8707 DEFSYM (Qraw_text, "raw-text");
4ed46869 8708
df7492f9 8709 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 8710
df7492f9 8711 DEFSYM (Qutf_8, "utf-8");
8f924df7 8712 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 8713
df7492f9 8714 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
8715 DEFSYM (Qbig, "big");
8716 DEFSYM (Qlittle, "little");
27901516 8717
df7492f9
KH
8718 DEFSYM (Qshift_jis, "shift-jis");
8719 DEFSYM (Qbig5, "big5");
4ed46869 8720
df7492f9 8721 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 8722
df7492f9 8723 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
8724 Fput (Qcoding_system_error, Qerror_conditions,
8725 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8726 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 8727 build_string ("Invalid coding system"));
4ed46869 8728
05e6f5dc
KH
8729 /* Intern this now in case it isn't already done.
8730 Setting this variable twice is harmless.
8731 But don't staticpro it here--that is done in alloc.c. */
8732 Qchar_table_extra_slots = intern ("char-table-extra-slots");
70c22245 8733
df7492f9 8734 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 8735 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
8736 DEFSYM (Qtranslation_table_id, "translation-table-id");
8737 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8738 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 8739
df7492f9 8740 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 8741
df7492f9 8742 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 8743
01378f49
KH
8744 DEFSYM (QCcategory, ":category");
8745
df7492f9
KH
8746 Vcoding_category_table
8747 = Fmake_vector (make_number (coding_category_max), Qnil);
8748 staticpro (&Vcoding_category_table);
8749 /* Followings are target of code detection. */
8750 ASET (Vcoding_category_table, coding_category_iso_7,
8751 intern ("coding-category-iso-7"));
8752 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8753 intern ("coding-category-iso-7-tight"));
8754 ASET (Vcoding_category_table, coding_category_iso_8_1,
8755 intern ("coding-category-iso-8-1"));
8756 ASET (Vcoding_category_table, coding_category_iso_8_2,
8757 intern ("coding-category-iso-8-2"));
8758 ASET (Vcoding_category_table, coding_category_iso_7_else,
8759 intern ("coding-category-iso-7-else"));
8760 ASET (Vcoding_category_table, coding_category_iso_8_else,
8761 intern ("coding-category-iso-8-else"));
8762 ASET (Vcoding_category_table, coding_category_utf_8,
8763 intern ("coding-category-utf-8"));
8764 ASET (Vcoding_category_table, coding_category_utf_16_be,
8765 intern ("coding-category-utf-16-be"));
ff563fce
KH
8766 ASET (Vcoding_category_table, coding_category_utf_16_auto,
8767 intern ("coding-category-utf-16-auto"));
df7492f9
KH
8768 ASET (Vcoding_category_table, coding_category_utf_16_le,
8769 intern ("coding-category-utf-16-le"));
8770 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8771 intern ("coding-category-utf-16-be-nosig"));
8772 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8773 intern ("coding-category-utf-16-le-nosig"));
8774 ASET (Vcoding_category_table, coding_category_charset,
8775 intern ("coding-category-charset"));
8776 ASET (Vcoding_category_table, coding_category_sjis,
8777 intern ("coding-category-sjis"));
8778 ASET (Vcoding_category_table, coding_category_big5,
8779 intern ("coding-category-big5"));
8780 ASET (Vcoding_category_table, coding_category_ccl,
8781 intern ("coding-category-ccl"));
8782 ASET (Vcoding_category_table, coding_category_emacs_mule,
8783 intern ("coding-category-emacs-mule"));
8784 /* Followings are NOT target of code detection. */
8785 ASET (Vcoding_category_table, coding_category_raw_text,
8786 intern ("coding-category-raw-text"));
8787 ASET (Vcoding_category_table, coding_category_undecided,
8788 intern ("coding-category-undecided"));
ecf488bc 8789
4ed46869
KH
8790 defsubr (&Scoding_system_p);
8791 defsubr (&Sread_coding_system);
8792 defsubr (&Sread_non_nil_coding_system);
8793 defsubr (&Scheck_coding_system);
8794 defsubr (&Sdetect_coding_region);
d46c5b12 8795 defsubr (&Sdetect_coding_string);
05e6f5dc 8796 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 8797 defsubr (&Sunencodable_char_position);
df7492f9 8798 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8799 defsubr (&Sdecode_coding_region);
8800 defsubr (&Sencode_coding_region);
8801 defsubr (&Sdecode_coding_string);
8802 defsubr (&Sencode_coding_string);
8803 defsubr (&Sdecode_sjis_char);
8804 defsubr (&Sencode_sjis_char);
8805 defsubr (&Sdecode_big5_char);
8806 defsubr (&Sencode_big5_char);
1ba9e4ab 8807 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8808 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8809 defsubr (&Sterminal_coding_system);
1ba9e4ab 8810 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8811 defsubr (&Skeyboard_coding_system);
a5d301df 8812 defsubr (&Sfind_operation_coding_system);
df7492f9 8813 defsubr (&Sset_coding_system_priority);
6b89e3aa 8814 defsubr (&Sdefine_coding_system_internal);
df7492f9
KH
8815 defsubr (&Sdefine_coding_system_alias);
8816 defsubr (&Scoding_system_base);
8817 defsubr (&Scoding_system_plist);
8818 defsubr (&Scoding_system_aliases);
8819 defsubr (&Scoding_system_eol_type);
8820 defsubr (&Scoding_system_priority_list);
4ed46869 8821
4608c386 8822 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8823 doc: /* List of coding systems.
8824
8825Do not alter the value of this variable manually. This variable should be
df7492f9 8826updated by the functions `define-coding-system' and
48b0f3ae 8827`define-coding-system-alias'. */);
4608c386
KH
8828 Vcoding_system_list = Qnil;
8829
8830 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8831 doc: /* Alist of coding system names.
8832Each element is one element list of coding system name.
8833This variable is given to `completing-read' as TABLE argument.
8834
8835Do not alter the value of this variable manually. This variable should be
8836updated by the functions `make-coding-system' and
8837`define-coding-system-alias'. */);
4608c386
KH
8838 Vcoding_system_alist = Qnil;
8839
4ed46869 8840 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8841 doc: /* List of coding-categories (symbols) ordered by priority.
8842
8843On detecting a coding system, Emacs tries code detection algorithms
8844associated with each coding-category one by one in this order. When
8845one algorithm agrees with a byte sequence of source text, the coding
8846system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8847 {
8848 int i;
8849
8850 Vcoding_category_list = Qnil;
df7492f9 8851 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8852 Vcoding_category_list
d46c5b12
KH
8853 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8854 Vcoding_category_list);
4ed46869
KH
8855 }
8856
8857 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8858 doc: /* Specify the coding system for read operations.
8859It is useful to bind this variable with `let', but do not set it globally.
8860If the value is a coding system, it is used for decoding on read operation.
8861If not, an appropriate element is used from one of the coding system alists:
8862There are three such tables, `file-coding-system-alist',
8863`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8864 Vcoding_system_for_read = Qnil;
8865
8866 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8867 doc: /* Specify the coding system for write operations.
8868Programs bind this variable with `let', but you should not set it globally.
8869If the value is a coding system, it is used for encoding of output,
8870when writing it to a file and when sending it to a file or subprocess.
8871
8872If this does not specify a coding system, an appropriate element
8873is used from one of the coding system alists:
8874There are three such tables, `file-coding-system-alist',
8875`process-coding-system-alist', and `network-coding-system-alist'.
8876For output to files, if the above procedure does not specify a coding system,
8877the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8878 Vcoding_system_for_write = Qnil;
8879
8880 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8881 doc: /*
8882Coding system used in the latest file or process I/O. */);
4ed46869
KH
8883 Vlast_coding_system_used = Qnil;
8884
9ce27fde 8885 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8886 doc: /*
8887*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8888See info node `Coding Systems' and info node `Text and Binary' concerning
8889such conversion. */);
9ce27fde
KH
8890 inhibit_eol_conversion = 0;
8891
ed29121d 8892 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8893 doc: /*
8894Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8895Bind it to t if the process output is to be treated as if it were a file
8896read from some filesystem. */);
ed29121d
EZ
8897 inherit_process_coding_system = 0;
8898
02ba4723 8899 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8900 doc: /*
8901Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8902The format is ((PATTERN . VAL) ...),
8903where PATTERN is a regular expression matching a file name,
8904VAL is a coding system, a cons of coding systems, or a function symbol.
8905If VAL is a coding system, it is used for both decoding and encoding
8906the file contents.
8907If VAL is a cons of coding systems, the car part is used for decoding,
8908and the cdr part is used for encoding.
8909If VAL is a function symbol, the function must return a coding system
0192762c
DL
8910or a cons of coding systems which are used as above. The function gets
8911the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8912
8913See also the function `find-operation-coding-system'
8914and the variable `auto-coding-alist'. */);
02ba4723
KH
8915 Vfile_coding_system_alist = Qnil;
8916
8917 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8918 doc: /*
8919Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8920The format is ((PATTERN . VAL) ...),
8921where PATTERN is a regular expression matching a program name,
8922VAL is a coding system, a cons of coding systems, or a function symbol.
8923If VAL is a coding system, it is used for both decoding what received
8924from the program and encoding what sent to the program.
8925If VAL is a cons of coding systems, the car part is used for decoding,
8926and the cdr part is used for encoding.
8927If VAL is a function symbol, the function must return a coding system
8928or a cons of coding systems which are used as above.
8929
8930See also the function `find-operation-coding-system'. */);
02ba4723
KH
8931 Vprocess_coding_system_alist = Qnil;
8932
8933 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8934 doc: /*
8935Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8936The format is ((PATTERN . VAL) ...),
8937where PATTERN is a regular expression matching a network service name
8938or is a port number to connect to,
8939VAL is a coding system, a cons of coding systems, or a function symbol.
8940If VAL is a coding system, it is used for both decoding what received
8941from the network stream and encoding what sent to the network stream.
8942If VAL is a cons of coding systems, the car part is used for decoding,
8943and the cdr part is used for encoding.
8944If VAL is a function symbol, the function must return a coding system
8945or a cons of coding systems which are used as above.
8946
8947See also the function `find-operation-coding-system'. */);
02ba4723 8948 Vnetwork_coding_system_alist = Qnil;
4ed46869 8949
68c45bf0 8950 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8951 doc: /* Coding system to use with system messages.
8952Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8953 Vlocale_coding_system = Qnil;
8954
005f0d35 8955 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8956 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8957 doc: /*
8958*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8959 eol_mnemonic_unix = build_string (":");
4ed46869 8960
7722baf9 8961 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8962 doc: /*
8963*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8964 eol_mnemonic_dos = build_string ("\\");
4ed46869 8965
7722baf9 8966 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8967 doc: /*
8968*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8969 eol_mnemonic_mac = build_string ("/");
4ed46869 8970
7722baf9 8971 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8972 doc: /*
8973*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8974 eol_mnemonic_undecided = build_string (":");
4ed46869 8975
84fbb8a0 8976 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8977 doc: /*
8978*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8979 Venable_character_translation = Qt;
bdd9fb48 8980
f967223b 8981 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8982 &Vstandard_translation_table_for_decode,
8983 doc: /* Table for translating characters while decoding. */);
f967223b 8984 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8985
f967223b 8986 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8987 &Vstandard_translation_table_for_encode,
8988 doc: /* Table for translating characters while encoding. */);
f967223b 8989 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8990
df7492f9 8991 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8992 doc: /* Alist of charsets vs revision numbers.
8993While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8994designate it with the escape sequence identifying revision (cdr part
8995of the element). */);
8996 Vcharset_revision_table = Qnil;
02ba4723
KH
8997
8998 DEFVAR_LISP ("default-process-coding-system",
8999 &Vdefault_process_coding_system,
48b0f3ae
PJ
9000 doc: /* Cons of coding systems used for process I/O by default.
9001The car part is used for decoding a process output,
9002the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 9003 Vdefault_process_coding_system = Qnil;
c4825358 9004
3f003981 9005 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
9006 doc: /*
9007Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
9008This is a vector of length 256.
9009If Nth element is non-nil, the existence of code N in a file
9010\(or output of subprocess) doesn't prevent it to be detected as
9011a coding system of ISO 2022 variant which has a flag
9012`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9013or reading output of a subprocess.
9014Only 128th through 159th elements has a meaning. */);
3f003981 9015 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
9016
9017 DEFVAR_LISP ("select-safe-coding-system-function",
9018 &Vselect_safe_coding_system_function,
df7492f9
KH
9019 doc: /*
9020Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
9021
9022If set, this function is called to force a user to select a proper
9023coding system which can encode the text in the case that a default
9024coding system used in each operation can't encode the text.
9025
9026The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
9027 Vselect_safe_coding_system_function = Qnil;
9028
5d5bf4d8
KH
9029 DEFVAR_BOOL ("coding-system-require-warning",
9030 &coding_system_require_warning,
9031 doc: /* Internal use only.
6b89e3aa
KH
9032If non-nil, on writing a file, `select-safe-coding-system-function' is
9033called even if `coding-system-for-write' is non-nil. The command
9034`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
9035 coding_system_require_warning = 0;
9036
9037
22ab2303 9038 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 9039 &inhibit_iso_escape_detection,
df7492f9
KH
9040 doc: /*
9041If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
9042
9043By default, on reading a file, Emacs tries to detect how the text is
9044encoded. This code detection is sensitive to escape sequences. If
9045the sequence is valid as ISO2022, the code is determined as one of
9046the ISO2022 encodings, and the file is decoded by the corresponding
9047coding system (e.g. `iso-2022-7bit').
9048
9049However, there may be a case that you want to read escape sequences in
9050a file as is. In such a case, you can set this variable to non-nil.
9051Then, as the code detection ignores any escape sequences, no file is
9052detected as encoded in some ISO2022 encoding. The result is that all
9053escape sequences become visible in a buffer.
9054
9055The default value is nil, and it is strongly recommended not to change
9056it. That is because many Emacs Lisp source files that contain
9057non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9058in Emacs's distribution, and they won't be decoded correctly on
9059reading if you suppress escape sequence detection.
9060
9061The other way to read escape sequences in a file without decoding is
9062to explicitly specify some coding system that doesn't use ISO2022's
9063escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 9064 inhibit_iso_escape_detection = 0;
002fdb44
DL
9065
9066 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1
DL
9067 doc: /* Char table for translating self-inserting characters.
9068This is applied to the result of input methods, not their input. See also
9069`keyboard-translate-table'. */);
002fdb44 9070 Vtranslation_table_for_input = Qnil;
8f924df7 9071
2c78b7e1
KH
9072 {
9073 Lisp_Object args[coding_arg_max];
8f924df7 9074 Lisp_Object plist[16];
2c78b7e1
KH
9075 int i;
9076
9077 for (i = 0; i < coding_arg_max; i++)
9078 args[i] = Qnil;
9079
9080 plist[0] = intern (":name");
9081 plist[1] = args[coding_arg_name] = Qno_conversion;
9082 plist[2] = intern (":mnemonic");
9083 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9084 plist[4] = intern (":coding-type");
9085 plist[5] = args[coding_arg_coding_type] = Qraw_text;
9086 plist[6] = intern (":ascii-compatible-p");
9087 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9088 plist[8] = intern (":default-char");
9089 plist[9] = args[coding_arg_default_char] = make_number (0);
8f924df7
KH
9090 plist[10] = intern (":for-unibyte");
9091 plist[11] = args[coding_arg_for_unibyte] = Qt;
9092 plist[12] = intern (":docstring");
9093 plist[13] = build_string ("Do no conversion.\n\
2c78b7e1
KH
9094\n\
9095When you visit a file with this coding, the file is read into a\n\
9096unibyte buffer as is, thus each byte of a file is treated as a\n\
9097character.");
8f924df7
KH
9098 plist[14] = intern (":eol-type");
9099 plist[15] = args[coding_arg_eol_type] = Qunix;
9100 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1
KH
9101 Fdefine_coding_system_internal (coding_arg_max, args);
9102 }
9103
9104 setup_coding_system (Qno_conversion, &keyboard_coding);
9105 setup_coding_system (Qno_conversion, &terminal_coding);
9106 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
9107
9108 {
9109 int i;
9110
9111 for (i = 0; i < coding_category_max; i++)
9112 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9113 }
4ed46869
KH
9114}
9115
68c45bf0
PE
9116char *
9117emacs_strerror (error_number)
9118 int error_number;
9119{
9120 char *str;
9121
ca9c0567 9122 synchronize_system_messages_locale ();
68c45bf0
PE
9123 str = strerror (error_number);
9124
9125 if (! NILP (Vlocale_coding_system))
9126 {
9127 Lisp_Object dec = code_convert_string_norecord (build_string (str),
9128 Vlocale_coding_system,
9129 0);
d5db4077 9130 str = (char *) SDATA (dec);
68c45bf0
PE
9131 }
9132
9133 return str;
9134}
9135
4ed46869 9136#endif /* emacs */