Rename term_init to init_tty.
[bpt/emacs.git] / src / coding.c
CommitLineData
4ed46869 1/* Coding system handler (conversion, detection, and etc).
f1ce3dcf 2 Copyright (C) 1995,97,1998,2002,2003 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
58f99379 4 Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
4ed46869 5
369314dc
KH
6This file is part of GNU Emacs.
7
8GNU Emacs is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
4ed46869 12
369314dc
KH
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
4ed46869 17
369314dc
KH
18You should have received a copy of the GNU General Public License
19along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
20the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21Boston, MA 02110-1301, USA. */
4ed46869
KH
22
23/*** TABLE OF CONTENTS ***
24
b73bfc1c 25 0. General comments
4ed46869 26 1. Preamble
0ef69138 27 2. Emacs' internal format (emacs-mule) handlers
4ed46869
KH
28 3. ISO2022 handlers
29 4. Shift-JIS and BIG5 handlers
1397dc18
KH
30 5. CCL handlers
31 6. End-of-line handlers
32 7. C library functions
33 8. Emacs Lisp library functions
34 9. Post-amble
4ed46869
KH
35
36*/
37
b73bfc1c
KH
38/*** 0. General comments ***/
39
40
cfb43547 41/*** GENERAL NOTE on CODING SYSTEMS ***
4ed46869 42
cfb43547 43 A coding system is an encoding mechanism for one or more character
4ed46869
KH
44 sets. Here's a list of coding systems which Emacs can handle. When
45 we say "decode", it means converting some other coding system to
cfb43547 46 Emacs' internal format (emacs-mule), and when we say "encode",
0ef69138
KH
47 it means converting the coding system emacs-mule to some other
48 coding system.
4ed46869 49
0ef69138 50 0. Emacs' internal format (emacs-mule)
4ed46869 51
cfb43547 52 Emacs itself holds a multi-lingual character in buffers and strings
f4dee582 53 in a special format. Details are described in section 2.
4ed46869
KH
54
55 1. ISO2022
56
57 The most famous coding system for multiple character sets. X's
f4dee582
RS
58 Compound Text, various EUCs (Extended Unix Code), and coding
59 systems used in Internet communication such as ISO-2022-JP are
60 all variants of ISO2022. Details are described in section 3.
4ed46869
KH
61
62 2. SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 63
4ed46869
KH
64 A coding system to encode character sets: ASCII, JISX0201, and
65 JISX0208. Widely used for PC's in Japan. Details are described in
f4dee582 66 section 4.
4ed46869
KH
67
68 3. BIG5
69
cfb43547
DL
70 A coding system to encode the character sets ASCII and Big5. Widely
71 used for Chinese (mainly in Taiwan and Hong Kong). Details are
f4dee582
RS
72 described in section 4. In this file, when we write "BIG5"
73 (all uppercase), we mean the coding system, and when we write
74 "Big5" (capitalized), we mean the character set.
4ed46869 75
27901516
KH
76 4. Raw text
77
cfb43547
DL
78 A coding system for text containing random 8-bit code. Emacs does
79 no code conversion on such text except for end-of-line format.
27901516
KH
80
81 5. Other
4ed46869 82
cfb43547
DL
83 If a user wants to read/write text encoded in a coding system not
84 listed above, he can supply a decoder and an encoder for it as CCL
4ed46869
KH
85 (Code Conversion Language) programs. Emacs executes the CCL program
86 while reading/writing.
87
d46c5b12
KH
88 Emacs represents a coding system by a Lisp symbol that has a property
89 `coding-system'. But, before actually using the coding system, the
4ed46869 90 information about it is set in a structure of type `struct
f4dee582 91 coding_system' for rapid processing. See section 6 for more details.
4ed46869
KH
92
93*/
94
95/*** GENERAL NOTES on END-OF-LINE FORMAT ***
96
cfb43547
DL
97 How end-of-line of text is encoded depends on the operating system.
98 For instance, Unix's format is just one byte of `line-feed' code,
f4dee582 99 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
100 `line-feed' codes. MacOS's format is usually one byte of
101 `carriage-return'.
4ed46869 102
cfb43547
DL
103 Since text character encoding and end-of-line encoding are
104 independent, any coding system described above can have any
105 end-of-line format. So Emacs has information about end-of-line
106 format in each coding-system. See section 6 for more details.
4ed46869
KH
107
108*/
109
110/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
111
112 These functions check if a text between SRC and SRC_END is encoded
113 in the coding system category XXX. Each returns an integer value in
cfb43547 114 which appropriate flag bits for the category XXX are set. The flag
4ed46869 115 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the
cfb43547 116 template for these functions. If MULTIBYTEP is nonzero, 8-bit codes
0a28aafb 117 of the range 0x80..0x9F are in multibyte form. */
4ed46869
KH
118#if 0
119int
0a28aafb 120detect_coding_emacs_mule (src, src_end, multibytep)
4ed46869 121 unsigned char *src, *src_end;
0a28aafb 122 int multibytep;
4ed46869
KH
123{
124 ...
125}
126#endif
127
128/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
129
b73bfc1c
KH
130 These functions decode SRC_BYTES length of unibyte text at SOURCE
131 encoded in CODING to Emacs' internal format. The resulting
132 multibyte text goes to a place pointed to by DESTINATION, the length
133 of which should not exceed DST_BYTES.
d46c5b12 134
cfb43547
DL
135 These functions set the information about original and decoded texts
136 in the members `produced', `produced_char', `consumed', and
137 `consumed_char' of the structure *CODING. They also set the member
138 `result' to one of CODING_FINISH_XXX indicating how the decoding
139 finished.
d46c5b12 140
cfb43547 141 DST_BYTES zero means that the source area and destination area are
d46c5b12 142 overlapped, which means that we can produce a decoded text until it
cfb43547 143 reaches the head of the not-yet-decoded source text.
d46c5b12 144
cfb43547 145 Below is a template for these functions. */
4ed46869 146#if 0
b73bfc1c 147static void
d46c5b12 148decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
4ed46869 149 struct coding_system *coding;
5bdca8af
DN
150 const unsigned char *source;
151 unsigned char *destination;
4ed46869 152 int src_bytes, dst_bytes;
4ed46869
KH
153{
154 ...
155}
156#endif
157
158/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
159
cfb43547 160 These functions encode SRC_BYTES length text at SOURCE from Emacs'
b73bfc1c
KH
161 internal multibyte format to CODING. The resulting unibyte text
162 goes to a place pointed to by DESTINATION, the length of which
163 should not exceed DST_BYTES.
d46c5b12 164
cfb43547
DL
165 These functions set the information about original and encoded texts
166 in the members `produced', `produced_char', `consumed', and
167 `consumed_char' of the structure *CODING. They also set the member
168 `result' to one of CODING_FINISH_XXX indicating how the encoding
169 finished.
d46c5b12 170
cfb43547
DL
171 DST_BYTES zero means that the source area and destination area are
172 overlapped, which means that we can produce encoded text until it
173 reaches at the head of the not-yet-encoded source text.
d46c5b12 174
cfb43547 175 Below is a template for these functions. */
4ed46869 176#if 0
b73bfc1c 177static void
d46c5b12 178encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
179 struct coding_system *coding;
180 unsigned char *source, *destination;
181 int src_bytes, dst_bytes;
4ed46869
KH
182{
183 ...
184}
185#endif
186
187/*** COMMONLY USED MACROS ***/
188
b73bfc1c
KH
189/* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
190 get one, two, and three bytes from the source text respectively.
191 If there are not enough bytes in the source, they jump to
192 `label_end_of_loop'. The caller should set variables `coding',
193 `src' and `src_end' to appropriate pointer in advance. These
194 macros are called from decoding routines `decode_coding_XXX', thus
195 it is assumed that the source text is unibyte. */
4ed46869 196
b73bfc1c
KH
197#define ONE_MORE_BYTE(c1) \
198 do { \
199 if (src >= src_end) \
200 { \
201 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
202 goto label_end_of_loop; \
203 } \
204 c1 = *src++; \
4ed46869
KH
205 } while (0)
206
b73bfc1c
KH
207#define TWO_MORE_BYTES(c1, c2) \
208 do { \
209 if (src + 1 >= src_end) \
210 { \
211 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
212 goto label_end_of_loop; \
213 } \
214 c1 = *src++; \
215 c2 = *src++; \
4ed46869
KH
216 } while (0)
217
4ed46869 218
0a28aafb
KH
219/* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
220 form if MULTIBYTEP is nonzero. */
221
222#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \
223 do { \
224 if (src >= src_end) \
225 { \
226 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
227 goto label_end_of_loop; \
228 } \
229 c1 = *src++; \
230 if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \
231 c1 = *src++ - 0x20; \
232 } while (0)
233
b73bfc1c
KH
234/* Set C to the next character at the source text pointed by `src'.
235 If there are not enough characters in the source, jump to
236 `label_end_of_loop'. The caller should set variables `coding'
237 `src', `src_end', and `translation_table' to appropriate pointers
238 in advance. This macro is used in encoding routines
239 `encode_coding_XXX', thus it assumes that the source text is in
240 multibyte form except for 8-bit characters. 8-bit characters are
241 in multibyte form if coding->src_multibyte is nonzero, else they
242 are represented by a single byte. */
4ed46869 243
b73bfc1c
KH
244#define ONE_MORE_CHAR(c) \
245 do { \
246 int len = src_end - src; \
247 int bytes; \
248 if (len <= 0) \
249 { \
250 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
251 goto label_end_of_loop; \
252 } \
253 if (coding->src_multibyte \
254 || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes)) \
255 c = STRING_CHAR_AND_LENGTH (src, len, bytes); \
256 else \
257 c = *src, bytes = 1; \
258 if (!NILP (translation_table)) \
39658efc 259 c = translate_char (translation_table, c, -1, 0, 0); \
b73bfc1c 260 src += bytes; \
4ed46869
KH
261 } while (0)
262
4ed46869 263
8ca3766a 264/* Produce a multibyte form of character C to `dst'. Jump to
b73bfc1c
KH
265 `label_end_of_loop' if there's not enough space at `dst'.
266
cfb43547 267 If we are now in the middle of a composition sequence, the decoded
b73bfc1c
KH
268 character may be ALTCHAR (for the current composition). In that
269 case, the character goes to coding->cmp_data->data instead of
270 `dst'.
271
272 This macro is used in decoding routines. */
273
274#define EMIT_CHAR(c) \
4ed46869 275 do { \
b73bfc1c
KH
276 if (! COMPOSING_P (coding) \
277 || coding->composing == COMPOSITION_RELATIVE \
278 || coding->composing == COMPOSITION_WITH_RULE) \
279 { \
280 int bytes = CHAR_BYTES (c); \
281 if ((dst + bytes) > (dst_bytes ? dst_end : src)) \
282 { \
283 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
284 goto label_end_of_loop; \
285 } \
286 dst += CHAR_STRING (c, dst); \
287 coding->produced_char++; \
288 } \
ec6d2bb8 289 \
b73bfc1c
KH
290 if (COMPOSING_P (coding) \
291 && coding->composing != COMPOSITION_RELATIVE) \
292 { \
293 CODING_ADD_COMPOSITION_COMPONENT (coding, c); \
294 coding->composition_rule_follows \
295 = coding->composing != COMPOSITION_WITH_ALTCHARS; \
296 } \
4ed46869
KH
297 } while (0)
298
4ed46869 299
b73bfc1c
KH
300#define EMIT_ONE_BYTE(c) \
301 do { \
302 if (dst >= (dst_bytes ? dst_end : src)) \
303 { \
304 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
305 goto label_end_of_loop; \
306 } \
307 *dst++ = c; \
308 } while (0)
309
310#define EMIT_TWO_BYTES(c1, c2) \
311 do { \
312 if (dst + 2 > (dst_bytes ? dst_end : src)) \
313 { \
314 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
315 goto label_end_of_loop; \
316 } \
317 *dst++ = c1, *dst++ = c2; \
318 } while (0)
319
320#define EMIT_BYTES(from, to) \
321 do { \
322 if (dst + (to - from) > (dst_bytes ? dst_end : src)) \
323 { \
324 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
325 goto label_end_of_loop; \
326 } \
327 while (from < to) \
328 *dst++ = *from++; \
4ed46869
KH
329 } while (0)
330
331\f
332/*** 1. Preamble ***/
333
68c45bf0
PE
334#ifdef emacs
335#include <config.h>
336#endif
337
4ed46869
KH
338#include <stdio.h>
339
340#ifdef emacs
341
4ed46869
KH
342#include "lisp.h"
343#include "buffer.h"
344#include "charset.h"
ec6d2bb8 345#include "composite.h"
4ed46869
KH
346#include "ccl.h"
347#include "coding.h"
348#include "window.h"
66638433 349#include "intervals.h"
b8299c66
KL
350#include "frame.h"
351#include "termhooks.h"
4ed46869
KH
352
353#else /* not emacs */
354
355#include "mulelib.h"
356
357#endif /* not emacs */
358
359Lisp_Object Qcoding_system, Qeol_type;
360Lisp_Object Qbuffer_file_coding_system;
361Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
27901516 362Lisp_Object Qno_conversion, Qundecided;
bb0115a2 363Lisp_Object Qcoding_system_history;
05e6f5dc 364Lisp_Object Qsafe_chars;
1397dc18 365Lisp_Object Qvalid_codes;
4ed46869
KH
366
367extern Lisp_Object Qinsert_file_contents, Qwrite_region;
368Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
369Lisp_Object Qstart_process, Qopen_network_stream;
370Lisp_Object Qtarget_idx;
371
a362520d
KH
372/* If a symbol has this property, evaluate the value to define the
373 symbol as a coding system. */
374Lisp_Object Qcoding_system_define_form;
375
d46c5b12
KH
376Lisp_Object Vselect_safe_coding_system_function;
377
5d5bf4d8
KH
378int coding_system_require_warning;
379
7722baf9
EZ
380/* Mnemonic string for each format of end-of-line. */
381Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
382/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 383 decided. */
7722baf9 384Lisp_Object eol_mnemonic_undecided;
4ed46869 385
9ce27fde
KH
386/* Format of end-of-line decided by system. This is CODING_EOL_LF on
387 Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac. */
388int system_eol_type;
389
4ed46869
KH
390#ifdef emacs
391
6b89e3aa
KH
392/* Information about which coding system is safe for which chars.
393 The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
394
395 GENERIC-LIST is a list of generic coding systems which can encode
396 any characters.
397
398 NON-GENERIC-ALIST is an alist of non generic coding systems vs the
399 corresponding char table that contains safe chars. */
400Lisp_Object Vcoding_system_safe_chars;
401
4608c386
KH
402Lisp_Object Vcoding_system_list, Vcoding_system_alist;
403
404Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 405
d46c5b12
KH
406/* Coding system emacs-mule and raw-text are for converting only
407 end-of-line format. */
408Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 409
ecf488bc
DL
410Lisp_Object Qutf_8;
411
4ed46869
KH
412/* Coding-systems are handed between Emacs Lisp programs and C internal
413 routines by the following three variables. */
414/* Coding-system for reading files and receiving data from process. */
415Lisp_Object Vcoding_system_for_read;
416/* Coding-system for writing files and sending data to process. */
417Lisp_Object Vcoding_system_for_write;
418/* Coding-system actually used in the latest I/O. */
419Lisp_Object Vlast_coding_system_used;
420
c4825358 421/* A vector of length 256 which contains information about special
94487c4e 422 Latin codes (especially for dealing with Microsoft codes). */
3f003981 423Lisp_Object Vlatin_extra_code_table;
c4825358 424
9ce27fde
KH
425/* Flag to inhibit code conversion of end-of-line format. */
426int inhibit_eol_conversion;
427
74383408
KH
428/* Flag to inhibit ISO2022 escape sequence detection. */
429int inhibit_iso_escape_detection;
430
ed29121d
EZ
431/* Flag to make buffer-file-coding-system inherit from process-coding. */
432int inherit_process_coding_system;
433
c4825358
KH
434/* Coding system to be used to encode text for terminal display when
435 terminal coding system is nil. */
436struct coding_system safe_terminal_coding;
437
6bc51348
KH
438/* Default coding system to be used to write a file. */
439struct coding_system default_buffer_file_coding;
440
02ba4723
KH
441Lisp_Object Vfile_coding_system_alist;
442Lisp_Object Vprocess_coding_system_alist;
443Lisp_Object Vnetwork_coding_system_alist;
4ed46869 444
68c45bf0
PE
445Lisp_Object Vlocale_coding_system;
446
4ed46869
KH
447#endif /* emacs */
448
d46c5b12 449Lisp_Object Qcoding_category, Qcoding_category_index;
4ed46869
KH
450
451/* List of symbols `coding-category-xxx' ordered by priority. */
452Lisp_Object Vcoding_category_list;
453
d46c5b12
KH
454/* Table of coding categories (Lisp symbols). */
455Lisp_Object Vcoding_category_table;
4ed46869
KH
456
457/* Table of names of symbol for each coding-category. */
458char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
0ef69138 459 "coding-category-emacs-mule",
4ed46869
KH
460 "coding-category-sjis",
461 "coding-category-iso-7",
d46c5b12 462 "coding-category-iso-7-tight",
4ed46869
KH
463 "coding-category-iso-8-1",
464 "coding-category-iso-8-2",
7717c392
KH
465 "coding-category-iso-7-else",
466 "coding-category-iso-8-else",
89fa8b36 467 "coding-category-ccl",
4ed46869 468 "coding-category-big5",
fa42c37f
KH
469 "coding-category-utf-8",
470 "coding-category-utf-16-be",
471 "coding-category-utf-16-le",
27901516 472 "coding-category-raw-text",
89fa8b36 473 "coding-category-binary"
4ed46869
KH
474};
475
66cfb530 476/* Table of pointers to coding systems corresponding to each coding
d46c5b12
KH
477 categories. */
478struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
479
66cfb530 480/* Table of coding category masks. Nth element is a mask for a coding
8ca3766a 481 category of which priority is Nth. */
66cfb530
KH
482static
483int coding_priorities[CODING_CATEGORY_IDX_MAX];
484
f967223b
KH
485/* Flag to tell if we look up translation table on character code
486 conversion. */
84fbb8a0 487Lisp_Object Venable_character_translation;
f967223b
KH
488/* Standard translation table to look up on decoding (reading). */
489Lisp_Object Vstandard_translation_table_for_decode;
490/* Standard translation table to look up on encoding (writing). */
491Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 492
f967223b
KH
493Lisp_Object Qtranslation_table;
494Lisp_Object Qtranslation_table_id;
495Lisp_Object Qtranslation_table_for_decode;
496Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
497
498/* Alist of charsets vs revision number. */
499Lisp_Object Vcharset_revision_alist;
500
02ba4723
KH
501/* Default coding systems used for process I/O. */
502Lisp_Object Vdefault_process_coding_system;
503
002fdb44
DL
504/* Char table for translating Quail and self-inserting input. */
505Lisp_Object Vtranslation_table_for_input;
506
b843d1ae
KH
507/* Global flag to tell that we can't call post-read-conversion and
508 pre-write-conversion functions. Usually the value is zero, but it
509 is set to 1 temporarily while such functions are running. This is
510 to avoid infinite recursive call. */
511static int inhibit_pre_post_conversion;
512
05e6f5dc
KH
513Lisp_Object Qchar_coding_system;
514
6b89e3aa
KH
515/* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check
516 its validity. */
05e6f5dc
KH
517
518Lisp_Object
6b89e3aa
KH
519coding_safe_chars (coding_system)
520 Lisp_Object coding_system;
05e6f5dc
KH
521{
522 Lisp_Object coding_spec, plist, safe_chars;
93dec019 523
6b89e3aa 524 coding_spec = Fget (coding_system, Qcoding_system);
05e6f5dc
KH
525 plist = XVECTOR (coding_spec)->contents[3];
526 safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
527 return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
528}
529
530#define CODING_SAFE_CHAR_P(safe_chars, c) \
531 (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
532
4ed46869 533\f
0ef69138 534/*** 2. Emacs internal format (emacs-mule) handlers ***/
4ed46869 535
aa72b389
KH
536/* Emacs' internal format for representation of multiple character
537 sets is a kind of multi-byte encoding, i.e. characters are
538 represented by variable-length sequences of one-byte codes.
b73bfc1c
KH
539
540 ASCII characters and control characters (e.g. `tab', `newline') are
541 represented by one-byte sequences which are their ASCII codes, in
542 the range 0x00 through 0x7F.
543
544 8-bit characters of the range 0x80..0x9F are represented by
545 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
546 code + 0x20).
547
548 8-bit characters of the range 0xA0..0xFF are represented by
549 one-byte sequences which are their 8-bit code.
550
551 The other characters are represented by a sequence of `base
552 leading-code', optional `extended leading-code', and one or two
553 `position-code's. The length of the sequence is determined by the
aa72b389 554 base leading-code. Leading-code takes the range 0x81 through 0x9D,
b73bfc1c
KH
555 whereas extended leading-code and position-code take the range 0xA0
556 through 0xFF. See `charset.h' for more details about leading-code
557 and position-code.
f4dee582 558
4ed46869 559 --- CODE RANGE of Emacs' internal format ---
b73bfc1c
KH
560 character set range
561 ------------- -----
562 ascii 0x00..0x7F
563 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
564 eight-bit-graphic 0xA0..0xBF
aa72b389 565 ELSE 0x81..0x9D + [0xA0..0xFF]+
4ed46869
KH
566 ---------------------------------------------
567
aa72b389
KH
568 As this is the internal character representation, the format is
569 usually not used externally (i.e. in a file or in a data sent to a
570 process). But, it is possible to have a text externally in this
571 format (i.e. by encoding by the coding system `emacs-mule').
572
573 In that case, a sequence of one-byte codes has a slightly different
574 form.
575
ae5145c2 576 Firstly, all characters in eight-bit-control are represented by
aa72b389
KH
577 one-byte sequences which are their 8-bit code.
578
579 Next, character composition data are represented by the byte
580 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
581 where,
582 METHOD is 0xF0 plus one of composition method (enum
583 composition_method),
584
ae5145c2 585 BYTES is 0xA0 plus the byte length of these composition data,
aa72b389 586
ae5145c2 587 CHARS is 0xA0 plus the number of characters composed by these
aa72b389
KH
588 data,
589
8ca3766a 590 COMPONENTs are characters of multibyte form or composition
aa72b389
KH
591 rules encoded by two-byte of ASCII codes.
592
593 In addition, for backward compatibility, the following formats are
594 also recognized as composition data on decoding.
595
596 0x80 MSEQ ...
597 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
598
599 Here,
600 MSEQ is a multibyte form but in these special format:
601 ASCII: 0xA0 ASCII_CODE+0x80,
602 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
603 RULE is a one byte code of the range 0xA0..0xF0 that
604 represents a composition rule.
4ed46869
KH
605 */
606
607enum emacs_code_class_type emacs_code_class[256];
608
4ed46869
KH
609/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
610 Check if a text is encoded in Emacs' internal format. If it is,
d46c5b12 611 return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
4ed46869 612
0a28aafb
KH
613static int
614detect_coding_emacs_mule (src, src_end, multibytep)
b73bfc1c 615 unsigned char *src, *src_end;
0a28aafb 616 int multibytep;
4ed46869
KH
617{
618 unsigned char c;
619 int composing = 0;
b73bfc1c
KH
620 /* Dummy for ONE_MORE_BYTE. */
621 struct coding_system dummy_coding;
622 struct coding_system *coding = &dummy_coding;
4ed46869 623
b73bfc1c 624 while (1)
4ed46869 625 {
0a28aafb 626 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
4ed46869
KH
627
628 if (composing)
629 {
630 if (c < 0xA0)
631 composing = 0;
b73bfc1c
KH
632 else if (c == 0xA0)
633 {
0a28aafb 634 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
b73bfc1c
KH
635 c &= 0x7F;
636 }
4ed46869
KH
637 else
638 c -= 0x20;
639 }
640
b73bfc1c 641 if (c < 0x20)
4ed46869 642 {
4ed46869
KH
643 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
644 return 0;
b73bfc1c
KH
645 }
646 else if (c >= 0x80 && c < 0xA0)
647 {
648 if (c == 0x80)
649 /* Old leading code for a composite character. */
650 composing = 1;
651 else
652 {
653 unsigned char *src_base = src - 1;
654 int bytes;
4ed46869 655
b73bfc1c
KH
656 if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
657 bytes))
658 return 0;
659 src = src_base + bytes;
660 }
661 }
662 }
663 label_end_of_loop:
664 return CODING_CATEGORY_MASK_EMACS_MULE;
665}
4ed46869 666
4ed46869 667
aa72b389
KH
668/* Record the starting position START and METHOD of one composition. */
669
670#define CODING_ADD_COMPOSITION_START(coding, start, method) \
671 do { \
672 struct composition_data *cmp_data = coding->cmp_data; \
673 int *data = cmp_data->data + cmp_data->used; \
674 coding->cmp_data_start = cmp_data->used; \
675 data[0] = -1; \
676 data[1] = cmp_data->char_offset + start; \
677 data[3] = (int) method; \
678 cmp_data->used += 4; \
679 } while (0)
680
681/* Record the ending position END of the current composition. */
682
683#define CODING_ADD_COMPOSITION_END(coding, end) \
684 do { \
685 struct composition_data *cmp_data = coding->cmp_data; \
686 int *data = cmp_data->data + coding->cmp_data_start; \
687 data[0] = cmp_data->used - coding->cmp_data_start; \
688 data[2] = cmp_data->char_offset + end; \
689 } while (0)
690
691/* Record one COMPONENT (alternate character or composition rule). */
692
b6871cc7
KH
693#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
694 do { \
695 coding->cmp_data->data[coding->cmp_data->used++] = component; \
696 if (coding->cmp_data->used - coding->cmp_data_start \
697 == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \
698 { \
699 CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
700 coding->composing = COMPOSITION_NO; \
701 } \
702 } while (0)
aa72b389
KH
703
704
705/* Get one byte from a data pointed by SRC and increment SRC. If SRC
8ca3766a 706 is not less than SRC_END, return -1 without incrementing Src. */
aa72b389
KH
707
708#define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
709
710
711/* Decode a character represented as a component of composition
712 sequence of Emacs 20 style at SRC. Set C to that character, store
713 its multibyte form sequence at P, and set P to the end of that
714 sequence. If no valid character is found, set C to -1. */
715
716#define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \
717 do { \
718 int bytes; \
fd3ae0b9 719 \
aa72b389
KH
720 c = SAFE_ONE_MORE_BYTE (); \
721 if (c < 0) \
722 break; \
723 if (CHAR_HEAD_P (c)) \
724 c = -1; \
725 else if (c == 0xA0) \
726 { \
727 c = SAFE_ONE_MORE_BYTE (); \
728 if (c < 0xA0) \
729 c = -1; \
730 else \
731 { \
732 c -= 0xA0; \
733 *p++ = c; \
734 } \
735 } \
736 else if (BASE_LEADING_CODE_P (c - 0x20)) \
737 { \
738 unsigned char *p0 = p; \
739 \
740 c -= 0x20; \
741 *p++ = c; \
742 bytes = BYTES_BY_CHAR_HEAD (c); \
743 while (--bytes) \
744 { \
745 c = SAFE_ONE_MORE_BYTE (); \
746 if (c < 0) \
747 break; \
748 *p++ = c; \
749 } \
fd3ae0b9
KH
750 if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes) \
751 || (coding->flags /* We are recovering a file. */ \
752 && p0[0] == LEADING_CODE_8_BIT_CONTROL \
753 && ! CHAR_HEAD_P (p0[1]))) \
aa72b389
KH
754 c = STRING_CHAR (p0, bytes); \
755 else \
756 c = -1; \
757 } \
758 else \
759 c = -1; \
760 } while (0)
761
762
763/* Decode a composition rule represented as a component of composition
764 sequence of Emacs 20 style at SRC. Set C to the rule. If not
765 valid rule is found, set C to -1. */
766
767#define DECODE_EMACS_MULE_COMPOSITION_RULE(c) \
768 do { \
769 c = SAFE_ONE_MORE_BYTE (); \
770 c -= 0xA0; \
771 if (c < 0 || c >= 81) \
772 c = -1; \
773 else \
774 { \
775 gref = c / 9, nref = c % 9; \
776 c = COMPOSITION_ENCODE_RULE (gref, nref); \
777 } \
778 } while (0)
779
780
781/* Decode composition sequence encoded by `emacs-mule' at the source
782 pointed by SRC. SRC_END is the end of source. Store information
783 of the composition in CODING->cmp_data.
784
785 For backward compatibility, decode also a composition sequence of
786 Emacs 20 style. In that case, the composition sequence contains
787 characters that should be extracted into a buffer or string. Store
788 those characters at *DESTINATION in multibyte form.
789
790 If we encounter an invalid byte sequence, return 0.
791 If we encounter an insufficient source or destination, or
792 insufficient space in CODING->cmp_data, return 1.
793 Otherwise, return consumed bytes in the source.
794
795*/
796static INLINE int
797decode_composition_emacs_mule (coding, src, src_end,
798 destination, dst_end, dst_bytes)
799 struct coding_system *coding;
5bdca8af
DN
800 const unsigned char *src, *src_end;
801 unsigned char **destination, *dst_end;
aa72b389
KH
802 int dst_bytes;
803{
804 unsigned char *dst = *destination;
805 int method, data_len, nchars;
5bdca8af 806 const unsigned char *src_base = src++;
8ca3766a 807 /* Store components of composition. */
aa72b389
KH
808 int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
809 int ncomponent;
810 /* Store multibyte form of characters to be composed. This is for
811 Emacs 20 style composition sequence. */
812 unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
813 unsigned char *bufp = buf;
814 int c, i, gref, nref;
815
816 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
817 >= COMPOSITION_DATA_SIZE)
818 {
819 coding->result = CODING_FINISH_INSUFFICIENT_CMP;
820 return -1;
821 }
822
823 ONE_MORE_BYTE (c);
824 if (c - 0xF0 >= COMPOSITION_RELATIVE
825 && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
826 {
827 int with_rule;
828
829 method = c - 0xF0;
830 with_rule = (method == COMPOSITION_WITH_RULE
831 || method == COMPOSITION_WITH_RULE_ALTCHARS);
832 ONE_MORE_BYTE (c);
833 data_len = c - 0xA0;
834 if (data_len < 4
835 || src_base + data_len > src_end)
836 return 0;
837 ONE_MORE_BYTE (c);
838 nchars = c - 0xA0;
839 if (c < 1)
840 return 0;
841 for (ncomponent = 0; src < src_base + data_len; ncomponent++)
842 {
b1887814
RS
843 /* If it is longer than this, it can't be valid. */
844 if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
845 return 0;
846
aa72b389
KH
847 if (ncomponent % 2 && with_rule)
848 {
849 ONE_MORE_BYTE (gref);
850 gref -= 32;
851 ONE_MORE_BYTE (nref);
852 nref -= 32;
853 c = COMPOSITION_ENCODE_RULE (gref, nref);
854 }
855 else
856 {
857 int bytes;
fd3ae0b9
KH
858 if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
859 || (coding->flags /* We are recovering a file. */
860 && src[0] == LEADING_CODE_8_BIT_CONTROL
861 && ! CHAR_HEAD_P (src[1])))
aa72b389
KH
862 c = STRING_CHAR (src, bytes);
863 else
864 c = *src, bytes = 1;
865 src += bytes;
866 }
867 component[ncomponent] = c;
868 }
869 }
870 else
871 {
872 /* This may be an old Emacs 20 style format. See the comment at
873 the section 2 of this file. */
874 while (src < src_end && !CHAR_HEAD_P (*src)) src++;
875 if (src == src_end
876 && !(coding->mode & CODING_MODE_LAST_BLOCK))
877 goto label_end_of_loop;
878
879 src_end = src;
880 src = src_base + 1;
881 if (c < 0xC0)
882 {
883 method = COMPOSITION_RELATIVE;
884 for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
885 {
886 DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
887 if (c < 0)
888 break;
889 component[ncomponent++] = c;
890 }
891 if (ncomponent < 2)
892 return 0;
893 nchars = ncomponent;
894 }
895 else if (c == 0xFF)
896 {
897 method = COMPOSITION_WITH_RULE;
898 src++;
899 DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
900 if (c < 0)
901 return 0;
902 component[0] = c;
903 for (ncomponent = 1;
904 ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
905 {
906 DECODE_EMACS_MULE_COMPOSITION_RULE (c);
907 if (c < 0)
908 break;
909 component[ncomponent++] = c;
910 DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
911 if (c < 0)
912 break;
913 component[ncomponent++] = c;
914 }
915 if (ncomponent < 3)
916 return 0;
917 nchars = (ncomponent + 1) / 2;
918 }
919 else
920 return 0;
921 }
922
923 if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
924 {
925 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
926 for (i = 0; i < ncomponent; i++)
927 CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
93dec019 928 CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
aa72b389
KH
929 if (buf < bufp)
930 {
931 unsigned char *p = buf;
932 EMIT_BYTES (p, bufp);
933 *destination += bufp - buf;
934 coding->produced_char += nchars;
935 }
936 return (src - src_base);
937 }
938 label_end_of_loop:
939 return -1;
940}
941
b73bfc1c 942/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 943
b73bfc1c
KH
944static void
945decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
946 struct coding_system *coding;
5bdca8af
DN
947 const unsigned char *source;
948 unsigned char *destination;
b73bfc1c
KH
949 int src_bytes, dst_bytes;
950{
5bdca8af
DN
951 const unsigned char *src = source;
952 const unsigned char *src_end = source + src_bytes;
b73bfc1c
KH
953 unsigned char *dst = destination;
954 unsigned char *dst_end = destination + dst_bytes;
955 /* SRC_BASE remembers the start position in source in each loop.
956 The loop will be exited when there's not enough source code, or
957 when there's not enough destination area to produce a
958 character. */
5bdca8af 959 const unsigned char *src_base;
4ed46869 960
b73bfc1c 961 coding->produced_char = 0;
8a33cf7b 962 while ((src_base = src) < src_end)
b73bfc1c 963 {
5bdca8af
DN
964 unsigned char tmp[MAX_MULTIBYTE_LENGTH];
965 const unsigned char *p;
b73bfc1c 966 int bytes;
ec6d2bb8 967
4af310db
EZ
968 if (*src == '\r')
969 {
2bcdf662 970 int c = *src++;
4af310db 971
4af310db
EZ
972 if (coding->eol_type == CODING_EOL_CR)
973 c = '\n';
974 else if (coding->eol_type == CODING_EOL_CRLF)
975 {
976 ONE_MORE_BYTE (c);
977 if (c != '\n')
978 {
4af310db
EZ
979 src--;
980 c = '\r';
981 }
982 }
983 *dst++ = c;
984 coding->produced_char++;
985 continue;
986 }
987 else if (*src == '\n')
988 {
989 if ((coding->eol_type == CODING_EOL_CR
990 || coding->eol_type == CODING_EOL_CRLF)
991 && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
992 {
993 coding->result = CODING_FINISH_INCONSISTENT_EOL;
994 goto label_end_of_loop;
995 }
996 *dst++ = *src++;
997 coding->produced_char++;
998 continue;
999 }
3089d25c 1000 else if (*src == 0x80 && coding->cmp_data)
aa72b389
KH
1001 {
1002 /* Start of composition data. */
1003 int consumed = decode_composition_emacs_mule (coding, src, src_end,
1004 &dst, dst_end,
1005 dst_bytes);
1006 if (consumed < 0)
1007 goto label_end_of_loop;
1008 else if (consumed > 0)
1009 {
1010 src += consumed;
1011 continue;
1012 }
1013 bytes = CHAR_STRING (*src, tmp);
1014 p = tmp;
1015 src++;
1016 }
fd3ae0b9
KH
1017 else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
1018 || (coding->flags /* We are recovering a file. */
1019 && src[0] == LEADING_CODE_8_BIT_CONTROL
1020 && ! CHAR_HEAD_P (src[1])))
b73bfc1c
KH
1021 {
1022 p = src;
1023 src += bytes;
1024 }
1025 else
1026 {
6eced09c
KH
1027 int i, c;
1028
1029 bytes = BYTES_BY_CHAR_HEAD (*src);
b73bfc1c 1030 src++;
6eced09c
KH
1031 for (i = 1; i < bytes; i++)
1032 {
1033 ONE_MORE_BYTE (c);
1034 if (CHAR_HEAD_P (c))
1035 break;
1036 }
1037 if (i < bytes)
1038 {
1039 bytes = CHAR_STRING (*src_base, tmp);
1040 p = tmp;
1041 src = src_base + 1;
1042 }
1043 else
1044 {
1045 p = src_base;
1046 }
b73bfc1c
KH
1047 }
1048 if (dst + bytes >= (dst_bytes ? dst_end : src))
1049 {
1050 coding->result = CODING_FINISH_INSUFFICIENT_DST;
4ed46869
KH
1051 break;
1052 }
b73bfc1c
KH
1053 while (bytes--) *dst++ = *p++;
1054 coding->produced_char++;
4ed46869 1055 }
4af310db 1056 label_end_of_loop:
b73bfc1c
KH
1057 coding->consumed = coding->consumed_char = src_base - source;
1058 coding->produced = dst - destination;
4ed46869
KH
1059}
1060
b73bfc1c 1061
aa72b389
KH
1062/* Encode composition data stored at DATA into a special byte sequence
1063 starting by 0x80. Update CODING->cmp_data_start and maybe
1064 CODING->cmp_data for the next call. */
1065
1066#define ENCODE_COMPOSITION_EMACS_MULE(coding, data) \
1067 do { \
1068 unsigned char buf[1024], *p0 = buf, *p; \
1069 int len = data[0]; \
1070 int i; \
1071 \
1072 buf[0] = 0x80; \
1073 buf[1] = 0xF0 + data[3]; /* METHOD */ \
1074 buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */ \
1075 p = buf + 4; \
1076 if (data[3] == COMPOSITION_WITH_RULE \
1077 || data[3] == COMPOSITION_WITH_RULE_ALTCHARS) \
1078 { \
1079 p += CHAR_STRING (data[4], p); \
1080 for (i = 5; i < len; i += 2) \
1081 { \
1082 int gref, nref; \
1083 COMPOSITION_DECODE_RULE (data[i], gref, nref); \
1084 *p++ = 0x20 + gref; \
1085 *p++ = 0x20 + nref; \
1086 p += CHAR_STRING (data[i + 1], p); \
1087 } \
1088 } \
1089 else \
1090 { \
1091 for (i = 4; i < len; i++) \
1092 p += CHAR_STRING (data[i], p); \
1093 } \
1094 buf[2] = 0xA0 + (p - buf); /* COMPONENTS-BYTES */ \
1095 \
1096 if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src)) \
1097 { \
1098 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
1099 goto label_end_of_loop; \
1100 } \
1101 while (p0 < p) \
1102 *dst++ = *p0++; \
1103 coding->cmp_data_start += data[0]; \
1104 if (coding->cmp_data_start == coding->cmp_data->used \
1105 && coding->cmp_data->next) \
1106 { \
1107 coding->cmp_data = coding->cmp_data->next; \
1108 coding->cmp_data_start = 0; \
1109 } \
1110 } while (0)
93dec019 1111
aa72b389 1112
a4244313 1113static void encode_eol P_ ((struct coding_system *, const unsigned char *,
aa72b389
KH
1114 unsigned char *, int, int));
1115
1116static void
1117encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1118 struct coding_system *coding;
5bdca8af
DN
1119 const unsigned char *source;
1120 unsigned char *destination;
aa72b389
KH
1121 int src_bytes, dst_bytes;
1122{
5bdca8af
DN
1123 const unsigned char *src = source;
1124 const unsigned char *src_end = source + src_bytes;
aa72b389
KH
1125 unsigned char *dst = destination;
1126 unsigned char *dst_end = destination + dst_bytes;
5bdca8af 1127 const unsigned char *src_base;
aa72b389
KH
1128 int c;
1129 int char_offset;
1130 int *data;
1131
1132 Lisp_Object translation_table;
1133
1134 translation_table = Qnil;
1135
1136 /* Optimization for the case that there's no composition. */
1137 if (!coding->cmp_data || coding->cmp_data->used == 0)
1138 {
1139 encode_eol (coding, source, destination, src_bytes, dst_bytes);
1140 return;
1141 }
1142
1143 char_offset = coding->cmp_data->char_offset;
1144 data = coding->cmp_data->data + coding->cmp_data_start;
1145 while (1)
1146 {
1147 src_base = src;
1148
1149 /* If SRC starts a composition, encode the information about the
1150 composition in advance. */
1151 if (coding->cmp_data_start < coding->cmp_data->used
1152 && char_offset + coding->consumed_char == data[1])
1153 {
1154 ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1155 char_offset = coding->cmp_data->char_offset;
1156 data = coding->cmp_data->data + coding->cmp_data_start;
1157 }
1158
1159 ONE_MORE_CHAR (c);
1160 if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1161 || coding->eol_type == CODING_EOL_CR))
1162 {
1163 if (coding->eol_type == CODING_EOL_CRLF)
1164 EMIT_TWO_BYTES ('\r', c);
1165 else
1166 EMIT_ONE_BYTE ('\r');
1167 }
1168 else if (SINGLE_BYTE_CHAR_P (c))
fd3ae0b9
KH
1169 {
1170 if (coding->flags && ! ASCII_BYTE_P (c))
1171 {
1172 /* As we are auto saving, retain the multibyte form for
1173 8-bit chars. */
1174 unsigned char buf[MAX_MULTIBYTE_LENGTH];
1175 int bytes = CHAR_STRING (c, buf);
1176
1177 if (bytes == 1)
1178 EMIT_ONE_BYTE (buf[0]);
1179 else
1180 EMIT_TWO_BYTES (buf[0], buf[1]);
1181 }
1182 else
1183 EMIT_ONE_BYTE (c);
1184 }
aa72b389
KH
1185 else
1186 EMIT_BYTES (src_base, src);
1187 coding->consumed_char++;
1188 }
1189 label_end_of_loop:
1190 coding->consumed = src_base - source;
1191 coding->produced = coding->produced_char = dst - destination;
1192 return;
1193}
b73bfc1c 1194
4ed46869
KH
1195\f
1196/*** 3. ISO2022 handlers ***/
1197
1198/* The following note describes the coding system ISO2022 briefly.
39787efd 1199 Since the intention of this note is to help understand the
cfb43547 1200 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 1201 SIMPLIFIED. For thorough understanding, please refer to the
cfb43547
DL
1202 original document of ISO2022. This is equivalent to the standard
1203 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
1204
1205 ISO2022 provides many mechanisms to encode several character sets
cfb43547 1206 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
1207 is encoded using bytes less than 128. This may make the encoded
1208 text a little bit longer, but the text passes more easily through
cfb43547 1209 several types of gateway, some of which strip off the MSB (Most
8ca3766a 1210 Significant Bit).
b73bfc1c 1211
cfb43547
DL
1212 There are two kinds of character sets: control character sets and
1213 graphic character sets. The former contain control characters such
4ed46869 1214 as `newline' and `escape' to provide control functions (control
39787efd 1215 functions are also provided by escape sequences). The latter
cfb43547 1216 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
1217 two control character sets and many graphic character sets.
1218
1219 Graphic character sets are classified into one of the following
39787efd
KH
1220 four classes, according to the number of bytes (DIMENSION) and
1221 number of characters in one dimension (CHARS) of the set:
1222 - DIMENSION1_CHARS94
1223 - DIMENSION1_CHARS96
1224 - DIMENSION2_CHARS94
1225 - DIMENSION2_CHARS96
1226
1227 In addition, each character set is assigned an identification tag,
cfb43547 1228 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
1229 hereafter). The <F> of each character set is decided by ECMA(*)
1230 when it is registered in ISO. The code range of <F> is 0x30..0x7F
1231 (0x30..0x3F are for private use only).
4ed46869
KH
1232
1233 Note (*): ECMA = European Computer Manufacturers Association
1234
cfb43547 1235 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
1236 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1237 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1238 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1239 o DIMENSION2_CHARS96 -- none for the moment
1240
39787efd 1241 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
1242 C0 [0x00..0x1F] -- control character plane 0
1243 GL [0x20..0x7F] -- graphic character plane 0
1244 C1 [0x80..0x9F] -- control character plane 1
1245 GR [0xA0..0xFF] -- graphic character plane 1
1246
1247 A control character set is directly designated and invoked to C0 or
39787efd
KH
1248 C1 by an escape sequence. The most common case is that:
1249 - ISO646's control character set is designated/invoked to C0, and
1250 - ISO6429's control character set is designated/invoked to C1,
1251 and usually these designations/invocations are omitted in encoded
1252 text. In a 7-bit environment, only C0 can be used, and a control
1253 character for C1 is encoded by an appropriate escape sequence to
1254 fit into the environment. All control characters for C1 are
1255 defined to have corresponding escape sequences.
4ed46869
KH
1256
1257 A graphic character set is at first designated to one of four
1258 graphic registers (G0 through G3), then these graphic registers are
1259 invoked to GL or GR. These designations and invocations can be
1260 done independently. The most common case is that G0 is invoked to
39787efd
KH
1261 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
1262 these invocations and designations are omitted in encoded text.
1263 In a 7-bit environment, only GL can be used.
4ed46869 1264
39787efd
KH
1265 When a graphic character set of CHARS94 is invoked to GL, codes
1266 0x20 and 0x7F of the GL area work as control characters SPACE and
1267 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1268 be used.
4ed46869
KH
1269
1270 There are two ways of invocation: locking-shift and single-shift.
1271 With locking-shift, the invocation lasts until the next different
39787efd
KH
1272 invocation, whereas with single-shift, the invocation affects the
1273 following character only and doesn't affect the locking-shift
1274 state. Invocations are done by the following control characters or
1275 escape sequences:
4ed46869
KH
1276
1277 ----------------------------------------------------------------------
39787efd 1278 abbrev function cntrl escape seq description
4ed46869 1279 ----------------------------------------------------------------------
39787efd
KH
1280 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
1281 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
1282 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
1283 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
1284 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
1285 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
1286 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
1287 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
1288 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 1289 ----------------------------------------------------------------------
39787efd
KH
1290 (*) These are not used by any known coding system.
1291
1292 Control characters for these functions are defined by macros
1293 ISO_CODE_XXX in `coding.h'.
4ed46869 1294
39787efd 1295 Designations are done by the following escape sequences:
4ed46869
KH
1296 ----------------------------------------------------------------------
1297 escape sequence description
1298 ----------------------------------------------------------------------
1299 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
1300 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
1301 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
1302 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
1303 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
1304 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
1305 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
1306 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
1307 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
1308 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
1309 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
1310 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
1311 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
1312 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
1313 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
1314 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
1315 ----------------------------------------------------------------------
1316
1317 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 1318 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
1319
1320 Note (*): Although these designations are not allowed in ISO2022,
1321 Emacs accepts them on decoding, and produces them on encoding
39787efd 1322 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
1323 7-bit environment, non-locking-shift, and non-single-shift.
1324
1325 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
39787efd 1326 '(' can be omitted. We refer to this as "short-form" hereafter.
4ed46869 1327
cfb43547 1328 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
1329 same multilingual text in ISO2022. Actually, there exist many
1330 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
1331 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1332 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
1333 localized platforms), and all of these are variants of ISO2022.
1334
1335 In addition to the above, Emacs handles two more kinds of escape
1336 sequences: ISO6429's direction specification and Emacs' private
1337 sequence for specifying character composition.
1338
39787efd 1339 ISO6429's direction specification takes the following form:
4ed46869
KH
1340 o CSI ']' -- end of the current direction
1341 o CSI '0' ']' -- end of the current direction
1342 o CSI '1' ']' -- start of left-to-right text
1343 o CSI '2' ']' -- start of right-to-left text
1344 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
1345 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1346
1347 Character composition specification takes the following form:
ec6d2bb8
KH
1348 o ESC '0' -- start relative composition
1349 o ESC '1' -- end composition
1350 o ESC '2' -- start rule-base composition (*)
1351 o ESC '3' -- start relative composition with alternate chars (**)
1352 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 1353 Since these are not standard escape sequences of any ISO standard,
cfb43547 1354 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 1355
cfb43547 1356 (*) This form is used only in Emacs 20.5 and older versions,
b73bfc1c 1357 but the newer versions can safely decode it.
cfb43547 1358 (**) This form is used only in Emacs 21.1 and newer versions,
b73bfc1c 1359 and the older versions can't decode it.
ec6d2bb8 1360
cfb43547 1361 Here's a list of example usages of these composition escape
b73bfc1c 1362 sequences (categorized by `enum composition_method').
ec6d2bb8 1363
b73bfc1c 1364 COMPOSITION_RELATIVE:
ec6d2bb8 1365 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 1366 COMPOSITION_WITH_RULE:
ec6d2bb8 1367 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 1368 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 1369 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 1370 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 1371 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
1372
1373enum iso_code_class_type iso_code_class[256];
1374
05e6f5dc
KH
1375#define CHARSET_OK(idx, charset, c) \
1376 (coding_system_table[idx] \
1377 && (charset == CHARSET_ASCII \
6b89e3aa 1378 || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
05e6f5dc
KH
1379 CODING_SAFE_CHAR_P (safe_chars, c))) \
1380 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1381 charset) \
1382 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
d46c5b12
KH
1383
1384#define SHIFT_OUT_OK(idx) \
1385 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1386
b6871cc7
KH
1387#define COMPOSITION_OK(idx) \
1388 (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1389
4ed46869 1390/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
cfb43547 1391 Check if a text is encoded in ISO2022. If it is, return an
4ed46869
KH
1392 integer in which appropriate flag bits any of:
1393 CODING_CATEGORY_MASK_ISO_7
d46c5b12 1394 CODING_CATEGORY_MASK_ISO_7_TIGHT
4ed46869
KH
1395 CODING_CATEGORY_MASK_ISO_8_1
1396 CODING_CATEGORY_MASK_ISO_8_2
7717c392
KH
1397 CODING_CATEGORY_MASK_ISO_7_ELSE
1398 CODING_CATEGORY_MASK_ISO_8_ELSE
4ed46869
KH
1399 are set. If a code which should never appear in ISO2022 is found,
1400 returns 0. */
1401
0a28aafb
KH
1402static int
1403detect_coding_iso2022 (src, src_end, multibytep)
4ed46869 1404 unsigned char *src, *src_end;
0a28aafb 1405 int multibytep;
4ed46869 1406{
d46c5b12
KH
1407 int mask = CODING_CATEGORY_MASK_ISO;
1408 int mask_found = 0;
f46869e4 1409 int reg[4], shift_out = 0, single_shifting = 0;
da55a2b7 1410 int c, c1, charset;
b73bfc1c
KH
1411 /* Dummy for ONE_MORE_BYTE. */
1412 struct coding_system dummy_coding;
1413 struct coding_system *coding = &dummy_coding;
05e6f5dc 1414 Lisp_Object safe_chars;
3f003981 1415
d46c5b12 1416 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
3f003981 1417 while (mask && src < src_end)
4ed46869 1418 {
0a28aafb 1419 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
8d239c89 1420 retry:
4ed46869
KH
1421 switch (c)
1422 {
1423 case ISO_CODE_ESC:
74383408
KH
1424 if (inhibit_iso_escape_detection)
1425 break;
f46869e4 1426 single_shifting = 0;
0a28aafb 1427 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
d46c5b12 1428 if (c >= '(' && c <= '/')
4ed46869 1429 {
bf9cdd4e 1430 /* Designation sequence for a charset of dimension 1. */
0a28aafb 1431 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
d46c5b12
KH
1432 if (c1 < ' ' || c1 >= 0x80
1433 || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1434 /* Invalid designation sequence. Just ignore. */
1435 break;
1436 reg[(c - '(') % 4] = charset;
bf9cdd4e
KH
1437 }
1438 else if (c == '$')
1439 {
1440 /* Designation sequence for a charset of dimension 2. */
0a28aafb 1441 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
bf9cdd4e
KH
1442 if (c >= '@' && c <= 'B')
1443 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
d46c5b12 1444 reg[0] = charset = iso_charset_table[1][0][c];
bf9cdd4e 1445 else if (c >= '(' && c <= '/')
bcf26d6a 1446 {
0a28aafb 1447 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
d46c5b12
KH
1448 if (c1 < ' ' || c1 >= 0x80
1449 || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1450 /* Invalid designation sequence. Just ignore. */
1451 break;
1452 reg[(c - '(') % 4] = charset;
bcf26d6a 1453 }
bf9cdd4e 1454 else
d46c5b12
KH
1455 /* Invalid designation sequence. Just ignore. */
1456 break;
1457 }
ae9ff118 1458 else if (c == 'N' || c == 'O')
d46c5b12 1459 {
ae9ff118
KH
1460 /* ESC <Fe> for SS2 or SS3. */
1461 mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
d46c5b12 1462 break;
4ed46869 1463 }
ec6d2bb8
KH
1464 else if (c >= '0' && c <= '4')
1465 {
1466 /* ESC <Fp> for start/end composition. */
b6871cc7
KH
1467 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1468 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1469 else
1470 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1471 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1472 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1473 else
1474 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1475 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1476 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1477 else
1478 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1479 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1480 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1481 else
1482 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1483 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1484 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1485 else
1486 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1487 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1488 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1489 else
1490 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
ec6d2bb8
KH
1491 break;
1492 }
bf9cdd4e 1493 else
d46c5b12
KH
1494 /* Invalid escape sequence. Just ignore. */
1495 break;
1496
1497 /* We found a valid designation sequence for CHARSET. */
1498 mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
05e6f5dc
KH
1499 c = MAKE_CHAR (charset, 0, 0);
1500 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
d46c5b12
KH
1501 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1502 else
1503 mask &= ~CODING_CATEGORY_MASK_ISO_7;
05e6f5dc 1504 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
d46c5b12
KH
1505 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1506 else
1507 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
05e6f5dc 1508 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
ae9ff118
KH
1509 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1510 else
d46c5b12 1511 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
05e6f5dc 1512 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
ae9ff118
KH
1513 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1514 else
d46c5b12 1515 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
1516 break;
1517
4ed46869 1518 case ISO_CODE_SO:
74383408
KH
1519 if (inhibit_iso_escape_detection)
1520 break;
f46869e4 1521 single_shifting = 0;
d46c5b12
KH
1522 if (shift_out == 0
1523 && (reg[1] >= 0
1524 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1525 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1526 {
1527 /* Locking shift out. */
1528 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1529 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1530 }
e0e989f6 1531 break;
93dec019 1532
d46c5b12 1533 case ISO_CODE_SI:
74383408
KH
1534 if (inhibit_iso_escape_detection)
1535 break;
f46869e4 1536 single_shifting = 0;
d46c5b12
KH
1537 if (shift_out == 1)
1538 {
1539 /* Locking shift in. */
1540 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1541 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1542 }
1543 break;
1544
4ed46869 1545 case ISO_CODE_CSI:
f46869e4 1546 single_shifting = 0;
4ed46869
KH
1547 case ISO_CODE_SS2:
1548 case ISO_CODE_SS3:
3f003981
KH
1549 {
1550 int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1551
74383408
KH
1552 if (inhibit_iso_escape_detection)
1553 break;
70c22245
KH
1554 if (c != ISO_CODE_CSI)
1555 {
d46c5b12
KH
1556 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1557 & CODING_FLAG_ISO_SINGLE_SHIFT)
70c22245 1558 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
d46c5b12
KH
1559 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1560 & CODING_FLAG_ISO_SINGLE_SHIFT)
70c22245 1561 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
f46869e4 1562 single_shifting = 1;
70c22245 1563 }
3f003981
KH
1564 if (VECTORP (Vlatin_extra_code_table)
1565 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1566 {
d46c5b12
KH
1567 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1568 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981 1569 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
d46c5b12
KH
1570 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1571 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981
KH
1572 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1573 }
1574 mask &= newmask;
d46c5b12 1575 mask_found |= newmask;
3f003981
KH
1576 }
1577 break;
4ed46869
KH
1578
1579 default:
1580 if (c < 0x80)
f46869e4
KH
1581 {
1582 single_shifting = 0;
1583 break;
1584 }
4ed46869 1585 else if (c < 0xA0)
c4825358 1586 {
f46869e4 1587 single_shifting = 0;
3f003981
KH
1588 if (VECTORP (Vlatin_extra_code_table)
1589 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
c4825358 1590 {
3f003981
KH
1591 int newmask = 0;
1592
d46c5b12
KH
1593 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1594 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981 1595 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
d46c5b12
KH
1596 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1597 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981
KH
1598 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1599 mask &= newmask;
d46c5b12 1600 mask_found |= newmask;
c4825358 1601 }
3f003981
KH
1602 else
1603 return 0;
c4825358 1604 }
4ed46869
KH
1605 else
1606 {
d46c5b12 1607 mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
7717c392 1608 | CODING_CATEGORY_MASK_ISO_7_ELSE);
d46c5b12 1609 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
f46869e4
KH
1610 /* Check the length of succeeding codes of the range
1611 0xA0..0FF. If the byte length is odd, we exclude
1612 CODING_CATEGORY_MASK_ISO_8_2. We can check this only
1613 when we are not single shifting. */
b73bfc1c
KH
1614 if (!single_shifting
1615 && mask & CODING_CATEGORY_MASK_ISO_8_2)
f46869e4 1616 {
e17de821 1617 int i = 1;
8d239c89
KH
1618
1619 c = -1;
b73bfc1c
KH
1620 while (src < src_end)
1621 {
0a28aafb 1622 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
b73bfc1c
KH
1623 if (c < 0xA0)
1624 break;
1625 i++;
1626 }
1627
1628 if (i & 1 && src < src_end)
f46869e4
KH
1629 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1630 else
1631 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
8d239c89
KH
1632 if (c >= 0)
1633 /* This means that we have read one extra byte. */
1634 goto retry;
f46869e4 1635 }
4ed46869
KH
1636 }
1637 break;
1638 }
1639 }
b73bfc1c 1640 label_end_of_loop:
d46c5b12 1641 return (mask & mask_found);
4ed46869
KH
1642}
1643
b73bfc1c
KH
1644/* Decode a character of which charset is CHARSET, the 1st position
1645 code is C1, the 2nd position code is C2, and return the decoded
1646 character code. If the variable `translation_table' is non-nil,
1647 returned the translated code. */
ec6d2bb8 1648
b73bfc1c
KH
1649#define DECODE_ISO_CHARACTER(charset, c1, c2) \
1650 (NILP (translation_table) \
1651 ? MAKE_CHAR (charset, c1, c2) \
1652 : translate_char (translation_table, -1, charset, c1, c2))
4ed46869
KH
1653
1654/* Set designation state into CODING. */
d46c5b12
KH
1655#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
1656 do { \
05e6f5dc 1657 int charset, c; \
944bd420
KH
1658 \
1659 if (final_char < '0' || final_char >= 128) \
1660 goto label_invalid_code; \
1661 charset = ISO_CHARSET_TABLE (make_number (dimension), \
1662 make_number (chars), \
1663 make_number (final_char)); \
05e6f5dc 1664 c = MAKE_CHAR (charset, 0, 0); \
d46c5b12 1665 if (charset >= 0 \
704c5781 1666 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
05e6f5dc 1667 || CODING_SAFE_CHAR_P (safe_chars, c))) \
d46c5b12
KH
1668 { \
1669 if (coding->spec.iso2022.last_invalid_designation_register == 0 \
1670 && reg == 0 \
1671 && charset == CHARSET_ASCII) \
1672 { \
1673 /* We should insert this designation sequence as is so \
1674 that it is surely written back to a file. */ \
1675 coding->spec.iso2022.last_invalid_designation_register = -1; \
1676 goto label_invalid_code; \
1677 } \
1678 coding->spec.iso2022.last_invalid_designation_register = -1; \
1679 if ((coding->mode & CODING_MODE_DIRECTION) \
1680 && CHARSET_REVERSE_CHARSET (charset) >= 0) \
1681 charset = CHARSET_REVERSE_CHARSET (charset); \
1682 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1683 } \
1684 else \
1685 { \
1686 coding->spec.iso2022.last_invalid_designation_register = reg; \
1687 goto label_invalid_code; \
1688 } \
4ed46869
KH
1689 } while (0)
1690
ec6d2bb8
KH
1691/* Allocate a memory block for storing information about compositions.
1692 The block is chained to the already allocated blocks. */
d46c5b12 1693
33fb63eb 1694void
ec6d2bb8 1695coding_allocate_composition_data (coding, char_offset)
d46c5b12 1696 struct coding_system *coding;
ec6d2bb8 1697 int char_offset;
d46c5b12 1698{
ec6d2bb8
KH
1699 struct composition_data *cmp_data
1700 = (struct composition_data *) xmalloc (sizeof *cmp_data);
1701
1702 cmp_data->char_offset = char_offset;
1703 cmp_data->used = 0;
1704 cmp_data->prev = coding->cmp_data;
1705 cmp_data->next = NULL;
1706 if (coding->cmp_data)
1707 coding->cmp_data->next = cmp_data;
1708 coding->cmp_data = cmp_data;
1709 coding->cmp_data_start = 0;
4307d534 1710 coding->composing = COMPOSITION_NO;
ec6d2bb8 1711}
d46c5b12 1712
aa72b389
KH
1713/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1714 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1715 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1716 ESC 3 : altchar composition : ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1717 ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1718 */
ec6d2bb8 1719
33fb63eb
KH
1720#define DECODE_COMPOSITION_START(c1) \
1721 do { \
1722 if (coding->composing == COMPOSITION_DISABLED) \
1723 { \
1724 *dst++ = ISO_CODE_ESC; \
1725 *dst++ = c1 & 0x7f; \
1726 coding->produced_char += 2; \
1727 } \
1728 else if (!COMPOSING_P (coding)) \
1729 { \
1730 /* This is surely the start of a composition. We must be sure \
1731 that coding->cmp_data has enough space to store the \
1732 information about the composition. If not, terminate the \
1733 current decoding loop, allocate one more memory block for \
8ca3766a 1734 coding->cmp_data in the caller, then start the decoding \
33fb63eb
KH
1735 loop again. We can't allocate memory here directly because \
1736 it may cause buffer/string relocation. */ \
1737 if (!coding->cmp_data \
1738 || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1739 >= COMPOSITION_DATA_SIZE)) \
1740 { \
1741 coding->result = CODING_FINISH_INSUFFICIENT_CMP; \
1742 goto label_end_of_loop; \
1743 } \
1744 coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
1745 : c1 == '2' ? COMPOSITION_WITH_RULE \
1746 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
1747 : COMPOSITION_WITH_RULE_ALTCHARS); \
1748 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \
1749 coding->composing); \
1750 coding->composition_rule_follows = 0; \
1751 } \
1752 else \
1753 { \
1754 /* We are already handling a composition. If the method is \
1755 the following two, the codes following the current escape \
1756 sequence are actual characters stored in a buffer. */ \
1757 if (coding->composing == COMPOSITION_WITH_ALTCHARS \
1758 || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \
1759 { \
1760 coding->composing = COMPOSITION_RELATIVE; \
1761 coding->composition_rule_follows = 0; \
1762 } \
1763 } \
ec6d2bb8
KH
1764 } while (0)
1765
8ca3766a 1766/* Handle composition end sequence ESC 1. */
ec6d2bb8
KH
1767
1768#define DECODE_COMPOSITION_END(c1) \
1769 do { \
93dec019 1770 if (! COMPOSING_P (coding)) \
ec6d2bb8
KH
1771 { \
1772 *dst++ = ISO_CODE_ESC; \
1773 *dst++ = c1; \
1774 coding->produced_char += 2; \
1775 } \
1776 else \
1777 { \
1778 CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
1779 coding->composing = COMPOSITION_NO; \
1780 } \
1781 } while (0)
1782
1783/* Decode a composition rule from the byte C1 (and maybe one more byte
1784 from SRC) and store one encoded composition rule in
1785 coding->cmp_data. */
1786
1787#define DECODE_COMPOSITION_RULE(c1) \
1788 do { \
1789 int rule = 0; \
1790 (c1) -= 32; \
1791 if (c1 < 81) /* old format (before ver.21) */ \
1792 { \
1793 int gref = (c1) / 9; \
1794 int nref = (c1) % 9; \
1795 if (gref == 4) gref = 10; \
1796 if (nref == 4) nref = 10; \
1797 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
1798 } \
b73bfc1c 1799 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
1800 { \
1801 ONE_MORE_BYTE (c2); \
1802 rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
1803 } \
1804 CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \
1805 coding->composition_rule_follows = 0; \
1806 } while (0)
88993dfd 1807
d46c5b12 1808
4ed46869
KH
1809/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1810
b73bfc1c 1811static void
d46c5b12 1812decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
4ed46869 1813 struct coding_system *coding;
5bdca8af
DN
1814 const unsigned char *source;
1815 unsigned char *destination;
4ed46869 1816 int src_bytes, dst_bytes;
4ed46869 1817{
5bdca8af
DN
1818 const unsigned char *src = source;
1819 const unsigned char *src_end = source + src_bytes;
4ed46869
KH
1820 unsigned char *dst = destination;
1821 unsigned char *dst_end = destination + dst_bytes;
4ed46869
KH
1822 /* Charsets invoked to graphic plane 0 and 1 respectively. */
1823 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1824 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
b73bfc1c
KH
1825 /* SRC_BASE remembers the start position in source in each loop.
1826 The loop will be exited when there's not enough source code
1827 (within macro ONE_MORE_BYTE), or when there's not enough
1828 destination area to produce a character (within macro
1829 EMIT_CHAR). */
5bdca8af 1830 const unsigned char *src_base;
b73bfc1c
KH
1831 int c, charset;
1832 Lisp_Object translation_table;
05e6f5dc
KH
1833 Lisp_Object safe_chars;
1834
6b89e3aa 1835 safe_chars = coding_safe_chars (coding->symbol);
bdd9fb48 1836
b73bfc1c
KH
1837 if (NILP (Venable_character_translation))
1838 translation_table = Qnil;
1839 else
1840 {
1841 translation_table = coding->translation_table_for_decode;
1842 if (NILP (translation_table))
1843 translation_table = Vstandard_translation_table_for_decode;
1844 }
4ed46869 1845
b73bfc1c
KH
1846 coding->result = CODING_FINISH_NORMAL;
1847
1848 while (1)
4ed46869 1849 {
85478bc6 1850 int c1, c2 = 0;
b73bfc1c
KH
1851
1852 src_base = src;
1853 ONE_MORE_BYTE (c1);
4ed46869 1854
ec6d2bb8 1855 /* We produce no character or one character. */
4ed46869
KH
1856 switch (iso_code_class [c1])
1857 {
1858 case ISO_0x20_or_0x7F:
ec6d2bb8
KH
1859 if (COMPOSING_P (coding) && coding->composition_rule_follows)
1860 {
1861 DECODE_COMPOSITION_RULE (c1);
b73bfc1c 1862 continue;
ec6d2bb8
KH
1863 }
1864 if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
4ed46869
KH
1865 {
1866 /* This is SPACE or DEL. */
b73bfc1c 1867 charset = CHARSET_ASCII;
4ed46869
KH
1868 break;
1869 }
1870 /* This is a graphic character, we fall down ... */
1871
1872 case ISO_graphic_plane_0:
ec6d2bb8 1873 if (COMPOSING_P (coding) && coding->composition_rule_follows)
b73bfc1c
KH
1874 {
1875 DECODE_COMPOSITION_RULE (c1);
1876 continue;
1877 }
1878 charset = charset0;
4ed46869
KH
1879 break;
1880
1881 case ISO_0xA0_or_0xFF:
d46c5b12
KH
1882 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1883 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
fb88bf2d 1884 goto label_invalid_code;
4ed46869
KH
1885 /* This is a graphic character, we fall down ... */
1886
1887 case ISO_graphic_plane_1:
b73bfc1c 1888 if (charset1 < 0)
fb88bf2d 1889 goto label_invalid_code;
b73bfc1c 1890 charset = charset1;
4ed46869
KH
1891 break;
1892
b73bfc1c 1893 case ISO_control_0:
ec6d2bb8
KH
1894 if (COMPOSING_P (coding))
1895 DECODE_COMPOSITION_END ('1');
1896
4ed46869
KH
1897 /* All ISO2022 control characters in this class have the
1898 same representation in Emacs internal format. */
d46c5b12
KH
1899 if (c1 == '\n'
1900 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1901 && (coding->eol_type == CODING_EOL_CR
1902 || coding->eol_type == CODING_EOL_CRLF))
1903 {
b73bfc1c
KH
1904 coding->result = CODING_FINISH_INCONSISTENT_EOL;
1905 goto label_end_of_loop;
d46c5b12 1906 }
b73bfc1c 1907 charset = CHARSET_ASCII;
4ed46869
KH
1908 break;
1909
b73bfc1c
KH
1910 case ISO_control_1:
1911 if (COMPOSING_P (coding))
1912 DECODE_COMPOSITION_END ('1');
1913 goto label_invalid_code;
1914
4ed46869 1915 case ISO_carriage_return:
ec6d2bb8
KH
1916 if (COMPOSING_P (coding))
1917 DECODE_COMPOSITION_END ('1');
1918
4ed46869 1919 if (coding->eol_type == CODING_EOL_CR)
b73bfc1c 1920 c1 = '\n';
4ed46869
KH
1921 else if (coding->eol_type == CODING_EOL_CRLF)
1922 {
1923 ONE_MORE_BYTE (c1);
b73bfc1c 1924 if (c1 != ISO_CODE_LF)
4ed46869
KH
1925 {
1926 src--;
b73bfc1c 1927 c1 = '\r';
4ed46869
KH
1928 }
1929 }
b73bfc1c 1930 charset = CHARSET_ASCII;
4ed46869
KH
1931 break;
1932
1933 case ISO_shift_out:
d46c5b12
KH
1934 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1935 || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1936 goto label_invalid_code;
4ed46869
KH
1937 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1938 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
b73bfc1c 1939 continue;
4ed46869
KH
1940
1941 case ISO_shift_in:
d46c5b12
KH
1942 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1943 goto label_invalid_code;
4ed46869
KH
1944 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1945 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
b73bfc1c 1946 continue;
4ed46869
KH
1947
1948 case ISO_single_shift_2_7:
1949 case ISO_single_shift_2:
d46c5b12
KH
1950 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1951 goto label_invalid_code;
4ed46869
KH
1952 /* SS2 is handled as an escape sequence of ESC 'N' */
1953 c1 = 'N';
1954 goto label_escape_sequence;
1955
1956 case ISO_single_shift_3:
d46c5b12
KH
1957 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1958 goto label_invalid_code;
4ed46869
KH
1959 /* SS2 is handled as an escape sequence of ESC 'O' */
1960 c1 = 'O';
1961 goto label_escape_sequence;
1962
1963 case ISO_control_sequence_introducer:
1964 /* CSI is handled as an escape sequence of ESC '[' ... */
1965 c1 = '[';
1966 goto label_escape_sequence;
1967
1968 case ISO_escape:
1969 ONE_MORE_BYTE (c1);
1970 label_escape_sequence:
1971 /* Escape sequences handled by Emacs are invocation,
1972 designation, direction specification, and character
1973 composition specification. */
1974 switch (c1)
1975 {
1976 case '&': /* revision of following character set */
1977 ONE_MORE_BYTE (c1);
1978 if (!(c1 >= '@' && c1 <= '~'))
d46c5b12 1979 goto label_invalid_code;
4ed46869
KH
1980 ONE_MORE_BYTE (c1);
1981 if (c1 != ISO_CODE_ESC)
d46c5b12 1982 goto label_invalid_code;
4ed46869
KH
1983 ONE_MORE_BYTE (c1);
1984 goto label_escape_sequence;
1985
1986 case '$': /* designation of 2-byte character set */
d46c5b12
KH
1987 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1988 goto label_invalid_code;
4ed46869
KH
1989 ONE_MORE_BYTE (c1);
1990 if (c1 >= '@' && c1 <= 'B')
1991 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 1992 or JISX0208.1980 */
4ed46869
KH
1993 DECODE_DESIGNATION (0, 2, 94, c1);
1994 }
1995 else if (c1 >= 0x28 && c1 <= 0x2B)
1996 { /* designation of DIMENSION2_CHARS94 character set */
1997 ONE_MORE_BYTE (c2);
1998 DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1999 }
2000 else if (c1 >= 0x2C && c1 <= 0x2F)
2001 { /* designation of DIMENSION2_CHARS96 character set */
2002 ONE_MORE_BYTE (c2);
2003 DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
2004 }
2005 else
d46c5b12 2006 goto label_invalid_code;
b73bfc1c
KH
2007 /* We must update these variables now. */
2008 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2009 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2010 continue;
4ed46869
KH
2011
2012 case 'n': /* invocation of locking-shift-2 */
d46c5b12
KH
2013 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
2014 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
2015 goto label_invalid_code;
4ed46869 2016 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
e0e989f6 2017 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
b73bfc1c 2018 continue;
4ed46869
KH
2019
2020 case 'o': /* invocation of locking-shift-3 */
d46c5b12
KH
2021 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
2022 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2023 goto label_invalid_code;
4ed46869 2024 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
e0e989f6 2025 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
b73bfc1c 2026 continue;
4ed46869
KH
2027
2028 case 'N': /* invocation of single-shift-2 */
d46c5b12
KH
2029 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2030 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
2031 goto label_invalid_code;
4ed46869 2032 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
b73bfc1c 2033 ONE_MORE_BYTE (c1);
e7046a18
KH
2034 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2035 goto label_invalid_code;
4ed46869
KH
2036 break;
2037
2038 case 'O': /* invocation of single-shift-3 */
d46c5b12
KH
2039 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2040 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2041 goto label_invalid_code;
4ed46869 2042 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
b73bfc1c 2043 ONE_MORE_BYTE (c1);
e7046a18
KH
2044 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2045 goto label_invalid_code;
4ed46869
KH
2046 break;
2047
ec6d2bb8
KH
2048 case '0': case '2': case '3': case '4': /* start composition */
2049 DECODE_COMPOSITION_START (c1);
b73bfc1c 2050 continue;
4ed46869 2051
ec6d2bb8
KH
2052 case '1': /* end composition */
2053 DECODE_COMPOSITION_END (c1);
b73bfc1c 2054 continue;
4ed46869
KH
2055
2056 case '[': /* specification of direction */
d46c5b12
KH
2057 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
2058 goto label_invalid_code;
4ed46869 2059 /* For the moment, nested direction is not supported.
d46c5b12 2060 So, `coding->mode & CODING_MODE_DIRECTION' zero means
8ca3766a 2061 left-to-right, and nonzero means right-to-left. */
4ed46869
KH
2062 ONE_MORE_BYTE (c1);
2063 switch (c1)
2064 {
2065 case ']': /* end of the current direction */
d46c5b12 2066 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
2067
2068 case '0': /* end of the current direction */
2069 case '1': /* start of left-to-right direction */
2070 ONE_MORE_BYTE (c1);
2071 if (c1 == ']')
d46c5b12 2072 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 2073 else
d46c5b12 2074 goto label_invalid_code;
4ed46869
KH
2075 break;
2076
2077 case '2': /* start of right-to-left direction */
2078 ONE_MORE_BYTE (c1);
2079 if (c1 == ']')
d46c5b12 2080 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 2081 else
d46c5b12 2082 goto label_invalid_code;
4ed46869
KH
2083 break;
2084
2085 default:
d46c5b12 2086 goto label_invalid_code;
4ed46869 2087 }
b73bfc1c 2088 continue;
4ed46869 2089
103e0180
KH
2090 case '%':
2091 if (COMPOSING_P (coding))
2092 DECODE_COMPOSITION_END ('1');
2093 ONE_MORE_BYTE (c1);
2094 if (c1 == '/')
2095 {
2096 /* CTEXT extended segment:
2097 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
2098 We keep these bytes as is for the moment.
2099 They may be decoded by post-read-conversion. */
2100 int dim, M, L;
2101 int size, required;
2102 int produced_chars;
43e4a82f 2103
103e0180
KH
2104 ONE_MORE_BYTE (dim);
2105 ONE_MORE_BYTE (M);
2106 ONE_MORE_BYTE (L);
2107 size = ((M - 128) * 128) + (L - 128);
2108 required = 8 + size * 2;
2109 if (dst + required > (dst_bytes ? dst_end : src))
2110 goto label_end_of_loop;
2111 *dst++ = ISO_CODE_ESC;
2112 *dst++ = '%';
2113 *dst++ = '/';
2114 *dst++ = dim;
2115 produced_chars = 4;
2116 dst += CHAR_STRING (M, dst), produced_chars++;
2117 dst += CHAR_STRING (L, dst), produced_chars++;
2118 while (size-- > 0)
2119 {
2120 ONE_MORE_BYTE (c1);
2121 dst += CHAR_STRING (c1, dst), produced_chars++;
2122 }
2123 coding->produced_char += produced_chars;
2124 }
2125 else if (c1 == 'G')
2126 {
2127 unsigned char *d = dst;
2128 int produced_chars;
2129
2130 /* XFree86 extension for embedding UTF-8 in CTEXT:
2131 ESC % G --UTF-8-BYTES-- ESC % @
2132 We keep these bytes as is for the moment.
2133 They may be decoded by post-read-conversion. */
2134 if (d + 6 > (dst_bytes ? dst_end : src))
2135 goto label_end_of_loop;
2136 *d++ = ISO_CODE_ESC;
2137 *d++ = '%';
2138 *d++ = 'G';
2139 produced_chars = 3;
2140 while (d + 1 < (dst_bytes ? dst_end : src))
2141 {
2142 ONE_MORE_BYTE (c1);
2143 if (c1 == ISO_CODE_ESC
2144 && src + 1 < src_end
2145 && src[0] == '%'
2146 && src[1] == '@')
47dc91ad
KH
2147 {
2148 src += 2;
2149 break;
2150 }
103e0180
KH
2151 d += CHAR_STRING (c1, d), produced_chars++;
2152 }
2153 if (d + 3 > (dst_bytes ? dst_end : src))
2154 goto label_end_of_loop;
2155 *d++ = ISO_CODE_ESC;
2156 *d++ = '%';
2157 *d++ = '@';
2158 dst = d;
2159 coding->produced_char += produced_chars + 3;
2160 }
2161 else
2162 goto label_invalid_code;
2163 continue;
2164
4ed46869 2165 default:
d46c5b12
KH
2166 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2167 goto label_invalid_code;
4ed46869
KH
2168 if (c1 >= 0x28 && c1 <= 0x2B)
2169 { /* designation of DIMENSION1_CHARS94 character set */
2170 ONE_MORE_BYTE (c2);
2171 DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
2172 }
2173 else if (c1 >= 0x2C && c1 <= 0x2F)
2174 { /* designation of DIMENSION1_CHARS96 character set */
2175 ONE_MORE_BYTE (c2);
2176 DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2177 }
2178 else
b73bfc1c
KH
2179 goto label_invalid_code;
2180 /* We must update these variables now. */
2181 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2182 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2183 continue;
4ed46869 2184 }
b73bfc1c 2185 }
4ed46869 2186
b73bfc1c
KH
2187 /* Now we know CHARSET and 1st position code C1 of a character.
2188 Produce a multibyte sequence for that character while getting
2189 2nd position code C2 if necessary. */
2190 if (CHARSET_DIMENSION (charset) == 2)
2191 {
2192 ONE_MORE_BYTE (c2);
2193 if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2194 /* C2 is not in a valid range. */
2195 goto label_invalid_code;
4ed46869 2196 }
b73bfc1c
KH
2197 c = DECODE_ISO_CHARACTER (charset, c1, c2);
2198 EMIT_CHAR (c);
4ed46869
KH
2199 continue;
2200
b73bfc1c
KH
2201 label_invalid_code:
2202 coding->errors++;
2203 if (COMPOSING_P (coding))
2204 DECODE_COMPOSITION_END ('1');
4ed46869 2205 src = src_base;
b73bfc1c 2206 c = *src++;
2d4430a8
KH
2207 if (! NILP (translation_table))
2208 c = translate_char (translation_table, c, 0, 0, 0);
b73bfc1c 2209 EMIT_CHAR (c);
4ed46869 2210 }
fb88bf2d 2211
b73bfc1c
KH
2212 label_end_of_loop:
2213 coding->consumed = coding->consumed_char = src_base - source;
d46c5b12 2214 coding->produced = dst - destination;
b73bfc1c 2215 return;
4ed46869
KH
2216}
2217
b73bfc1c 2218
f4dee582 2219/* ISO2022 encoding stuff. */
4ed46869
KH
2220
2221/*
f4dee582 2222 It is not enough to say just "ISO2022" on encoding, we have to
cfb43547 2223 specify more details. In Emacs, each ISO2022 coding system
4ed46869 2224 variant has the following specifications:
8ca3766a 2225 1. Initial designation to G0 through G3.
4ed46869
KH
2226 2. Allows short-form designation?
2227 3. ASCII should be designated to G0 before control characters?
2228 4. ASCII should be designated to G0 at end of line?
2229 5. 7-bit environment or 8-bit environment?
2230 6. Use locking-shift?
2231 7. Use Single-shift?
2232 And the following two are only for Japanese:
2233 8. Use ASCII in place of JIS0201-1976-Roman?
2234 9. Use JISX0208-1983 in place of JISX0208-1978?
2235 These specifications are encoded in `coding->flags' as flag bits
2236 defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more
f4dee582 2237 details.
4ed46869
KH
2238*/
2239
2240/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
2241 register REG at DST, and increment DST. If <final-char> of CHARSET is
2242 '@', 'A', or 'B' and the coding system CODING allows, produce
2243 designation sequence of short-form. */
4ed46869
KH
2244
2245#define ENCODE_DESIGNATION(charset, reg, coding) \
2246 do { \
2247 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
2248 char *intermediate_char_94 = "()*+"; \
2249 char *intermediate_char_96 = ",-./"; \
70c22245 2250 int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
b73bfc1c 2251 \
70c22245
KH
2252 if (revision < 255) \
2253 { \
4ed46869
KH
2254 *dst++ = ISO_CODE_ESC; \
2255 *dst++ = '&'; \
70c22245 2256 *dst++ = '@' + revision; \
4ed46869 2257 } \
b73bfc1c 2258 *dst++ = ISO_CODE_ESC; \
4ed46869
KH
2259 if (CHARSET_DIMENSION (charset) == 1) \
2260 { \
2261 if (CHARSET_CHARS (charset) == 94) \
2262 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
2263 else \
2264 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
2265 } \
2266 else \
2267 { \
2268 *dst++ = '$'; \
2269 if (CHARSET_CHARS (charset) == 94) \
2270 { \
b73bfc1c
KH
2271 if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
2272 || reg != 0 \
2273 || final_char < '@' || final_char > 'B') \
4ed46869
KH
2274 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
2275 } \
2276 else \
b73bfc1c 2277 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
4ed46869 2278 } \
b73bfc1c 2279 *dst++ = final_char; \
4ed46869
KH
2280 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
2281 } while (0)
2282
2283/* The following two macros produce codes (control character or escape
2284 sequence) for ISO2022 single-shift functions (single-shift-2 and
2285 single-shift-3). */
2286
2287#define ENCODE_SINGLE_SHIFT_2 \
2288 do { \
2289 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
2290 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
2291 else \
b73bfc1c 2292 *dst++ = ISO_CODE_SS2; \
4ed46869
KH
2293 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
2294 } while (0)
2295
fb88bf2d
KH
2296#define ENCODE_SINGLE_SHIFT_3 \
2297 do { \
4ed46869 2298 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
fb88bf2d
KH
2299 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
2300 else \
b73bfc1c 2301 *dst++ = ISO_CODE_SS3; \
4ed46869
KH
2302 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
2303 } while (0)
2304
2305/* The following four macros produce codes (control character or
2306 escape sequence) for ISO2022 locking-shift functions (shift-in,
2307 shift-out, locking-shift-2, and locking-shift-3). */
2308
b73bfc1c
KH
2309#define ENCODE_SHIFT_IN \
2310 do { \
2311 *dst++ = ISO_CODE_SI; \
4ed46869
KH
2312 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2313 } while (0)
2314
b73bfc1c
KH
2315#define ENCODE_SHIFT_OUT \
2316 do { \
2317 *dst++ = ISO_CODE_SO; \
4ed46869
KH
2318 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2319 } while (0)
2320
2321#define ENCODE_LOCKING_SHIFT_2 \
2322 do { \
2323 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
2324 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2325 } while (0)
2326
b73bfc1c
KH
2327#define ENCODE_LOCKING_SHIFT_3 \
2328 do { \
2329 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
4ed46869
KH
2330 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2331 } while (0)
2332
f4dee582
RS
2333/* Produce codes for a DIMENSION1 character whose character set is
2334 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
2335 sequences are also produced in advance if necessary. */
2336
6e85d753
KH
2337#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
2338 do { \
2339 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
2340 { \
2341 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
2342 *dst++ = c1 & 0x7F; \
2343 else \
2344 *dst++ = c1 | 0x80; \
2345 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
2346 break; \
2347 } \
2348 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
2349 { \
2350 *dst++ = c1 & 0x7F; \
2351 break; \
2352 } \
2353 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
2354 { \
2355 *dst++ = c1 | 0x80; \
2356 break; \
2357 } \
6e85d753
KH
2358 else \
2359 /* Since CHARSET is not yet invoked to any graphic planes, we \
2360 must invoke it, or, at first, designate it to some graphic \
2361 register. Then repeat the loop to actually produce the \
2362 character. */ \
2363 dst = encode_invocation_designation (charset, coding, dst); \
4ed46869
KH
2364 } while (1)
2365
f4dee582
RS
2366/* Produce codes for a DIMENSION2 character whose character set is
2367 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
2368 invocation codes are also produced in advance if necessary. */
2369
6e85d753
KH
2370#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
2371 do { \
2372 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
2373 { \
2374 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
2375 *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
2376 else \
2377 *dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
2378 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
2379 break; \
2380 } \
2381 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
2382 { \
2383 *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
2384 break; \
2385 } \
2386 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
2387 { \
2388 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
2389 break; \
2390 } \
6e85d753
KH
2391 else \
2392 /* Since CHARSET is not yet invoked to any graphic planes, we \
2393 must invoke it, or, at first, designate it to some graphic \
2394 register. Then repeat the loop to actually produce the \
2395 character. */ \
2396 dst = encode_invocation_designation (charset, coding, dst); \
4ed46869
KH
2397 } while (1)
2398
05e6f5dc
KH
2399#define ENCODE_ISO_CHARACTER(c) \
2400 do { \
2401 int charset, c1, c2; \
2402 \
2403 SPLIT_CHAR (c, charset, c1, c2); \
2404 if (CHARSET_DEFINED_P (charset)) \
2405 { \
2406 if (CHARSET_DIMENSION (charset) == 1) \
2407 { \
2408 if (charset == CHARSET_ASCII \
2409 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
2410 charset = charset_latin_jisx0201; \
2411 ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1); \
2412 } \
2413 else \
2414 { \
2415 if (charset == charset_jisx0208 \
2416 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
2417 charset = charset_jisx0208_1978; \
2418 ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2); \
2419 } \
2420 } \
2421 else \
2422 { \
2423 *dst++ = c1; \
2424 if (c2 >= 0) \
2425 *dst++ = c2; \
2426 } \
2427 } while (0)
2428
2429
2430/* Instead of encoding character C, produce one or two `?'s. */
2431
0eecad43
KH
2432#define ENCODE_UNSAFE_CHARACTER(c) \
2433 do { \
2434 ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
2435 if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
2436 ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
84fbb8a0 2437 } while (0)
bdd9fb48 2438
05e6f5dc 2439
4ed46869
KH
2440/* Produce designation and invocation codes at a place pointed by DST
2441 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
2442 Return new DST. */
2443
2444unsigned char *
2445encode_invocation_designation (charset, coding, dst)
2446 int charset;
2447 struct coding_system *coding;
2448 unsigned char *dst;
2449{
2450 int reg; /* graphic register number */
2451
2452 /* At first, check designations. */
2453 for (reg = 0; reg < 4; reg++)
2454 if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2455 break;
2456
2457 if (reg >= 4)
2458 {
2459 /* CHARSET is not yet designated to any graphic registers. */
2460 /* At first check the requested designation. */
2461 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1ba9e4ab
KH
2462 if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2463 /* Since CHARSET requests no special designation, designate it
2464 to graphic register 0. */
4ed46869
KH
2465 reg = 0;
2466
2467 ENCODE_DESIGNATION (charset, reg, coding);
2468 }
2469
2470 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2471 && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2472 {
2473 /* Since the graphic register REG is not invoked to any graphic
2474 planes, invoke it to graphic plane 0. */
2475 switch (reg)
2476 {
2477 case 0: /* graphic register 0 */
2478 ENCODE_SHIFT_IN;
2479 break;
2480
2481 case 1: /* graphic register 1 */
2482 ENCODE_SHIFT_OUT;
2483 break;
2484
2485 case 2: /* graphic register 2 */
2486 if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2487 ENCODE_SINGLE_SHIFT_2;
2488 else
2489 ENCODE_LOCKING_SHIFT_2;
2490 break;
2491
2492 case 3: /* graphic register 3 */
2493 if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2494 ENCODE_SINGLE_SHIFT_3;
2495 else
2496 ENCODE_LOCKING_SHIFT_3;
2497 break;
2498 }
2499 }
b73bfc1c 2500
4ed46869
KH
2501 return dst;
2502}
2503
ec6d2bb8
KH
2504/* Produce 2-byte codes for encoded composition rule RULE. */
2505
2506#define ENCODE_COMPOSITION_RULE(rule) \
2507 do { \
2508 int gref, nref; \
2509 COMPOSITION_DECODE_RULE (rule, gref, nref); \
2510 *dst++ = 32 + 81 + gref; \
2511 *dst++ = 32 + nref; \
2512 } while (0)
2513
2514/* Produce codes for indicating the start of a composition sequence
2515 (ESC 0, ESC 3, or ESC 4). DATA points to an array of integers
2516 which specify information about the composition. See the comment
2517 in coding.h for the format of DATA. */
2518
2519#define ENCODE_COMPOSITION_START(coding, data) \
2520 do { \
2521 coding->composing = data[3]; \
2522 *dst++ = ISO_CODE_ESC; \
2523 if (coding->composing == COMPOSITION_RELATIVE) \
2524 *dst++ = '0'; \
2525 else \
2526 { \
2527 *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS \
2528 ? '3' : '4'); \
2529 coding->cmp_data_index = coding->cmp_data_start + 4; \
2530 coding->composition_rule_follows = 0; \
2531 } \
2532 } while (0)
2533
2534/* Produce codes for indicating the end of the current composition. */
2535
2536#define ENCODE_COMPOSITION_END(coding, data) \
2537 do { \
2538 *dst++ = ISO_CODE_ESC; \
2539 *dst++ = '1'; \
2540 coding->cmp_data_start += data[0]; \
2541 coding->composing = COMPOSITION_NO; \
2542 if (coding->cmp_data_start == coding->cmp_data->used \
2543 && coding->cmp_data->next) \
2544 { \
2545 coding->cmp_data = coding->cmp_data->next; \
2546 coding->cmp_data_start = 0; \
2547 } \
2548 } while (0)
2549
2550/* Produce composition start sequence ESC 0. Here, this sequence
2551 doesn't mean the start of a new composition but means that we have
2552 just produced components (alternate chars and composition rules) of
2553 the composition and the actual text follows in SRC. */
2554
2555#define ENCODE_COMPOSITION_FAKE_START(coding) \
2556 do { \
2557 *dst++ = ISO_CODE_ESC; \
2558 *dst++ = '0'; \
2559 coding->composing = COMPOSITION_RELATIVE; \
2560 } while (0)
4ed46869
KH
2561
2562/* The following three macros produce codes for indicating direction
2563 of text. */
b73bfc1c
KH
2564#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
2565 do { \
4ed46869 2566 if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
b73bfc1c
KH
2567 *dst++ = ISO_CODE_ESC, *dst++ = '['; \
2568 else \
2569 *dst++ = ISO_CODE_CSI; \
4ed46869
KH
2570 } while (0)
2571
2572#define ENCODE_DIRECTION_R2L \
b73bfc1c 2573 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
4ed46869
KH
2574
2575#define ENCODE_DIRECTION_L2R \
b73bfc1c 2576 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
4ed46869
KH
2577
2578/* Produce codes for designation and invocation to reset the graphic
2579 planes and registers to initial state. */
e0e989f6
KH
2580#define ENCODE_RESET_PLANE_AND_REGISTER \
2581 do { \
2582 int reg; \
2583 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
2584 ENCODE_SHIFT_IN; \
2585 for (reg = 0; reg < 4; reg++) \
2586 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
2587 && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
2588 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
2589 ENCODE_DESIGNATION \
2590 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
4ed46869
KH
2591 } while (0)
2592
bdd9fb48 2593/* Produce designation sequences of charsets in the line started from
b73bfc1c 2594 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
2595
2596 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
2597 find all the necessary designations. */
2598
b73bfc1c
KH
2599static unsigned char *
2600encode_designation_at_bol (coding, translation_table, src, src_end, dst)
e0e989f6 2601 struct coding_system *coding;
b73bfc1c 2602 Lisp_Object translation_table;
5bdca8af
DN
2603 const unsigned char *src, *src_end;
2604 unsigned char *dst;
e0e989f6 2605{
bdd9fb48
KH
2606 int charset, c, found = 0, reg;
2607 /* Table of charsets to be designated to each graphic register. */
2608 int r[4];
bdd9fb48
KH
2609
2610 for (reg = 0; reg < 4; reg++)
2611 r[reg] = -1;
2612
b73bfc1c 2613 while (found < 4)
e0e989f6 2614 {
b73bfc1c
KH
2615 ONE_MORE_CHAR (c);
2616 if (c == '\n')
2617 break;
93dec019 2618
b73bfc1c 2619 charset = CHAR_CHARSET (c);
e0e989f6 2620 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
d46c5b12 2621 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
bdd9fb48
KH
2622 {
2623 found++;
2624 r[reg] = charset;
2625 }
bdd9fb48
KH
2626 }
2627
b73bfc1c 2628 label_end_of_loop:
bdd9fb48
KH
2629 if (found)
2630 {
2631 for (reg = 0; reg < 4; reg++)
2632 if (r[reg] >= 0
2633 && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2634 ENCODE_DESIGNATION (r[reg], reg, coding);
e0e989f6 2635 }
b73bfc1c
KH
2636
2637 return dst;
e0e989f6
KH
2638}
2639
4ed46869
KH
2640/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
2641
b73bfc1c 2642static void
d46c5b12 2643encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
4ed46869 2644 struct coding_system *coding;
5bdca8af
DN
2645 const unsigned char *source;
2646 unsigned char *destination;
4ed46869 2647 int src_bytes, dst_bytes;
4ed46869 2648{
5bdca8af
DN
2649 const unsigned char *src = source;
2650 const unsigned char *src_end = source + src_bytes;
4ed46869
KH
2651 unsigned char *dst = destination;
2652 unsigned char *dst_end = destination + dst_bytes;
b73bfc1c 2653 /* Since the maximum bytes produced by each loop is 20, we subtract 19
4ed46869
KH
2654 from DST_END to assure overflow checking is necessary only at the
2655 head of loop. */
b73bfc1c
KH
2656 unsigned char *adjusted_dst_end = dst_end - 19;
2657 /* SRC_BASE remembers the start position in source in each loop.
2658 The loop will be exited when there's not enough source text to
2659 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2660 there's not enough destination area to produce encoded codes
2661 (within macro EMIT_BYTES). */
5bdca8af 2662 const unsigned char *src_base;
b73bfc1c
KH
2663 int c;
2664 Lisp_Object translation_table;
05e6f5dc
KH
2665 Lisp_Object safe_chars;
2666
0eecad43
KH
2667 if (coding->flags & CODING_FLAG_ISO_SAFE)
2668 coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
2669
6b89e3aa 2670 safe_chars = coding_safe_chars (coding->symbol);
bdd9fb48 2671
b73bfc1c
KH
2672 if (NILP (Venable_character_translation))
2673 translation_table = Qnil;
2674 else
2675 {
2676 translation_table = coding->translation_table_for_encode;
2677 if (NILP (translation_table))
2678 translation_table = Vstandard_translation_table_for_encode;
2679 }
4ed46869 2680
d46c5b12 2681 coding->consumed_char = 0;
b73bfc1c
KH
2682 coding->errors = 0;
2683 while (1)
4ed46869 2684 {
b73bfc1c
KH
2685 src_base = src;
2686
2687 if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2688 {
2689 coding->result = CODING_FINISH_INSUFFICIENT_DST;
2690 break;
2691 }
4ed46869 2692
e0e989f6
KH
2693 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2694 && CODING_SPEC_ISO_BOL (coding))
2695 {
bdd9fb48 2696 /* We have to produce designation sequences if any now. */
b73bfc1c
KH
2697 dst = encode_designation_at_bol (coding, translation_table,
2698 src, src_end, dst);
e0e989f6
KH
2699 CODING_SPEC_ISO_BOL (coding) = 0;
2700 }
2701
ec6d2bb8
KH
2702 /* Check composition start and end. */
2703 if (coding->composing != COMPOSITION_DISABLED
2704 && coding->cmp_data_start < coding->cmp_data->used)
4ed46869 2705 {
ec6d2bb8
KH
2706 struct composition_data *cmp_data = coding->cmp_data;
2707 int *data = cmp_data->data + coding->cmp_data_start;
2708 int this_pos = cmp_data->char_offset + coding->consumed_char;
2709
2710 if (coding->composing == COMPOSITION_RELATIVE)
4ed46869 2711 {
ec6d2bb8
KH
2712 if (this_pos == data[2])
2713 {
2714 ENCODE_COMPOSITION_END (coding, data);
2715 cmp_data = coding->cmp_data;
2716 data = cmp_data->data + coding->cmp_data_start;
2717 }
4ed46869 2718 }
ec6d2bb8 2719 else if (COMPOSING_P (coding))
4ed46869 2720 {
ec6d2bb8
KH
2721 /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */
2722 if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2723 /* We have consumed components of the composition.
8ca3766a 2724 What follows in SRC is the composition's base
ec6d2bb8
KH
2725 text. */
2726 ENCODE_COMPOSITION_FAKE_START (coding);
2727 else
4ed46869 2728 {
ec6d2bb8
KH
2729 int c = cmp_data->data[coding->cmp_data_index++];
2730 if (coding->composition_rule_follows)
2731 {
2732 ENCODE_COMPOSITION_RULE (c);
2733 coding->composition_rule_follows = 0;
2734 }
2735 else
2736 {
0eecad43 2737 if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
05e6f5dc
KH
2738 && ! CODING_SAFE_CHAR_P (safe_chars, c))
2739 ENCODE_UNSAFE_CHARACTER (c);
2740 else
2741 ENCODE_ISO_CHARACTER (c);
ec6d2bb8
KH
2742 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2743 coding->composition_rule_follows = 1;
2744 }
4ed46869
KH
2745 continue;
2746 }
ec6d2bb8
KH
2747 }
2748 if (!COMPOSING_P (coding))
2749 {
2750 if (this_pos == data[1])
4ed46869 2751 {
ec6d2bb8
KH
2752 ENCODE_COMPOSITION_START (coding, data);
2753 continue;
4ed46869 2754 }
4ed46869
KH
2755 }
2756 }
ec6d2bb8 2757
b73bfc1c 2758 ONE_MORE_CHAR (c);
4ed46869 2759
b73bfc1c
KH
2760 /* Now encode the character C. */
2761 if (c < 0x20 || c == 0x7F)
2762 {
2763 if (c == '\r')
19a8d9e0 2764 {
b73bfc1c
KH
2765 if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2766 {
2767 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2768 ENCODE_RESET_PLANE_AND_REGISTER;
2769 *dst++ = c;
2770 continue;
2771 }
2772 /* fall down to treat '\r' as '\n' ... */
2773 c = '\n';
19a8d9e0 2774 }
b73bfc1c 2775 if (c == '\n')
19a8d9e0 2776 {
b73bfc1c
KH
2777 if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2778 ENCODE_RESET_PLANE_AND_REGISTER;
2779 if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2780 bcopy (coding->spec.iso2022.initial_designation,
2781 coding->spec.iso2022.current_designation,
2782 sizeof coding->spec.iso2022.initial_designation);
2783 if (coding->eol_type == CODING_EOL_LF
2784 || coding->eol_type == CODING_EOL_UNDECIDED)
2785 *dst++ = ISO_CODE_LF;
2786 else if (coding->eol_type == CODING_EOL_CRLF)
2787 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2788 else
2789 *dst++ = ISO_CODE_CR;
2790 CODING_SPEC_ISO_BOL (coding) = 1;
19a8d9e0 2791 }
93dec019 2792 else
19a8d9e0 2793 {
b73bfc1c
KH
2794 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2795 ENCODE_RESET_PLANE_AND_REGISTER;
2796 *dst++ = c;
19a8d9e0 2797 }
4ed46869 2798 }
b73bfc1c 2799 else if (ASCII_BYTE_P (c))
05e6f5dc 2800 ENCODE_ISO_CHARACTER (c);
b73bfc1c 2801 else if (SINGLE_BYTE_CHAR_P (c))
88993dfd 2802 {
b73bfc1c
KH
2803 *dst++ = c;
2804 coding->errors++;
88993dfd 2805 }
0eecad43 2806 else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
05e6f5dc
KH
2807 && ! CODING_SAFE_CHAR_P (safe_chars, c))
2808 ENCODE_UNSAFE_CHARACTER (c);
b73bfc1c 2809 else
05e6f5dc 2810 ENCODE_ISO_CHARACTER (c);
b73bfc1c
KH
2811
2812 coding->consumed_char++;
84fbb8a0 2813 }
b73bfc1c
KH
2814
2815 label_end_of_loop:
2816 coding->consumed = src_base - source;
d46c5b12 2817 coding->produced = coding->produced_char = dst - destination;
4ed46869
KH
2818}
2819
2820\f
2821/*** 4. SJIS and BIG5 handlers ***/
2822
cfb43547 2823/* Although SJIS and BIG5 are not ISO coding systems, they are used
4ed46869
KH
2824 quite widely. So, for the moment, Emacs supports them in the bare
2825 C code. But, in the future, they may be supported only by CCL. */
2826
2827/* SJIS is a coding system encoding three character sets: ASCII, right
2828 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
2829 as is. A character of charset katakana-jisx0201 is encoded by
2830 "position-code + 0x80". A character of charset japanese-jisx0208
2831 is encoded in 2-byte but two position-codes are divided and shifted
cfb43547 2832 so that it fits in the range below.
4ed46869
KH
2833
2834 --- CODE RANGE of SJIS ---
2835 (character set) (range)
2836 ASCII 0x00 .. 0x7F
682169fe 2837 KATAKANA-JISX0201 0xA1 .. 0xDF
c28a9453 2838 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 2839 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
2840 -------------------------------
2841
2842*/
2843
2844/* BIG5 is a coding system encoding two character sets: ASCII and
2845 Big5. An ASCII character is encoded as is. Big5 is a two-byte
cfb43547 2846 character set and is encoded in two bytes.
4ed46869
KH
2847
2848 --- CODE RANGE of BIG5 ---
2849 (character set) (range)
2850 ASCII 0x00 .. 0x7F
2851 Big5 (1st byte) 0xA1 .. 0xFE
2852 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
2853 --------------------------
2854
2855 Since the number of characters in Big5 is larger than maximum
2856 characters in Emacs' charset (96x96), it can't be handled as one
2857 charset. So, in Emacs, Big5 is divided into two: `charset-big5-1'
2858 and `charset-big5-2'. Both are DIMENSION2 and CHARS94. The former
2859 contains frequently used characters and the latter contains less
2860 frequently used characters. */
2861
2862/* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
2863 are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
f458a8e0 2864 C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
4ed46869
KH
2865 format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
2866
2867/* Number of Big5 characters which have the same code in 1st byte. */
2868#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2869
2870#define DECODE_BIG5(b1, b2, charset, c1, c2) \
2871 do { \
2872 unsigned int temp \
2873 = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62); \
2874 if (b1 < 0xC9) \
2875 charset = charset_big5_1; \
2876 else \
2877 { \
2878 charset = charset_big5_2; \
2879 temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW; \
2880 } \
2881 c1 = temp / (0xFF - 0xA1) + 0x21; \
2882 c2 = temp % (0xFF - 0xA1) + 0x21; \
2883 } while (0)
2884
2885#define ENCODE_BIG5(charset, c1, c2, b1, b2) \
2886 do { \
2887 unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21); \
2888 if (charset == charset_big5_2) \
2889 temp += BIG5_SAME_ROW * (0xC9 - 0xA1); \
2890 b1 = temp / BIG5_SAME_ROW + 0xA1; \
2891 b2 = temp % BIG5_SAME_ROW; \
2892 b2 += b2 < 0x3F ? 0x40 : 0x62; \
2893 } while (0)
2894
2895/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2896 Check if a text is encoded in SJIS. If it is, return
2897 CODING_CATEGORY_MASK_SJIS, else return 0. */
2898
0a28aafb
KH
2899static int
2900detect_coding_sjis (src, src_end, multibytep)
4ed46869 2901 unsigned char *src, *src_end;
0a28aafb 2902 int multibytep;
4ed46869 2903{
b73bfc1c
KH
2904 int c;
2905 /* Dummy for ONE_MORE_BYTE. */
2906 struct coding_system dummy_coding;
2907 struct coding_system *coding = &dummy_coding;
4ed46869 2908
b73bfc1c 2909 while (1)
4ed46869 2910 {
0a28aafb 2911 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
682169fe
KH
2912 if (c < 0x80)
2913 continue;
2914 if (c == 0x80 || c == 0xA0 || c > 0xEF)
2915 return 0;
2916 if (c <= 0x9F || c >= 0xE0)
4ed46869 2917 {
682169fe
KH
2918 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2919 if (c < 0x40 || c == 0x7F || c > 0xFC)
4ed46869
KH
2920 return 0;
2921 }
2922 }
b73bfc1c 2923 label_end_of_loop:
4ed46869
KH
2924 return CODING_CATEGORY_MASK_SJIS;
2925}
2926
2927/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2928 Check if a text is encoded in BIG5. If it is, return
2929 CODING_CATEGORY_MASK_BIG5, else return 0. */
2930
0a28aafb
KH
2931static int
2932detect_coding_big5 (src, src_end, multibytep)
4ed46869 2933 unsigned char *src, *src_end;
0a28aafb 2934 int multibytep;
4ed46869 2935{
b73bfc1c
KH
2936 int c;
2937 /* Dummy for ONE_MORE_BYTE. */
2938 struct coding_system dummy_coding;
2939 struct coding_system *coding = &dummy_coding;
4ed46869 2940
b73bfc1c 2941 while (1)
4ed46869 2942 {
0a28aafb 2943 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
682169fe
KH
2944 if (c < 0x80)
2945 continue;
2946 if (c < 0xA1 || c > 0xFE)
2947 return 0;
2948 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2949 if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2950 return 0;
4ed46869 2951 }
b73bfc1c 2952 label_end_of_loop:
4ed46869
KH
2953 return CODING_CATEGORY_MASK_BIG5;
2954}
2955
fa42c37f
KH
2956/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2957 Check if a text is encoded in UTF-8. If it is, return
2958 CODING_CATEGORY_MASK_UTF_8, else return 0. */
2959
2960#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
2961#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
2962#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2963#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2964#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2965#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2966#define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2967
0a28aafb
KH
2968static int
2969detect_coding_utf_8 (src, src_end, multibytep)
fa42c37f 2970 unsigned char *src, *src_end;
0a28aafb 2971 int multibytep;
fa42c37f
KH
2972{
2973 unsigned char c;
2974 int seq_maybe_bytes;
b73bfc1c
KH
2975 /* Dummy for ONE_MORE_BYTE. */
2976 struct coding_system dummy_coding;
2977 struct coding_system *coding = &dummy_coding;
fa42c37f 2978
b73bfc1c 2979 while (1)
fa42c37f 2980 {
0a28aafb 2981 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
fa42c37f
KH
2982 if (UTF_8_1_OCTET_P (c))
2983 continue;
2984 else if (UTF_8_2_OCTET_LEADING_P (c))
2985 seq_maybe_bytes = 1;
2986 else if (UTF_8_3_OCTET_LEADING_P (c))
2987 seq_maybe_bytes = 2;
2988 else if (UTF_8_4_OCTET_LEADING_P (c))
2989 seq_maybe_bytes = 3;
2990 else if (UTF_8_5_OCTET_LEADING_P (c))
2991 seq_maybe_bytes = 4;
2992 else if (UTF_8_6_OCTET_LEADING_P (c))
2993 seq_maybe_bytes = 5;
2994 else
2995 return 0;
2996
2997 do
2998 {
0a28aafb 2999 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
fa42c37f
KH
3000 if (!UTF_8_EXTRA_OCTET_P (c))
3001 return 0;
3002 seq_maybe_bytes--;
3003 }
3004 while (seq_maybe_bytes > 0);
3005 }
3006
b73bfc1c 3007 label_end_of_loop:
fa42c37f
KH
3008 return CODING_CATEGORY_MASK_UTF_8;
3009}
3010
3011/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3012 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
3013 Little Endian (otherwise). If it is, return
3014 CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
3015 else return 0. */
3016
3017#define UTF_16_INVALID_P(val) \
3018 (((val) == 0xFFFE) \
3019 || ((val) == 0xFFFF))
3020
3021#define UTF_16_HIGH_SURROGATE_P(val) \
3022 (((val) & 0xD800) == 0xD800)
3023
3024#define UTF_16_LOW_SURROGATE_P(val) \
3025 (((val) & 0xDC00) == 0xDC00)
3026
0a28aafb
KH
3027static int
3028detect_coding_utf_16 (src, src_end, multibytep)
fa42c37f 3029 unsigned char *src, *src_end;
0a28aafb 3030 int multibytep;
fa42c37f 3031{
b73bfc1c 3032 unsigned char c1, c2;
1c7457e2 3033 /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */
b73bfc1c
KH
3034 struct coding_system dummy_coding;
3035 struct coding_system *coding = &dummy_coding;
fa42c37f 3036
0a28aafb
KH
3037 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
3038 ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
b73bfc1c
KH
3039
3040 if ((c1 == 0xFF) && (c2 == 0xFE))
fa42c37f 3041 return CODING_CATEGORY_MASK_UTF_16_LE;
b73bfc1c 3042 else if ((c1 == 0xFE) && (c2 == 0xFF))
fa42c37f
KH
3043 return CODING_CATEGORY_MASK_UTF_16_BE;
3044
b73bfc1c 3045 label_end_of_loop:
fa42c37f
KH
3046 return 0;
3047}
3048
4ed46869
KH
3049/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3050 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3051
b73bfc1c 3052static void
4ed46869 3053decode_coding_sjis_big5 (coding, source, destination,
d46c5b12 3054 src_bytes, dst_bytes, sjis_p)
4ed46869 3055 struct coding_system *coding;
5bdca8af
DN
3056 const unsigned char *source;
3057 unsigned char *destination;
4ed46869 3058 int src_bytes, dst_bytes;
4ed46869
KH
3059 int sjis_p;
3060{
5bdca8af
DN
3061 const unsigned char *src = source;
3062 const unsigned char *src_end = source + src_bytes;
4ed46869
KH
3063 unsigned char *dst = destination;
3064 unsigned char *dst_end = destination + dst_bytes;
b73bfc1c
KH
3065 /* SRC_BASE remembers the start position in source in each loop.
3066 The loop will be exited when there's not enough source code
3067 (within macro ONE_MORE_BYTE), or when there's not enough
3068 destination area to produce a character (within macro
3069 EMIT_CHAR). */
5bdca8af 3070 const unsigned char *src_base;
b73bfc1c 3071 Lisp_Object translation_table;
a5d301df 3072
b73bfc1c
KH
3073 if (NILP (Venable_character_translation))
3074 translation_table = Qnil;
3075 else
3076 {
3077 translation_table = coding->translation_table_for_decode;
3078 if (NILP (translation_table))
3079 translation_table = Vstandard_translation_table_for_decode;
3080 }
4ed46869 3081
d46c5b12 3082 coding->produced_char = 0;
b73bfc1c 3083 while (1)
4ed46869 3084 {
85478bc6 3085 int c, charset, c1, c2 = 0;
b73bfc1c
KH
3086
3087 src_base = src;
3088 ONE_MORE_BYTE (c1);
3089
3090 if (c1 < 0x80)
4ed46869 3091 {
b73bfc1c
KH
3092 charset = CHARSET_ASCII;
3093 if (c1 < 0x20)
4ed46869 3094 {
b73bfc1c 3095 if (c1 == '\r')
d46c5b12 3096 {
b73bfc1c 3097 if (coding->eol_type == CODING_EOL_CRLF)
d46c5b12 3098 {
b73bfc1c
KH
3099 ONE_MORE_BYTE (c2);
3100 if (c2 == '\n')
3101 c1 = c2;
b73bfc1c
KH
3102 else
3103 /* To process C2 again, SRC is subtracted by 1. */
3104 src--;
d46c5b12 3105 }
b73bfc1c
KH
3106 else if (coding->eol_type == CODING_EOL_CR)
3107 c1 = '\n';
3108 }
3109 else if (c1 == '\n'
3110 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3111 && (coding->eol_type == CODING_EOL_CR
3112 || coding->eol_type == CODING_EOL_CRLF))
3113 {
3114 coding->result = CODING_FINISH_INCONSISTENT_EOL;
3115 goto label_end_of_loop;
d46c5b12 3116 }
4ed46869 3117 }
4ed46869 3118 }
54f78171 3119 else
b73bfc1c 3120 {
4ed46869
KH
3121 if (sjis_p)
3122 {
682169fe 3123 if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
b73bfc1c 3124 goto label_invalid_code;
682169fe 3125 if (c1 <= 0x9F || c1 >= 0xE0)
fb88bf2d 3126 {
54f78171
KH
3127 /* SJIS -> JISX0208 */
3128 ONE_MORE_BYTE (c2);
b73bfc1c
KH
3129 if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
3130 goto label_invalid_code;
3131 DECODE_SJIS (c1, c2, c1, c2);
3132 charset = charset_jisx0208;
5e34de15 3133 }
fb88bf2d 3134 else
b73bfc1c
KH
3135 /* SJIS -> JISX0201-Kana */
3136 charset = charset_katakana_jisx0201;
4ed46869 3137 }
fb88bf2d 3138 else
fb88bf2d 3139 {
54f78171 3140 /* BIG5 -> Big5 */
682169fe 3141 if (c1 < 0xA0 || c1 > 0xFE)
b73bfc1c
KH
3142 goto label_invalid_code;
3143 ONE_MORE_BYTE (c2);
3144 if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
3145 goto label_invalid_code;
3146 DECODE_BIG5 (c1, c2, charset, c1, c2);
4ed46869
KH
3147 }
3148 }
4ed46869 3149
b73bfc1c
KH
3150 c = DECODE_ISO_CHARACTER (charset, c1, c2);
3151 EMIT_CHAR (c);
fb88bf2d
KH
3152 continue;
3153
b73bfc1c
KH
3154 label_invalid_code:
3155 coding->errors++;
4ed46869 3156 src = src_base;
b73bfc1c
KH
3157 c = *src++;
3158 EMIT_CHAR (c);
fb88bf2d 3159 }
d46c5b12 3160
b73bfc1c
KH
3161 label_end_of_loop:
3162 coding->consumed = coding->consumed_char = src_base - source;
d46c5b12 3163 coding->produced = dst - destination;
b73bfc1c 3164 return;
4ed46869
KH
3165}
3166
3167/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
3168 This function can encode charsets `ascii', `katakana-jisx0201',
3169 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3170 are sure that all these charsets are registered as official charset
4ed46869
KH
3171 (i.e. do not have extended leading-codes). Characters of other
3172 charsets are produced without any encoding. If SJIS_P is 1, encode
3173 SJIS text, else encode BIG5 text. */
3174
b73bfc1c 3175static void
4ed46869 3176encode_coding_sjis_big5 (coding, source, destination,
d46c5b12 3177 src_bytes, dst_bytes, sjis_p)
4ed46869
KH
3178 struct coding_system *coding;
3179 unsigned char *source, *destination;
3180 int src_bytes, dst_bytes;
4ed46869
KH
3181 int sjis_p;
3182{
3183 unsigned char *src = source;
3184 unsigned char *src_end = source + src_bytes;
3185 unsigned char *dst = destination;
3186 unsigned char *dst_end = destination + dst_bytes;
b73bfc1c
KH
3187 /* SRC_BASE remembers the start position in source in each loop.
3188 The loop will be exited when there's not enough source text to
3189 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3190 there's not enough destination area to produce encoded codes
3191 (within macro EMIT_BYTES). */
3192 unsigned char *src_base;
3193 Lisp_Object translation_table;
4ed46869 3194
b73bfc1c
KH
3195 if (NILP (Venable_character_translation))
3196 translation_table = Qnil;
3197 else
4ed46869 3198 {
39658efc 3199 translation_table = coding->translation_table_for_encode;
b73bfc1c 3200 if (NILP (translation_table))
39658efc 3201 translation_table = Vstandard_translation_table_for_encode;
b73bfc1c 3202 }
a5d301df 3203
b73bfc1c
KH
3204 while (1)
3205 {
3206 int c, charset, c1, c2;
4ed46869 3207
b73bfc1c
KH
3208 src_base = src;
3209 ONE_MORE_CHAR (c);
93dec019 3210
b73bfc1c
KH
3211 /* Now encode the character C. */
3212 if (SINGLE_BYTE_CHAR_P (c))
3213 {
3214 switch (c)
4ed46869 3215 {
b73bfc1c 3216 case '\r':
7371fe0a 3217 if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
b73bfc1c
KH
3218 {
3219 EMIT_ONE_BYTE (c);
3220 break;
3221 }
3222 c = '\n';
3223 case '\n':
3224 if (coding->eol_type == CODING_EOL_CRLF)
3225 {
3226 EMIT_TWO_BYTES ('\r', c);
3227 break;
3228 }
3229 else if (coding->eol_type == CODING_EOL_CR)
3230 c = '\r';
3231 default:
3232 EMIT_ONE_BYTE (c);
3233 }
3234 }
3235 else
3236 {
3237 SPLIT_CHAR (c, charset, c1, c2);
3238 if (sjis_p)
3239 {
3240 if (charset == charset_jisx0208
3241 || charset == charset_jisx0208_1978)
3242 {
3243 ENCODE_SJIS (c1, c2, c1, c2);
3244 EMIT_TWO_BYTES (c1, c2);
3245 }
39658efc
KH
3246 else if (charset == charset_katakana_jisx0201)
3247 EMIT_ONE_BYTE (c1 | 0x80);
fc53a214
KH
3248 else if (charset == charset_latin_jisx0201)
3249 EMIT_ONE_BYTE (c1);
0eecad43
KH
3250 else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3251 {
3252 EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3253 if (CHARSET_WIDTH (charset) > 1)
3254 EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3255 }
b73bfc1c
KH
3256 else
3257 /* There's no way other than producing the internal
3258 codes as is. */
3259 EMIT_BYTES (src_base, src);
4ed46869 3260 }
4ed46869 3261 else
b73bfc1c
KH
3262 {
3263 if (charset == charset_big5_1 || charset == charset_big5_2)
3264 {
3265 ENCODE_BIG5 (charset, c1, c2, c1, c2);
3266 EMIT_TWO_BYTES (c1, c2);
3267 }
0eecad43
KH
3268 else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3269 {
3270 EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3271 if (CHARSET_WIDTH (charset) > 1)
3272 EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3273 }
b73bfc1c
KH
3274 else
3275 /* There's no way other than producing the internal
3276 codes as is. */
3277 EMIT_BYTES (src_base, src);
3278 }
4ed46869 3279 }
b73bfc1c 3280 coding->consumed_char++;
4ed46869
KH
3281 }
3282
b73bfc1c
KH
3283 label_end_of_loop:
3284 coding->consumed = src_base - source;
d46c5b12 3285 coding->produced = coding->produced_char = dst - destination;
4ed46869
KH
3286}
3287
3288\f
1397dc18
KH
3289/*** 5. CCL handlers ***/
3290
3291/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3292 Check if a text is encoded in a coding system of which
3293 encoder/decoder are written in CCL program. If it is, return
3294 CODING_CATEGORY_MASK_CCL, else return 0. */
3295
0a28aafb
KH
3296static int
3297detect_coding_ccl (src, src_end, multibytep)
1397dc18 3298 unsigned char *src, *src_end;
0a28aafb 3299 int multibytep;
1397dc18
KH
3300{
3301 unsigned char *valid;
b73bfc1c
KH
3302 int c;
3303 /* Dummy for ONE_MORE_BYTE. */
3304 struct coding_system dummy_coding;
3305 struct coding_system *coding = &dummy_coding;
1397dc18
KH
3306
3307 /* No coding system is assigned to coding-category-ccl. */
3308 if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3309 return 0;
3310
3311 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
b73bfc1c 3312 while (1)
1397dc18 3313 {
0a28aafb 3314 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
b73bfc1c
KH
3315 if (! valid[c])
3316 return 0;
1397dc18 3317 }
b73bfc1c 3318 label_end_of_loop:
1397dc18
KH
3319 return CODING_CATEGORY_MASK_CCL;
3320}
3321
3322\f
3323/*** 6. End-of-line handlers ***/
4ed46869 3324
b73bfc1c 3325/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 3326
b73bfc1c 3327static void
d46c5b12 3328decode_eol (coding, source, destination, src_bytes, dst_bytes)
4ed46869 3329 struct coding_system *coding;
5bdca8af
DN
3330 const unsigned char *source;
3331 unsigned char *destination;
4ed46869 3332 int src_bytes, dst_bytes;
4ed46869 3333{
5bdca8af 3334 const unsigned char *src = source;
4ed46869 3335 unsigned char *dst = destination;
5bdca8af 3336 const unsigned char *src_end = src + src_bytes;
b73bfc1c
KH
3337 unsigned char *dst_end = dst + dst_bytes;
3338 Lisp_Object translation_table;
3339 /* SRC_BASE remembers the start position in source in each loop.
3340 The loop will be exited when there's not enough source code
3341 (within macro ONE_MORE_BYTE), or when there's not enough
3342 destination area to produce a character (within macro
3343 EMIT_CHAR). */
5bdca8af 3344 const unsigned char *src_base;
b73bfc1c
KH
3345 int c;
3346
3347 translation_table = Qnil;
4ed46869
KH
3348 switch (coding->eol_type)
3349 {
3350 case CODING_EOL_CRLF:
b73bfc1c 3351 while (1)
d46c5b12 3352 {
b73bfc1c
KH
3353 src_base = src;
3354 ONE_MORE_BYTE (c);
3355 if (c == '\r')
fb88bf2d 3356 {
b73bfc1c
KH
3357 ONE_MORE_BYTE (c);
3358 if (c != '\n')
3359 {
b73bfc1c
KH
3360 src--;
3361 c = '\r';
3362 }
fb88bf2d 3363 }
b73bfc1c
KH
3364 else if (c == '\n'
3365 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
d46c5b12 3366 {
b73bfc1c
KH
3367 coding->result = CODING_FINISH_INCONSISTENT_EOL;
3368 goto label_end_of_loop;
d46c5b12 3369 }
b73bfc1c 3370 EMIT_CHAR (c);
d46c5b12 3371 }
b73bfc1c
KH
3372 break;
3373
3374 case CODING_EOL_CR:
3375 while (1)
d46c5b12 3376 {
b73bfc1c
KH
3377 src_base = src;
3378 ONE_MORE_BYTE (c);
3379 if (c == '\n')
3380 {
3381 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3382 {
3383 coding->result = CODING_FINISH_INCONSISTENT_EOL;
3384 goto label_end_of_loop;
3385 }
3386 }
3387 else if (c == '\r')
3388 c = '\n';
3389 EMIT_CHAR (c);
d46c5b12 3390 }
4ed46869
KH
3391 break;
3392
b73bfc1c
KH
3393 default: /* no need for EOL handling */
3394 while (1)
d46c5b12 3395 {
b73bfc1c
KH
3396 src_base = src;
3397 ONE_MORE_BYTE (c);
3398 EMIT_CHAR (c);
d46c5b12 3399 }
4ed46869
KH
3400 }
3401
b73bfc1c
KH
3402 label_end_of_loop:
3403 coding->consumed = coding->consumed_char = src_base - source;
3404 coding->produced = dst - destination;
3405 return;
4ed46869
KH
3406}
3407
3408/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
b73bfc1c 3409 format of end-of-line according to `coding->eol_type'. It also
8ca3766a 3410 convert multibyte form 8-bit characters to unibyte if
b73bfc1c
KH
3411 CODING->src_multibyte is nonzero. If `coding->mode &
3412 CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3413 also means end-of-line. */
4ed46869 3414
b73bfc1c 3415static void
d46c5b12 3416encode_eol (coding, source, destination, src_bytes, dst_bytes)
4ed46869 3417 struct coding_system *coding;
a4244313
KR
3418 const unsigned char *source;
3419 unsigned char *destination;
4ed46869 3420 int src_bytes, dst_bytes;
4ed46869 3421{
a4244313 3422 const unsigned char *src = source;
4ed46869 3423 unsigned char *dst = destination;
a4244313 3424 const unsigned char *src_end = src + src_bytes;
b73bfc1c
KH
3425 unsigned char *dst_end = dst + dst_bytes;
3426 Lisp_Object translation_table;
3427 /* SRC_BASE remembers the start position in source in each loop.
3428 The loop will be exited when there's not enough source text to
3429 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3430 there's not enough destination area to produce encoded codes
3431 (within macro EMIT_BYTES). */
a4244313
KR
3432 const unsigned char *src_base;
3433 unsigned char *tmp;
b73bfc1c
KH
3434 int c;
3435 int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3436
3437 translation_table = Qnil;
3438 if (coding->src_multibyte
3439 && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3440 {
3441 src_end--;
3442 src_bytes--;
3443 coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3444 }
fb88bf2d 3445
d46c5b12
KH
3446 if (coding->eol_type == CODING_EOL_CRLF)
3447 {
b73bfc1c 3448 while (src < src_end)
d46c5b12 3449 {
b73bfc1c 3450 src_base = src;
d46c5b12 3451 c = *src++;
b73bfc1c
KH
3452 if (c >= 0x20)
3453 EMIT_ONE_BYTE (c);
3454 else if (c == '\n' || (c == '\r' && selective_display))
3455 EMIT_TWO_BYTES ('\r', '\n');
d46c5b12 3456 else
b73bfc1c 3457 EMIT_ONE_BYTE (c);
d46c5b12 3458 }
ff2b1ea9 3459 src_base = src;
b73bfc1c 3460 label_end_of_loop:
005f0d35 3461 ;
d46c5b12
KH
3462 }
3463 else
4ed46869 3464 {
78a629d2 3465 if (!dst_bytes || src_bytes <= dst_bytes)
4ed46869 3466 {
b73bfc1c
KH
3467 safe_bcopy (src, dst, src_bytes);
3468 src_base = src_end;
3469 dst += src_bytes;
d46c5b12 3470 }
d46c5b12 3471 else
b73bfc1c
KH
3472 {
3473 if (coding->src_multibyte
3474 && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3475 dst_bytes--;
3476 safe_bcopy (src, dst, dst_bytes);
3477 src_base = src + dst_bytes;
3478 dst = destination + dst_bytes;
3479 coding->result = CODING_FINISH_INSUFFICIENT_DST;
3480 }
993824c9 3481 if (coding->eol_type == CODING_EOL_CR)
d46c5b12 3482 {
a4244313
KR
3483 for (tmp = destination; tmp < dst; tmp++)
3484 if (*tmp == '\n') *tmp = '\r';
d46c5b12 3485 }
b73bfc1c 3486 else if (selective_display)
d46c5b12 3487 {
a4244313
KR
3488 for (tmp = destination; tmp < dst; tmp++)
3489 if (*tmp == '\r') *tmp = '\n';
4ed46869 3490 }
4ed46869 3491 }
b73bfc1c
KH
3492 if (coding->src_multibyte)
3493 dst = destination + str_as_unibyte (destination, dst - destination);
4ed46869 3494
b73bfc1c
KH
3495 coding->consumed = src_base - source;
3496 coding->produced = dst - destination;
78a629d2 3497 coding->produced_char = coding->produced;
4ed46869
KH
3498}
3499
3500\f
1397dc18 3501/*** 7. C library functions ***/
4ed46869 3502
cfb43547 3503/* In Emacs Lisp, a coding system is represented by a Lisp symbol which
4ed46869 3504 has a property `coding-system'. The value of this property is a
cfb43547 3505 vector of length 5 (called the coding-vector). Among elements of
4ed46869
KH
3506 this vector, the first (element[0]) and the fifth (element[4])
3507 carry important information for decoding/encoding. Before
3508 decoding/encoding, this information should be set in fields of a
3509 structure of type `coding_system'.
3510
cfb43547 3511 The value of the property `coding-system' can be a symbol of another
4ed46869
KH
3512 subsidiary coding-system. In that case, Emacs gets coding-vector
3513 from that symbol.
3514
3515 `element[0]' contains information to be set in `coding->type'. The
3516 value and its meaning is as follows:
3517
0ef69138
KH
3518 0 -- coding_type_emacs_mule
3519 1 -- coding_type_sjis
3520 2 -- coding_type_iso2022
3521 3 -- coding_type_big5
3522 4 -- coding_type_ccl encoder/decoder written in CCL
3523 nil -- coding_type_no_conversion
3524 t -- coding_type_undecided (automatic conversion on decoding,
3525 no-conversion on encoding)
4ed46869
KH
3526
3527 `element[4]' contains information to be set in `coding->flags' and
3528 `coding->spec'. The meaning varies by `coding->type'.
3529
3530 If `coding->type' is `coding_type_iso2022', element[4] is a vector
3531 of length 32 (of which the first 13 sub-elements are used now).
3532 Meanings of these sub-elements are:
3533
3534 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3535 If the value is an integer of valid charset, the charset is
3536 assumed to be designated to graphic register N initially.
3537
3538 If the value is minus, it is a minus value of charset which
3539 reserves graphic register N, which means that the charset is
3540 not designated initially but should be designated to graphic
3541 register N just before encoding a character in that charset.
3542
3543 If the value is nil, graphic register N is never used on
3544 encoding.
93dec019 3545
4ed46869
KH
3546 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3547 Each value takes t or nil. See the section ISO2022 of
3548 `coding.h' for more information.
3549
3550 If `coding->type' is `coding_type_big5', element[4] is t to denote
3551 BIG5-ETen or nil to denote BIG5-HKU.
3552
3553 If `coding->type' takes the other value, element[4] is ignored.
3554
cfb43547 3555 Emacs Lisp's coding systems also carry information about format of
4ed46869
KH
3556 end-of-line in a value of property `eol-type'. If the value is
3557 integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3558 means CODING_EOL_CR. If it is not integer, it should be a vector
3559 of subsidiary coding systems of which property `eol-type' has one
cfb43547 3560 of the above values.
4ed46869
KH
3561
3562*/
3563
3564/* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3565 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING
3566 is setup so that no conversion is necessary and return -1, else
3567 return 0. */
3568
3569int
e0e989f6
KH
3570setup_coding_system (coding_system, coding)
3571 Lisp_Object coding_system;
4ed46869
KH
3572 struct coding_system *coding;
3573{
d46c5b12 3574 Lisp_Object coding_spec, coding_type, eol_type, plist;
4608c386 3575 Lisp_Object val;
4ed46869 3576
c07c8e12
KH
3577 /* At first, zero clear all members. */
3578 bzero (coding, sizeof (struct coding_system));
3579
d46c5b12 3580 /* Initialize some fields required for all kinds of coding systems. */
774324d6 3581 coding->symbol = coding_system;
d46c5b12
KH
3582 coding->heading_ascii = -1;
3583 coding->post_read_conversion = coding->pre_write_conversion = Qnil;
ec6d2bb8
KH
3584 coding->composing = COMPOSITION_DISABLED;
3585 coding->cmp_data = NULL;
1f5dbf34
KH
3586
3587 if (NILP (coding_system))
3588 goto label_invalid_coding_system;
3589
4608c386 3590 coding_spec = Fget (coding_system, Qcoding_system);
1f5dbf34 3591
4608c386
KH
3592 if (!VECTORP (coding_spec)
3593 || XVECTOR (coding_spec)->size != 5
3594 || !CONSP (XVECTOR (coding_spec)->contents[3]))
4ed46869 3595 goto label_invalid_coding_system;
4608c386 3596
d46c5b12
KH
3597 eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3598 if (VECTORP (eol_type))
3599 {
3600 coding->eol_type = CODING_EOL_UNDECIDED;
3601 coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3602 }
3603 else if (XFASTINT (eol_type) == 1)
3604 {
3605 coding->eol_type = CODING_EOL_CRLF;
3606 coding->common_flags
3607 = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3608 }
3609 else if (XFASTINT (eol_type) == 2)
3610 {
3611 coding->eol_type = CODING_EOL_CR;
3612 coding->common_flags
3613 = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3614 }
3615 else
3616 coding->eol_type = CODING_EOL_LF;
3617
3618 coding_type = XVECTOR (coding_spec)->contents[0];
3619 /* Try short cut. */
3620 if (SYMBOLP (coding_type))
3621 {
3622 if (EQ (coding_type, Qt))
3623 {
3624 coding->type = coding_type_undecided;
3625 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3626 }
3627 else
3628 coding->type = coding_type_no_conversion;
9b96232f
KH
3629 /* Initialize this member. Any thing other than
3630 CODING_CATEGORY_IDX_UTF_16_BE and
3631 CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3632 special treatment in detect_eol. */
3633 coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3634
d46c5b12
KH
3635 return 0;
3636 }
3637
d46c5b12
KH
3638 /* Get values of coding system properties:
3639 `post-read-conversion', `pre-write-conversion',
f967223b 3640 `translation-table-for-decode', `translation-table-for-encode'. */
4608c386 3641 plist = XVECTOR (coding_spec)->contents[3];
b843d1ae 3642 /* Pre & post conversion functions should be disabled if
8ca3766a 3643 inhibit_eol_conversion is nonzero. This is the case that a code
b843d1ae
KH
3644 conversion function is called while those functions are running. */
3645 if (! inhibit_pre_post_conversion)
3646 {
3647 coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3648 coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3649 }
f967223b 3650 val = Fplist_get (plist, Qtranslation_table_for_decode);
4608c386 3651 if (SYMBOLP (val))
f967223b
KH
3652 val = Fget (val, Qtranslation_table_for_decode);
3653 coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3654 val = Fplist_get (plist, Qtranslation_table_for_encode);
4608c386 3655 if (SYMBOLP (val))
f967223b
KH
3656 val = Fget (val, Qtranslation_table_for_encode);
3657 coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
d46c5b12
KH
3658 val = Fplist_get (plist, Qcoding_category);
3659 if (!NILP (val))
3660 {
3661 val = Fget (val, Qcoding_category_index);
3662 if (INTEGERP (val))
3663 coding->category_idx = XINT (val);
3664 else
3665 goto label_invalid_coding_system;
3666 }
3667 else
3668 goto label_invalid_coding_system;
93dec019 3669
ec6d2bb8
KH
3670 /* If the coding system has non-nil `composition' property, enable
3671 composition handling. */
3672 val = Fplist_get (plist, Qcomposition);
3673 if (!NILP (val))
3674 coding->composing = COMPOSITION_NO;
3675
d46c5b12 3676 switch (XFASTINT (coding_type))
4ed46869
KH
3677 {
3678 case 0:
0ef69138 3679 coding->type = coding_type_emacs_mule;
aa72b389
KH
3680 coding->common_flags
3681 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
c952af22
KH
3682 if (!NILP (coding->post_read_conversion))
3683 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3684 if (!NILP (coding->pre_write_conversion))
3685 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4ed46869
KH
3686 break;
3687
3688 case 1:
3689 coding->type = coding_type_sjis;
c952af22
KH
3690 coding->common_flags
3691 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869
KH
3692 break;
3693
3694 case 2:
3695 coding->type = coding_type_iso2022;
c952af22
KH
3696 coding->common_flags
3697 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869 3698 {
70c22245 3699 Lisp_Object val, temp;
4ed46869 3700 Lisp_Object *flags;
d46c5b12 3701 int i, charset, reg_bits = 0;
4ed46869 3702
4608c386 3703 val = XVECTOR (coding_spec)->contents[4];
f44d27ce 3704
4ed46869
KH
3705 if (!VECTORP (val) || XVECTOR (val)->size != 32)
3706 goto label_invalid_coding_system;
3707
3708 flags = XVECTOR (val)->contents;
3709 coding->flags
3710 = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3711 | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3712 | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3713 | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3714 | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3715 | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3716 | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3717 | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
e0e989f6
KH
3718 | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3719 | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
c4825358
KH
3720 | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3721 | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3f003981 3722 | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
c4825358 3723 );
4ed46869
KH
3724
3725 /* Invoke graphic register 0 to plane 0. */
3726 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3727 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */
3728 CODING_SPEC_ISO_INVOCATION (coding, 1)
3729 = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3730 /* Not single shifting at first. */
6e85d753 3731 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
e0e989f6 3732 /* Beginning of buffer should also be regarded as bol. */
6e85d753 3733 CODING_SPEC_ISO_BOL (coding) = 1;
4ed46869 3734
70c22245
KH
3735 for (charset = 0; charset <= MAX_CHARSET; charset++)
3736 CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3737 val = Vcharset_revision_alist;
3738 while (CONSP (val))
3739 {
03699b14 3740 charset = get_charset_id (Fcar_safe (XCAR (val)));
70c22245 3741 if (charset >= 0
03699b14 3742 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
70c22245
KH
3743 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3744 CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
03699b14 3745 val = XCDR (val);
70c22245
KH
3746 }
3747
4ed46869
KH
3748 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3749 FLAGS[REG] can be one of below:
3750 integer CHARSET: CHARSET occupies register I,
3751 t: designate nothing to REG initially, but can be used
3752 by any charsets,
3753 list of integer, nil, or t: designate the first
3754 element (if integer) to REG initially, the remaining
3755 elements (if integer) is designated to REG on request,
d46c5b12 3756 if an element is t, REG can be used by any charsets,
4ed46869 3757 nil: REG is never used. */
467e7675 3758 for (charset = 0; charset <= MAX_CHARSET; charset++)
1ba9e4ab
KH
3759 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3760 = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
4ed46869
KH
3761 for (i = 0; i < 4; i++)
3762 {
87323294
PJ
3763 if ((INTEGERP (flags[i])
3764 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
e0e989f6 3765 || (charset = get_charset_id (flags[i])) >= 0)
4ed46869
KH
3766 {
3767 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3768 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3769 }
3770 else if (EQ (flags[i], Qt))
3771 {
3772 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
d46c5b12
KH
3773 reg_bits |= 1 << i;
3774 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
4ed46869
KH
3775 }
3776 else if (CONSP (flags[i]))
3777 {
84d60297
RS
3778 Lisp_Object tail;
3779 tail = flags[i];
4ed46869 3780
d46c5b12 3781 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
87323294
PJ
3782 if ((INTEGERP (XCAR (tail))
3783 && (charset = XINT (XCAR (tail)),
3784 CHARSET_VALID_P (charset)))
03699b14 3785 || (charset = get_charset_id (XCAR (tail))) >= 0)
4ed46869
KH
3786 {
3787 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3788 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3789 }
3790 else
3791 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
03699b14 3792 tail = XCDR (tail);
4ed46869
KH
3793 while (CONSP (tail))
3794 {
87323294
PJ
3795 if ((INTEGERP (XCAR (tail))
3796 && (charset = XINT (XCAR (tail)),
3797 CHARSET_VALID_P (charset)))
03699b14 3798 || (charset = get_charset_id (XCAR (tail))) >= 0)
70c22245
KH
3799 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3800 = i;
03699b14 3801 else if (EQ (XCAR (tail), Qt))
d46c5b12 3802 reg_bits |= 1 << i;
03699b14 3803 tail = XCDR (tail);
4ed46869
KH
3804 }
3805 }
3806 else
3807 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
93dec019 3808
4ed46869
KH
3809 CODING_SPEC_ISO_DESIGNATION (coding, i)
3810 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3811 }
3812
d46c5b12 3813 if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
4ed46869
KH
3814 {
3815 /* REG 1 can be used only by locking shift in 7-bit env. */
3816 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
d46c5b12 3817 reg_bits &= ~2;
4ed46869
KH
3818 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3819 /* Without any shifting, only REG 0 and 1 can be used. */
d46c5b12 3820 reg_bits &= 3;
4ed46869
KH
3821 }
3822
d46c5b12
KH
3823 if (reg_bits)
3824 for (charset = 0; charset <= MAX_CHARSET; charset++)
6e85d753 3825 {
928a85c1 3826 if (CHARSET_DEFINED_P (charset)
96148065
KH
3827 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3828 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
d46c5b12
KH
3829 {
3830 /* There exist some default graphic registers to be
96148065 3831 used by CHARSET. */
d46c5b12
KH
3832
3833 /* We had better avoid designating a charset of
3834 CHARS96 to REG 0 as far as possible. */
3835 if (CHARSET_CHARS (charset) == 96)
3836 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3837 = (reg_bits & 2
3838 ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3839 else
3840 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3841 = (reg_bits & 1
3842 ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3843 }
6e85d753 3844 }
4ed46869 3845 }
c952af22 3846 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
d46c5b12 3847 coding->spec.iso2022.last_invalid_designation_register = -1;
4ed46869
KH
3848 break;
3849
3850 case 3:
3851 coding->type = coding_type_big5;
c952af22
KH
3852 coding->common_flags
3853 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869 3854 coding->flags
4608c386 3855 = (NILP (XVECTOR (coding_spec)->contents[4])
4ed46869
KH
3856 ? CODING_FLAG_BIG5_HKU
3857 : CODING_FLAG_BIG5_ETEN);
3858 break;
3859
3860 case 4:
3861 coding->type = coding_type_ccl;
c952af22
KH
3862 coding->common_flags
3863 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869 3864 {
84d60297 3865 val = XVECTOR (coding_spec)->contents[4];
ef4ced28
KH
3866 if (! CONSP (val)
3867 || setup_ccl_program (&(coding->spec.ccl.decoder),
03699b14 3868 XCAR (val)) < 0
ef4ced28 3869 || setup_ccl_program (&(coding->spec.ccl.encoder),
03699b14 3870 XCDR (val)) < 0)
4ed46869 3871 goto label_invalid_coding_system;
1397dc18
KH
3872
3873 bzero (coding->spec.ccl.valid_codes, 256);
3874 val = Fplist_get (plist, Qvalid_codes);
3875 if (CONSP (val))
3876 {
3877 Lisp_Object this;
3878
03699b14 3879 for (; CONSP (val); val = XCDR (val))
1397dc18 3880 {
03699b14 3881 this = XCAR (val);
1397dc18
KH
3882 if (INTEGERP (this)
3883 && XINT (this) >= 0 && XINT (this) < 256)
3884 coding->spec.ccl.valid_codes[XINT (this)] = 1;
3885 else if (CONSP (this)
03699b14
KR
3886 && INTEGERP (XCAR (this))
3887 && INTEGERP (XCDR (this)))
1397dc18 3888 {
03699b14
KR
3889 int start = XINT (XCAR (this));
3890 int end = XINT (XCDR (this));
1397dc18
KH
3891
3892 if (start >= 0 && start <= end && end < 256)
e133c8fa 3893 while (start <= end)
1397dc18
KH
3894 coding->spec.ccl.valid_codes[start++] = 1;
3895 }
3896 }
3897 }
4ed46869 3898 }
c952af22 3899 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
aaaf0b1e 3900 coding->spec.ccl.cr_carryover = 0;
1c3478b0 3901 coding->spec.ccl.eight_bit_carryover[0] = 0;
4ed46869
KH
3902 break;
3903
27901516
KH
3904 case 5:
3905 coding->type = coding_type_raw_text;
3906 break;
3907
4ed46869 3908 default:
d46c5b12 3909 goto label_invalid_coding_system;
4ed46869
KH
3910 }
3911 return 0;
3912
3913 label_invalid_coding_system:
3914 coding->type = coding_type_no_conversion;
d46c5b12 3915 coding->category_idx = CODING_CATEGORY_IDX_BINARY;
c952af22 3916 coding->common_flags = 0;
dec137e5 3917 coding->eol_type = CODING_EOL_LF;
d46c5b12 3918 coding->pre_write_conversion = coding->post_read_conversion = Qnil;
4ed46869
KH
3919 return -1;
3920}
3921
ec6d2bb8
KH
3922/* Free memory blocks allocated for storing composition information. */
3923
3924void
3925coding_free_composition_data (coding)
3926 struct coding_system *coding;
3927{
3928 struct composition_data *cmp_data = coding->cmp_data, *next;
3929
3930 if (!cmp_data)
3931 return;
3932 /* Memory blocks are chained. At first, rewind to the first, then,
3933 free blocks one by one. */
3934 while (cmp_data->prev)
3935 cmp_data = cmp_data->prev;
3936 while (cmp_data)
3937 {
3938 next = cmp_data->next;
3939 xfree (cmp_data);
3940 cmp_data = next;
3941 }
3942 coding->cmp_data = NULL;
3943}
3944
3945/* Set `char_offset' member of all memory blocks pointed by
3946 coding->cmp_data to POS. */
3947
3948void
3949coding_adjust_composition_offset (coding, pos)
3950 struct coding_system *coding;
3951 int pos;
3952{
3953 struct composition_data *cmp_data;
3954
3955 for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3956 cmp_data->char_offset = pos;
3957}
3958
54f78171
KH
3959/* Setup raw-text or one of its subsidiaries in the structure
3960 coding_system CODING according to the already setup value eol_type
3961 in CODING. CODING should be setup for some coding system in
3962 advance. */
3963
3964void
3965setup_raw_text_coding_system (coding)
3966 struct coding_system *coding;
3967{
3968 if (coding->type != coding_type_raw_text)
3969 {
3970 coding->symbol = Qraw_text;
3971 coding->type = coding_type_raw_text;
3972 if (coding->eol_type != CODING_EOL_UNDECIDED)
3973 {
84d60297
RS
3974 Lisp_Object subsidiaries;
3975 subsidiaries = Fget (Qraw_text, Qeol_type);
54f78171
KH
3976
3977 if (VECTORP (subsidiaries)
3978 && XVECTOR (subsidiaries)->size == 3)
3979 coding->symbol
3980 = XVECTOR (subsidiaries)->contents[coding->eol_type];
3981 }
716e0b0a 3982 setup_coding_system (coding->symbol, coding);
54f78171
KH
3983 }
3984 return;
3985}
3986
4ed46869
KH
3987/* Emacs has a mechanism to automatically detect a coding system if it
3988 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
3989 it's impossible to distinguish some coding systems accurately
3990 because they use the same range of codes. So, at first, coding
3991 systems are categorized into 7, those are:
3992
0ef69138 3993 o coding-category-emacs-mule
4ed46869
KH
3994
3995 The category for a coding system which has the same code range
3996 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 3997 symbol) `emacs-mule' by default.
4ed46869
KH
3998
3999 o coding-category-sjis
4000
4001 The category for a coding system which has the same code range
4002 as SJIS. Assigned the coding-system (Lisp
7717c392 4003 symbol) `japanese-shift-jis' by default.
4ed46869
KH
4004
4005 o coding-category-iso-7
4006
4007 The category for a coding system which has the same code range
7717c392 4008 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
4009 shift and single shift functions. This can encode/decode all
4010 charsets. Assigned the coding-system (Lisp symbol)
4011 `iso-2022-7bit' by default.
4012
4013 o coding-category-iso-7-tight
4014
4015 Same as coding-category-iso-7 except that this can
4016 encode/decode only the specified charsets.
4ed46869
KH
4017
4018 o coding-category-iso-8-1
4019
4020 The category for a coding system which has the same code range
4021 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4022 for DIMENSION1 charset. This doesn't use any locking shift
4023 and single shift functions. Assigned the coding-system (Lisp
4024 symbol) `iso-latin-1' by default.
4ed46869
KH
4025
4026 o coding-category-iso-8-2
4027
4028 The category for a coding system which has the same code range
4029 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4030 for DIMENSION2 charset. This doesn't use any locking shift
4031 and single shift functions. Assigned the coding-system (Lisp
4032 symbol) `japanese-iso-8bit' by default.
4ed46869 4033
7717c392 4034 o coding-category-iso-7-else
4ed46869
KH
4035
4036 The category for a coding system which has the same code range
8ca3766a 4037 as ISO2022 of 7-bit environment but uses locking shift or
7717c392
KH
4038 single shift functions. Assigned the coding-system (Lisp
4039 symbol) `iso-2022-7bit-lock' by default.
4040
4041 o coding-category-iso-8-else
4042
4043 The category for a coding system which has the same code range
8ca3766a 4044 as ISO2022 of 8-bit environment but uses locking shift or
7717c392
KH
4045 single shift functions. Assigned the coding-system (Lisp
4046 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
4047
4048 o coding-category-big5
4049
4050 The category for a coding system which has the same code range
4051 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 4052 `cn-big5' by default.
4ed46869 4053
fa42c37f
KH
4054 o coding-category-utf-8
4055
4056 The category for a coding system which has the same code range
38b92c42 4057 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
4058 symbol) `utf-8' by default.
4059
4060 o coding-category-utf-16-be
4061
4062 The category for a coding system in which a text has an
4063 Unicode signature (cf. Unicode Standard) in the order of BIG
4064 endian at the head. Assigned the coding-system (Lisp symbol)
4065 `utf-16-be' by default.
4066
4067 o coding-category-utf-16-le
4068
4069 The category for a coding system in which a text has an
4070 Unicode signature (cf. Unicode Standard) in the order of
4071 LITTLE endian at the head. Assigned the coding-system (Lisp
4072 symbol) `utf-16-le' by default.
4073
1397dc18
KH
4074 o coding-category-ccl
4075
4076 The category for a coding system of which encoder/decoder is
4077 written in CCL programs. The default value is nil, i.e., no
4078 coding system is assigned.
4079
4ed46869
KH
4080 o coding-category-binary
4081
4082 The category for a coding system not categorized in any of the
4083 above. Assigned the coding-system (Lisp symbol)
e0e989f6 4084 `no-conversion' by default.
4ed46869
KH
4085
4086 Each of them is a Lisp symbol and the value is an actual
cfb43547 4087 `coding-system' (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
4088 What Emacs does actually is to detect a category of coding system.
4089 Then, it uses a `coding-system' assigned to it. If Emacs can't
cfb43547 4090 decide a single possible category, it selects a category of the
4ed46869
KH
4091 highest priority. Priorities of categories are also specified by a
4092 user in a Lisp variable `coding-category-list'.
4093
4094*/
4095
66cfb530
KH
4096static
4097int ascii_skip_code[256];
4098
d46c5b12 4099/* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
4ed46869
KH
4100 If it detects possible coding systems, return an integer in which
4101 appropriate flag bits are set. Flag bits are defined by macros
fa42c37f
KH
4102 CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL,
4103 it should point the table `coding_priorities'. In that case, only
4104 the flag bit for a coding system of the highest priority is set in
0a28aafb
KH
4105 the returned value. If MULTIBYTEP is nonzero, 8-bit codes of the
4106 range 0x80..0x9F are in multibyte form.
4ed46869 4107
d46c5b12
KH
4108 How many ASCII characters are at the head is returned as *SKIP. */
4109
4110static int
0a28aafb 4111detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
d46c5b12
KH
4112 unsigned char *source;
4113 int src_bytes, *priorities, *skip;
0a28aafb 4114 int multibytep;
4ed46869
KH
4115{
4116 register unsigned char c;
d46c5b12 4117 unsigned char *src = source, *src_end = source + src_bytes;
fa42c37f 4118 unsigned int mask, utf16_examined_p, iso2022_examined_p;
da55a2b7 4119 int i;
4ed46869
KH
4120
4121 /* At first, skip all ASCII characters and control characters except
4122 for three ISO2022 specific control characters. */
66cfb530
KH
4123 ascii_skip_code[ISO_CODE_SO] = 0;
4124 ascii_skip_code[ISO_CODE_SI] = 0;
4125 ascii_skip_code[ISO_CODE_ESC] = 0;
4126
bcf26d6a 4127 label_loop_detect_coding:
66cfb530 4128 while (src < src_end && ascii_skip_code[*src]) src++;
d46c5b12 4129 *skip = src - source;
4ed46869
KH
4130
4131 if (src >= src_end)
4132 /* We found nothing other than ASCII. There's nothing to do. */
d46c5b12 4133 return 0;
4ed46869 4134
8a8147d6 4135 c = *src;
4ed46869
KH
4136 /* The text seems to be encoded in some multilingual coding system.
4137 Now, try to find in which coding system the text is encoded. */
4138 if (c < 0x80)
bcf26d6a
KH
4139 {
4140 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
4141 /* C is an ISO2022 specific control code of C0. */
0a28aafb 4142 mask = detect_coding_iso2022 (src, src_end, multibytep);
1b2af4b0 4143 if (mask == 0)
d46c5b12
KH
4144 {
4145 /* No valid ISO2022 code follows C. Try again. */
4146 src++;
66cfb530
KH
4147 if (c == ISO_CODE_ESC)
4148 ascii_skip_code[ISO_CODE_ESC] = 1;
4149 else
4150 ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
d46c5b12
KH
4151 goto label_loop_detect_coding;
4152 }
4153 if (priorities)
fa42c37f
KH
4154 {
4155 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4156 {
4157 if (mask & priorities[i])
4158 return priorities[i];
4159 }
4160 return CODING_CATEGORY_MASK_RAW_TEXT;
4161 }
bcf26d6a 4162 }
d46c5b12 4163 else
c4825358 4164 {
d46c5b12 4165 int try;
4ed46869 4166
0a28aafb 4167 if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
67091e59 4168 c = src[1] - 0x20;
0a28aafb 4169
d46c5b12
KH
4170 if (c < 0xA0)
4171 {
4172 /* C is the first byte of SJIS character code,
fa42c37f
KH
4173 or a leading-code of Emacs' internal format (emacs-mule),
4174 or the first byte of UTF-16. */
4175 try = (CODING_CATEGORY_MASK_SJIS
4176 | CODING_CATEGORY_MASK_EMACS_MULE
4177 | CODING_CATEGORY_MASK_UTF_16_BE
4178 | CODING_CATEGORY_MASK_UTF_16_LE);
d46c5b12
KH
4179
4180 /* Or, if C is a special latin extra code,
93dec019 4181 or is an ISO2022 specific control code of C1 (SS2 or SS3),
d46c5b12
KH
4182 or is an ISO2022 control-sequence-introducer (CSI),
4183 we should also consider the possibility of ISO2022 codings. */
4184 if ((VECTORP (Vlatin_extra_code_table)
4185 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4186 || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4187 || (c == ISO_CODE_CSI
4188 && (src < src_end
4189 && (*src == ']'
4190 || ((*src == '0' || *src == '1' || *src == '2')
4191 && src + 1 < src_end
4192 && src[1] == ']')))))
4193 try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4194 | CODING_CATEGORY_MASK_ISO_8BIT);
4195 }
c4825358 4196 else
d46c5b12
KH
4197 /* C is a character of ISO2022 in graphic plane right,
4198 or a SJIS's 1-byte character code (i.e. JISX0201),
fa42c37f
KH
4199 or the first byte of BIG5's 2-byte code,
4200 or the first byte of UTF-8/16. */
d46c5b12
KH
4201 try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4202 | CODING_CATEGORY_MASK_ISO_8BIT
4203 | CODING_CATEGORY_MASK_SJIS
fa42c37f
KH
4204 | CODING_CATEGORY_MASK_BIG5
4205 | CODING_CATEGORY_MASK_UTF_8
4206 | CODING_CATEGORY_MASK_UTF_16_BE
4207 | CODING_CATEGORY_MASK_UTF_16_LE);
d46c5b12 4208
1397dc18
KH
4209 /* Or, we may have to consider the possibility of CCL. */
4210 if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4211 && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4212 ->spec.ccl.valid_codes)[c])
4213 try |= CODING_CATEGORY_MASK_CCL;
4214
d46c5b12 4215 mask = 0;
fa42c37f 4216 utf16_examined_p = iso2022_examined_p = 0;
d46c5b12
KH
4217 if (priorities)
4218 {
4219 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4220 {
fa42c37f
KH
4221 if (!iso2022_examined_p
4222 && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4223 {
0192762c 4224 mask |= detect_coding_iso2022 (src, src_end, multibytep);
fa42c37f
KH
4225 iso2022_examined_p = 1;
4226 }
5ab13dd0 4227 else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
0a28aafb 4228 mask |= detect_coding_sjis (src, src_end, multibytep);
fa42c37f 4229 else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
0a28aafb 4230 mask |= detect_coding_utf_8 (src, src_end, multibytep);
fa42c37f
KH
4231 else if (!utf16_examined_p
4232 && (priorities[i] & try &
4233 CODING_CATEGORY_MASK_UTF_16_BE_LE))
4234 {
0a28aafb 4235 mask |= detect_coding_utf_16 (src, src_end, multibytep);
fa42c37f
KH
4236 utf16_examined_p = 1;
4237 }
5ab13dd0 4238 else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
0a28aafb 4239 mask |= detect_coding_big5 (src, src_end, multibytep);
5ab13dd0 4240 else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
0a28aafb 4241 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
89fa8b36 4242 else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
0a28aafb 4243 mask |= detect_coding_ccl (src, src_end, multibytep);
5ab13dd0 4244 else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
fa42c37f 4245 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
5ab13dd0 4246 else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
fa42c37f
KH
4247 mask |= CODING_CATEGORY_MASK_BINARY;
4248 if (mask & priorities[i])
4249 return priorities[i];
d46c5b12
KH
4250 }
4251 return CODING_CATEGORY_MASK_RAW_TEXT;
4252 }
4253 if (try & CODING_CATEGORY_MASK_ISO)
0a28aafb 4254 mask |= detect_coding_iso2022 (src, src_end, multibytep);
d46c5b12 4255 if (try & CODING_CATEGORY_MASK_SJIS)
0a28aafb 4256 mask |= detect_coding_sjis (src, src_end, multibytep);
d46c5b12 4257 if (try & CODING_CATEGORY_MASK_BIG5)
0a28aafb 4258 mask |= detect_coding_big5 (src, src_end, multibytep);
fa42c37f 4259 if (try & CODING_CATEGORY_MASK_UTF_8)
0a28aafb 4260 mask |= detect_coding_utf_8 (src, src_end, multibytep);
fa42c37f 4261 if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
0a28aafb 4262 mask |= detect_coding_utf_16 (src, src_end, multibytep);
d46c5b12 4263 if (try & CODING_CATEGORY_MASK_EMACS_MULE)
0a28aafb 4264 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
1397dc18 4265 if (try & CODING_CATEGORY_MASK_CCL)
0a28aafb 4266 mask |= detect_coding_ccl (src, src_end, multibytep);
c4825358 4267 }
5ab13dd0 4268 return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4ed46869
KH
4269}
4270
4271/* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4272 The information of the detected coding system is set in CODING. */
4273
4274void
4275detect_coding (coding, src, src_bytes)
4276 struct coding_system *coding;
a4244313 4277 const unsigned char *src;
4ed46869
KH
4278 int src_bytes;
4279{
d46c5b12 4280 unsigned int idx;
da55a2b7 4281 int skip, mask;
84d60297 4282 Lisp_Object val;
4ed46869 4283
84d60297 4284 val = Vcoding_category_list;
64c1e55f
KH
4285 mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4286 coding->src_multibyte);
d46c5b12 4287 coding->heading_ascii = skip;
4ed46869 4288
d46c5b12
KH
4289 if (!mask) return;
4290
4291 /* We found a single coding system of the highest priority in MASK. */
4292 idx = 0;
4293 while (mask && ! (mask & 1)) mask >>= 1, idx++;
4294 if (! mask)
4295 idx = CODING_CATEGORY_IDX_RAW_TEXT;
4ed46869 4296
f5c1dd0d 4297 val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
d46c5b12
KH
4298
4299 if (coding->eol_type != CODING_EOL_UNDECIDED)
27901516 4300 {
84d60297 4301 Lisp_Object tmp;
d46c5b12 4302
84d60297 4303 tmp = Fget (val, Qeol_type);
d46c5b12
KH
4304 if (VECTORP (tmp))
4305 val = XVECTOR (tmp)->contents[coding->eol_type];
4ed46869 4306 }
b73bfc1c
KH
4307
4308 /* Setup this new coding system while preserving some slots. */
4309 {
4310 int src_multibyte = coding->src_multibyte;
4311 int dst_multibyte = coding->dst_multibyte;
4312
4313 setup_coding_system (val, coding);
4314 coding->src_multibyte = src_multibyte;
4315 coding->dst_multibyte = dst_multibyte;
4316 coding->heading_ascii = skip;
4317 }
4ed46869
KH
4318}
4319
d46c5b12
KH
4320/* Detect how end-of-line of a text of length SRC_BYTES pointed by
4321 SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4322 CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4323
4324 How many non-eol characters are at the head is returned as *SKIP. */
4ed46869 4325
bc4bc72a
RS
4326#define MAX_EOL_CHECK_COUNT 3
4327
d46c5b12
KH
4328static int
4329detect_eol_type (source, src_bytes, skip)
4330 unsigned char *source;
4331 int src_bytes, *skip;
4ed46869 4332{
d46c5b12 4333 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 4334 unsigned char c;
bc4bc72a
RS
4335 int total = 0; /* How many end-of-lines are found so far. */
4336 int eol_type = CODING_EOL_UNDECIDED;
4337 int this_eol_type;
4ed46869 4338
d46c5b12
KH
4339 *skip = 0;
4340
bc4bc72a 4341 while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4ed46869
KH
4342 {
4343 c = *src++;
bc4bc72a 4344 if (c == '\n' || c == '\r')
4ed46869 4345 {
d46c5b12
KH
4346 if (*skip == 0)
4347 *skip = src - 1 - source;
bc4bc72a
RS
4348 total++;
4349 if (c == '\n')
4350 this_eol_type = CODING_EOL_LF;
4351 else if (src >= src_end || *src != '\n')
4352 this_eol_type = CODING_EOL_CR;
4ed46869 4353 else
bc4bc72a
RS
4354 this_eol_type = CODING_EOL_CRLF, src++;
4355
4356 if (eol_type == CODING_EOL_UNDECIDED)
4357 /* This is the first end-of-line. */
4358 eol_type = this_eol_type;
4359 else if (eol_type != this_eol_type)
d46c5b12
KH
4360 {
4361 /* The found type is different from what found before. */
4362 eol_type = CODING_EOL_INCONSISTENT;
4363 break;
4364 }
4ed46869
KH
4365 }
4366 }
bc4bc72a 4367
d46c5b12
KH
4368 if (*skip == 0)
4369 *skip = src_end - source;
85a02ca4 4370 return eol_type;
4ed46869
KH
4371}
4372
fa42c37f
KH
4373/* Like detect_eol_type, but detect EOL type in 2-octet
4374 big-endian/little-endian format for coding systems utf-16-be and
4375 utf-16-le. */
4376
4377static int
4378detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4379 unsigned char *source;
cfb43547 4380 int src_bytes, *skip, big_endian_p;
fa42c37f
KH
4381{
4382 unsigned char *src = source, *src_end = src + src_bytes;
4383 unsigned int c1, c2;
4384 int total = 0; /* How many end-of-lines are found so far. */
4385 int eol_type = CODING_EOL_UNDECIDED;
4386 int this_eol_type;
4387 int msb, lsb;
4388
4389 if (big_endian_p)
4390 msb = 0, lsb = 1;
4391 else
4392 msb = 1, lsb = 0;
4393
4394 *skip = 0;
4395
4396 while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4397 {
4398 c1 = (src[msb] << 8) | (src[lsb]);
4399 src += 2;
4400
4401 if (c1 == '\n' || c1 == '\r')
4402 {
4403 if (*skip == 0)
4404 *skip = src - 2 - source;
4405 total++;
4406 if (c1 == '\n')
4407 {
4408 this_eol_type = CODING_EOL_LF;
4409 }
4410 else
4411 {
4412 if ((src + 1) >= src_end)
4413 {
4414 this_eol_type = CODING_EOL_CR;
4415 }
4416 else
4417 {
4418 c2 = (src[msb] << 8) | (src[lsb]);
4419 if (c2 == '\n')
4420 this_eol_type = CODING_EOL_CRLF, src += 2;
4421 else
4422 this_eol_type = CODING_EOL_CR;
4423 }
4424 }
4425
4426 if (eol_type == CODING_EOL_UNDECIDED)
4427 /* This is the first end-of-line. */
4428 eol_type = this_eol_type;
4429 else if (eol_type != this_eol_type)
4430 {
4431 /* The found type is different from what found before. */
4432 eol_type = CODING_EOL_INCONSISTENT;
4433 break;
4434 }
4435 }
4436 }
4437
4438 if (*skip == 0)
4439 *skip = src_end - source;
4440 return eol_type;
4441}
4442
4ed46869
KH
4443/* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4444 is encoded. If it detects an appropriate format of end-of-line, it
4445 sets the information in *CODING. */
4446
4447void
4448detect_eol (coding, src, src_bytes)
4449 struct coding_system *coding;
a4244313 4450 const unsigned char *src;
4ed46869
KH
4451 int src_bytes;
4452{
4608c386 4453 Lisp_Object val;
d46c5b12 4454 int skip;
fa42c37f
KH
4455 int eol_type;
4456
4457 switch (coding->category_idx)
4458 {
4459 case CODING_CATEGORY_IDX_UTF_16_BE:
4460 eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4461 break;
4462 case CODING_CATEGORY_IDX_UTF_16_LE:
4463 eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4464 break;
4465 default:
4466 eol_type = detect_eol_type (src, src_bytes, &skip);
4467 break;
4468 }
d46c5b12
KH
4469
4470 if (coding->heading_ascii > skip)
4471 coding->heading_ascii = skip;
4472 else
4473 skip = coding->heading_ascii;
4ed46869 4474
0ef69138 4475 if (eol_type == CODING_EOL_UNDECIDED)
4ed46869 4476 return;
27901516
KH
4477 if (eol_type == CODING_EOL_INCONSISTENT)
4478 {
4479#if 0
4480 /* This code is suppressed until we find a better way to
992f23f2 4481 distinguish raw text file and binary file. */
27901516
KH
4482
4483 /* If we have already detected that the coding is raw-text, the
4484 coding should actually be no-conversion. */
4485 if (coding->type == coding_type_raw_text)
4486 {
4487 setup_coding_system (Qno_conversion, coding);
4488 return;
4489 }
4490 /* Else, let's decode only text code anyway. */
4491#endif /* 0 */
1b2af4b0 4492 eol_type = CODING_EOL_LF;
27901516
KH
4493 }
4494
4608c386 4495 val = Fget (coding->symbol, Qeol_type);
4ed46869 4496 if (VECTORP (val) && XVECTOR (val)->size == 3)
d46c5b12 4497 {
b73bfc1c
KH
4498 int src_multibyte = coding->src_multibyte;
4499 int dst_multibyte = coding->dst_multibyte;
1cd6b64c 4500 struct composition_data *cmp_data = coding->cmp_data;
b73bfc1c 4501
d46c5b12 4502 setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
b73bfc1c
KH
4503 coding->src_multibyte = src_multibyte;
4504 coding->dst_multibyte = dst_multibyte;
d46c5b12 4505 coding->heading_ascii = skip;
1cd6b64c 4506 coding->cmp_data = cmp_data;
d46c5b12
KH
4507 }
4508}
4509
4510#define CONVERSION_BUFFER_EXTRA_ROOM 256
4511
b73bfc1c
KH
4512#define DECODING_BUFFER_MAG(coding) \
4513 (coding->type == coding_type_iso2022 \
4514 ? 3 \
4515 : (coding->type == coding_type_ccl \
4516 ? coding->spec.ccl.decoder.buf_magnification \
4517 : 2))
d46c5b12
KH
4518
4519/* Return maximum size (bytes) of a buffer enough for decoding
4520 SRC_BYTES of text encoded in CODING. */
4521
4522int
4523decoding_buffer_size (coding, src_bytes)
4524 struct coding_system *coding;
4525 int src_bytes;
4526{
4527 return (src_bytes * DECODING_BUFFER_MAG (coding)
4528 + CONVERSION_BUFFER_EXTRA_ROOM);
4529}
4530
4531/* Return maximum size (bytes) of a buffer enough for encoding
4532 SRC_BYTES of text to CODING. */
4533
4534int
4535encoding_buffer_size (coding, src_bytes)
4536 struct coding_system *coding;
4537 int src_bytes;
4538{
4539 int magnification;
4540
4541 if (coding->type == coding_type_ccl)
a84f1519
KH
4542 {
4543 magnification = coding->spec.ccl.encoder.buf_magnification;
4544 if (coding->eol_type == CODING_EOL_CRLF)
4545 magnification *= 2;
4546 }
b73bfc1c 4547 else if (CODING_REQUIRE_ENCODING (coding))
d46c5b12 4548 magnification = 3;
b73bfc1c
KH
4549 else
4550 magnification = 1;
d46c5b12
KH
4551
4552 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4553}
4554
73be902c
KH
4555/* Working buffer for code conversion. */
4556struct conversion_buffer
4557{
4558 int size; /* size of data. */
4559 int on_stack; /* 1 if allocated by alloca. */
4560 unsigned char *data;
4561};
d46c5b12 4562
73be902c
KH
4563/* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */
4564#define allocate_conversion_buffer(buf, len) \
4565 do { \
4566 if (len < MAX_ALLOCA) \
4567 { \
4568 buf.data = (unsigned char *) alloca (len); \
4569 buf.on_stack = 1; \
4570 } \
4571 else \
4572 { \
4573 buf.data = (unsigned char *) xmalloc (len); \
4574 buf.on_stack = 0; \
4575 } \
4576 buf.size = len; \
4577 } while (0)
d46c5b12 4578
73be902c
KH
4579/* Double the allocated memory for *BUF. */
4580static void
4581extend_conversion_buffer (buf)
4582 struct conversion_buffer *buf;
d46c5b12 4583{
73be902c 4584 if (buf->on_stack)
d46c5b12 4585 {
73be902c
KH
4586 unsigned char *save = buf->data;
4587 buf->data = (unsigned char *) xmalloc (buf->size * 2);
4588 bcopy (save, buf->data, buf->size);
4589 buf->on_stack = 0;
d46c5b12 4590 }
73be902c
KH
4591 else
4592 {
4593 buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4594 }
4595 buf->size *= 2;
4596}
4597
4598/* Free the allocated memory for BUF if it is not on stack. */
4599static void
4600free_conversion_buffer (buf)
4601 struct conversion_buffer *buf;
4602{
4603 if (!buf->on_stack)
4604 xfree (buf->data);
d46c5b12
KH
4605}
4606
4607int
4608ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4609 struct coding_system *coding;
4610 unsigned char *source, *destination;
4611 int src_bytes, dst_bytes, encodep;
4612{
4613 struct ccl_program *ccl
4614 = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
1c3478b0 4615 unsigned char *dst = destination;
d46c5b12 4616
bd64290d 4617 ccl->suppress_error = coding->suppress_error;
ae9ff118 4618 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
aaaf0b1e 4619 if (encodep)
80e0ca99
KH
4620 {
4621 /* On encoding, EOL format is converted within ccl_driver. For
4622 that, setup proper information in the structure CCL. */
4623 ccl->eol_type = coding->eol_type;
4624 if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4625 ccl->eol_type = CODING_EOL_LF;
4626 ccl->cr_consumed = coding->spec.ccl.cr_carryover;
b671ed5e 4627 ccl->eight_bit_control = coding->dst_multibyte;
80e0ca99 4628 }
b671ed5e
KH
4629 else
4630 ccl->eight_bit_control = 1;
7272d75c 4631 ccl->multibyte = coding->src_multibyte;
1c3478b0
KH
4632 if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4633 {
4634 /* Move carryover bytes to DESTINATION. */
4635 unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4636 while (*p)
4637 *dst++ = *p++;
4638 coding->spec.ccl.eight_bit_carryover[0] = 0;
4639 if (dst_bytes)
4640 dst_bytes -= dst - destination;
4641 }
4642
4643 coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4644 &(coding->consumed))
4645 + dst - destination);
4646
b73bfc1c 4647 if (encodep)
80e0ca99
KH
4648 {
4649 coding->produced_char = coding->produced;
4650 coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4651 }
ade8d05e
KH
4652 else if (!ccl->eight_bit_control)
4653 {
4654 /* The produced bytes forms a valid multibyte sequence. */
4655 coding->produced_char
4656 = multibyte_chars_in_text (destination, coding->produced);
4657 coding->spec.ccl.eight_bit_carryover[0] = 0;
4658 }
b73bfc1c
KH
4659 else
4660 {
1c3478b0
KH
4661 /* On decoding, the destination should always multibyte. But,
4662 CCL program might have been generated an invalid multibyte
4663 sequence. Here we make such a sequence valid as
4664 multibyte. */
b73bfc1c
KH
4665 int bytes
4666 = dst_bytes ? dst_bytes : source + coding->consumed - destination;
1c3478b0
KH
4667
4668 if ((coding->consumed < src_bytes
4669 || !ccl->last_block)
4670 && coding->produced >= 1
4671 && destination[coding->produced - 1] >= 0x80)
4672 {
4673 /* We should not convert the tailing 8-bit codes to
4674 multibyte form even if they doesn't form a valid
4675 multibyte sequence. They may form a valid sequence in
4676 the next call. */
4677 int carryover = 0;
4678
4679 if (destination[coding->produced - 1] < 0xA0)
4680 carryover = 1;
4681 else if (coding->produced >= 2)
4682 {
4683 if (destination[coding->produced - 2] >= 0x80)
4684 {
4685 if (destination[coding->produced - 2] < 0xA0)
4686 carryover = 2;
4687 else if (coding->produced >= 3
4688 && destination[coding->produced - 3] >= 0x80
4689 && destination[coding->produced - 3] < 0xA0)
4690 carryover = 3;
4691 }
4692 }
4693 if (carryover > 0)
4694 {
4695 BCOPY_SHORT (destination + coding->produced - carryover,
4696 coding->spec.ccl.eight_bit_carryover,
4697 carryover);
4698 coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4699 coding->produced -= carryover;
4700 }
4701 }
b73bfc1c
KH
4702 coding->produced = str_as_multibyte (destination, bytes,
4703 coding->produced,
4704 &(coding->produced_char));
4705 }
69f76525 4706
d46c5b12
KH
4707 switch (ccl->status)
4708 {
4709 case CCL_STAT_SUSPEND_BY_SRC:
73be902c 4710 coding->result = CODING_FINISH_INSUFFICIENT_SRC;
d46c5b12
KH
4711 break;
4712 case CCL_STAT_SUSPEND_BY_DST:
73be902c 4713 coding->result = CODING_FINISH_INSUFFICIENT_DST;
d46c5b12 4714 break;
9864ebce
KH
4715 case CCL_STAT_QUIT:
4716 case CCL_STAT_INVALID_CMD:
73be902c 4717 coding->result = CODING_FINISH_INTERRUPT;
9864ebce 4718 break;
d46c5b12 4719 default:
73be902c 4720 coding->result = CODING_FINISH_NORMAL;
d46c5b12
KH
4721 break;
4722 }
73be902c 4723 return coding->result;
4ed46869
KH
4724}
4725
aaaf0b1e
KH
4726/* Decode EOL format of the text at PTR of BYTES length destructively
4727 according to CODING->eol_type. This is called after the CCL
4728 program produced a decoded text at PTR. If we do CRLF->LF
4729 conversion, update CODING->produced and CODING->produced_char. */
4730
4731static void
4732decode_eol_post_ccl (coding, ptr, bytes)
4733 struct coding_system *coding;
4734 unsigned char *ptr;
4735 int bytes;
4736{
4737 Lisp_Object val, saved_coding_symbol;
4738 unsigned char *pend = ptr + bytes;
4739 int dummy;
4740
4741 /* Remember the current coding system symbol. We set it back when
4742 an inconsistent EOL is found so that `last-coding-system-used' is
4743 set to the coding system that doesn't specify EOL conversion. */
4744 saved_coding_symbol = coding->symbol;
4745
4746 coding->spec.ccl.cr_carryover = 0;
4747 if (coding->eol_type == CODING_EOL_UNDECIDED)
4748 {
4749 /* Here, to avoid the call of setup_coding_system, we directly
4750 call detect_eol_type. */
4751 coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
74b01b80
EZ
4752 if (coding->eol_type == CODING_EOL_INCONSISTENT)
4753 coding->eol_type = CODING_EOL_LF;
4754 if (coding->eol_type != CODING_EOL_UNDECIDED)
4755 {
4756 val = Fget (coding->symbol, Qeol_type);
4757 if (VECTORP (val) && XVECTOR (val)->size == 3)
4758 coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4759 }
aaaf0b1e
KH
4760 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4761 }
4762
74b01b80
EZ
4763 if (coding->eol_type == CODING_EOL_LF
4764 || coding->eol_type == CODING_EOL_UNDECIDED)
aaaf0b1e
KH
4765 {
4766 /* We have nothing to do. */
4767 ptr = pend;
4768 }
4769 else if (coding->eol_type == CODING_EOL_CRLF)
4770 {
4771 unsigned char *pstart = ptr, *p = ptr;
4772
4773 if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4774 && *(pend - 1) == '\r')
4775 {
4776 /* If the last character is CR, we can't handle it here
4777 because LF will be in the not-yet-decoded source text.
9861e777 4778 Record that the CR is not yet processed. */
aaaf0b1e
KH
4779 coding->spec.ccl.cr_carryover = 1;
4780 coding->produced--;
4781 coding->produced_char--;
4782 pend--;
4783 }
4784 while (ptr < pend)
4785 {
4786 if (*ptr == '\r')
4787 {
4788 if (ptr + 1 < pend && *(ptr + 1) == '\n')
4789 {
4790 *p++ = '\n';
4791 ptr += 2;
4792 }
4793 else
4794 {
4795 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4796 goto undo_eol_conversion;
4797 *p++ = *ptr++;
4798 }
4799 }
4800 else if (*ptr == '\n'
4801 && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4802 goto undo_eol_conversion;
4803 else
4804 *p++ = *ptr++;
4805 continue;
4806
4807 undo_eol_conversion:
4808 /* We have faced with inconsistent EOL format at PTR.
4809 Convert all LFs before PTR back to CRLFs. */
4810 for (p--, ptr--; p >= pstart; p--)
4811 {
4812 if (*p == '\n')
4813 *ptr-- = '\n', *ptr-- = '\r';
4814 else
4815 *ptr-- = *p;
4816 }
4817 /* If carryover is recorded, cancel it because we don't
4818 convert CRLF anymore. */
4819 if (coding->spec.ccl.cr_carryover)
4820 {
4821 coding->spec.ccl.cr_carryover = 0;
4822 coding->produced++;
4823 coding->produced_char++;
4824 pend++;
4825 }
4826 p = ptr = pend;
4827 coding->eol_type = CODING_EOL_LF;
4828 coding->symbol = saved_coding_symbol;
4829 }
4830 if (p < pend)
4831 {
4832 /* As each two-byte sequence CRLF was converted to LF, (PEND
4833 - P) is the number of deleted characters. */
4834 coding->produced -= pend - p;
4835 coding->produced_char -= pend - p;
4836 }
4837 }
4838 else /* i.e. coding->eol_type == CODING_EOL_CR */
4839 {
4840 unsigned char *p = ptr;
4841
4842 for (; ptr < pend; ptr++)
4843 {
4844 if (*ptr == '\r')
4845 *ptr = '\n';
4846 else if (*ptr == '\n'
4847 && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4848 {
4849 for (; p < ptr; p++)
4850 {
4851 if (*p == '\n')
4852 *p = '\r';
4853 }
4854 ptr = pend;
4855 coding->eol_type = CODING_EOL_LF;
4856 coding->symbol = saved_coding_symbol;
4857 }
4858 }
4859 }
4860}
4861
4ed46869
KH
4862/* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
4863 decoding, it may detect coding system and format of end-of-line if
b73bfc1c
KH
4864 those are not yet decided. The source should be unibyte, the
4865 result is multibyte if CODING->dst_multibyte is nonzero, else
4866 unibyte. */
4ed46869
KH
4867
4868int
d46c5b12 4869decode_coding (coding, source, destination, src_bytes, dst_bytes)
4ed46869 4870 struct coding_system *coding;
a4244313
KR
4871 const unsigned char *source;
4872 unsigned char *destination;
4ed46869 4873 int src_bytes, dst_bytes;
4ed46869 4874{
9861e777
EZ
4875 int extra = 0;
4876
0ef69138 4877 if (coding->type == coding_type_undecided)
4ed46869
KH
4878 detect_coding (coding, source, src_bytes);
4879
aaaf0b1e
KH
4880 if (coding->eol_type == CODING_EOL_UNDECIDED
4881 && coding->type != coding_type_ccl)
8844fa83
KH
4882 {
4883 detect_eol (coding, source, src_bytes);
4884 /* We had better recover the original eol format if we
8ca3766a 4885 encounter an inconsistent eol format while decoding. */
8844fa83
KH
4886 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4887 }
4ed46869 4888
b73bfc1c
KH
4889 coding->produced = coding->produced_char = 0;
4890 coding->consumed = coding->consumed_char = 0;
4891 coding->errors = 0;
4892 coding->result = CODING_FINISH_NORMAL;
4893
4ed46869
KH
4894 switch (coding->type)
4895 {
4ed46869 4896 case coding_type_sjis:
b73bfc1c
KH
4897 decode_coding_sjis_big5 (coding, source, destination,
4898 src_bytes, dst_bytes, 1);
4ed46869
KH
4899 break;
4900
4901 case coding_type_iso2022:
b73bfc1c
KH
4902 decode_coding_iso2022 (coding, source, destination,
4903 src_bytes, dst_bytes);
4ed46869
KH
4904 break;
4905
4906 case coding_type_big5:
b73bfc1c
KH
4907 decode_coding_sjis_big5 (coding, source, destination,
4908 src_bytes, dst_bytes, 0);
4909 break;
4910
4911 case coding_type_emacs_mule:
4912 decode_coding_emacs_mule (coding, source, destination,
4913 src_bytes, dst_bytes);
4ed46869
KH
4914 break;
4915
4916 case coding_type_ccl:
aaaf0b1e
KH
4917 if (coding->spec.ccl.cr_carryover)
4918 {
9861e777
EZ
4919 /* Put the CR which was not processed by the previous call
4920 of decode_eol_post_ccl in DESTINATION. It will be
4921 decoded together with the following LF by the call to
4922 decode_eol_post_ccl below. */
aaaf0b1e
KH
4923 *destination = '\r';
4924 coding->produced++;
4925 coding->produced_char++;
4926 dst_bytes--;
9861e777 4927 extra = coding->spec.ccl.cr_carryover;
aaaf0b1e 4928 }
9861e777 4929 ccl_coding_driver (coding, source, destination + extra,
b73bfc1c 4930 src_bytes, dst_bytes, 0);
aaaf0b1e 4931 if (coding->eol_type != CODING_EOL_LF)
9861e777
EZ
4932 {
4933 coding->produced += extra;
4934 coding->produced_char += extra;
4935 decode_eol_post_ccl (coding, destination, coding->produced);
4936 }
d46c5b12
KH
4937 break;
4938
b73bfc1c
KH
4939 default:
4940 decode_eol (coding, source, destination, src_bytes, dst_bytes);
4941 }
4942
4943 if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
e7c9eef9 4944 && coding->mode & CODING_MODE_LAST_BLOCK
b73bfc1c
KH
4945 && coding->consumed == src_bytes)
4946 coding->result = CODING_FINISH_NORMAL;
4947
4948 if (coding->mode & CODING_MODE_LAST_BLOCK
4949 && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4950 {
a4244313 4951 const unsigned char *src = source + coding->consumed;
b73bfc1c
KH
4952 unsigned char *dst = destination + coding->produced;
4953
4954 src_bytes -= coding->consumed;
bb10be8b 4955 coding->errors++;
b73bfc1c
KH
4956 if (COMPOSING_P (coding))
4957 DECODE_COMPOSITION_END ('1');
4958 while (src_bytes--)
d46c5b12 4959 {
b73bfc1c
KH
4960 int c = *src++;
4961 dst += CHAR_STRING (c, dst);
4962 coding->produced_char++;
d46c5b12 4963 }
b73bfc1c
KH
4964 coding->consumed = coding->consumed_char = src - source;
4965 coding->produced = dst - destination;
73be902c 4966 coding->result = CODING_FINISH_NORMAL;
4ed46869
KH
4967 }
4968
b73bfc1c
KH
4969 if (!coding->dst_multibyte)
4970 {
4971 coding->produced = str_as_unibyte (destination, coding->produced);
4972 coding->produced_char = coding->produced;
4973 }
4ed46869 4974
b73bfc1c
KH
4975 return coding->result;
4976}
52d41803 4977
b73bfc1c
KH
4978/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". The
4979 multibyteness of the source is CODING->src_multibyte, the
4980 multibyteness of the result is always unibyte. */
4ed46869
KH
4981
4982int
d46c5b12 4983encode_coding (coding, source, destination, src_bytes, dst_bytes)
4ed46869 4984 struct coding_system *coding;
a4244313
KR
4985 const unsigned char *source;
4986 unsigned char *destination;
4ed46869 4987 int src_bytes, dst_bytes;
4ed46869 4988{
b73bfc1c
KH
4989 coding->produced = coding->produced_char = 0;
4990 coding->consumed = coding->consumed_char = 0;
4991 coding->errors = 0;
4992 coding->result = CODING_FINISH_NORMAL;
4ed46869 4993
d46c5b12
KH
4994 switch (coding->type)
4995 {
4ed46869 4996 case coding_type_sjis:
b73bfc1c
KH
4997 encode_coding_sjis_big5 (coding, source, destination,
4998 src_bytes, dst_bytes, 1);
4ed46869
KH
4999 break;
5000
5001 case coding_type_iso2022:
b73bfc1c
KH
5002 encode_coding_iso2022 (coding, source, destination,
5003 src_bytes, dst_bytes);
4ed46869
KH
5004 break;
5005
5006 case coding_type_big5:
b73bfc1c
KH
5007 encode_coding_sjis_big5 (coding, source, destination,
5008 src_bytes, dst_bytes, 0);
5009 break;
5010
5011 case coding_type_emacs_mule:
5012 encode_coding_emacs_mule (coding, source, destination,
5013 src_bytes, dst_bytes);
4ed46869
KH
5014 break;
5015
5016 case coding_type_ccl:
b73bfc1c
KH
5017 ccl_coding_driver (coding, source, destination,
5018 src_bytes, dst_bytes, 1);
d46c5b12
KH
5019 break;
5020
b73bfc1c
KH
5021 default:
5022 encode_eol (coding, source, destination, src_bytes, dst_bytes);
5023 }
5024
73be902c
KH
5025 if (coding->mode & CODING_MODE_LAST_BLOCK
5026 && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
b73bfc1c 5027 {
a4244313 5028 const unsigned char *src = source + coding->consumed;
b73bfc1c
KH
5029 unsigned char *dst = destination + coding->produced;
5030
5031 if (coding->type == coding_type_iso2022)
5032 ENCODE_RESET_PLANE_AND_REGISTER;
5033 if (COMPOSING_P (coding))
5034 *dst++ = ISO_CODE_ESC, *dst++ = '1';
5035 if (coding->consumed < src_bytes)
d46c5b12 5036 {
b73bfc1c
KH
5037 int len = src_bytes - coding->consumed;
5038
fabf4a91 5039 BCOPY_SHORT (src, dst, len);
b73bfc1c
KH
5040 if (coding->src_multibyte)
5041 len = str_as_unibyte (dst, len);
5042 dst += len;
5043 coding->consumed = src_bytes;
d46c5b12 5044 }
b73bfc1c 5045 coding->produced = coding->produced_char = dst - destination;
73be902c 5046 coding->result = CODING_FINISH_NORMAL;
4ed46869
KH
5047 }
5048
bb10be8b
KH
5049 if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
5050 && coding->consumed == src_bytes)
5051 coding->result = CODING_FINISH_NORMAL;
5052
b73bfc1c 5053 return coding->result;
4ed46869
KH
5054}
5055
fb88bf2d
KH
5056/* Scan text in the region between *BEG and *END (byte positions),
5057 skip characters which we don't have to decode by coding system
5058 CODING at the head and tail, then set *BEG and *END to the region
5059 of the text we actually have to convert. The caller should move
b73bfc1c
KH
5060 the gap out of the region in advance if the region is from a
5061 buffer.
4ed46869 5062
d46c5b12
KH
5063 If STR is not NULL, *BEG and *END are indices into STR. */
5064
5065static void
5066shrink_decoding_region (beg, end, coding, str)
5067 int *beg, *end;
5068 struct coding_system *coding;
5069 unsigned char *str;
5070{
fb88bf2d 5071 unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
d46c5b12 5072 int eol_conversion;
88993dfd 5073 Lisp_Object translation_table;
d46c5b12
KH
5074
5075 if (coding->type == coding_type_ccl
5076 || coding->type == coding_type_undecided
b73bfc1c
KH
5077 || coding->eol_type != CODING_EOL_LF
5078 || !NILP (coding->post_read_conversion)
5079 || coding->composing != COMPOSITION_DISABLED)
d46c5b12
KH
5080 {
5081 /* We can't skip any data. */
5082 return;
5083 }
b73bfc1c
KH
5084 if (coding->type == coding_type_no_conversion
5085 || coding->type == coding_type_raw_text
5086 || coding->type == coding_type_emacs_mule)
d46c5b12 5087 {
fb88bf2d
KH
5088 /* We need no conversion, but don't have to skip any data here.
5089 Decoding routine handles them effectively anyway. */
d46c5b12
KH
5090 return;
5091 }
5092
88993dfd
KH
5093 translation_table = coding->translation_table_for_decode;
5094 if (NILP (translation_table) && !NILP (Venable_character_translation))
5095 translation_table = Vstandard_translation_table_for_decode;
5096 if (CHAR_TABLE_P (translation_table))
5097 {
5098 int i;
5099 for (i = 0; i < 128; i++)
5100 if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5101 break;
5102 if (i < 128)
fa46990e 5103 /* Some ASCII character should be translated. We give up
88993dfd
KH
5104 shrinking. */
5105 return;
5106 }
5107
b73bfc1c 5108 if (coding->heading_ascii >= 0)
d46c5b12
KH
5109 /* Detection routine has already found how much we can skip at the
5110 head. */
5111 *beg += coding->heading_ascii;
5112
5113 if (str)
5114 {
5115 begp_orig = begp = str + *beg;
5116 endp_orig = endp = str + *end;
5117 }
5118 else
5119 {
fb88bf2d 5120 begp_orig = begp = BYTE_POS_ADDR (*beg);
d46c5b12
KH
5121 endp_orig = endp = begp + *end - *beg;
5122 }
5123
fa46990e
DL
5124 eol_conversion = (coding->eol_type == CODING_EOL_CR
5125 || coding->eol_type == CODING_EOL_CRLF);
5126
d46c5b12
KH
5127 switch (coding->type)
5128 {
d46c5b12
KH
5129 case coding_type_sjis:
5130 case coding_type_big5:
5131 /* We can skip all ASCII characters at the head. */
5132 if (coding->heading_ascii < 0)
5133 {
5134 if (eol_conversion)
de9d083c 5135 while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
d46c5b12
KH
5136 else
5137 while (begp < endp && *begp < 0x80) begp++;
5138 }
5139 /* We can skip all ASCII characters at the tail except for the
5140 second byte of SJIS or BIG5 code. */
5141 if (eol_conversion)
de9d083c 5142 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
d46c5b12
KH
5143 else
5144 while (begp < endp && endp[-1] < 0x80) endp--;
ee59c65f
RS
5145 /* Do not consider LF as ascii if preceded by CR, since that
5146 confuses eol decoding. */
5147 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5148 endp++;
d46c5b12
KH
5149 if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
5150 endp++;
5151 break;
5152
b73bfc1c 5153 case coding_type_iso2022:
622fece5
KH
5154 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5155 /* We can't skip any data. */
5156 break;
d46c5b12
KH
5157 if (coding->heading_ascii < 0)
5158 {
d46c5b12
KH
5159 /* We can skip all ASCII characters at the head except for a
5160 few control codes. */
5161 while (begp < endp && (c = *begp) < 0x80
5162 && c != ISO_CODE_CR && c != ISO_CODE_SO
5163 && c != ISO_CODE_SI && c != ISO_CODE_ESC
5164 && (!eol_conversion || c != ISO_CODE_LF))
5165 begp++;
5166 }
5167 switch (coding->category_idx)
5168 {
5169 case CODING_CATEGORY_IDX_ISO_8_1:
5170 case CODING_CATEGORY_IDX_ISO_8_2:
5171 /* We can skip all ASCII characters at the tail. */
5172 if (eol_conversion)
de9d083c 5173 while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
d46c5b12
KH
5174 else
5175 while (begp < endp && endp[-1] < 0x80) endp--;
ee59c65f
RS
5176 /* Do not consider LF as ascii if preceded by CR, since that
5177 confuses eol decoding. */
5178 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5179 endp++;
d46c5b12
KH
5180 break;
5181
5182 case CODING_CATEGORY_IDX_ISO_7:
5183 case CODING_CATEGORY_IDX_ISO_7_TIGHT:
de79a6a5 5184 {
8ca3766a 5185 /* We can skip all characters at the tail except for 8-bit
de79a6a5
KH
5186 codes and ESC and the following 2-byte at the tail. */
5187 unsigned char *eight_bit = NULL;
5188
5189 if (eol_conversion)
5190 while (begp < endp
5191 && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
5192 {
5193 if (!eight_bit && c & 0x80) eight_bit = endp;
5194 endp--;
5195 }
5196 else
5197 while (begp < endp
5198 && (c = endp[-1]) != ISO_CODE_ESC)
5199 {
5200 if (!eight_bit && c & 0x80) eight_bit = endp;
5201 endp--;
5202 }
5203 /* Do not consider LF as ascii if preceded by CR, since that
5204 confuses eol decoding. */
5205 if (begp < endp && endp < endp_orig
5206 && endp[-1] == '\r' && endp[0] == '\n')
5207 endp++;
5208 if (begp < endp && endp[-1] == ISO_CODE_ESC)
5209 {
5210 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5211 /* This is an ASCII designation sequence. We can
5212 surely skip the tail. But, if we have
5213 encountered an 8-bit code, skip only the codes
5214 after that. */
5215 endp = eight_bit ? eight_bit : endp + 2;
5216 else
5217 /* Hmmm, we can't skip the tail. */
5218 endp = endp_orig;
5219 }
5220 else if (eight_bit)
5221 endp = eight_bit;
5222 }
d46c5b12 5223 }
b73bfc1c
KH
5224 break;
5225
5226 default:
5227 abort ();
d46c5b12
KH
5228 }
5229 *beg += begp - begp_orig;
5230 *end += endp - endp_orig;
5231 return;
5232}
5233
5234/* Like shrink_decoding_region but for encoding. */
5235
5236static void
5237shrink_encoding_region (beg, end, coding, str)
5238 int *beg, *end;
5239 struct coding_system *coding;
5240 unsigned char *str;
5241{
5242 unsigned char *begp_orig, *begp, *endp_orig, *endp;
5243 int eol_conversion;
88993dfd 5244 Lisp_Object translation_table;
d46c5b12 5245
b73bfc1c
KH
5246 if (coding->type == coding_type_ccl
5247 || coding->eol_type == CODING_EOL_CRLF
5248 || coding->eol_type == CODING_EOL_CR
87323294 5249 || (coding->cmp_data && coding->cmp_data->used > 0))
d46c5b12 5250 {
b73bfc1c
KH
5251 /* We can't skip any data. */
5252 return;
5253 }
5254 if (coding->type == coding_type_no_conversion
5255 || coding->type == coding_type_raw_text
5256 || coding->type == coding_type_emacs_mule
5257 || coding->type == coding_type_undecided)
5258 {
5259 /* We need no conversion, but don't have to skip any data here.
5260 Encoding routine handles them effectively anyway. */
d46c5b12
KH
5261 return;
5262 }
5263
88993dfd
KH
5264 translation_table = coding->translation_table_for_encode;
5265 if (NILP (translation_table) && !NILP (Venable_character_translation))
5266 translation_table = Vstandard_translation_table_for_encode;
5267 if (CHAR_TABLE_P (translation_table))
5268 {
5269 int i;
5270 for (i = 0; i < 128; i++)
5271 if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5272 break;
5273 if (i < 128)
8ca3766a 5274 /* Some ASCII character should be translated. We give up
88993dfd
KH
5275 shrinking. */
5276 return;
5277 }
5278
d46c5b12
KH
5279 if (str)
5280 {
5281 begp_orig = begp = str + *beg;
5282 endp_orig = endp = str + *end;
5283 }
5284 else
5285 {
fb88bf2d 5286 begp_orig = begp = BYTE_POS_ADDR (*beg);
d46c5b12
KH
5287 endp_orig = endp = begp + *end - *beg;
5288 }
5289
5290 eol_conversion = (coding->eol_type == CODING_EOL_CR
5291 || coding->eol_type == CODING_EOL_CRLF);
5292
5293 /* Here, we don't have to check coding->pre_write_conversion because
5294 the caller is expected to have handled it already. */
5295 switch (coding->type)
5296 {
d46c5b12 5297 case coding_type_iso2022:
622fece5
KH
5298 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5299 /* We can't skip any data. */
5300 break;
d46c5b12
KH
5301 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5302 {
93dec019 5303 unsigned char *bol = begp;
d46c5b12
KH
5304 while (begp < endp && *begp < 0x80)
5305 {
5306 begp++;
5307 if (begp[-1] == '\n')
5308 bol = begp;
5309 }
5310 begp = bol;
5311 goto label_skip_tail;
5312 }
5313 /* fall down ... */
5314
b73bfc1c
KH
5315 case coding_type_sjis:
5316 case coding_type_big5:
d46c5b12
KH
5317 /* We can skip all ASCII characters at the head and tail. */
5318 if (eol_conversion)
5319 while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5320 else
5321 while (begp < endp && *begp < 0x80) begp++;
5322 label_skip_tail:
5323 if (eol_conversion)
5324 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5325 else
5326 while (begp < endp && *(endp - 1) < 0x80) endp--;
5327 break;
b73bfc1c
KH
5328
5329 default:
5330 abort ();
d46c5b12
KH
5331 }
5332
5333 *beg += begp - begp_orig;
5334 *end += endp - endp_orig;
5335 return;
5336}
5337
88993dfd
KH
5338/* As shrinking conversion region requires some overhead, we don't try
5339 shrinking if the length of conversion region is less than this
5340 value. */
5341static int shrink_conversion_region_threshhold = 1024;
5342
5343#define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \
5344 do { \
5345 if (*(end) - *(beg) > shrink_conversion_region_threshhold) \
5346 { \
5347 if (encodep) shrink_encoding_region (beg, end, coding, str); \
5348 else shrink_decoding_region (beg, end, coding, str); \
5349 } \
5350 } while (0)
5351
b843d1ae 5352static Lisp_Object
1c7457e2
KH
5353code_convert_region_unwind (arg)
5354 Lisp_Object arg;
b843d1ae
KH
5355{
5356 inhibit_pre_post_conversion = 0;
1c7457e2 5357 Vlast_coding_system_used = arg;
b843d1ae
KH
5358 return Qnil;
5359}
5360
ec6d2bb8
KH
5361/* Store information about all compositions in the range FROM and TO
5362 of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a
5363 buffer or a string, defaults to the current buffer. */
5364
5365void
5366coding_save_composition (coding, from, to, obj)
5367 struct coding_system *coding;
5368 int from, to;
5369 Lisp_Object obj;
5370{
5371 Lisp_Object prop;
5372 int start, end;
5373
91bee881
KH
5374 if (coding->composing == COMPOSITION_DISABLED)
5375 return;
5376 if (!coding->cmp_data)
5377 coding_allocate_composition_data (coding, from);
ec6d2bb8
KH
5378 if (!find_composition (from, to, &start, &end, &prop, obj)
5379 || end > to)
5380 return;
5381 if (start < from
5382 && (!find_composition (end, to, &start, &end, &prop, obj)
5383 || end > to))
5384 return;
5385 coding->composing = COMPOSITION_NO;
ec6d2bb8
KH
5386 do
5387 {
5388 if (COMPOSITION_VALID_P (start, end, prop))
5389 {
5390 enum composition_method method = COMPOSITION_METHOD (prop);
5391 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5392 >= COMPOSITION_DATA_SIZE)
5393 coding_allocate_composition_data (coding, from);
5394 /* For relative composition, we remember start and end
5395 positions, for the other compositions, we also remember
5396 components. */
5397 CODING_ADD_COMPOSITION_START (coding, start - from, method);
5398 if (method != COMPOSITION_RELATIVE)
5399 {
5400 /* We must store a*/
5401 Lisp_Object val, ch;
5402
5403 val = COMPOSITION_COMPONENTS (prop);
5404 if (CONSP (val))
5405 while (CONSP (val))
5406 {
5407 ch = XCAR (val), val = XCDR (val);
5408 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5409 }
5410 else if (VECTORP (val) || STRINGP (val))
5411 {
5412 int len = (VECTORP (val)
d5db4077 5413 ? XVECTOR (val)->size : SCHARS (val));
ec6d2bb8
KH
5414 int i;
5415 for (i = 0; i < len; i++)
5416 {
5417 ch = (STRINGP (val)
5418 ? Faref (val, make_number (i))
5419 : XVECTOR (val)->contents[i]);
5420 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5421 }
5422 }
5423 else /* INTEGERP (val) */
5424 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5425 }
5426 CODING_ADD_COMPOSITION_END (coding, end - from);
5427 }
5428 start = end;
5429 }
5430 while (start < to
5431 && find_composition (start, to, &start, &end, &prop, obj)
5432 && end <= to);
5433
5434 /* Make coding->cmp_data point to the first memory block. */
5435 while (coding->cmp_data->prev)
5436 coding->cmp_data = coding->cmp_data->prev;
5437 coding->cmp_data_start = 0;
5438}
5439
5440/* Reflect the saved information about compositions to OBJ.
8ca3766a 5441 CODING->cmp_data points to a memory block for the information. OBJ
ec6d2bb8
KH
5442 is a buffer or a string, defaults to the current buffer. */
5443
33fb63eb 5444void
ec6d2bb8
KH
5445coding_restore_composition (coding, obj)
5446 struct coding_system *coding;
5447 Lisp_Object obj;
5448{
5449 struct composition_data *cmp_data = coding->cmp_data;
5450
5451 if (!cmp_data)
5452 return;
5453
5454 while (cmp_data->prev)
5455 cmp_data = cmp_data->prev;
5456
5457 while (cmp_data)
5458 {
5459 int i;
5460
78108bcd
KH
5461 for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5462 i += cmp_data->data[i])
ec6d2bb8
KH
5463 {
5464 int *data = cmp_data->data + i;
5465 enum composition_method method = (enum composition_method) data[3];
5466 Lisp_Object components;
5467
4307d534
KH
5468 if (data[0] < 0 || i + data[0] > cmp_data->used)
5469 /* Invalid composition data. */
5470 break;
5471
ec6d2bb8
KH
5472 if (method == COMPOSITION_RELATIVE)
5473 components = Qnil;
5474 else
5475 {
5476 int len = data[0] - 4, j;
5477 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5478
b6871cc7
KH
5479 if (method == COMPOSITION_WITH_RULE_ALTCHARS
5480 && len % 2 == 0)
5481 len --;
09721b31
KH
5482 if (len < 1)
5483 /* Invalid composition data. */
5484 break;
ec6d2bb8
KH
5485 for (j = 0; j < len; j++)
5486 args[j] = make_number (data[4 + j]);
5487 components = (method == COMPOSITION_WITH_ALTCHARS
316d4bf9
SM
5488 ? Fstring (len, args)
5489 : Fvector (len, args));
ec6d2bb8
KH
5490 }
5491 compose_text (data[1], data[2], components, Qnil, obj);
5492 }
5493 cmp_data = cmp_data->next;
5494 }
5495}
5496
d46c5b12 5497/* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
fb88bf2d
KH
5498 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5499 coding system CODING, and return the status code of code conversion
5500 (currently, this value has no meaning).
5501
5502 How many characters (and bytes) are converted to how many
5503 characters (and bytes) are recorded in members of the structure
5504 CODING.
d46c5b12 5505
6e44253b 5506 If REPLACE is nonzero, we do various things as if the original text
d46c5b12 5507 is deleted and a new text is inserted. See the comments in
b73bfc1c
KH
5508 replace_range (insdel.c) to know what we are doing.
5509
5510 If REPLACE is zero, it is assumed that the source text is unibyte.
8ca3766a 5511 Otherwise, it is assumed that the source text is multibyte. */
4ed46869
KH
5512
5513int
6e44253b
KH
5514code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5515 int from, from_byte, to, to_byte, encodep, replace;
4ed46869 5516 struct coding_system *coding;
4ed46869 5517{
fb88bf2d 5518 int len = to - from, len_byte = to_byte - from_byte;
72d1a715 5519 int nchars_del = 0, nbytes_del = 0;
fb88bf2d 5520 int require, inserted, inserted_byte;
4b39528c 5521 int head_skip, tail_skip, total_skip = 0;
84d60297 5522 Lisp_Object saved_coding_symbol;
fb88bf2d 5523 int first = 1;
fb88bf2d 5524 unsigned char *src, *dst;
84d60297 5525 Lisp_Object deletion;
e133c8fa 5526 int orig_point = PT, orig_len = len;
6abb9bd9 5527 int prev_Z;
b73bfc1c
KH
5528 int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5529
84d60297 5530 deletion = Qnil;
8844fa83 5531 saved_coding_symbol = coding->symbol;
d46c5b12 5532
83fa074f 5533 if (from < PT && PT < to)
e133c8fa
KH
5534 {
5535 TEMP_SET_PT_BOTH (from, from_byte);
5536 orig_point = from;
5537 }
83fa074f 5538
6e44253b 5539 if (replace)
d46c5b12 5540 {
fb88bf2d 5541 int saved_from = from;
e077cc80 5542 int saved_inhibit_modification_hooks;
fb88bf2d 5543
d46c5b12 5544 prepare_to_modify_buffer (from, to, &from);
fb88bf2d
KH
5545 if (saved_from != from)
5546 {
5547 to = from + len;
b73bfc1c 5548 from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
fb88bf2d
KH
5549 len_byte = to_byte - from_byte;
5550 }
e077cc80
KH
5551
5552 /* The code conversion routine can not preserve text properties
5553 for now. So, we must remove all text properties in the
5554 region. Here, we must suppress all modification hooks. */
5555 saved_inhibit_modification_hooks = inhibit_modification_hooks;
5556 inhibit_modification_hooks = 1;
5557 Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5558 inhibit_modification_hooks = saved_inhibit_modification_hooks;
d46c5b12 5559 }
d46c5b12
KH
5560
5561 if (! encodep && CODING_REQUIRE_DETECTION (coding))
5562 {
12410ef1 5563 /* We must detect encoding of text and eol format. */
d46c5b12
KH
5564
5565 if (from < GPT && to > GPT)
5566 move_gap_both (from, from_byte);
5567 if (coding->type == coding_type_undecided)
5568 {
fb88bf2d 5569 detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
d46c5b12 5570 if (coding->type == coding_type_undecided)
62b3ef1d
KH
5571 {
5572 /* It seems that the text contains only ASCII, but we
d9aef30f 5573 should not leave it undecided because the deeper
62b3ef1d
KH
5574 decoding routine (decode_coding) tries to detect the
5575 encodings again in vain. */
5576 coding->type = coding_type_emacs_mule;
5577 coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
d280ccb6
KH
5578 /* As emacs-mule decoder will handle composition, we
5579 need this setting to allocate coding->cmp_data
5580 later. */
5581 coding->composing = COMPOSITION_NO;
62b3ef1d 5582 }
d46c5b12 5583 }
aaaf0b1e
KH
5584 if (coding->eol_type == CODING_EOL_UNDECIDED
5585 && coding->type != coding_type_ccl)
d46c5b12 5586 {
d46c5b12
KH
5587 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5588 if (coding->eol_type == CODING_EOL_UNDECIDED)
5589 coding->eol_type = CODING_EOL_LF;
5590 /* We had better recover the original eol format if we
8ca3766a 5591 encounter an inconsistent eol format while decoding. */
d46c5b12
KH
5592 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5593 }
5594 }
5595
d46c5b12
KH
5596 /* Now we convert the text. */
5597
5598 /* For encoding, we must process pre-write-conversion in advance. */
b73bfc1c
KH
5599 if (! inhibit_pre_post_conversion
5600 && encodep
d46c5b12
KH
5601 && SYMBOLP (coding->pre_write_conversion)
5602 && ! NILP (Ffboundp (coding->pre_write_conversion)))
5603 {
2b4f9037
KH
5604 /* The function in pre-write-conversion may put a new text in a
5605 new buffer. */
0007bdd0
KH
5606 struct buffer *prev = current_buffer;
5607 Lisp_Object new;
d46c5b12 5608
1c7457e2 5609 record_unwind_protect (code_convert_region_unwind,
24a948a7 5610 Vlast_coding_system_used);
b843d1ae
KH
5611 /* We should not call any more pre-write/post-read-conversion
5612 functions while this pre-write-conversion is running. */
5613 inhibit_pre_post_conversion = 1;
b39f748c
AS
5614 call2 (coding->pre_write_conversion,
5615 make_number (from), make_number (to));
b843d1ae
KH
5616 inhibit_pre_post_conversion = 0;
5617 /* Discard the unwind protect. */
5618 specpdl_ptr--;
5619
d46c5b12
KH
5620 if (current_buffer != prev)
5621 {
5622 len = ZV - BEGV;
0007bdd0 5623 new = Fcurrent_buffer ();
d46c5b12 5624 set_buffer_internal_1 (prev);
7dae4502 5625 del_range_2 (from, from_byte, to, to_byte, 0);
e133c8fa 5626 TEMP_SET_PT_BOTH (from, from_byte);
0007bdd0
KH
5627 insert_from_buffer (XBUFFER (new), 1, len, 0);
5628 Fkill_buffer (new);
e133c8fa
KH
5629 if (orig_point >= to)
5630 orig_point += len - orig_len;
5631 else if (orig_point > from)
5632 orig_point = from;
5633 orig_len = len;
d46c5b12 5634 to = from + len;
b73bfc1c
KH
5635 from_byte = CHAR_TO_BYTE (from);
5636 to_byte = CHAR_TO_BYTE (to);
d46c5b12 5637 len_byte = to_byte - from_byte;
e133c8fa 5638 TEMP_SET_PT_BOTH (from, from_byte);
d46c5b12
KH
5639 }
5640 }
5641
12410ef1 5642 if (replace)
72d1a715
RS
5643 {
5644 if (! EQ (current_buffer->undo_list, Qt))
5645 deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5646 else
5647 {
5648 nchars_del = to - from;
5649 nbytes_del = to_byte - from_byte;
5650 }
5651 }
12410ef1 5652
ec6d2bb8
KH
5653 if (coding->composing != COMPOSITION_DISABLED)
5654 {
5655 if (encodep)
5656 coding_save_composition (coding, from, to, Fcurrent_buffer ());
5657 else
5658 coding_allocate_composition_data (coding, from);
5659 }
fb88bf2d 5660
ce559e6f
KH
5661 /* Try to skip the heading and tailing ASCIIs. We can't skip them
5662 if we must run CCL program or there are compositions to
5663 encode. */
5664 if (coding->type != coding_type_ccl
5665 && (! coding->cmp_data || coding->cmp_data->used == 0))
4956c225
KH
5666 {
5667 int from_byte_orig = from_byte, to_byte_orig = to_byte;
ec6d2bb8 5668
4956c225
KH
5669 if (from < GPT && GPT < to)
5670 move_gap_both (from, from_byte);
5671 SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5672 if (from_byte == to_byte
5673 && (encodep || NILP (coding->post_read_conversion))
5674 && ! CODING_REQUIRE_FLUSHING (coding))
5675 {
5676 coding->produced = len_byte;
5677 coding->produced_char = len;
5678 if (!replace)
5679 /* We must record and adjust for this new text now. */
5680 adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
ce559e6f 5681 coding_free_composition_data (coding);
4956c225
KH
5682 return 0;
5683 }
5684
5685 head_skip = from_byte - from_byte_orig;
5686 tail_skip = to_byte_orig - to_byte;
5687 total_skip = head_skip + tail_skip;
5688 from += head_skip;
5689 to -= tail_skip;
5690 len -= total_skip; len_byte -= total_skip;
5691 }
d46c5b12 5692
8ca3766a 5693 /* For conversion, we must put the gap before the text in addition to
fb88bf2d
KH
5694 making the gap larger for efficient decoding. The required gap
5695 size starts from 2000 which is the magic number used in make_gap.
5696 But, after one batch of conversion, it will be incremented if we
5697 find that it is not enough . */
d46c5b12
KH
5698 require = 2000;
5699
5700 if (GAP_SIZE < require)
5701 make_gap (require - GAP_SIZE);
5702 move_gap_both (from, from_byte);
5703
d46c5b12 5704 inserted = inserted_byte = 0;
fb88bf2d
KH
5705
5706 GAP_SIZE += len_byte;
5707 ZV -= len;
5708 Z -= len;
5709 ZV_BYTE -= len_byte;
5710 Z_BYTE -= len_byte;
5711
d9f9a1bc
GM
5712 if (GPT - BEG < BEG_UNCHANGED)
5713 BEG_UNCHANGED = GPT - BEG;
5714 if (Z - GPT < END_UNCHANGED)
5715 END_UNCHANGED = Z - GPT;
f2558efd 5716
b73bfc1c
KH
5717 if (!encodep && coding->src_multibyte)
5718 {
5719 /* Decoding routines expects that the source text is unibyte.
5720 We must convert 8-bit characters of multibyte form to
5721 unibyte. */
5722 int len_byte_orig = len_byte;
5723 len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5724 if (len_byte < len_byte_orig)
5725 safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5726 len_byte);
5727 coding->src_multibyte = 0;
5728 }
5729
d46c5b12
KH
5730 for (;;)
5731 {
fb88bf2d 5732 int result;
d46c5b12 5733
ec6d2bb8 5734 /* The buffer memory is now:
b73bfc1c
KH
5735 +--------+converted-text+---------+-------original-text-------+---+
5736 |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5737 |<---------------------- GAP ----------------------->| */
ec6d2bb8
KH
5738 src = GAP_END_ADDR - len_byte;
5739 dst = GPT_ADDR + inserted_byte;
5740
d46c5b12 5741 if (encodep)
fb88bf2d 5742 result = encode_coding (coding, src, dst, len_byte, 0);
d46c5b12 5743 else
0e79d667
RS
5744 {
5745 if (coding->composing != COMPOSITION_DISABLED)
5746 coding->cmp_data->char_offset = from + inserted;
5747 result = decode_coding (coding, src, dst, len_byte, 0);
5748 }
ec6d2bb8
KH
5749
5750 /* The buffer memory is now:
b73bfc1c
KH
5751 +--------+-------converted-text----+--+------original-text----+---+
5752 |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5753 |<---------------------- GAP ----------------------->| */
ec6d2bb8 5754
d46c5b12
KH
5755 inserted += coding->produced_char;
5756 inserted_byte += coding->produced;
d46c5b12 5757 len_byte -= coding->consumed;
ec6d2bb8
KH
5758
5759 if (result == CODING_FINISH_INSUFFICIENT_CMP)
5760 {
5761 coding_allocate_composition_data (coding, from + inserted);
5762 continue;
5763 }
5764
fb88bf2d 5765 src += coding->consumed;
3636f7a3 5766 dst += coding->produced;
d46c5b12 5767
9864ebce
KH
5768 if (result == CODING_FINISH_NORMAL)
5769 {
5770 src += len_byte;
5771 break;
5772 }
d46c5b12
KH
5773 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5774 {
fb88bf2d 5775 unsigned char *pend = dst, *p = pend - inserted_byte;
38edf7d4 5776 Lisp_Object eol_type;
d46c5b12
KH
5777
5778 /* Encode LFs back to the original eol format (CR or CRLF). */
5779 if (coding->eol_type == CODING_EOL_CR)
5780 {
5781 while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5782 }
5783 else
5784 {
d46c5b12
KH
5785 int count = 0;
5786
fb88bf2d
KH
5787 while (p < pend) if (*p++ == '\n') count++;
5788 if (src - dst < count)
d46c5b12 5789 {
38edf7d4 5790 /* We don't have sufficient room for encoding LFs
fb88bf2d
KH
5791 back to CRLF. We must record converted and
5792 not-yet-converted text back to the buffer
5793 content, enlarge the gap, then record them out of
5794 the buffer contents again. */
5795 int add = len_byte + inserted_byte;
5796
5797 GAP_SIZE -= add;
5798 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5799 GPT += inserted_byte; GPT_BYTE += inserted_byte;
5800 make_gap (count - GAP_SIZE);
5801 GAP_SIZE += add;
5802 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5803 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5804 /* Don't forget to update SRC, DST, and PEND. */
5805 src = GAP_END_ADDR - len_byte;
5806 dst = GPT_ADDR + inserted_byte;
5807 pend = dst;
d46c5b12 5808 }
d46c5b12
KH
5809 inserted += count;
5810 inserted_byte += count;
fb88bf2d
KH
5811 coding->produced += count;
5812 p = dst = pend + count;
5813 while (count)
5814 {
5815 *--p = *--pend;
5816 if (*p == '\n') count--, *--p = '\r';
5817 }
d46c5b12
KH
5818 }
5819
5820 /* Suppress eol-format conversion in the further conversion. */
5821 coding->eol_type = CODING_EOL_LF;
5822
38edf7d4
KH
5823 /* Set the coding system symbol to that for Unix-like EOL. */
5824 eol_type = Fget (saved_coding_symbol, Qeol_type);
5825 if (VECTORP (eol_type)
5826 && XVECTOR (eol_type)->size == 3
5827 && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5828 coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5829 else
5830 coding->symbol = saved_coding_symbol;
93dec019 5831
fb88bf2d 5832 continue;
d46c5b12
KH
5833 }
5834 if (len_byte <= 0)
944bd420
KH
5835 {
5836 if (coding->type != coding_type_ccl
5837 || coding->mode & CODING_MODE_LAST_BLOCK)
5838 break;
5839 coding->mode |= CODING_MODE_LAST_BLOCK;
5840 continue;
5841 }
d46c5b12
KH
5842 if (result == CODING_FINISH_INSUFFICIENT_SRC)
5843 {
5844 /* The source text ends in invalid codes. Let's just
5845 make them valid buffer contents, and finish conversion. */
70ad9fc4
GM
5846 if (multibyte_p)
5847 {
5848 unsigned char *start = dst;
93dec019 5849
70ad9fc4
GM
5850 inserted += len_byte;
5851 while (len_byte--)
5852 {
5853 int c = *src++;
5854 dst += CHAR_STRING (c, dst);
5855 }
5856
5857 inserted_byte += dst - start;
5858 }
5859 else
5860 {
5861 inserted += len_byte;
5862 inserted_byte += len_byte;
5863 while (len_byte--)
5864 *dst++ = *src++;
5865 }
d46c5b12
KH
5866 break;
5867 }
9864ebce
KH
5868 if (result == CODING_FINISH_INTERRUPT)
5869 {
5870 /* The conversion procedure was interrupted by a user. */
9864ebce
KH
5871 break;
5872 }
5873 /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */
5874 if (coding->consumed < 1)
5875 {
5876 /* It's quite strange to require more memory without
5877 consuming any bytes. Perhaps CCL program bug. */
9864ebce
KH
5878 break;
5879 }
fb88bf2d
KH
5880 if (first)
5881 {
5882 /* We have just done the first batch of conversion which was
8ca3766a 5883 stopped because of insufficient gap. Let's reconsider the
fb88bf2d
KH
5884 required gap size (i.e. SRT - DST) now.
5885
5886 We have converted ORIG bytes (== coding->consumed) into
5887 NEW bytes (coding->produced). To convert the remaining
5888 LEN bytes, we may need REQUIRE bytes of gap, where:
5889 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5890 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5891 Here, we are sure that NEW >= ORIG. */
b3385c28
KH
5892
5893 if (coding->produced <= coding->consumed)
5894 {
5895 /* This happens because of CCL-based coding system with
5896 eol-type CRLF. */
5897 require = 0;
5898 }
5899 else
5900 {
b3ebb2d4
KH
5901 float ratio = coding->produced - coding->consumed;
5902 ratio /= coding->consumed;
b3385c28
KH
5903 require = len_byte * ratio;
5904 }
fb88bf2d
KH
5905 first = 0;
5906 }
5907 if ((src - dst) < (require + 2000))
5908 {
5909 /* See the comment above the previous call of make_gap. */
5910 int add = len_byte + inserted_byte;
5911
5912 GAP_SIZE -= add;
5913 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5914 GPT += inserted_byte; GPT_BYTE += inserted_byte;
5915 make_gap (require + 2000);
5916 GAP_SIZE += add;
5917 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5918 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
fb88bf2d 5919 }
d46c5b12 5920 }
fb88bf2d
KH
5921 if (src - dst > 0) *dst = 0; /* Put an anchor. */
5922
b73bfc1c
KH
5923 if (encodep && coding->dst_multibyte)
5924 {
5925 /* The output is unibyte. We must convert 8-bit characters to
5926 multibyte form. */
5927 if (inserted_byte * 2 > GAP_SIZE)
5928 {
5929 GAP_SIZE -= inserted_byte;
5930 ZV += inserted_byte; Z += inserted_byte;
5931 ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5932 GPT += inserted_byte; GPT_BYTE += inserted_byte;
5933 make_gap (inserted_byte - GAP_SIZE);
5934 GAP_SIZE += inserted_byte;
5935 ZV -= inserted_byte; Z -= inserted_byte;
5936 ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5937 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5938 }
5939 inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5940 }
7553d0e1 5941
93dec019 5942 /* If we shrank the conversion area, adjust it now. */
12410ef1
KH
5943 if (total_skip > 0)
5944 {
5945 if (tail_skip > 0)
5946 safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5947 inserted += total_skip; inserted_byte += total_skip;
5948 GAP_SIZE += total_skip;
5949 GPT -= head_skip; GPT_BYTE -= head_skip;
5950 ZV -= total_skip; ZV_BYTE -= total_skip;
5951 Z -= total_skip; Z_BYTE -= total_skip;
5952 from -= head_skip; from_byte -= head_skip;
5953 to += tail_skip; to_byte += tail_skip;
5954 }
5955
6abb9bd9 5956 prev_Z = Z;
72d1a715
RS
5957 if (! EQ (current_buffer->undo_list, Qt))
5958 adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5959 else
5960 adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
5961 inserted, inserted_byte);
6abb9bd9 5962 inserted = Z - prev_Z;
4ed46869 5963
ec6d2bb8
KH
5964 if (!encodep && coding->cmp_data && coding->cmp_data->used)
5965 coding_restore_composition (coding, Fcurrent_buffer ());
5966 coding_free_composition_data (coding);
5967
b73bfc1c
KH
5968 if (! inhibit_pre_post_conversion
5969 && ! encodep && ! NILP (coding->post_read_conversion))
d46c5b12 5970 {
2b4f9037 5971 Lisp_Object val;
1c7457e2 5972 Lisp_Object saved_coding_system;
4ed46869 5973
e133c8fa
KH
5974 if (from != PT)
5975 TEMP_SET_PT_BOTH (from, from_byte);
6abb9bd9 5976 prev_Z = Z;
1c7457e2
KH
5977 record_unwind_protect (code_convert_region_unwind,
5978 Vlast_coding_system_used);
5979 saved_coding_system = Vlast_coding_system_used;
5980 Vlast_coding_system_used = coding->symbol;
b843d1ae
KH
5981 /* We should not call any more pre-write/post-read-conversion
5982 functions while this post-read-conversion is running. */
5983 inhibit_pre_post_conversion = 1;
2b4f9037 5984 val = call1 (coding->post_read_conversion, make_number (inserted));
b843d1ae 5985 inhibit_pre_post_conversion = 0;
1c7457e2
KH
5986 coding->symbol = Vlast_coding_system_used;
5987 Vlast_coding_system_used = saved_coding_system;
b843d1ae
KH
5988 /* Discard the unwind protect. */
5989 specpdl_ptr--;
b7826503 5990 CHECK_NUMBER (val);
944bd420 5991 inserted += Z - prev_Z;
e133c8fa
KH
5992 }
5993
5994 if (orig_point >= from)
5995 {
5996 if (orig_point >= from + orig_len)
5997 orig_point += inserted - orig_len;
5998 else
5999 orig_point = from;
6000 TEMP_SET_PT (orig_point);
d46c5b12 6001 }
4ed46869 6002
ec6d2bb8
KH
6003 if (replace)
6004 {
6005 signal_after_change (from, to - from, inserted);
e19539f1 6006 update_compositions (from, from + inserted, CHECK_BORDER);
ec6d2bb8 6007 }
2b4f9037 6008
fb88bf2d 6009 {
12410ef1
KH
6010 coding->consumed = to_byte - from_byte;
6011 coding->consumed_char = to - from;
6012 coding->produced = inserted_byte;
6013 coding->produced_char = inserted;
fb88bf2d 6014 }
7553d0e1 6015
fb88bf2d 6016 return 0;
d46c5b12
KH
6017}
6018
2a47931b
KH
6019/* Name (or base name) of work buffer for code conversion. */
6020static Lisp_Object Vcode_conversion_workbuf_name;
6021
6022/* Set the current buffer to the working buffer prepared for
6023 code-conversion. MULTIBYTE specifies the multibyteness of the
6024 buffer. */
6025
6026static struct buffer *
6027set_conversion_work_buffer (multibyte)
6028 int multibyte;
6029{
6030 Lisp_Object buffer;
6031 struct buffer *buf;
6032
6033 buffer = Fget_buffer_create (Vcode_conversion_workbuf_name);
6034 buf = XBUFFER (buffer);
6035 delete_all_overlays (buf);
6036 buf->directory = current_buffer->directory;
6037 buf->read_only = Qnil;
6038 buf->filename = Qnil;
6039 buf->undo_list = Qt;
6040 eassert (buf->overlays_before == NULL);
6041 eassert (buf->overlays_after == NULL);
6042 set_buffer_internal (buf);
6043 if (BEG != BEGV || Z != ZV)
6044 Fwiden ();
6045 del_range_2 (BEG, BEG_BYTE, Z, Z_BYTE, 0);
6046 buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
6047 return buf;
6048}
6049
d46c5b12 6050Lisp_Object
b73bfc1c
KH
6051run_pre_post_conversion_on_str (str, coding, encodep)
6052 Lisp_Object str;
6053 struct coding_system *coding;
6054 int encodep;
6055{
aed13378 6056 int count = SPECPDL_INDEX ();
cf3b32fc 6057 struct gcpro gcpro1, gcpro2;
b73bfc1c 6058 int multibyte = STRING_MULTIBYTE (str);
cf3b32fc 6059 Lisp_Object old_deactivate_mark;
b73bfc1c
KH
6060
6061 record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
1c7457e2
KH
6062 record_unwind_protect (code_convert_region_unwind,
6063 Vlast_coding_system_used);
cf3b32fc
RS
6064 /* It is not crucial to specbind this. */
6065 old_deactivate_mark = Vdeactivate_mark;
6066 GCPRO2 (str, old_deactivate_mark);
3fd9494b 6067
b73bfc1c
KH
6068 /* We must insert the contents of STR as is without
6069 unibyte<->multibyte conversion. For that, we adjust the
6070 multibyteness of the working buffer to that of STR. */
2a47931b 6071 set_conversion_work_buffer (multibyte);
3fd9494b 6072
b73bfc1c 6073 insert_from_string (str, 0, 0,
d5db4077 6074 SCHARS (str), SBYTES (str), 0);
b73bfc1c
KH
6075 UNGCPRO;
6076 inhibit_pre_post_conversion = 1;
6077 if (encodep)
6078 call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
6079 else
6bac5b12 6080 {
1c7457e2 6081 Vlast_coding_system_used = coding->symbol;
6bac5b12
KH
6082 TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
6083 call1 (coding->post_read_conversion, make_number (Z - BEG));
1c7457e2 6084 coding->symbol = Vlast_coding_system_used;
6bac5b12 6085 }
b73bfc1c 6086 inhibit_pre_post_conversion = 0;
cf3b32fc 6087 Vdeactivate_mark = old_deactivate_mark;
78108bcd 6088 str = make_buffer_string (BEG, Z, 1);
b73bfc1c
KH
6089 return unbind_to (count, str);
6090}
6091
2a47931b
KH
6092
6093/* Run pre-write-conversion function of CODING on NCHARS/NBYTES
6094 text in *STR. *SIZE is the allocated bytes for STR. As it
6095 is intended that this function is called from encode_terminal_code,
6096 the pre-write-conversion function is run by safe_call and thus
6097 "Error during redisplay: ..." is logged when an error occurs.
6098
6099 Store the resulting text in *STR and set CODING->produced_char and
6100 CODING->produced to the number of characters and bytes
6101 respectively. If the size of *STR is too small, enlarge it by
6102 xrealloc and update *STR and *SIZE. */
6103
6104void
6105run_pre_write_conversin_on_c_str (str, size, nchars, nbytes, coding)
6106 unsigned char **str;
6107 int *size, nchars, nbytes;
6108 struct coding_system *coding;
6109{
6110 struct gcpro gcpro1, gcpro2;
6111 struct buffer *cur = current_buffer;
6112 Lisp_Object old_deactivate_mark, old_last_coding_system_used;
6113 Lisp_Object args[3];
6114
6115 /* It is not crucial to specbind this. */
6116 old_deactivate_mark = Vdeactivate_mark;
6117 old_last_coding_system_used = Vlast_coding_system_used;
6118 GCPRO2 (old_deactivate_mark, old_last_coding_system_used);
6119
6120 /* We must insert the contents of STR as is without
6121 unibyte<->multibyte conversion. For that, we adjust the
6122 multibyteness of the working buffer to that of STR. */
6123 set_conversion_work_buffer (coding->src_multibyte);
6124 insert_1_both (*str, nchars, nbytes, 0, 0, 0);
6125 UNGCPRO;
6126 inhibit_pre_post_conversion = 1;
6127 args[0] = coding->pre_write_conversion;
6128 args[1] = make_number (BEG);
6129 args[2] = make_number (Z);
6130 safe_call (3, args);
6131 inhibit_pre_post_conversion = 0;
6132 Vdeactivate_mark = old_deactivate_mark;
6133 Vlast_coding_system_used = old_last_coding_system_used;
6134 coding->produced_char = Z - BEG;
6135 coding->produced = Z_BYTE - BEG_BYTE;
6136 if (coding->produced > *size)
6137 {
6138 *size = coding->produced;
6139 *str = xrealloc (*str, *size);
6140 }
6141 if (BEG < GPT && GPT < Z)
6142 move_gap (BEG);
6143 bcopy (BEG_ADDR, *str, coding->produced);
6144 coding->src_multibyte
6145 = ! NILP (current_buffer->enable_multibyte_characters);
6146 set_buffer_internal (cur);
6147}
6148
6149
b73bfc1c
KH
6150Lisp_Object
6151decode_coding_string (str, coding, nocopy)
d46c5b12 6152 Lisp_Object str;
4ed46869 6153 struct coding_system *coding;
b73bfc1c 6154 int nocopy;
4ed46869 6155{
d46c5b12 6156 int len;
73be902c 6157 struct conversion_buffer buf;
da55a2b7 6158 int from, to_byte;
84d60297 6159 Lisp_Object saved_coding_symbol;
d46c5b12 6160 int result;
78108bcd 6161 int require_decoding;
73be902c
KH
6162 int shrinked_bytes = 0;
6163 Lisp_Object newstr;
2391eaa4 6164 int consumed, consumed_char, produced, produced_char;
4ed46869 6165
b73bfc1c 6166 from = 0;
d5db4077 6167 to_byte = SBYTES (str);
4ed46869 6168
8844fa83 6169 saved_coding_symbol = coding->symbol;
764ca8da
KH
6170 coding->src_multibyte = STRING_MULTIBYTE (str);
6171 coding->dst_multibyte = 1;
b73bfc1c 6172 if (CODING_REQUIRE_DETECTION (coding))
d46c5b12
KH
6173 {
6174 /* See the comments in code_convert_region. */
6175 if (coding->type == coding_type_undecided)
6176 {
d5db4077 6177 detect_coding (coding, SDATA (str), to_byte);
d46c5b12 6178 if (coding->type == coding_type_undecided)
d280ccb6
KH
6179 {
6180 coding->type = coding_type_emacs_mule;
6181 coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
6182 /* As emacs-mule decoder will handle composition, we
6183 need this setting to allocate coding->cmp_data
6184 later. */
6185 coding->composing = COMPOSITION_NO;
6186 }
d46c5b12 6187 }
aaaf0b1e
KH
6188 if (coding->eol_type == CODING_EOL_UNDECIDED
6189 && coding->type != coding_type_ccl)
d46c5b12
KH
6190 {
6191 saved_coding_symbol = coding->symbol;
d5db4077 6192 detect_eol (coding, SDATA (str), to_byte);
d46c5b12
KH
6193 if (coding->eol_type == CODING_EOL_UNDECIDED)
6194 coding->eol_type = CODING_EOL_LF;
6195 /* We had better recover the original eol format if we
8ca3766a 6196 encounter an inconsistent eol format while decoding. */
d46c5b12
KH
6197 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
6198 }
6199 }
4ed46869 6200
764ca8da
KH
6201 if (coding->type == coding_type_no_conversion
6202 || coding->type == coding_type_raw_text)
6203 coding->dst_multibyte = 0;
6204
78108bcd 6205 require_decoding = CODING_REQUIRE_DECODING (coding);
ec6d2bb8 6206
b73bfc1c 6207 if (STRING_MULTIBYTE (str))
d46c5b12 6208 {
b73bfc1c
KH
6209 /* Decoding routines expect the source text to be unibyte. */
6210 str = Fstring_as_unibyte (str);
d5db4077 6211 to_byte = SBYTES (str);
b73bfc1c 6212 nocopy = 1;
764ca8da 6213 coding->src_multibyte = 0;
b73bfc1c 6214 }
ec6d2bb8 6215
b73bfc1c 6216 /* Try to skip the heading and tailing ASCIIs. */
78108bcd 6217 if (require_decoding && coding->type != coding_type_ccl)
4956c225 6218 {
d5db4077 6219 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
4956c225
KH
6220 0);
6221 if (from == to_byte)
78108bcd 6222 require_decoding = 0;
d5db4077 6223 shrinked_bytes = from + (SBYTES (str) - to_byte);
4956c225 6224 }
b73bfc1c 6225
439ad9ea
KH
6226 if (!require_decoding
6227 && !(SYMBOLP (coding->post_read_conversion)
6228 && !NILP (Ffboundp (coding->post_read_conversion))))
78108bcd 6229 {
d5db4077
KR
6230 coding->consumed = SBYTES (str);
6231 coding->consumed_char = SCHARS (str);
78108bcd
KH
6232 if (coding->dst_multibyte)
6233 {
6234 str = Fstring_as_multibyte (str);
6235 nocopy = 1;
6236 }
d5db4077
KR
6237 coding->produced = SBYTES (str);
6238 coding->produced_char = SCHARS (str);
78108bcd
KH
6239 return (nocopy ? str : Fcopy_sequence (str));
6240 }
6241
6242 if (coding->composing != COMPOSITION_DISABLED)
6243 coding_allocate_composition_data (coding, from);
b73bfc1c 6244 len = decoding_buffer_size (coding, to_byte - from);
73be902c 6245 allocate_conversion_buffer (buf, len);
4ed46869 6246
2391eaa4 6247 consumed = consumed_char = produced = produced_char = 0;
73be902c 6248 while (1)
4ed46869 6249 {
d5db4077 6250 result = decode_coding (coding, SDATA (str) + from + consumed,
73be902c
KH
6251 buf.data + produced, to_byte - from - consumed,
6252 buf.size - produced);
6253 consumed += coding->consumed;
2391eaa4 6254 consumed_char += coding->consumed_char;
73be902c
KH
6255 produced += coding->produced;
6256 produced_char += coding->produced_char;
2391eaa4 6257 if (result == CODING_FINISH_NORMAL
c3912f23 6258 || result == CODING_FINISH_INTERRUPT
2391eaa4
KH
6259 || (result == CODING_FINISH_INSUFFICIENT_SRC
6260 && coding->consumed == 0))
73be902c
KH
6261 break;
6262 if (result == CODING_FINISH_INSUFFICIENT_CMP)
6263 coding_allocate_composition_data (coding, from + produced_char);
6264 else if (result == CODING_FINISH_INSUFFICIENT_DST)
6265 extend_conversion_buffer (&buf);
6266 else if (result == CODING_FINISH_INCONSISTENT_EOL)
6267 {
8844fa83
KH
6268 Lisp_Object eol_type;
6269
73be902c
KH
6270 /* Recover the original EOL format. */
6271 if (coding->eol_type == CODING_EOL_CR)
6272 {
6273 unsigned char *p;
6274 for (p = buf.data; p < buf.data + produced; p++)
6275 if (*p == '\n') *p = '\r';
6276 }
6277 else if (coding->eol_type == CODING_EOL_CRLF)
6278 {
6279 int num_eol = 0;
6280 unsigned char *p0, *p1;
6281 for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
6282 if (*p0 == '\n') num_eol++;
6283 if (produced + num_eol >= buf.size)
6284 extend_conversion_buffer (&buf);
6285 for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
6286 {
6287 *--p1 = *--p0;
6288 if (*p0 == '\n') *--p1 = '\r';
6289 }
6290 produced += num_eol;
6291 produced_char += num_eol;
93dec019 6292 }
8844fa83 6293 /* Suppress eol-format conversion in the further conversion. */
73be902c 6294 coding->eol_type = CODING_EOL_LF;
8844fa83
KH
6295
6296 /* Set the coding system symbol to that for Unix-like EOL. */
6297 eol_type = Fget (saved_coding_symbol, Qeol_type);
6298 if (VECTORP (eol_type)
6299 && XVECTOR (eol_type)->size == 3
6300 && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
6301 coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
6302 else
6303 coding->symbol = saved_coding_symbol;
6304
6305
73be902c 6306 }
4ed46869 6307 }
d46c5b12 6308
2391eaa4
KH
6309 coding->consumed = consumed;
6310 coding->consumed_char = consumed_char;
6311 coding->produced = produced;
6312 coding->produced_char = produced_char;
6313
78108bcd 6314 if (coding->dst_multibyte)
73be902c
KH
6315 newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
6316 produced + shrinked_bytes);
78108bcd 6317 else
73be902c
KH
6318 newstr = make_uninit_string (produced + shrinked_bytes);
6319 if (from > 0)
a4244313
KR
6320 STRING_COPYIN (newstr, 0, SDATA (str), from);
6321 STRING_COPYIN (newstr, from, buf.data, produced);
73be902c 6322 if (shrinked_bytes > from)
a4244313
KR
6323 STRING_COPYIN (newstr, from + produced,
6324 SDATA (str) + to_byte,
6325 shrinked_bytes - from);
73be902c 6326 free_conversion_buffer (&buf);
b73bfc1c 6327
160a708c
KH
6328 coding->consumed += shrinked_bytes;
6329 coding->consumed_char += shrinked_bytes;
6330 coding->produced += shrinked_bytes;
6331 coding->produced_char += shrinked_bytes;
6332
b73bfc1c 6333 if (coding->cmp_data && coding->cmp_data->used)
73be902c 6334 coding_restore_composition (coding, newstr);
b73bfc1c
KH
6335 coding_free_composition_data (coding);
6336
6337 if (SYMBOLP (coding->post_read_conversion)
6338 && !NILP (Ffboundp (coding->post_read_conversion)))
73be902c 6339 newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
b73bfc1c 6340
73be902c 6341 return newstr;
b73bfc1c
KH
6342}
6343
6344Lisp_Object
6345encode_coding_string (str, coding, nocopy)
6346 Lisp_Object str;
6347 struct coding_system *coding;
6348 int nocopy;
6349{
6350 int len;
73be902c 6351 struct conversion_buffer buf;
b73bfc1c 6352 int from, to, to_byte;
b73bfc1c 6353 int result;
73be902c
KH
6354 int shrinked_bytes = 0;
6355 Lisp_Object newstr;
2391eaa4 6356 int consumed, consumed_char, produced, produced_char;
b73bfc1c
KH
6357
6358 if (SYMBOLP (coding->pre_write_conversion)
6359 && !NILP (Ffboundp (coding->pre_write_conversion)))
3bb917bf
KH
6360 {
6361 str = run_pre_post_conversion_on_str (str, coding, 1);
6362 /* As STR is just newly generated, we don't have to copy it
6363 anymore. */
6364 nocopy = 1;
6365 }
b73bfc1c
KH
6366
6367 from = 0;
d5db4077
KR
6368 to = SCHARS (str);
6369 to_byte = SBYTES (str);
b73bfc1c 6370
e2c06b17
KH
6371 /* Encoding routines determine the multibyteness of the source text
6372 by coding->src_multibyte. */
3bb917bf 6373 coding->src_multibyte = SCHARS (str) < SBYTES (str);
e2c06b17 6374 coding->dst_multibyte = 0;
b73bfc1c 6375 if (! CODING_REQUIRE_ENCODING (coding))
3bb917bf 6376 goto no_need_of_encoding;
826bfb8b 6377
b73bfc1c
KH
6378 if (coding->composing != COMPOSITION_DISABLED)
6379 coding_save_composition (coding, from, to, str);
ec6d2bb8 6380
ce559e6f
KH
6381 /* Try to skip the heading and tailing ASCIIs. We can't skip them
6382 if we must run CCL program or there are compositions to
6383 encode. */
6384 if (coding->type != coding_type_ccl
6385 && (! coding->cmp_data || coding->cmp_data->used == 0))
4956c225 6386 {
d5db4077 6387 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
4956c225
KH
6388 1);
6389 if (from == to_byte)
ce559e6f
KH
6390 {
6391 coding_free_composition_data (coding);
3bb917bf 6392 goto no_need_of_encoding;
ce559e6f 6393 }
d5db4077 6394 shrinked_bytes = from + (SBYTES (str) - to_byte);
4956c225 6395 }
b73bfc1c
KH
6396
6397 len = encoding_buffer_size (coding, to_byte - from);
73be902c
KH
6398 allocate_conversion_buffer (buf, len);
6399
2391eaa4 6400 consumed = consumed_char = produced = produced_char = 0;
73be902c
KH
6401 while (1)
6402 {
d5db4077 6403 result = encode_coding (coding, SDATA (str) + from + consumed,
73be902c
KH
6404 buf.data + produced, to_byte - from - consumed,
6405 buf.size - produced);
6406 consumed += coding->consumed;
2391eaa4 6407 consumed_char += coding->consumed_char;
13004bef 6408 produced += coding->produced;
2391eaa4
KH
6409 produced_char += coding->produced_char;
6410 if (result == CODING_FINISH_NORMAL
230779b9 6411 || result == CODING_FINISH_INTERRUPT
2391eaa4
KH
6412 || (result == CODING_FINISH_INSUFFICIENT_SRC
6413 && coding->consumed == 0))
73be902c
KH
6414 break;
6415 /* Now result should be CODING_FINISH_INSUFFICIENT_DST. */
6416 extend_conversion_buffer (&buf);
6417 }
6418
2391eaa4
KH
6419 coding->consumed = consumed;
6420 coding->consumed_char = consumed_char;
6421 coding->produced = produced;
6422 coding->produced_char = produced_char;
6423
73be902c 6424 newstr = make_uninit_string (produced + shrinked_bytes);
b73bfc1c 6425 if (from > 0)
a4244313
KR
6426 STRING_COPYIN (newstr, 0, SDATA (str), from);
6427 STRING_COPYIN (newstr, from, buf.data, produced);
73be902c 6428 if (shrinked_bytes > from)
a4244313
KR
6429 STRING_COPYIN (newstr, from + produced,
6430 SDATA (str) + to_byte,
6431 shrinked_bytes - from);
73be902c
KH
6432
6433 free_conversion_buffer (&buf);
ec6d2bb8 6434 coding_free_composition_data (coding);
b73bfc1c 6435
73be902c 6436 return newstr;
3bb917bf
KH
6437
6438 no_need_of_encoding:
6439 coding->consumed = SBYTES (str);
6440 coding->consumed_char = SCHARS (str);
6441 if (STRING_MULTIBYTE (str))
6442 {
6443 if (nocopy)
6444 /* We are sure that STR doesn't contain a multibyte
6445 character. */
6446 STRING_SET_UNIBYTE (str);
6447 else
6448 {
6449 str = Fstring_as_unibyte (str);
6450 nocopy = 1;
6451 }
6452 }
6453 coding->produced = SBYTES (str);
6454 coding->produced_char = SCHARS (str);
6455 return (nocopy ? str : Fcopy_sequence (str));
4ed46869
KH
6456}
6457
6458\f
6459#ifdef emacs
1397dc18 6460/*** 8. Emacs Lisp library functions ***/
4ed46869 6461
4ed46869 6462DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae
PJ
6463 doc: /* Return t if OBJECT is nil or a coding-system.
6464See the documentation of `make-coding-system' for information
6465about coding-system objects. */)
6466 (obj)
4ed46869
KH
6467 Lisp_Object obj;
6468{
4608c386
KH
6469 if (NILP (obj))
6470 return Qt;
6471 if (!SYMBOLP (obj))
6472 return Qnil;
c2164d91
KH
6473 if (! NILP (Fget (obj, Qcoding_system_define_form)))
6474 return Qt;
4608c386
KH
6475 /* Get coding-spec vector for OBJ. */
6476 obj = Fget (obj, Qcoding_system);
6477 return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6478 ? Qt : Qnil);
4ed46869
KH
6479}
6480
9d991de8
RS
6481DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6482 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6483 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6484 (prompt)
4ed46869
KH
6485 Lisp_Object prompt;
6486{
e0e989f6 6487 Lisp_Object val;
9d991de8
RS
6488 do
6489 {
4608c386
KH
6490 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6491 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 6492 }
d5db4077 6493 while (SCHARS (val) == 0);
e0e989f6 6494 return (Fintern (val, Qnil));
4ed46869
KH
6495}
6496
9b787f3e 6497DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6498 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6499If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6500 (prompt, default_coding_system)
9b787f3e 6501 Lisp_Object prompt, default_coding_system;
4ed46869 6502{
f44d27ce 6503 Lisp_Object val;
9b787f3e 6504 if (SYMBOLP (default_coding_system))
57d25e6f 6505 default_coding_system = SYMBOL_NAME (default_coding_system);
4608c386 6506 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6507 Qt, Qnil, Qcoding_system_history,
6508 default_coding_system, Qnil);
d5db4077 6509 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6510}
6511
6512DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6513 1, 1, 0,
48b0f3ae
PJ
6514 doc: /* Check validity of CODING-SYSTEM.
6515If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
303cdc2d 6516It is valid if it is nil or a symbol with a non-nil `coding-system' property.
de1d1a40 6517The value of this property should be a vector of length 5. */)
48b0f3ae 6518 (coding_system)
4ed46869
KH
6519 Lisp_Object coding_system;
6520{
a362520d
KH
6521 Lisp_Object define_form;
6522
6523 define_form = Fget (coding_system, Qcoding_system_define_form);
6524 if (! NILP (define_form))
6525 {
6526 Fput (coding_system, Qcoding_system_define_form, Qnil);
6527 safe_eval (define_form);
6528 }
4ed46869
KH
6529 if (!NILP (Fcoding_system_p (coding_system)))
6530 return coding_system;
6531 while (1)
02ba4723 6532 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6533}
3a73fa5d 6534\f
d46c5b12 6535Lisp_Object
0a28aafb 6536detect_coding_system (src, src_bytes, highest, multibytep)
a4244313 6537 const unsigned char *src;
d46c5b12 6538 int src_bytes, highest;
0a28aafb 6539 int multibytep;
4ed46869
KH
6540{
6541 int coding_mask, eol_type;
d46c5b12
KH
6542 Lisp_Object val, tmp;
6543 int dummy;
4ed46869 6544
0a28aafb 6545 coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
d46c5b12
KH
6546 eol_type = detect_eol_type (src, src_bytes, &dummy);
6547 if (eol_type == CODING_EOL_INCONSISTENT)
25b02698 6548 eol_type = CODING_EOL_UNDECIDED;
4ed46869 6549
d46c5b12 6550 if (!coding_mask)
4ed46869 6551 {
27901516 6552 val = Qundecided;
d46c5b12 6553 if (eol_type != CODING_EOL_UNDECIDED)
4ed46869 6554 {
f44d27ce
RS
6555 Lisp_Object val2;
6556 val2 = Fget (Qundecided, Qeol_type);
4ed46869
KH
6557 if (VECTORP (val2))
6558 val = XVECTOR (val2)->contents[eol_type];
6559 }
80e803b4 6560 return (highest ? val : Fcons (val, Qnil));
4ed46869 6561 }
4ed46869 6562
d46c5b12
KH
6563 /* At first, gather possible coding systems in VAL. */
6564 val = Qnil;
fa42c37f 6565 for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
4ed46869 6566 {
fa42c37f
KH
6567 Lisp_Object category_val, category_index;
6568
6569 category_index = Fget (XCAR (tmp), Qcoding_category_index);
6570 category_val = Fsymbol_value (XCAR (tmp));
6571 if (!NILP (category_val)
6572 && NATNUMP (category_index)
6573 && (coding_mask & (1 << XFASTINT (category_index))))
4ed46869 6574 {
fa42c37f 6575 val = Fcons (category_val, val);
d46c5b12
KH
6576 if (highest)
6577 break;
4ed46869
KH
6578 }
6579 }
d46c5b12
KH
6580 if (!highest)
6581 val = Fnreverse (val);
4ed46869 6582
65059037 6583 /* Then, replace the elements with subsidiary coding systems. */
fa42c37f 6584 for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
4ed46869 6585 {
65059037
RS
6586 if (eol_type != CODING_EOL_UNDECIDED
6587 && eol_type != CODING_EOL_INCONSISTENT)
4ed46869 6588 {
d46c5b12 6589 Lisp_Object eol;
03699b14 6590 eol = Fget (XCAR (tmp), Qeol_type);
d46c5b12 6591 if (VECTORP (eol))
f3fbd155 6592 XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
4ed46869
KH
6593 }
6594 }
03699b14 6595 return (highest ? XCAR (val) : val);
93dec019 6596}
4ed46869 6597
d46c5b12
KH
6598DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6599 2, 3, 0,
40fd536c
KH
6600 doc: /* Detect how the byte sequence in the region is encoded.
6601Return a list of possible coding systems used on decoding a byte
6602sequence containing the bytes in the region between START and END when
6603the coding system `undecided' is specified. The list is ordered by
6604priority decided in the current language environment.
48b0f3ae
PJ
6605
6606If only ASCII characters are found, it returns a list of single element
6607`undecided' or its subsidiary coding system according to a detected
6608end-of-line format.
6609
6610If optional argument HIGHEST is non-nil, return the coding system of
6611highest priority. */)
6612 (start, end, highest)
d46c5b12
KH
6613 Lisp_Object start, end, highest;
6614{
6615 int from, to;
6616 int from_byte, to_byte;
682169fe 6617 int include_anchor_byte = 0;
6289dd10 6618
b7826503
PJ
6619 CHECK_NUMBER_COERCE_MARKER (start);
6620 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6621
d46c5b12
KH
6622 validate_region (&start, &end);
6623 from = XINT (start), to = XINT (end);
6624 from_byte = CHAR_TO_BYTE (from);
6625 to_byte = CHAR_TO_BYTE (to);
6289dd10 6626
d46c5b12
KH
6627 if (from < GPT && to >= GPT)
6628 move_gap_both (to, to_byte);
c210f766
KH
6629 /* If we an anchor byte `\0' follows the region, we include it in
6630 the detecting source. Then code detectors can handle the tailing
6631 byte sequence more accurately.
6632
7d0393cf 6633 Fix me: This is not a perfect solution. It is better that we
c210f766
KH
6634 add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6635 */
682169fe
KH
6636 if (to == Z || (to == GPT && GAP_SIZE > 0))
6637 include_anchor_byte = 1;
d46c5b12 6638 return detect_coding_system (BYTE_POS_ADDR (from_byte),
682169fe 6639 to_byte - from_byte + include_anchor_byte,
0a28aafb
KH
6640 !NILP (highest),
6641 !NILP (current_buffer
6642 ->enable_multibyte_characters));
d46c5b12 6643}
6289dd10 6644
d46c5b12
KH
6645DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6646 1, 2, 0,
eec1f3c7
KH
6647 doc: /* Detect how the byte sequence in STRING is encoded.
6648Return a list of possible coding systems used on decoding a byte
6649sequence containing the bytes in STRING when the coding system
6650`undecided' is specified. The list is ordered by priority decided in
6651the current language environment.
48b0f3ae
PJ
6652
6653If only ASCII characters are found, it returns a list of single element
6654`undecided' or its subsidiary coding system according to a detected
6655end-of-line format.
6656
6657If optional argument HIGHEST is non-nil, return the coding system of
6658highest priority. */)
6659 (string, highest)
d46c5b12
KH
6660 Lisp_Object string, highest;
6661{
b7826503 6662 CHECK_STRING (string);
4ed46869 6663
d5db4077 6664 return detect_coding_system (SDATA (string),
682169fe
KH
6665 /* "+ 1" is to include the anchor byte
6666 `\0'. With this, code detectors can
c210f766
KH
6667 handle the tailing bytes more
6668 accurately. */
d5db4077 6669 SBYTES (string) + 1,
0a28aafb
KH
6670 !NILP (highest),
6671 STRING_MULTIBYTE (string));
4ed46869
KH
6672}
6673
d12168d6 6674/* Subroutine for Ffind_coding_systems_region_internal.
05e6f5dc
KH
6675
6676 Return a list of coding systems that safely encode the multibyte
b666620c 6677 text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of
05e6f5dc
KH
6678 possible coding systems. If it is nil, it means that we have not
6679 yet found any coding systems.
6680
12d5b185
KH
6681 WORK_TABLE a char-table of which element is set to t once the
6682 element is looked up.
05e6f5dc
KH
6683
6684 If a non-ASCII single byte char is found, set
6685 *single_byte_char_found to 1. */
6686
6687static Lisp_Object
6688find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6689 unsigned char *p, *pend;
6690 Lisp_Object safe_codings, work_table;
6691 int *single_byte_char_found;
6b89e3aa 6692{
f1ce3dcf 6693 int c, len;
6b89e3aa
KH
6694 Lisp_Object val, ch;
6695 Lisp_Object prev, tail;
177c0ea7 6696
12d5b185
KH
6697 if (NILP (safe_codings))
6698 goto done_safe_codings;
6b89e3aa
KH
6699 while (p < pend)
6700 {
6701 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6702 p += len;
6703 if (ASCII_BYTE_P (c))
6704 /* We can ignore ASCII characters here. */
6705 continue;
6706 if (SINGLE_BYTE_CHAR_P (c))
6707 *single_byte_char_found = 1;
6b89e3aa
KH
6708 /* Check the safe coding systems for C. */
6709 ch = make_number (c);
6710 val = Faref (work_table, ch);
6711 if (EQ (val, Qt))
6712 /* This element was already checked. Ignore it. */
6713 continue;
6714 /* Remember that we checked this element. */
6715 Faset (work_table, ch, Qt);
6716
6717 for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6718 {
b666620c
KH
6719 Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6720 int encodable;
6721
6722 elt = XCAR (tail);
6723 if (CONSP (XCDR (elt)))
6724 {
6725 /* This entry has this format now:
6726 ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6727 ACCEPT-LATIN-EXTRA ) */
6728 val = XCDR (elt);
6729 encodable = ! NILP (Faref (XCAR (val), ch));
6730 if (! encodable)
6731 {
6732 val = XCDR (val);
6733 translation_table = XCAR (val);
6734 hash_table = XCAR (XCDR (val));
6735 accept_latin_extra = XCAR (XCDR (XCDR (val)));
6736 }
6737 }
6738 else
6739 {
6740 /* This entry has this format now: ( CODING . SAFE-CHARS) */
6741 encodable = ! NILP (Faref (XCDR (elt), ch));
6742 if (! encodable)
6743 {
6744 /* Transform the format to:
6745 ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6746 ACCEPT-LATIN-EXTRA ) */
6747 val = Fget (XCAR (elt), Qcoding_system);
6748 translation_table
6749 = Fplist_get (AREF (val, 3),
6750 Qtranslation_table_for_encode);
6751 if (SYMBOLP (translation_table))
6752 translation_table = Fget (translation_table,
6753 Qtranslation_table);
6754 hash_table
6755 = (CHAR_TABLE_P (translation_table)
6756 ? XCHAR_TABLE (translation_table)->extras[1]
6757 : Qnil);
6758 accept_latin_extra
6759 = ((EQ (AREF (val, 0), make_number (2))
6760 && VECTORP (AREF (val, 4)))
58f99379 6761 ? AREF (AREF (val, 4), 16)
b666620c
KH
6762 : Qnil);
6763 XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6764 translation_table, hash_table,
6765 accept_latin_extra));
6766 }
6767 }
43e4a82f 6768
b666620c
KH
6769 if (! encodable
6770 && ((CHAR_TABLE_P (translation_table)
6771 && ! NILP (Faref (translation_table, ch)))
6772 || (HASH_TABLE_P (hash_table)
6773 && ! NILP (Fgethash (ch, hash_table, Qnil)))
6774 || (SINGLE_BYTE_CHAR_P (c)
6775 && ! NILP (accept_latin_extra)
6776 && VECTORP (Vlatin_extra_code_table)
6777 && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6778 encodable = 1;
6779 if (encodable)
6780 prev = tail;
6781 else
6b89e3aa 6782 {
7c695ab9 6783 /* Exclude this coding system from SAFE_CODINGS. */
6b89e3aa 6784 if (EQ (tail, safe_codings))
12d5b185
KH
6785 {
6786 safe_codings = XCDR (safe_codings);
6787 if (NILP (safe_codings))
6788 goto done_safe_codings;
6789 }
6b89e3aa
KH
6790 else
6791 XSETCDR (prev, XCDR (tail));
6792 }
6b89e3aa
KH
6793 }
6794 }
12d5b185
KH
6795
6796 done_safe_codings:
6797 /* If the above loop was terminated before P reaches PEND, it means
6798 SAFE_CODINGS was set to nil. If we have not yet found an
6799 non-ASCII single-byte char, check it now. */
6800 if (! *single_byte_char_found)
6801 while (p < pend)
6802 {
6803 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6804 p += len;
6805 if (! ASCII_BYTE_P (c)
6806 && SINGLE_BYTE_CHAR_P (c))
6807 {
6808 *single_byte_char_found = 1;
6809 break;
6810 }
6811 }
6b89e3aa
KH
6812 return safe_codings;
6813}
6814
067a6a66
KH
6815DEFUN ("find-coding-systems-region-internal",
6816 Ffind_coding_systems_region_internal,
6817 Sfind_coding_systems_region_internal, 2, 2, 0,
6b89e3aa
KH
6818 doc: /* Internal use only. */)
6819 (start, end)
6820 Lisp_Object start, end;
6821{
6822 Lisp_Object work_table, safe_codings;
6823 int non_ascii_p = 0;
6824 int single_byte_char_found = 0;
6825 const unsigned char *p1, *p1end, *p2, *p2end, *p;
6826
6827 if (STRINGP (start))
6828 {
6829 if (!STRING_MULTIBYTE (start))
6830 return Qt;
6831 p1 = SDATA (start), p1end = p1 + SBYTES (start);
6832 p2 = p2end = p1end;
6833 if (SCHARS (start) != SBYTES (start))
6834 non_ascii_p = 1;
6835 }
6836 else
6837 {
6838 int from, to, stop;
6839
6840 CHECK_NUMBER_COERCE_MARKER (start);
6841 CHECK_NUMBER_COERCE_MARKER (end);
6842 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6843 args_out_of_range (start, end);
6844 if (NILP (current_buffer->enable_multibyte_characters))
6845 return Qt;
6846 from = CHAR_TO_BYTE (XINT (start));
6847 to = CHAR_TO_BYTE (XINT (end));
6848 stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6849 p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6850 if (stop == to)
6851 p2 = p2end = p1end;
6852 else
6853 p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6854 if (XINT (end) - XINT (start) != to - from)
6855 non_ascii_p = 1;
6856 }
6857
6858 if (!non_ascii_p)
6859 {
6860 /* We are sure that the text contains no multibyte character.
6861 Check if it contains eight-bit-graphic. */
6862 p = p1;
6863 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6864 if (p == p1end)
6865 {
6866 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6867 if (p == p2end)
6868 return Qt;
6869 }
6870 }
6871
6872 /* The text contains non-ASCII characters. */
6873
6874 work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6875 safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6876
067a6a66
KH
6877 safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6878 &single_byte_char_found);
6b89e3aa 6879 if (p2 < p2end)
067a6a66
KH
6880 safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6881 &single_byte_char_found);
6b89e3aa
KH
6882 if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6883 safe_codings = Qt;
6884 else
6885 {
6886 /* Turn safe_codings to a list of coding systems... */
6887 Lisp_Object val;
6888
6889 if (single_byte_char_found)
6890 /* ... and append these for eight-bit chars. */
6891 val = Fcons (Qraw_text,
6892 Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
6893 else
6894 /* ... and append generic coding systems. */
6895 val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
177c0ea7 6896
6b89e3aa
KH
6897 for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
6898 val = Fcons (XCAR (XCAR (safe_codings)), val);
6899 safe_codings = val;
6900 }
6901
6902 return safe_codings;
6903}
6904
6905
068a9dbd
KH
6906/* Search from position POS for such characters that are unencodable
6907 accoding to SAFE_CHARS, and return a list of their positions. P
6908 points where in the memory the character at POS exists. Limit the
6909 search at PEND or when Nth unencodable characters are found.
6910
6911 If SAFE_CHARS is a char table, an element for an unencodable
6912 character is nil.
6913
6914 If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6915
6916 Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6917 eight-bit-graphic characters are unencodable. */
6918
6919static Lisp_Object
6920unencodable_char_position (safe_chars, pos, p, pend, n)
6921 Lisp_Object safe_chars;
6922 int pos;
6923 unsigned char *p, *pend;
6924 int n;
6925{
6926 Lisp_Object pos_list;
6927
6928 pos_list = Qnil;
6929 while (p < pend)
6930 {
6931 int len;
6932 int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
7d0393cf 6933
068a9dbd
KH
6934 if (c >= 128
6935 && (CHAR_TABLE_P (safe_chars)
6936 ? NILP (CHAR_TABLE_REF (safe_chars, c))
6937 : (NILP (safe_chars) || c < 256)))
6938 {
6939 pos_list = Fcons (make_number (pos), pos_list);
6940 if (--n <= 0)
6941 break;
6942 }
6943 pos++;
6944 p += len;
6945 }
6946 return Fnreverse (pos_list);
6947}
6948
6949
6950DEFUN ("unencodable-char-position", Funencodable_char_position,
6951 Sunencodable_char_position, 3, 5, 0,
6952 doc: /*
6953Return position of first un-encodable character in a region.
6954START and END specfiy the region and CODING-SYSTEM specifies the
6955encoding to check. Return nil if CODING-SYSTEM does encode the region.
6956
6957If optional 4th argument COUNT is non-nil, it specifies at most how
6958many un-encodable characters to search. In this case, the value is a
6959list of positions.
6960
6961If optional 5th argument STRING is non-nil, it is a string to search
6962for un-encodable characters. In that case, START and END are indexes
6963to the string. */)
6964 (start, end, coding_system, count, string)
6965 Lisp_Object start, end, coding_system, count, string;
6966{
6967 int n;
6968 Lisp_Object safe_chars;
6969 struct coding_system coding;
6970 Lisp_Object positions;
6971 int from, to;
6972 unsigned char *p, *pend;
6973
6974 if (NILP (string))
6975 {
6976 validate_region (&start, &end);
6977 from = XINT (start);
6978 to = XINT (end);
6979 if (NILP (current_buffer->enable_multibyte_characters))
6980 return Qnil;
6981 p = CHAR_POS_ADDR (from);
200c93e2
KH
6982 if (to == GPT)
6983 pend = GPT_ADDR;
6984 else
6985 pend = CHAR_POS_ADDR (to);
068a9dbd
KH
6986 }
6987 else
6988 {
6989 CHECK_STRING (string);
6990 CHECK_NATNUM (start);
6991 CHECK_NATNUM (end);
6992 from = XINT (start);
6993 to = XINT (end);
6994 if (from > to
6995 || to > SCHARS (string))
6996 args_out_of_range_3 (string, start, end);
6997 if (! STRING_MULTIBYTE (string))
6998 return Qnil;
6999 p = SDATA (string) + string_char_to_byte (string, from);
7000 pend = SDATA (string) + string_char_to_byte (string, to);
7001 }
7002
7003 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7004
7005 if (NILP (count))
7006 n = 1;
7007 else
7008 {
7009 CHECK_NATNUM (count);
7010 n = XINT (count);
7011 }
7012
7013 if (coding.type == coding_type_no_conversion
7014 || coding.type == coding_type_raw_text)
7015 return Qnil;
7016
7017 if (coding.type == coding_type_undecided)
7018 safe_chars = Qnil;
7019 else
6b89e3aa 7020 safe_chars = coding_safe_chars (coding_system);
068a9dbd
KH
7021
7022 if (STRINGP (string)
7023 || from >= GPT || to <= GPT)
7024 positions = unencodable_char_position (safe_chars, from, p, pend, n);
7025 else
7026 {
7027 Lisp_Object args[2];
7028
7029 args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
96d2e64d 7030 n -= XINT (Flength (args[0]));
068a9dbd
KH
7031 if (n <= 0)
7032 positions = args[0];
7033 else
7034 {
7035 args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
7036 pend, n);
7037 positions = Fappend (2, args);
7038 }
7039 }
7040
7041 return (NILP (count) ? Fcar (positions) : positions);
7042}
7043
7044
4031e2bf
KH
7045Lisp_Object
7046code_convert_region1 (start, end, coding_system, encodep)
d46c5b12 7047 Lisp_Object start, end, coding_system;
4031e2bf 7048 int encodep;
3a73fa5d
RS
7049{
7050 struct coding_system coding;
da55a2b7 7051 int from, to;
3a73fa5d 7052
b7826503
PJ
7053 CHECK_NUMBER_COERCE_MARKER (start);
7054 CHECK_NUMBER_COERCE_MARKER (end);
7055 CHECK_SYMBOL (coding_system);
3a73fa5d 7056
d46c5b12
KH
7057 validate_region (&start, &end);
7058 from = XFASTINT (start);
7059 to = XFASTINT (end);
7060
3a73fa5d 7061 if (NILP (coding_system))
d46c5b12
KH
7062 return make_number (to - from);
7063
3a73fa5d 7064 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
d5db4077 7065 error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
3a73fa5d 7066
d46c5b12 7067 coding.mode |= CODING_MODE_LAST_BLOCK;
b73bfc1c
KH
7068 coding.src_multibyte = coding.dst_multibyte
7069 = !NILP (current_buffer->enable_multibyte_characters);
fb88bf2d
KH
7070 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
7071 &coding, encodep, 1);
f072a3e8 7072 Vlast_coding_system_used = coding.symbol;
fb88bf2d 7073 return make_number (coding.produced_char);
4031e2bf
KH
7074}
7075
7076DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7077 3, 3, "r\nzCoding system: ",
48b0f3ae
PJ
7078 doc: /* Decode the current region from the specified coding system.
7079When called from a program, takes three arguments:
7080START, END, and CODING-SYSTEM. START and END are buffer positions.
7081This function sets `last-coding-system-used' to the precise coding system
7082used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7083not fully specified.)
7084It returns the length of the decoded text. */)
7085 (start, end, coding_system)
4031e2bf
KH
7086 Lisp_Object start, end, coding_system;
7087{
7088 return code_convert_region1 (start, end, coding_system, 0);
3a73fa5d
RS
7089}
7090
7091DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7092 3, 3, "r\nzCoding system: ",
48b0f3ae
PJ
7093 doc: /* Encode the current region into the specified coding system.
7094When called from a program, takes three arguments:
7095START, END, and CODING-SYSTEM. START and END are buffer positions.
7096This function sets `last-coding-system-used' to the precise coding system
7097used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7098not fully specified.)
7099It returns the length of the encoded text. */)
7100 (start, end, coding_system)
d46c5b12 7101 Lisp_Object start, end, coding_system;
3a73fa5d 7102{
4031e2bf
KH
7103 return code_convert_region1 (start, end, coding_system, 1);
7104}
3a73fa5d 7105
4031e2bf
KH
7106Lisp_Object
7107code_convert_string1 (string, coding_system, nocopy, encodep)
7108 Lisp_Object string, coding_system, nocopy;
7109 int encodep;
7110{
7111 struct coding_system coding;
3a73fa5d 7112
b7826503
PJ
7113 CHECK_STRING (string);
7114 CHECK_SYMBOL (coding_system);
4ed46869 7115
d46c5b12 7116 if (NILP (coding_system))
4031e2bf 7117 return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4ed46869 7118
d46c5b12 7119 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
d5db4077 7120 error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
5f1cd180 7121
d46c5b12 7122 coding.mode |= CODING_MODE_LAST_BLOCK;
b73bfc1c
KH
7123 string = (encodep
7124 ? encode_coding_string (string, &coding, !NILP (nocopy))
7125 : decode_coding_string (string, &coding, !NILP (nocopy)));
f072a3e8 7126 Vlast_coding_system_used = coding.symbol;
ec6d2bb8
KH
7127
7128 return string;
4ed46869
KH
7129}
7130
4ed46869 7131DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
e0e989f6 7132 2, 3, 0,
48b0f3ae
PJ
7133 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7134Optional arg NOCOPY non-nil means it is OK to return STRING itself
7135if the decoding operation is trivial.
7136This function sets `last-coding-system-used' to the precise coding system
7137used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7138not fully specified.) */)
7139 (string, coding_system, nocopy)
e0e989f6 7140 Lisp_Object string, coding_system, nocopy;
4ed46869 7141{
f072a3e8 7142 return code_convert_string1 (string, coding_system, nocopy, 0);
4ed46869
KH
7143}
7144
7145DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
e0e989f6 7146 2, 3, 0,
48b0f3ae
PJ
7147 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7148Optional arg NOCOPY non-nil means it is OK to return STRING itself
7149if the encoding operation is trivial.
7150This function sets `last-coding-system-used' to the precise coding system
7151used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7152not fully specified.) */)
7153 (string, coding_system, nocopy)
e0e989f6 7154 Lisp_Object string, coding_system, nocopy;
4ed46869 7155{
f072a3e8 7156 return code_convert_string1 (string, coding_system, nocopy, 1);
4ed46869 7157}
4031e2bf 7158
ecec61c1 7159/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
7160 Do not set Vlast_coding_system_used.
7161
7162 This function is called only from macros DECODE_FILE and
7163 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
7164
7165Lisp_Object
7166code_convert_string_norecord (string, coding_system, encodep)
7167 Lisp_Object string, coding_system;
7168 int encodep;
7169{
7170 struct coding_system coding;
7171
b7826503
PJ
7172 CHECK_STRING (string);
7173 CHECK_SYMBOL (coding_system);
ecec61c1
KH
7174
7175 if (NILP (coding_system))
7176 return string;
7177
7178 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
d5db4077 7179 error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
ecec61c1 7180
ec6d2bb8 7181 coding.composing = COMPOSITION_DISABLED;
ecec61c1 7182 coding.mode |= CODING_MODE_LAST_BLOCK;
b73bfc1c
KH
7183 return (encodep
7184 ? encode_coding_string (string, &coding, 1)
7185 : decode_coding_string (string, &coding, 1));
ecec61c1 7186}
3a73fa5d 7187\f
4ed46869 7188DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7189 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7190Return the corresponding character. */)
7191 (code)
4ed46869
KH
7192 Lisp_Object code;
7193{
7194 unsigned char c1, c2, s1, s2;
7195 Lisp_Object val;
7196
b7826503 7197 CHECK_NUMBER (code);
4ed46869 7198 s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
55ab7be3
KH
7199 if (s1 == 0)
7200 {
c28a9453
KH
7201 if (s2 < 0x80)
7202 XSETFASTINT (val, s2);
7203 else if (s2 >= 0xA0 || s2 <= 0xDF)
b73bfc1c 7204 XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
c28a9453 7205 else
9da8350f 7206 error ("Invalid Shift JIS code: %x", XFASTINT (code));
55ab7be3
KH
7207 }
7208 else
7209 {
87323294 7210 if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
55ab7be3 7211 || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
9da8350f 7212 error ("Invalid Shift JIS code: %x", XFASTINT (code));
55ab7be3 7213 DECODE_SJIS (s1, s2, c1, c2);
b73bfc1c 7214 XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
55ab7be3 7215 }
4ed46869
KH
7216 return val;
7217}
7218
7219DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
7220 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7221Return the corresponding code in SJIS. */)
7222 (ch)
4ed46869
KH
7223 Lisp_Object ch;
7224{
bcf26d6a 7225 int charset, c1, c2, s1, s2;
4ed46869
KH
7226 Lisp_Object val;
7227
b7826503 7228 CHECK_NUMBER (ch);
4ed46869 7229 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
c28a9453
KH
7230 if (charset == CHARSET_ASCII)
7231 {
7232 val = ch;
7233 }
7234 else if (charset == charset_jisx0208
7235 && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
4ed46869
KH
7236 {
7237 ENCODE_SJIS (c1, c2, s1, s2);
bcf26d6a 7238 XSETFASTINT (val, (s1 << 8) | s2);
4ed46869 7239 }
55ab7be3
KH
7240 else if (charset == charset_katakana_jisx0201
7241 && c1 > 0x20 && c2 < 0xE0)
7242 {
7243 XSETFASTINT (val, c1 | 0x80);
7244 }
4ed46869 7245 else
55ab7be3 7246 error ("Can't encode to shift_jis: %d", XFASTINT (ch));
4ed46869
KH
7247 return val;
7248}
7249
7250DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7251 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7252Return the corresponding character. */)
7253 (code)
4ed46869
KH
7254 Lisp_Object code;
7255{
7256 int charset;
7257 unsigned char b1, b2, c1, c2;
7258 Lisp_Object val;
7259
b7826503 7260 CHECK_NUMBER (code);
4ed46869 7261 b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
c28a9453
KH
7262 if (b1 == 0)
7263 {
7264 if (b2 >= 0x80)
9da8350f 7265 error ("Invalid BIG5 code: %x", XFASTINT (code));
c28a9453
KH
7266 val = code;
7267 }
7268 else
7269 {
7270 if ((b1 < 0xA1 || b1 > 0xFE)
7271 || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
9da8350f 7272 error ("Invalid BIG5 code: %x", XFASTINT (code));
c28a9453 7273 DECODE_BIG5 (b1, b2, charset, c1, c2);
b73bfc1c 7274 XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
c28a9453 7275 }
4ed46869
KH
7276 return val;
7277}
7278
7279DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7280 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7281Return the corresponding character code in Big5. */)
7282 (ch)
4ed46869
KH
7283 Lisp_Object ch;
7284{
bcf26d6a 7285 int charset, c1, c2, b1, b2;
4ed46869
KH
7286 Lisp_Object val;
7287
b7826503 7288 CHECK_NUMBER (ch);
4ed46869 7289 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
c28a9453
KH
7290 if (charset == CHARSET_ASCII)
7291 {
7292 val = ch;
7293 }
7294 else if ((charset == charset_big5_1
7295 && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
7296 || (charset == charset_big5_2
7297 && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
4ed46869
KH
7298 {
7299 ENCODE_BIG5 (charset, c1, c2, b1, b2);
bcf26d6a 7300 XSETFASTINT (val, (b1 << 8) | b2);
4ed46869
KH
7301 }
7302 else
c28a9453 7303 error ("Can't encode to Big5: %d", XFASTINT (ch));
4ed46869
KH
7304 return val;
7305}
3a73fa5d 7306\f
002fdb44 7307DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
68bba4e4 7308 Sset_terminal_coding_system_internal, 1, 2, 0,
48b0f3ae 7309 doc: /* Internal use only. */)
68bba4e4 7310 (coding_system, display)
4ed46869 7311 Lisp_Object coding_system;
68bba4e4 7312 Lisp_Object display;
4ed46869 7313{
68bba4e4 7314 struct coding_system *terminal_coding = DISPLAY_TERMINAL_CODING (get_display (display, 1));
b7826503 7315 CHECK_SYMBOL (coding_system);
b8299c66 7316 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
70c22245 7317 /* We had better not send unsafe characters to terminal. */
b8299c66 7318 terminal_coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
8ca3766a 7319 /* Character composition should be disabled. */
b8299c66 7320 terminal_coding->composing = COMPOSITION_DISABLED;
bd64290d 7321 /* Error notification should be suppressed. */
b8299c66
KL
7322 terminal_coding->suppress_error = 1;
7323 terminal_coding->src_multibyte = 1;
7324 terminal_coding->dst_multibyte = 0;
4ed46869
KH
7325 return Qnil;
7326}
7327
002fdb44 7328DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
48b0f3ae 7329 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7330 doc: /* Internal use only. */)
48b0f3ae 7331 (coding_system)
c4825358
KH
7332 Lisp_Object coding_system;
7333{
b7826503 7334 CHECK_SYMBOL (coding_system);
c4825358
KH
7335 setup_coding_system (Fcheck_coding_system (coding_system),
7336 &safe_terminal_coding);
8ca3766a 7337 /* Character composition should be disabled. */
ec6d2bb8 7338 safe_terminal_coding.composing = COMPOSITION_DISABLED;
bd64290d 7339 /* Error notification should be suppressed. */
b8299c66 7340 safe_terminal_coding.suppress_error = 1;
b73bfc1c
KH
7341 safe_terminal_coding.src_multibyte = 1;
7342 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7343 return Qnil;
7344}
7345
002fdb44 7346DEFUN ("terminal-coding-system", Fterminal_coding_system,
68bba4e4
KL
7347 Sterminal_coding_system, 0, 1, 0,
7348 doc: /* Return coding system specified for terminal output on the given display.
7349DISPLAY may be a display id, a frame, or nil for the selected frame's display. */)
7350 (display)
7351 Lisp_Object display;
4ed46869 7352{
68bba4e4 7353 return DISPLAY_TERMINAL_CODING (get_display (display, 1))->symbol;
4ed46869
KH
7354}
7355
002fdb44 7356DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
68bba4e4 7357 Sset_keyboard_coding_system_internal, 1, 2, 0,
48b0f3ae 7358 doc: /* Internal use only. */)
68bba4e4 7359 (coding_system, display)
4ed46869 7360 Lisp_Object coding_system;
68bba4e4 7361 Lisp_Object display;
4ed46869 7362{
68bba4e4 7363 struct display *d = get_display (display, 1);
b7826503 7364 CHECK_SYMBOL (coding_system);
68bba4e4 7365
b8299c66 7366 setup_coding_system (Fcheck_coding_system (coding_system),
68bba4e4 7367 DISPLAY_KEYBOARD_CODING (d));
8ca3766a 7368 /* Character composition should be disabled. */
68bba4e4 7369 DISPLAY_KEYBOARD_CODING (d)->composing = COMPOSITION_DISABLED;
4ed46869
KH
7370 return Qnil;
7371}
7372
002fdb44 7373DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
68bba4e4 7374 Skeyboard_coding_system, 0, 1, 0,
48b0f3ae 7375 doc: /* Return coding system specified for decoding keyboard input. */)
68bba4e4
KL
7376 (display)
7377 Lisp_Object display;
4ed46869 7378{
68bba4e4 7379 return DISPLAY_KEYBOARD_CODING (get_display (display, 1))->symbol;
4ed46869
KH
7380}
7381
7382\f
a5d301df
KH
7383DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7384 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7385 doc: /* Choose a coding system for an operation based on the target name.
7386The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7387DECODING-SYSTEM is the coding system to use for decoding
7388\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7389for encoding (in case OPERATION does encoding).
7390
7391The first argument OPERATION specifies an I/O primitive:
7392 For file I/O, `insert-file-contents' or `write-region'.
7393 For process I/O, `call-process', `call-process-region', or `start-process'.
7394 For network I/O, `open-network-stream'.
7395
7396The remaining arguments should be the same arguments that were passed
7397to the primitive. Depending on which primitive, one of those arguments
7398is selected as the TARGET. For example, if OPERATION does file I/O,
7399whichever argument specifies the file name is TARGET.
7400
7401TARGET has a meaning which depends on OPERATION:
7402 For file I/O, TARGET is a file name.
7403 For process I/O, TARGET is a process name.
7404 For network I/O, TARGET is a service name or a port number
7405
7406This function looks up what specified for TARGET in,
7407`file-coding-system-alist', `process-coding-system-alist',
7408or `network-coding-system-alist' depending on OPERATION.
7409They may specify a coding system, a cons of coding systems,
7410or a function symbol to call.
7411In the last case, we call the function with one argument,
7412which is a list of all the arguments given to this function.
7413
7414usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7415 (nargs, args)
4ed46869
KH
7416 int nargs;
7417 Lisp_Object *args;
7418{
7419 Lisp_Object operation, target_idx, target, val;
7420 register Lisp_Object chain;
7421
7422 if (nargs < 2)
7423 error ("Too few arguments");
7424 operation = args[0];
7425 if (!SYMBOLP (operation)
7426 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8ca3766a 7427 error ("Invalid first argument");
4ed46869
KH
7428 if (nargs < 1 + XINT (target_idx))
7429 error ("Too few arguments for operation: %s",
d5db4077 7430 SDATA (SYMBOL_NAME (operation)));
7f787cfd
KH
7431 /* For write-region, if the 6th argument (i.e. VISIT, the 5th
7432 argument to write-region) is string, it must be treated as a
7433 target file name. */
7434 if (EQ (operation, Qwrite_region)
7435 && nargs > 5
7436 && STRINGP (args[5]))
d90ed3b4 7437 target_idx = make_number (4);
4ed46869
KH
7438 target = args[XINT (target_idx) + 1];
7439 if (!(STRINGP (target)
7440 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8ca3766a 7441 error ("Invalid argument %d", XINT (target_idx) + 1);
4ed46869 7442
2e34157c
RS
7443 chain = ((EQ (operation, Qinsert_file_contents)
7444 || EQ (operation, Qwrite_region))
02ba4723 7445 ? Vfile_coding_system_alist
2e34157c 7446 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7447 ? Vnetwork_coding_system_alist
7448 : Vprocess_coding_system_alist));
4ed46869
KH
7449 if (NILP (chain))
7450 return Qnil;
7451
03699b14 7452 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7453 {
f44d27ce 7454 Lisp_Object elt;
03699b14 7455 elt = XCAR (chain);
4ed46869
KH
7456
7457 if (CONSP (elt)
7458 && ((STRINGP (target)
03699b14
KR
7459 && STRINGP (XCAR (elt))
7460 && fast_string_match (XCAR (elt), target) >= 0)
7461 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7462 {
03699b14 7463 val = XCDR (elt);
b19fd4c5
KH
7464 /* Here, if VAL is both a valid coding system and a valid
7465 function symbol, we return VAL as a coding system. */
02ba4723
KH
7466 if (CONSP (val))
7467 return val;
7468 if (! SYMBOLP (val))
7469 return Qnil;
7470 if (! NILP (Fcoding_system_p (val)))
7471 return Fcons (val, val);
b19fd4c5
KH
7472 if (! NILP (Ffboundp (val)))
7473 {
7474 val = call1 (val, Flist (nargs, args));
7475 if (CONSP (val))
7476 return val;
7477 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7478 return Fcons (val, val);
7479 }
02ba4723
KH
7480 return Qnil;
7481 }
4ed46869
KH
7482 }
7483 return Qnil;
7484}
7485
1397dc18
KH
7486DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal,
7487 Supdate_coding_systems_internal, 0, 0, 0,
48b0f3ae
PJ
7488 doc: /* Update internal database for ISO2022 and CCL based coding systems.
7489When values of any coding categories are changed, you must
7490call this function. */)
7491 ()
d46c5b12
KH
7492{
7493 int i;
7494
fa42c37f 7495 for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
d46c5b12 7496 {
1397dc18
KH
7497 Lisp_Object val;
7498
f5c1dd0d 7499 val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
1397dc18
KH
7500 if (!NILP (val))
7501 {
7502 if (! coding_system_table[i])
7503 coding_system_table[i] = ((struct coding_system *)
7504 xmalloc (sizeof (struct coding_system)));
7505 setup_coding_system (val, coding_system_table[i]);
7506 }
7507 else if (coding_system_table[i])
7508 {
7509 xfree (coding_system_table[i]);
7510 coding_system_table[i] = NULL;
7511 }
d46c5b12 7512 }
1397dc18 7513
d46c5b12
KH
7514 return Qnil;
7515}
7516
66cfb530
KH
7517DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
7518 Sset_coding_priority_internal, 0, 0, 0,
48b0f3ae
PJ
7519 doc: /* Update internal database for the current value of `coding-category-list'.
7520This function is internal use only. */)
7521 ()
66cfb530
KH
7522{
7523 int i = 0, idx;
84d60297
RS
7524 Lisp_Object val;
7525
7526 val = Vcoding_category_list;
66cfb530
KH
7527
7528 while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
7529 {
03699b14 7530 if (! SYMBOLP (XCAR (val)))
66cfb530 7531 break;
03699b14 7532 idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
66cfb530
KH
7533 if (idx >= CODING_CATEGORY_IDX_MAX)
7534 break;
7535 coding_priorities[i++] = (1 << idx);
03699b14 7536 val = XCDR (val);
66cfb530
KH
7537 }
7538 /* If coding-category-list is valid and contains all coding
7539 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
fa42c37f 7540 the following code saves Emacs from crashing. */
66cfb530
KH
7541 while (i < CODING_CATEGORY_IDX_MAX)
7542 coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
7543
7544 return Qnil;
7545}
7546
6b89e3aa
KH
7547DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7548 Sdefine_coding_system_internal, 1, 1, 0,
7549 doc: /* Register CODING-SYSTEM as a base coding system.
7550This function is internal use only. */)
7551 (coding_system)
7552 Lisp_Object coding_system;
7553{
7554 Lisp_Object safe_chars, slot;
7555
7556 if (NILP (Fcheck_coding_system (coding_system)))
7557 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7558 safe_chars = coding_safe_chars (coding_system);
7559 if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
7560 error ("No valid safe-chars property for %s",
7561 SDATA (SYMBOL_NAME (coding_system)));
7562 if (EQ (safe_chars, Qt))
7563 {
7564 if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
7565 XSETCAR (Vcoding_system_safe_chars,
7566 Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
7567 }
7568 else
7569 {
7570 slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
7571 if (NILP (slot))
7572 XSETCDR (Vcoding_system_safe_chars,
7573 nconc2 (XCDR (Vcoding_system_safe_chars),
7574 Fcons (Fcons (coding_system, safe_chars), Qnil)));
7575 else
7576 XSETCDR (slot, safe_chars);
7577 }
7578 return Qnil;
7579}
7580
4ed46869
KH
7581#endif /* emacs */
7582
7583\f
1397dc18 7584/*** 9. Post-amble ***/
4ed46869 7585
dfcf069d 7586void
4ed46869
KH
7587init_coding_once ()
7588{
7589 int i;
7590
93dec019 7591 /* Emacs' internal format specific initialize routine. */
4ed46869
KH
7592 for (i = 0; i <= 0x20; i++)
7593 emacs_code_class[i] = EMACS_control_code;
7594 emacs_code_class[0x0A] = EMACS_linefeed_code;
7595 emacs_code_class[0x0D] = EMACS_carriage_return_code;
7596 for (i = 0x21 ; i < 0x7F; i++)
7597 emacs_code_class[i] = EMACS_ascii_code;
7598 emacs_code_class[0x7F] = EMACS_control_code;
ec6d2bb8 7599 for (i = 0x80; i < 0xFF; i++)
4ed46869
KH
7600 emacs_code_class[i] = EMACS_invalid_code;
7601 emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
7602 emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
7603 emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
7604 emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
7605
7606 /* ISO2022 specific initialize routine. */
7607 for (i = 0; i < 0x20; i++)
b73bfc1c 7608 iso_code_class[i] = ISO_control_0;
4ed46869
KH
7609 for (i = 0x21; i < 0x7F; i++)
7610 iso_code_class[i] = ISO_graphic_plane_0;
7611 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 7612 iso_code_class[i] = ISO_control_1;
4ed46869
KH
7613 for (i = 0xA1; i < 0xFF; i++)
7614 iso_code_class[i] = ISO_graphic_plane_1;
7615 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7616 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7617 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7618 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7619 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7620 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7621 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7622 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7623 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7624 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7625
c4825358 7626 setup_coding_system (Qnil, &safe_terminal_coding);
6bc51348 7627 setup_coding_system (Qnil, &default_buffer_file_coding);
9ce27fde 7628
d46c5b12
KH
7629 bzero (coding_system_table, sizeof coding_system_table);
7630
66cfb530
KH
7631 bzero (ascii_skip_code, sizeof ascii_skip_code);
7632 for (i = 0; i < 128; i++)
7633 ascii_skip_code[i] = 1;
7634
9ce27fde
KH
7635#if defined (MSDOS) || defined (WINDOWSNT)
7636 system_eol_type = CODING_EOL_CRLF;
7637#else
7638 system_eol_type = CODING_EOL_LF;
7639#endif
b843d1ae
KH
7640
7641 inhibit_pre_post_conversion = 0;
e0e989f6
KH
7642}
7643
7644#ifdef emacs
7645
dfcf069d 7646void
e0e989f6
KH
7647syms_of_coding ()
7648{
2a47931b
KH
7649 staticpro (&Vcode_conversion_workbuf_name);
7650 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
7651
e0e989f6
KH
7652 Qtarget_idx = intern ("target-idx");
7653 staticpro (&Qtarget_idx);
7654
bb0115a2
RS
7655 Qcoding_system_history = intern ("coding-system-history");
7656 staticpro (&Qcoding_system_history);
7657 Fset (Qcoding_system_history, Qnil);
7658
9ce27fde 7659 /* Target FILENAME is the first argument. */
e0e989f6 7660 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 7661 /* Target FILENAME is the third argument. */
e0e989f6
KH
7662 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7663
7664 Qcall_process = intern ("call-process");
7665 staticpro (&Qcall_process);
9ce27fde 7666 /* Target PROGRAM is the first argument. */
e0e989f6
KH
7667 Fput (Qcall_process, Qtarget_idx, make_number (0));
7668
7669 Qcall_process_region = intern ("call-process-region");
7670 staticpro (&Qcall_process_region);
9ce27fde 7671 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7672 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7673
7674 Qstart_process = intern ("start-process");
7675 staticpro (&Qstart_process);
9ce27fde 7676 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7677 Fput (Qstart_process, Qtarget_idx, make_number (2));
7678
7679 Qopen_network_stream = intern ("open-network-stream");
7680 staticpro (&Qopen_network_stream);
9ce27fde 7681 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
7682 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7683
4ed46869
KH
7684 Qcoding_system = intern ("coding-system");
7685 staticpro (&Qcoding_system);
7686
7687 Qeol_type = intern ("eol-type");
7688 staticpro (&Qeol_type);
7689
7690 Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7691 staticpro (&Qbuffer_file_coding_system);
7692
7693 Qpost_read_conversion = intern ("post-read-conversion");
7694 staticpro (&Qpost_read_conversion);
7695
7696 Qpre_write_conversion = intern ("pre-write-conversion");
7697 staticpro (&Qpre_write_conversion);
7698
27901516
KH
7699 Qno_conversion = intern ("no-conversion");
7700 staticpro (&Qno_conversion);
7701
7702 Qundecided = intern ("undecided");
7703 staticpro (&Qundecided);
7704
4ed46869
KH
7705 Qcoding_system_p = intern ("coding-system-p");
7706 staticpro (&Qcoding_system_p);
7707
7708 Qcoding_system_error = intern ("coding-system-error");
7709 staticpro (&Qcoding_system_error);
7710
7711 Fput (Qcoding_system_error, Qerror_conditions,
7712 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7713 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 7714 build_string ("Invalid coding system"));
4ed46869 7715
d46c5b12
KH
7716 Qcoding_category = intern ("coding-category");
7717 staticpro (&Qcoding_category);
4ed46869
KH
7718 Qcoding_category_index = intern ("coding-category-index");
7719 staticpro (&Qcoding_category_index);
7720
d46c5b12
KH
7721 Vcoding_category_table
7722 = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7723 staticpro (&Vcoding_category_table);
4ed46869
KH
7724 {
7725 int i;
7726 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7727 {
d46c5b12
KH
7728 XVECTOR (Vcoding_category_table)->contents[i]
7729 = intern (coding_category_name[i]);
7730 Fput (XVECTOR (Vcoding_category_table)->contents[i],
7731 Qcoding_category_index, make_number (i));
4ed46869
KH
7732 }
7733 }
7734
6b89e3aa
KH
7735 Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7736 staticpro (&Vcoding_system_safe_chars);
7737
f967223b
KH
7738 Qtranslation_table = intern ("translation-table");
7739 staticpro (&Qtranslation_table);
b666620c 7740 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
bdd9fb48 7741
f967223b
KH
7742 Qtranslation_table_id = intern ("translation-table-id");
7743 staticpro (&Qtranslation_table_id);
84fbb8a0 7744
f967223b
KH
7745 Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7746 staticpro (&Qtranslation_table_for_decode);
a5d301df 7747
f967223b
KH
7748 Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7749 staticpro (&Qtranslation_table_for_encode);
a5d301df 7750
05e6f5dc
KH
7751 Qsafe_chars = intern ("safe-chars");
7752 staticpro (&Qsafe_chars);
7753
7754 Qchar_coding_system = intern ("char-coding-system");
7755 staticpro (&Qchar_coding_system);
7756
7757 /* Intern this now in case it isn't already done.
7758 Setting this variable twice is harmless.
7759 But don't staticpro it here--that is done in alloc.c. */
7760 Qchar_table_extra_slots = intern ("char-table-extra-slots");
7761 Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
067a6a66 7762 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
70c22245 7763
1397dc18
KH
7764 Qvalid_codes = intern ("valid-codes");
7765 staticpro (&Qvalid_codes);
7766
9ce27fde
KH
7767 Qemacs_mule = intern ("emacs-mule");
7768 staticpro (&Qemacs_mule);
7769
d46c5b12
KH
7770 Qraw_text = intern ("raw-text");
7771 staticpro (&Qraw_text);
7772
ecf488bc
DL
7773 Qutf_8 = intern ("utf-8");
7774 staticpro (&Qutf_8);
7775
a362520d
KH
7776 Qcoding_system_define_form = intern ("coding-system-define-form");
7777 staticpro (&Qcoding_system_define_form);
7778
4ed46869
KH
7779 defsubr (&Scoding_system_p);
7780 defsubr (&Sread_coding_system);
7781 defsubr (&Sread_non_nil_coding_system);
7782 defsubr (&Scheck_coding_system);
7783 defsubr (&Sdetect_coding_region);
d46c5b12 7784 defsubr (&Sdetect_coding_string);
05e6f5dc 7785 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 7786 defsubr (&Sunencodable_char_position);
4ed46869
KH
7787 defsubr (&Sdecode_coding_region);
7788 defsubr (&Sencode_coding_region);
7789 defsubr (&Sdecode_coding_string);
7790 defsubr (&Sencode_coding_string);
7791 defsubr (&Sdecode_sjis_char);
7792 defsubr (&Sencode_sjis_char);
7793 defsubr (&Sdecode_big5_char);
7794 defsubr (&Sencode_big5_char);
1ba9e4ab 7795 defsubr (&Sset_terminal_coding_system_internal);
c4825358 7796 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 7797 defsubr (&Sterminal_coding_system);
1ba9e4ab 7798 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 7799 defsubr (&Skeyboard_coding_system);
a5d301df 7800 defsubr (&Sfind_operation_coding_system);
1397dc18 7801 defsubr (&Supdate_coding_systems_internal);
66cfb530 7802 defsubr (&Sset_coding_priority_internal);
6b89e3aa 7803 defsubr (&Sdefine_coding_system_internal);
4ed46869 7804
4608c386 7805 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
7806 doc: /* List of coding systems.
7807
7808Do not alter the value of this variable manually. This variable should be
7809updated by the functions `make-coding-system' and
7810`define-coding-system-alias'. */);
4608c386
KH
7811 Vcoding_system_list = Qnil;
7812
7813 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
7814 doc: /* Alist of coding system names.
7815Each element is one element list of coding system name.
7816This variable is given to `completing-read' as TABLE argument.
7817
7818Do not alter the value of this variable manually. This variable should be
7819updated by the functions `make-coding-system' and
7820`define-coding-system-alias'. */);
4608c386
KH
7821 Vcoding_system_alist = Qnil;
7822
4ed46869 7823 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
7824 doc: /* List of coding-categories (symbols) ordered by priority.
7825
7826On detecting a coding system, Emacs tries code detection algorithms
7827associated with each coding-category one by one in this order. When
7828one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
7829system bound to the corresponding coding-category is selected.
7830
42205607 7831Don't modify this variable directly, but use `set-coding-priority'. */);
4ed46869
KH
7832 {
7833 int i;
7834
7835 Vcoding_category_list = Qnil;
7836 for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7837 Vcoding_category_list
d46c5b12
KH
7838 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7839 Vcoding_category_list);
4ed46869
KH
7840 }
7841
7842 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
7843 doc: /* Specify the coding system for read operations.
7844It is useful to bind this variable with `let', but do not set it globally.
7845If the value is a coding system, it is used for decoding on read operation.
7846If not, an appropriate element is used from one of the coding system alists:
7847There are three such tables, `file-coding-system-alist',
7848`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
7849 Vcoding_system_for_read = Qnil;
7850
7851 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
7852 doc: /* Specify the coding system for write operations.
7853Programs bind this variable with `let', but you should not set it globally.
7854If the value is a coding system, it is used for encoding of output,
7855when writing it to a file and when sending it to a file or subprocess.
7856
7857If this does not specify a coding system, an appropriate element
7858is used from one of the coding system alists:
7859There are three such tables, `file-coding-system-alist',
7860`process-coding-system-alist', and `network-coding-system-alist'.
7861For output to files, if the above procedure does not specify a coding system,
7862the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
7863 Vcoding_system_for_write = Qnil;
7864
7865 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7c695ab9
DL
7866 doc: /* Coding system used in the latest file or process I/O.
7867Also set by `encode-coding-region', `decode-coding-region',
7868`encode-coding-string' and `decode-coding-string'. */);
4ed46869
KH
7869 Vlast_coding_system_used = Qnil;
7870
9ce27fde 7871 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
48b0f3ae
PJ
7872 doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7873See info node `Coding Systems' and info node `Text and Binary' concerning
7874such conversion. */);
9ce27fde
KH
7875 inhibit_eol_conversion = 0;
7876
ed29121d 7877 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
48b0f3ae
PJ
7878 doc: /* Non-nil means process buffer inherits coding system of process output.
7879Bind it to t if the process output is to be treated as if it were a file
7880read from some filesystem. */);
ed29121d
EZ
7881 inherit_process_coding_system = 0;
7882
02ba4723 7883 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
48b0f3ae
PJ
7884 doc: /* Alist to decide a coding system to use for a file I/O operation.
7885The format is ((PATTERN . VAL) ...),
7886where PATTERN is a regular expression matching a file name,
7887VAL is a coding system, a cons of coding systems, or a function symbol.
7888If VAL is a coding system, it is used for both decoding and encoding
7889the file contents.
7890If VAL is a cons of coding systems, the car part is used for decoding,
7891and the cdr part is used for encoding.
7892If VAL is a function symbol, the function must return a coding system
0192762c 7893or a cons of coding systems which are used as above. The function gets
ff955d90 7894the arguments with which `find-operation-coding-system' was called.
48b0f3ae
PJ
7895
7896See also the function `find-operation-coding-system'
7897and the variable `auto-coding-alist'. */);
02ba4723
KH
7898 Vfile_coding_system_alist = Qnil;
7899
7900 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
48b0f3ae
PJ
7901 doc: /* Alist to decide a coding system to use for a process I/O operation.
7902The format is ((PATTERN . VAL) ...),
7903where PATTERN is a regular expression matching a program name,
7904VAL is a coding system, a cons of coding systems, or a function symbol.
7905If VAL is a coding system, it is used for both decoding what received
7906from the program and encoding what sent to the program.
7907If VAL is a cons of coding systems, the car part is used for decoding,
7908and the cdr part is used for encoding.
7909If VAL is a function symbol, the function must return a coding system
7910or a cons of coding systems which are used as above.
7911
7912See also the function `find-operation-coding-system'. */);
02ba4723
KH
7913 Vprocess_coding_system_alist = Qnil;
7914
7915 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
48b0f3ae
PJ
7916 doc: /* Alist to decide a coding system to use for a network I/O operation.
7917The format is ((PATTERN . VAL) ...),
7918where PATTERN is a regular expression matching a network service name
7919or is a port number to connect to,
7920VAL is a coding system, a cons of coding systems, or a function symbol.
7921If VAL is a coding system, it is used for both decoding what received
7922from the network stream and encoding what sent to the network stream.
7923If VAL is a cons of coding systems, the car part is used for decoding,
7924and the cdr part is used for encoding.
7925If VAL is a function symbol, the function must return a coding system
7926or a cons of coding systems which are used as above.
7927
7928See also the function `find-operation-coding-system'. */);
02ba4723 7929 Vnetwork_coding_system_alist = Qnil;
4ed46869 7930
68c45bf0 7931 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
7932 doc: /* Coding system to use with system messages.
7933Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
7934 Vlocale_coding_system = Qnil;
7935
005f0d35 7936 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 7937 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
48b0f3ae 7938 doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 7939 eol_mnemonic_unix = build_string (":");
4ed46869 7940
7722baf9 7941 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
48b0f3ae 7942 doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 7943 eol_mnemonic_dos = build_string ("\\");
4ed46869 7944
7722baf9 7945 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
48b0f3ae 7946 doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 7947 eol_mnemonic_mac = build_string ("/");
4ed46869 7948
7722baf9 7949 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
48b0f3ae 7950 doc: /* *String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 7951 eol_mnemonic_undecided = build_string (":");
4ed46869 7952
84fbb8a0 7953 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
48b0f3ae 7954 doc: /* *Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 7955 Venable_character_translation = Qt;
bdd9fb48 7956
f967223b 7957 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
7958 &Vstandard_translation_table_for_decode,
7959 doc: /* Table for translating characters while decoding. */);
f967223b 7960 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 7961
f967223b 7962 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
7963 &Vstandard_translation_table_for_encode,
7964 doc: /* Table for translating characters while encoding. */);
f967223b 7965 Vstandard_translation_table_for_encode = Qnil;
4ed46869
KH
7966
7967 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
48b0f3ae
PJ
7968 doc: /* Alist of charsets vs revision numbers.
7969While encoding, if a charset (car part of an element) is found,
7970designate it with the escape sequence identifying revision (cdr part of the element). */);
4ed46869 7971 Vcharset_revision_alist = Qnil;
02ba4723
KH
7972
7973 DEFVAR_LISP ("default-process-coding-system",
7974 &Vdefault_process_coding_system,
48b0f3ae
PJ
7975 doc: /* Cons of coding systems used for process I/O by default.
7976The car part is used for decoding a process output,
7977the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 7978 Vdefault_process_coding_system = Qnil;
c4825358 7979
3f003981 7980 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
48b0f3ae
PJ
7981 doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
7982This is a vector of length 256.
7983If Nth element is non-nil, the existence of code N in a file
7984\(or output of subprocess) doesn't prevent it to be detected as
7985a coding system of ISO 2022 variant which has a flag
7986`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
7987or reading output of a subprocess.
7988Only 128th through 159th elements has a meaning. */);
3f003981 7989 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
7990
7991 DEFVAR_LISP ("select-safe-coding-system-function",
7992 &Vselect_safe_coding_system_function,
48b0f3ae
PJ
7993 doc: /* Function to call to select safe coding system for encoding a text.
7994
7995If set, this function is called to force a user to select a proper
7996coding system which can encode the text in the case that a default
7997coding system used in each operation can't encode the text.
7998
7999The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8000 Vselect_safe_coding_system_function = Qnil;
8001
5d5bf4d8
KH
8002 DEFVAR_BOOL ("coding-system-require-warning",
8003 &coding_system_require_warning,
8004 doc: /* Internal use only.
6b89e3aa
KH
8005If non-nil, on writing a file, `select-safe-coding-system-function' is
8006called even if `coding-system-for-write' is non-nil. The command
8007`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
8008 coding_system_require_warning = 0;
8009
8010
22ab2303 8011 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8012 &inhibit_iso_escape_detection,
48b0f3ae
PJ
8013 doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8014
8015By default, on reading a file, Emacs tries to detect how the text is
8016encoded. This code detection is sensitive to escape sequences. If
8017the sequence is valid as ISO2022, the code is determined as one of
8018the ISO2022 encodings, and the file is decoded by the corresponding
8019coding system (e.g. `iso-2022-7bit').
8020
8021However, there may be a case that you want to read escape sequences in
8022a file as is. In such a case, you can set this variable to non-nil.
8023Then, as the code detection ignores any escape sequences, no file is
8024detected as encoded in some ISO2022 encoding. The result is that all
8025escape sequences become visible in a buffer.
8026
8027The default value is nil, and it is strongly recommended not to change
8028it. That is because many Emacs Lisp source files that contain
8029non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8030in Emacs's distribution, and they won't be decoded correctly on
8031reading if you suppress escape sequence detection.
8032
8033The other way to read escape sequences in a file without decoding is
8034to explicitly specify some coding system that doesn't use ISO2022's
8035escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8036 inhibit_iso_escape_detection = 0;
002fdb44
DL
8037
8038 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1
DL
8039 doc: /* Char table for translating self-inserting characters.
8040This is applied to the result of input methods, not their input. See also
8041`keyboard-translate-table'. */);
002fdb44 8042 Vtranslation_table_for_input = Qnil;
4ed46869
KH
8043}
8044
68c45bf0
PE
8045char *
8046emacs_strerror (error_number)
8047 int error_number;
8048{
8049 char *str;
8050
ca9c0567 8051 synchronize_system_messages_locale ();
68c45bf0
PE
8052 str = strerror (error_number);
8053
8054 if (! NILP (Vlocale_coding_system))
8055 {
8056 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8057 Vlocale_coding_system,
8058 0);
d5db4077 8059 str = (char *) SDATA (dec);
68c45bf0
PE
8060 }
8061
8062 return str;
8063}
8064
4ed46869 8065#endif /* emacs */
c2f94ebc 8066
ab5796a9
MB
8067/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
8068 (do not change this comment) */