(encode_eol): Add null statement after label.
[bpt/emacs.git] / src / coding.c
CommitLineData
4ed46869 1/* Coding system handler (conversion, detection, and etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
4ed46869 4
369314dc
KH
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
4ed46869 11
369314dc
KH
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
4ed46869 16
369314dc
KH
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
4ed46869
KH
21
22/*** TABLE OF CONTENTS ***
23
b73bfc1c 24 0. General comments
4ed46869 25 1. Preamble
0ef69138 26 2. Emacs' internal format (emacs-mule) handlers
4ed46869
KH
27 3. ISO2022 handlers
28 4. Shift-JIS and BIG5 handlers
1397dc18
KH
29 5. CCL handlers
30 6. End-of-line handlers
31 7. C library functions
32 8. Emacs Lisp library functions
33 9. Post-amble
4ed46869
KH
34
35*/
36
b73bfc1c
KH
37/*** 0. General comments ***/
38
39
4ed46869
KH
40/*** GENERAL NOTE on CODING SYSTEM ***
41
42 Coding system is an encoding mechanism of one or more character
43 sets. Here's a list of coding systems which Emacs can handle. When
44 we say "decode", it means converting some other coding system to
0ef69138
KH
45 Emacs' internal format (emacs-internal), and when we say "encode",
46 it means converting the coding system emacs-mule to some other
47 coding system.
4ed46869 48
0ef69138 49 0. Emacs' internal format (emacs-mule)
4ed46869
KH
50
51 Emacs itself holds a multi-lingual character in a buffer and a string
f4dee582 52 in a special format. Details are described in section 2.
4ed46869
KH
53
54 1. ISO2022
55
56 The most famous coding system for multiple character sets. X's
f4dee582
RS
57 Compound Text, various EUCs (Extended Unix Code), and coding
58 systems used in Internet communication such as ISO-2022-JP are
59 all variants of ISO2022. Details are described in section 3.
4ed46869
KH
60
61 2. SJIS (or Shift-JIS or MS-Kanji-Code)
62
63 A coding system to encode character sets: ASCII, JISX0201, and
64 JISX0208. Widely used for PC's in Japan. Details are described in
f4dee582 65 section 4.
4ed46869
KH
66
67 3. BIG5
68
69 A coding system to encode character sets: ASCII and Big5. Widely
70 used by Chinese (mainly in Taiwan and Hong Kong). Details are
f4dee582
RS
71 described in section 4. In this file, when we write "BIG5"
72 (all uppercase), we mean the coding system, and when we write
73 "Big5" (capitalized), we mean the character set.
4ed46869 74
27901516
KH
75 4. Raw text
76
4608c386
KH
77 A coding system for a text containing random 8-bit code. Emacs does
78 no code conversion on such a text except for end-of-line format.
27901516
KH
79
80 5. Other
4ed46869 81
f4dee582 82 If a user wants to read/write a text encoded in a coding system not
4ed46869
KH
83 listed above, he can supply a decoder and an encoder for it in CCL
84 (Code Conversion Language) programs. Emacs executes the CCL program
85 while reading/writing.
86
d46c5b12
KH
87 Emacs represents a coding system by a Lisp symbol that has a property
88 `coding-system'. But, before actually using the coding system, the
4ed46869 89 information about it is set in a structure of type `struct
f4dee582 90 coding_system' for rapid processing. See section 6 for more details.
4ed46869
KH
91
92*/
93
94/*** GENERAL NOTES on END-OF-LINE FORMAT ***
95
96 How end-of-line of a text is encoded depends on a system. For
97 instance, Unix's format is just one byte of `line-feed' code,
f4dee582 98 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
99 `line-feed' codes. MacOS's format is usually one byte of
100 `carriage-return'.
4ed46869 101
f4dee582
RS
102 Since text characters encoding and end-of-line encoding are
103 independent, any coding system described above can take
4ed46869 104 any format of end-of-line. So, Emacs has information of format of
f4dee582 105 end-of-line in each coding-system. See section 6 for more details.
4ed46869
KH
106
107*/
108
109/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
110
111 These functions check if a text between SRC and SRC_END is encoded
112 in the coding system category XXX. Each returns an integer value in
113 which appropriate flag bits for the category XXX is set. The flag
114 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the
115 template of these functions. */
116#if 0
117int
0ef69138 118detect_coding_emacs_mule (src, src_end)
4ed46869
KH
119 unsigned char *src, *src_end;
120{
121 ...
122}
123#endif
124
125/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
126
b73bfc1c
KH
127 These functions decode SRC_BYTES length of unibyte text at SOURCE
128 encoded in CODING to Emacs' internal format. The resulting
129 multibyte text goes to a place pointed to by DESTINATION, the length
130 of which should not exceed DST_BYTES.
d46c5b12 131
b73bfc1c
KH
132 These functions set the information of original and decoded texts in
133 the members produced, produced_char, consumed, and consumed_char of
134 the structure *CODING. They also set the member result to one of
135 CODING_FINISH_XXX indicating how the decoding finished.
d46c5b12
KH
136
137 DST_BYTES zero means that source area and destination area are
138 overlapped, which means that we can produce a decoded text until it
139 reaches at the head of not-yet-decoded source text.
140
141 Below is a template of these functions. */
4ed46869 142#if 0
b73bfc1c 143static void
d46c5b12 144decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
145 struct coding_system *coding;
146 unsigned char *source, *destination;
147 int src_bytes, dst_bytes;
4ed46869
KH
148{
149 ...
150}
151#endif
152
153/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
154
0ef69138 155 These functions encode SRC_BYTES length text at SOURCE of Emacs'
b73bfc1c
KH
156 internal multibyte format to CODING. The resulting unibyte text
157 goes to a place pointed to by DESTINATION, the length of which
158 should not exceed DST_BYTES.
d46c5b12 159
b73bfc1c
KH
160 These functions set the information of original and encoded texts in
161 the members produced, produced_char, consumed, and consumed_char of
162 the structure *CODING. They also set the member result to one of
163 CODING_FINISH_XXX indicating how the encoding finished.
d46c5b12
KH
164
165 DST_BYTES zero means that source area and destination area are
b73bfc1c
KH
166 overlapped, which means that we can produce a encoded text until it
167 reaches at the head of not-yet-encoded source text.
d46c5b12
KH
168
169 Below is a template of these functions. */
4ed46869 170#if 0
b73bfc1c 171static void
d46c5b12 172encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
173 struct coding_system *coding;
174 unsigned char *source, *destination;
175 int src_bytes, dst_bytes;
4ed46869
KH
176{
177 ...
178}
179#endif
180
181/*** COMMONLY USED MACROS ***/
182
b73bfc1c
KH
183/* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
184 get one, two, and three bytes from the source text respectively.
185 If there are not enough bytes in the source, they jump to
186 `label_end_of_loop'. The caller should set variables `coding',
187 `src' and `src_end' to appropriate pointer in advance. These
188 macros are called from decoding routines `decode_coding_XXX', thus
189 it is assumed that the source text is unibyte. */
4ed46869 190
b73bfc1c
KH
191#define ONE_MORE_BYTE(c1) \
192 do { \
193 if (src >= src_end) \
194 { \
195 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
196 goto label_end_of_loop; \
197 } \
198 c1 = *src++; \
4ed46869
KH
199 } while (0)
200
b73bfc1c
KH
201#define TWO_MORE_BYTES(c1, c2) \
202 do { \
203 if (src + 1 >= src_end) \
204 { \
205 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
206 goto label_end_of_loop; \
207 } \
208 c1 = *src++; \
209 c2 = *src++; \
4ed46869
KH
210 } while (0)
211
4ed46869 212
b73bfc1c
KH
213/* Set C to the next character at the source text pointed by `src'.
214 If there are not enough characters in the source, jump to
215 `label_end_of_loop'. The caller should set variables `coding'
216 `src', `src_end', and `translation_table' to appropriate pointers
217 in advance. This macro is used in encoding routines
218 `encode_coding_XXX', thus it assumes that the source text is in
219 multibyte form except for 8-bit characters. 8-bit characters are
220 in multibyte form if coding->src_multibyte is nonzero, else they
221 are represented by a single byte. */
4ed46869 222
b73bfc1c
KH
223#define ONE_MORE_CHAR(c) \
224 do { \
225 int len = src_end - src; \
226 int bytes; \
227 if (len <= 0) \
228 { \
229 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
230 goto label_end_of_loop; \
231 } \
232 if (coding->src_multibyte \
233 || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes)) \
234 c = STRING_CHAR_AND_LENGTH (src, len, bytes); \
235 else \
236 c = *src, bytes = 1; \
237 if (!NILP (translation_table)) \
238 c = translate_char (translation_table, c, 0, 0, 0); \
239 src += bytes; \
4ed46869
KH
240 } while (0)
241
4ed46869 242
b73bfc1c
KH
243/* Produce a multibyte form of characater C to `dst'. Jump to
244 `label_end_of_loop' if there's not enough space at `dst'.
245
246 If we are now in the middle of composition sequence, the decoded
247 character may be ALTCHAR (for the current composition). In that
248 case, the character goes to coding->cmp_data->data instead of
249 `dst'.
250
251 This macro is used in decoding routines. */
252
253#define EMIT_CHAR(c) \
4ed46869 254 do { \
b73bfc1c
KH
255 if (! COMPOSING_P (coding) \
256 || coding->composing == COMPOSITION_RELATIVE \
257 || coding->composing == COMPOSITION_WITH_RULE) \
258 { \
259 int bytes = CHAR_BYTES (c); \
260 if ((dst + bytes) > (dst_bytes ? dst_end : src)) \
261 { \
262 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
263 goto label_end_of_loop; \
264 } \
265 dst += CHAR_STRING (c, dst); \
266 coding->produced_char++; \
267 } \
ec6d2bb8 268 \
b73bfc1c
KH
269 if (COMPOSING_P (coding) \
270 && coding->composing != COMPOSITION_RELATIVE) \
271 { \
272 CODING_ADD_COMPOSITION_COMPONENT (coding, c); \
273 coding->composition_rule_follows \
274 = coding->composing != COMPOSITION_WITH_ALTCHARS; \
275 } \
4ed46869
KH
276 } while (0)
277
4ed46869 278
b73bfc1c
KH
279#define EMIT_ONE_BYTE(c) \
280 do { \
281 if (dst >= (dst_bytes ? dst_end : src)) \
282 { \
283 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
284 goto label_end_of_loop; \
285 } \
286 *dst++ = c; \
287 } while (0)
288
289#define EMIT_TWO_BYTES(c1, c2) \
290 do { \
291 if (dst + 2 > (dst_bytes ? dst_end : src)) \
292 { \
293 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
294 goto label_end_of_loop; \
295 } \
296 *dst++ = c1, *dst++ = c2; \
297 } while (0)
298
299#define EMIT_BYTES(from, to) \
300 do { \
301 if (dst + (to - from) > (dst_bytes ? dst_end : src)) \
302 { \
303 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
304 goto label_end_of_loop; \
305 } \
306 while (from < to) \
307 *dst++ = *from++; \
4ed46869
KH
308 } while (0)
309
310\f
311/*** 1. Preamble ***/
312
68c45bf0
PE
313#ifdef emacs
314#include <config.h>
315#endif
316
4ed46869
KH
317#include <stdio.h>
318
319#ifdef emacs
320
4ed46869
KH
321#include "lisp.h"
322#include "buffer.h"
323#include "charset.h"
ec6d2bb8 324#include "composite.h"
4ed46869
KH
325#include "ccl.h"
326#include "coding.h"
327#include "window.h"
328
329#else /* not emacs */
330
331#include "mulelib.h"
332
333#endif /* not emacs */
334
335Lisp_Object Qcoding_system, Qeol_type;
336Lisp_Object Qbuffer_file_coding_system;
337Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
27901516 338Lisp_Object Qno_conversion, Qundecided;
bb0115a2 339Lisp_Object Qcoding_system_history;
70c22245 340Lisp_Object Qsafe_charsets;
1397dc18 341Lisp_Object Qvalid_codes;
4ed46869
KH
342
343extern Lisp_Object Qinsert_file_contents, Qwrite_region;
344Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
345Lisp_Object Qstart_process, Qopen_network_stream;
346Lisp_Object Qtarget_idx;
347
d46c5b12
KH
348Lisp_Object Vselect_safe_coding_system_function;
349
7722baf9
EZ
350/* Mnemonic string for each format of end-of-line. */
351Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
352/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 353 decided. */
7722baf9 354Lisp_Object eol_mnemonic_undecided;
4ed46869 355
9ce27fde
KH
356/* Format of end-of-line decided by system. This is CODING_EOL_LF on
357 Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac. */
358int system_eol_type;
359
4ed46869
KH
360#ifdef emacs
361
4608c386
KH
362Lisp_Object Vcoding_system_list, Vcoding_system_alist;
363
364Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 365
d46c5b12
KH
366/* Coding system emacs-mule and raw-text are for converting only
367 end-of-line format. */
368Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 369
4ed46869
KH
370/* Coding-systems are handed between Emacs Lisp programs and C internal
371 routines by the following three variables. */
372/* Coding-system for reading files and receiving data from process. */
373Lisp_Object Vcoding_system_for_read;
374/* Coding-system for writing files and sending data to process. */
375Lisp_Object Vcoding_system_for_write;
376/* Coding-system actually used in the latest I/O. */
377Lisp_Object Vlast_coding_system_used;
378
c4825358 379/* A vector of length 256 which contains information about special
94487c4e 380 Latin codes (especially for dealing with Microsoft codes). */
3f003981 381Lisp_Object Vlatin_extra_code_table;
c4825358 382
9ce27fde
KH
383/* Flag to inhibit code conversion of end-of-line format. */
384int inhibit_eol_conversion;
385
ed29121d
EZ
386/* Flag to make buffer-file-coding-system inherit from process-coding. */
387int inherit_process_coding_system;
388
c4825358 389/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
390struct coding_system terminal_coding;
391
c4825358
KH
392/* Coding system to be used to encode text for terminal display when
393 terminal coding system is nil. */
394struct coding_system safe_terminal_coding;
395
396/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
397struct coding_system keyboard_coding;
398
6bc51348
KH
399/* Default coding system to be used to write a file. */
400struct coding_system default_buffer_file_coding;
401
02ba4723
KH
402Lisp_Object Vfile_coding_system_alist;
403Lisp_Object Vprocess_coding_system_alist;
404Lisp_Object Vnetwork_coding_system_alist;
4ed46869 405
68c45bf0
PE
406Lisp_Object Vlocale_coding_system;
407
4ed46869
KH
408#endif /* emacs */
409
d46c5b12 410Lisp_Object Qcoding_category, Qcoding_category_index;
4ed46869
KH
411
412/* List of symbols `coding-category-xxx' ordered by priority. */
413Lisp_Object Vcoding_category_list;
414
d46c5b12
KH
415/* Table of coding categories (Lisp symbols). */
416Lisp_Object Vcoding_category_table;
4ed46869
KH
417
418/* Table of names of symbol for each coding-category. */
419char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
0ef69138 420 "coding-category-emacs-mule",
4ed46869
KH
421 "coding-category-sjis",
422 "coding-category-iso-7",
d46c5b12 423 "coding-category-iso-7-tight",
4ed46869
KH
424 "coding-category-iso-8-1",
425 "coding-category-iso-8-2",
7717c392
KH
426 "coding-category-iso-7-else",
427 "coding-category-iso-8-else",
89fa8b36 428 "coding-category-ccl",
4ed46869 429 "coding-category-big5",
fa42c37f
KH
430 "coding-category-utf-8",
431 "coding-category-utf-16-be",
432 "coding-category-utf-16-le",
27901516 433 "coding-category-raw-text",
89fa8b36 434 "coding-category-binary"
4ed46869
KH
435};
436
66cfb530 437/* Table of pointers to coding systems corresponding to each coding
d46c5b12
KH
438 categories. */
439struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
440
66cfb530
KH
441/* Table of coding category masks. Nth element is a mask for a coding
442 cateogry of which priority is Nth. */
443static
444int coding_priorities[CODING_CATEGORY_IDX_MAX];
445
f967223b
KH
446/* Flag to tell if we look up translation table on character code
447 conversion. */
84fbb8a0 448Lisp_Object Venable_character_translation;
f967223b
KH
449/* Standard translation table to look up on decoding (reading). */
450Lisp_Object Vstandard_translation_table_for_decode;
451/* Standard translation table to look up on encoding (writing). */
452Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 453
f967223b
KH
454Lisp_Object Qtranslation_table;
455Lisp_Object Qtranslation_table_id;
456Lisp_Object Qtranslation_table_for_decode;
457Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
458
459/* Alist of charsets vs revision number. */
460Lisp_Object Vcharset_revision_alist;
461
02ba4723
KH
462/* Default coding systems used for process I/O. */
463Lisp_Object Vdefault_process_coding_system;
464
b843d1ae
KH
465/* Global flag to tell that we can't call post-read-conversion and
466 pre-write-conversion functions. Usually the value is zero, but it
467 is set to 1 temporarily while such functions are running. This is
468 to avoid infinite recursive call. */
469static int inhibit_pre_post_conversion;
470
4ed46869 471\f
0ef69138 472/*** 2. Emacs internal format (emacs-mule) handlers ***/
4ed46869
KH
473
474/* Emacs' internal format for encoding multiple character sets is a
f4dee582 475 kind of multi-byte encoding, i.e. characters are encoded by
b73bfc1c
KH
476 variable-length sequences of one-byte codes.
477
478 ASCII characters and control characters (e.g. `tab', `newline') are
479 represented by one-byte sequences which are their ASCII codes, in
480 the range 0x00 through 0x7F.
481
482 8-bit characters of the range 0x80..0x9F are represented by
483 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
484 code + 0x20).
485
486 8-bit characters of the range 0xA0..0xFF are represented by
487 one-byte sequences which are their 8-bit code.
488
489 The other characters are represented by a sequence of `base
490 leading-code', optional `extended leading-code', and one or two
491 `position-code's. The length of the sequence is determined by the
492 base leading-code. Leading-code takes the range 0x80 through 0x9F,
493 whereas extended leading-code and position-code take the range 0xA0
494 through 0xFF. See `charset.h' for more details about leading-code
495 and position-code.
f4dee582 496
4ed46869 497 --- CODE RANGE of Emacs' internal format ---
b73bfc1c
KH
498 character set range
499 ------------- -----
500 ascii 0x00..0x7F
501 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
502 eight-bit-graphic 0xA0..0xBF
503 ELSE 0x81..0x9F + [0xA0..0xFF]+
4ed46869
KH
504 ---------------------------------------------
505
506 */
507
508enum emacs_code_class_type emacs_code_class[256];
509
4ed46869
KH
510/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
511 Check if a text is encoded in Emacs' internal format. If it is,
d46c5b12 512 return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
4ed46869
KH
513
514int
0ef69138 515detect_coding_emacs_mule (src, src_end)
b73bfc1c 516 unsigned char *src, *src_end;
4ed46869
KH
517{
518 unsigned char c;
519 int composing = 0;
b73bfc1c
KH
520 /* Dummy for ONE_MORE_BYTE. */
521 struct coding_system dummy_coding;
522 struct coding_system *coding = &dummy_coding;
4ed46869 523
b73bfc1c 524 while (1)
4ed46869 525 {
b73bfc1c 526 ONE_MORE_BYTE (c);
4ed46869
KH
527
528 if (composing)
529 {
530 if (c < 0xA0)
531 composing = 0;
b73bfc1c
KH
532 else if (c == 0xA0)
533 {
534 ONE_MORE_BYTE (c);
535 c &= 0x7F;
536 }
4ed46869
KH
537 else
538 c -= 0x20;
539 }
540
b73bfc1c 541 if (c < 0x20)
4ed46869 542 {
4ed46869
KH
543 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
544 return 0;
b73bfc1c
KH
545 }
546 else if (c >= 0x80 && c < 0xA0)
547 {
548 if (c == 0x80)
549 /* Old leading code for a composite character. */
550 composing = 1;
551 else
552 {
553 unsigned char *src_base = src - 1;
554 int bytes;
4ed46869 555
b73bfc1c
KH
556 if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
557 bytes))
558 return 0;
559 src = src_base + bytes;
560 }
561 }
562 }
563 label_end_of_loop:
564 return CODING_CATEGORY_MASK_EMACS_MULE;
565}
4ed46869 566
4ed46869 567
b73bfc1c 568/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 569
b73bfc1c
KH
570static void
571decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
572 struct coding_system *coding;
573 unsigned char *source, *destination;
574 int src_bytes, dst_bytes;
575{
576 unsigned char *src = source;
577 unsigned char *src_end = source + src_bytes;
578 unsigned char *dst = destination;
579 unsigned char *dst_end = destination + dst_bytes;
580 /* SRC_BASE remembers the start position in source in each loop.
581 The loop will be exited when there's not enough source code, or
582 when there's not enough destination area to produce a
583 character. */
584 unsigned char *src_base;
4ed46869 585
b73bfc1c
KH
586 coding->produced_char = 0;
587 while (src < src_end)
588 {
589 unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
590 int bytes;
ec6d2bb8 591
b73bfc1c
KH
592 src_base = src;
593 if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
594 {
595 p = src;
596 src += bytes;
597 }
598 else
599 {
600 bytes = CHAR_STRING (*src, tmp);
601 p = tmp;
602 src++;
603 }
604 if (dst + bytes >= (dst_bytes ? dst_end : src))
605 {
606 coding->result = CODING_FINISH_INSUFFICIENT_DST;
4ed46869
KH
607 break;
608 }
b73bfc1c
KH
609 while (bytes--) *dst++ = *p++;
610 coding->produced_char++;
4ed46869 611 }
b73bfc1c
KH
612 coding->consumed = coding->consumed_char = src_base - source;
613 coding->produced = dst - destination;
4ed46869
KH
614}
615
b73bfc1c
KH
616#define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
617 encode_eol (coding, source, destination, src_bytes, dst_bytes)
618
619
4ed46869
KH
620\f
621/*** 3. ISO2022 handlers ***/
622
623/* The following note describes the coding system ISO2022 briefly.
39787efd
KH
624 Since the intention of this note is to help understand the
625 functions in this file, some parts are NOT ACCURATE or OVERLY
626 SIMPLIFIED. For thorough understanding, please refer to the
4ed46869
KH
627 original document of ISO2022.
628
629 ISO2022 provides many mechanisms to encode several character sets
39787efd
KH
630 in 7-bit and 8-bit environments. For 7-bite environments, all text
631 is encoded using bytes less than 128. This may make the encoded
632 text a little bit longer, but the text passes more easily through
633 several gateways, some of which strip off MSB (Most Signigant Bit).
b73bfc1c 634
39787efd 635 There are two kinds of character sets: control character set and
4ed46869
KH
636 graphic character set. The former contains control characters such
637 as `newline' and `escape' to provide control functions (control
39787efd
KH
638 functions are also provided by escape sequences). The latter
639 contains graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
640 two control character sets and many graphic character sets.
641
642 Graphic character sets are classified into one of the following
39787efd
KH
643 four classes, according to the number of bytes (DIMENSION) and
644 number of characters in one dimension (CHARS) of the set:
645 - DIMENSION1_CHARS94
646 - DIMENSION1_CHARS96
647 - DIMENSION2_CHARS94
648 - DIMENSION2_CHARS96
649
650 In addition, each character set is assigned an identification tag,
651 unique for each set, called "final character" (denoted as <F>
652 hereafter). The <F> of each character set is decided by ECMA(*)
653 when it is registered in ISO. The code range of <F> is 0x30..0x7F
654 (0x30..0x3F are for private use only).
4ed46869
KH
655
656 Note (*): ECMA = European Computer Manufacturers Association
657
658 Here are examples of graphic character set [NAME(<F>)]:
659 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
660 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
661 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
662 o DIMENSION2_CHARS96 -- none for the moment
663
39787efd 664 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
665 C0 [0x00..0x1F] -- control character plane 0
666 GL [0x20..0x7F] -- graphic character plane 0
667 C1 [0x80..0x9F] -- control character plane 1
668 GR [0xA0..0xFF] -- graphic character plane 1
669
670 A control character set is directly designated and invoked to C0 or
39787efd
KH
671 C1 by an escape sequence. The most common case is that:
672 - ISO646's control character set is designated/invoked to C0, and
673 - ISO6429's control character set is designated/invoked to C1,
674 and usually these designations/invocations are omitted in encoded
675 text. In a 7-bit environment, only C0 can be used, and a control
676 character for C1 is encoded by an appropriate escape sequence to
677 fit into the environment. All control characters for C1 are
678 defined to have corresponding escape sequences.
4ed46869
KH
679
680 A graphic character set is at first designated to one of four
681 graphic registers (G0 through G3), then these graphic registers are
682 invoked to GL or GR. These designations and invocations can be
683 done independently. The most common case is that G0 is invoked to
39787efd
KH
684 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
685 these invocations and designations are omitted in encoded text.
686 In a 7-bit environment, only GL can be used.
4ed46869 687
39787efd
KH
688 When a graphic character set of CHARS94 is invoked to GL, codes
689 0x20 and 0x7F of the GL area work as control characters SPACE and
690 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
691 be used.
4ed46869
KH
692
693 There are two ways of invocation: locking-shift and single-shift.
694 With locking-shift, the invocation lasts until the next different
39787efd
KH
695 invocation, whereas with single-shift, the invocation affects the
696 following character only and doesn't affect the locking-shift
697 state. Invocations are done by the following control characters or
698 escape sequences:
4ed46869
KH
699
700 ----------------------------------------------------------------------
39787efd 701 abbrev function cntrl escape seq description
4ed46869 702 ----------------------------------------------------------------------
39787efd
KH
703 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
704 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
705 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
706 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
707 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
708 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
709 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
710 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
711 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 712 ----------------------------------------------------------------------
39787efd
KH
713 (*) These are not used by any known coding system.
714
715 Control characters for these functions are defined by macros
716 ISO_CODE_XXX in `coding.h'.
4ed46869 717
39787efd 718 Designations are done by the following escape sequences:
4ed46869
KH
719 ----------------------------------------------------------------------
720 escape sequence description
721 ----------------------------------------------------------------------
722 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
723 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
724 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
725 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
726 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
727 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
728 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
729 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
730 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
731 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
732 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
733 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
734 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
735 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
736 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
737 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
738 ----------------------------------------------------------------------
739
740 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 741 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
742
743 Note (*): Although these designations are not allowed in ISO2022,
744 Emacs accepts them on decoding, and produces them on encoding
39787efd 745 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
746 7-bit environment, non-locking-shift, and non-single-shift.
747
748 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
39787efd 749 '(' can be omitted. We refer to this as "short-form" hereafter.
4ed46869
KH
750
751 Now you may notice that there are a lot of ways for encoding the
39787efd
KH
752 same multilingual text in ISO2022. Actually, there exist many
753 coding systems such as Compound Text (used in X11's inter client
754 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
755 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
756 localized platforms), and all of these are variants of ISO2022.
757
758 In addition to the above, Emacs handles two more kinds of escape
759 sequences: ISO6429's direction specification and Emacs' private
760 sequence for specifying character composition.
761
39787efd 762 ISO6429's direction specification takes the following form:
4ed46869
KH
763 o CSI ']' -- end of the current direction
764 o CSI '0' ']' -- end of the current direction
765 o CSI '1' ']' -- start of left-to-right text
766 o CSI '2' ']' -- start of right-to-left text
767 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
768 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
769
770 Character composition specification takes the following form:
ec6d2bb8
KH
771 o ESC '0' -- start relative composition
772 o ESC '1' -- end composition
773 o ESC '2' -- start rule-base composition (*)
774 o ESC '3' -- start relative composition with alternate chars (**)
775 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c
KH
776 Since these are not standard escape sequences of any ISO standard,
777 the use of them for these meaning is restricted to Emacs only.
ec6d2bb8 778
b73bfc1c
KH
779 (*) This form is used only in Emacs 20.5 and the older versions,
780 but the newer versions can safely decode it.
781 (**) This form is used only in Emacs 21.1 and the newer versions,
782 and the older versions can't decode it.
ec6d2bb8 783
b73bfc1c
KH
784 Here's a list of examples usages of these composition escape
785 sequences (categorized by `enum composition_method').
ec6d2bb8 786
b73bfc1c 787 COMPOSITION_RELATIVE:
ec6d2bb8 788 ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 789 COMPOSITOIN_WITH_RULE:
ec6d2bb8 790 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 791 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 792 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 793 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 794 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
795
796enum iso_code_class_type iso_code_class[256];
797
f024b6aa
RS
798#define CHARSET_OK(idx, charset) \
799 (coding_system_table[idx] \
800 && (coding_system_table[idx]->safe_charsets[charset] \
801 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \
802 (coding_system_table[idx], charset) \
803 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
d46c5b12
KH
804
805#define SHIFT_OUT_OK(idx) \
806 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
807
4ed46869
KH
808/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
809 Check if a text is encoded in ISO2022. If it is, returns an
810 integer in which appropriate flag bits any of:
811 CODING_CATEGORY_MASK_ISO_7
d46c5b12 812 CODING_CATEGORY_MASK_ISO_7_TIGHT
4ed46869
KH
813 CODING_CATEGORY_MASK_ISO_8_1
814 CODING_CATEGORY_MASK_ISO_8_2
7717c392
KH
815 CODING_CATEGORY_MASK_ISO_7_ELSE
816 CODING_CATEGORY_MASK_ISO_8_ELSE
4ed46869
KH
817 are set. If a code which should never appear in ISO2022 is found,
818 returns 0. */
819
820int
821detect_coding_iso2022 (src, src_end)
822 unsigned char *src, *src_end;
823{
d46c5b12
KH
824 int mask = CODING_CATEGORY_MASK_ISO;
825 int mask_found = 0;
f46869e4 826 int reg[4], shift_out = 0, single_shifting = 0;
d46c5b12 827 int c, c1, i, charset;
b73bfc1c
KH
828 /* Dummy for ONE_MORE_BYTE. */
829 struct coding_system dummy_coding;
830 struct coding_system *coding = &dummy_coding;
3f003981 831
d46c5b12 832 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
3f003981 833 while (mask && src < src_end)
4ed46869 834 {
b73bfc1c 835 ONE_MORE_BYTE (c);
4ed46869
KH
836 switch (c)
837 {
838 case ISO_CODE_ESC:
f46869e4 839 single_shifting = 0;
b73bfc1c 840 ONE_MORE_BYTE (c);
d46c5b12 841 if (c >= '(' && c <= '/')
4ed46869 842 {
bf9cdd4e 843 /* Designation sequence for a charset of dimension 1. */
b73bfc1c 844 ONE_MORE_BYTE (c1);
d46c5b12
KH
845 if (c1 < ' ' || c1 >= 0x80
846 || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
847 /* Invalid designation sequence. Just ignore. */
848 break;
849 reg[(c - '(') % 4] = charset;
bf9cdd4e
KH
850 }
851 else if (c == '$')
852 {
853 /* Designation sequence for a charset of dimension 2. */
b73bfc1c 854 ONE_MORE_BYTE (c);
bf9cdd4e
KH
855 if (c >= '@' && c <= 'B')
856 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
d46c5b12 857 reg[0] = charset = iso_charset_table[1][0][c];
bf9cdd4e 858 else if (c >= '(' && c <= '/')
bcf26d6a 859 {
b73bfc1c 860 ONE_MORE_BYTE (c1);
d46c5b12
KH
861 if (c1 < ' ' || c1 >= 0x80
862 || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
863 /* Invalid designation sequence. Just ignore. */
864 break;
865 reg[(c - '(') % 4] = charset;
bcf26d6a 866 }
bf9cdd4e 867 else
d46c5b12
KH
868 /* Invalid designation sequence. Just ignore. */
869 break;
870 }
ae9ff118 871 else if (c == 'N' || c == 'O')
d46c5b12 872 {
ae9ff118
KH
873 /* ESC <Fe> for SS2 or SS3. */
874 mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
d46c5b12 875 break;
4ed46869 876 }
ec6d2bb8
KH
877 else if (c >= '0' && c <= '4')
878 {
879 /* ESC <Fp> for start/end composition. */
880 mask_found |= CODING_CATEGORY_MASK_ISO;
881 break;
882 }
bf9cdd4e 883 else
d46c5b12
KH
884 /* Invalid escape sequence. Just ignore. */
885 break;
886
887 /* We found a valid designation sequence for CHARSET. */
888 mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
889 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
890 mask_found |= CODING_CATEGORY_MASK_ISO_7;
891 else
892 mask &= ~CODING_CATEGORY_MASK_ISO_7;
893 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
894 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
895 else
896 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
ae9ff118
KH
897 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
898 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
899 else
d46c5b12 900 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
ae9ff118
KH
901 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
902 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
903 else
d46c5b12 904 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
905 break;
906
4ed46869 907 case ISO_CODE_SO:
f46869e4 908 single_shifting = 0;
d46c5b12
KH
909 if (shift_out == 0
910 && (reg[1] >= 0
911 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
912 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
913 {
914 /* Locking shift out. */
915 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
916 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
917 }
e0e989f6
KH
918 break;
919
d46c5b12 920 case ISO_CODE_SI:
f46869e4 921 single_shifting = 0;
d46c5b12
KH
922 if (shift_out == 1)
923 {
924 /* Locking shift in. */
925 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
926 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
927 }
928 break;
929
4ed46869 930 case ISO_CODE_CSI:
f46869e4 931 single_shifting = 0;
4ed46869
KH
932 case ISO_CODE_SS2:
933 case ISO_CODE_SS3:
3f003981
KH
934 {
935 int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
936
70c22245
KH
937 if (c != ISO_CODE_CSI)
938 {
d46c5b12
KH
939 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
940 & CODING_FLAG_ISO_SINGLE_SHIFT)
70c22245 941 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
d46c5b12
KH
942 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
943 & CODING_FLAG_ISO_SINGLE_SHIFT)
70c22245 944 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
f46869e4 945 single_shifting = 1;
70c22245 946 }
3f003981
KH
947 if (VECTORP (Vlatin_extra_code_table)
948 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
949 {
d46c5b12
KH
950 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
951 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981 952 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
d46c5b12
KH
953 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
954 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981
KH
955 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
956 }
957 mask &= newmask;
d46c5b12 958 mask_found |= newmask;
3f003981
KH
959 }
960 break;
4ed46869
KH
961
962 default:
963 if (c < 0x80)
f46869e4
KH
964 {
965 single_shifting = 0;
966 break;
967 }
4ed46869 968 else if (c < 0xA0)
c4825358 969 {
f46869e4 970 single_shifting = 0;
3f003981
KH
971 if (VECTORP (Vlatin_extra_code_table)
972 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
c4825358 973 {
3f003981
KH
974 int newmask = 0;
975
d46c5b12
KH
976 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
977 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981 978 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
d46c5b12
KH
979 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
980 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981
KH
981 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
982 mask &= newmask;
d46c5b12 983 mask_found |= newmask;
c4825358 984 }
3f003981
KH
985 else
986 return 0;
c4825358 987 }
4ed46869
KH
988 else
989 {
7717c392 990 unsigned char *src_begin = src;
4ed46869 991
d46c5b12 992 mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
7717c392 993 | CODING_CATEGORY_MASK_ISO_7_ELSE);
d46c5b12 994 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
f46869e4
KH
995 /* Check the length of succeeding codes of the range
996 0xA0..0FF. If the byte length is odd, we exclude
997 CODING_CATEGORY_MASK_ISO_8_2. We can check this only
998 when we are not single shifting. */
b73bfc1c
KH
999 if (!single_shifting
1000 && mask & CODING_CATEGORY_MASK_ISO_8_2)
f46869e4 1001 {
b73bfc1c
KH
1002 int i = 0;
1003 while (src < src_end)
1004 {
1005 ONE_MORE_BYTE (c);
1006 if (c < 0xA0)
1007 break;
1008 i++;
1009 }
1010
1011 if (i & 1 && src < src_end)
f46869e4
KH
1012 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1013 else
1014 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1015 }
4ed46869
KH
1016 }
1017 break;
1018 }
1019 }
b73bfc1c 1020 label_end_of_loop:
d46c5b12 1021 return (mask & mask_found);
4ed46869
KH
1022}
1023
b73bfc1c
KH
1024/* Decode a character of which charset is CHARSET, the 1st position
1025 code is C1, the 2nd position code is C2, and return the decoded
1026 character code. If the variable `translation_table' is non-nil,
1027 returned the translated code. */
ec6d2bb8 1028
b73bfc1c
KH
1029#define DECODE_ISO_CHARACTER(charset, c1, c2) \
1030 (NILP (translation_table) \
1031 ? MAKE_CHAR (charset, c1, c2) \
1032 : translate_char (translation_table, -1, charset, c1, c2))
4ed46869
KH
1033
1034/* Set designation state into CODING. */
d46c5b12
KH
1035#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
1036 do { \
944bd420
KH
1037 int charset; \
1038 \
1039 if (final_char < '0' || final_char >= 128) \
1040 goto label_invalid_code; \
1041 charset = ISO_CHARSET_TABLE (make_number (dimension), \
1042 make_number (chars), \
1043 make_number (final_char)); \
d46c5b12 1044 if (charset >= 0 \
704c5781
KH
1045 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1046 || coding->safe_charsets[charset])) \
d46c5b12
KH
1047 { \
1048 if (coding->spec.iso2022.last_invalid_designation_register == 0 \
1049 && reg == 0 \
1050 && charset == CHARSET_ASCII) \
1051 { \
1052 /* We should insert this designation sequence as is so \
1053 that it is surely written back to a file. */ \
1054 coding->spec.iso2022.last_invalid_designation_register = -1; \
1055 goto label_invalid_code; \
1056 } \
1057 coding->spec.iso2022.last_invalid_designation_register = -1; \
1058 if ((coding->mode & CODING_MODE_DIRECTION) \
1059 && CHARSET_REVERSE_CHARSET (charset) >= 0) \
1060 charset = CHARSET_REVERSE_CHARSET (charset); \
1061 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1062 } \
1063 else \
1064 { \
1065 coding->spec.iso2022.last_invalid_designation_register = reg; \
1066 goto label_invalid_code; \
1067 } \
4ed46869
KH
1068 } while (0)
1069
ec6d2bb8
KH
1070/* Allocate a memory block for storing information about compositions.
1071 The block is chained to the already allocated blocks. */
d46c5b12 1072
ec6d2bb8
KH
1073static void
1074coding_allocate_composition_data (coding, char_offset)
d46c5b12 1075 struct coding_system *coding;
ec6d2bb8 1076 int char_offset;
d46c5b12 1077{
ec6d2bb8
KH
1078 struct composition_data *cmp_data
1079 = (struct composition_data *) xmalloc (sizeof *cmp_data);
1080
1081 cmp_data->char_offset = char_offset;
1082 cmp_data->used = 0;
1083 cmp_data->prev = coding->cmp_data;
1084 cmp_data->next = NULL;
1085 if (coding->cmp_data)
1086 coding->cmp_data->next = cmp_data;
1087 coding->cmp_data = cmp_data;
1088 coding->cmp_data_start = 0;
1089}
d46c5b12 1090
ec6d2bb8
KH
1091/* Record the starting position START and METHOD of one composition. */
1092
1093#define CODING_ADD_COMPOSITION_START(coding, start, method) \
1094 do { \
1095 struct composition_data *cmp_data = coding->cmp_data; \
1096 int *data = cmp_data->data + cmp_data->used; \
1097 coding->cmp_data_start = cmp_data->used; \
1098 data[0] = -1; \
1099 data[1] = cmp_data->char_offset + start; \
1100 data[3] = (int) method; \
1101 cmp_data->used += 4; \
1102 } while (0)
1103
1104/* Record the ending position END of the current composition. */
1105
1106#define CODING_ADD_COMPOSITION_END(coding, end) \
1107 do { \
1108 struct composition_data *cmp_data = coding->cmp_data; \
1109 int *data = cmp_data->data + coding->cmp_data_start; \
1110 data[0] = cmp_data->used - coding->cmp_data_start; \
1111 data[2] = cmp_data->char_offset + end; \
1112 } while (0)
1113
1114/* Record one COMPONENT (alternate character or composition rule). */
1115
1116#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
1117 (coding->cmp_data->data[coding->cmp_data->used++] = component)
1118
1119/* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4. */
1120
1121#define DECODE_COMPOSITION_START(c1) \
1122 do { \
1123 if (coding->composing == COMPOSITION_DISABLED) \
1124 { \
1125 *dst++ = ISO_CODE_ESC; \
1126 *dst++ = c1 & 0x7f; \
1127 coding->produced_char += 2; \
1128 } \
1129 else if (!COMPOSING_P (coding)) \
1130 { \
1131 /* This is surely the start of a composition. We must be sure \
1132 that coding->cmp_data has enough space to store the \
1133 information about the composition. If not, terminate the \
1134 current decoding loop, allocate one more memory block for \
1135 coding->cmp_data in the calller, then start the decoding \
1136 loop again. We can't allocate memory here directly because \
1137 it may cause buffer/string relocation. */ \
1138 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1139 >= COMPOSITION_DATA_SIZE) \
1140 { \
b73bfc1c
KH
1141 coding->result = CODING_FINISH_INSUFFICIENT_CMP; \
1142 goto label_end_of_loop; \
ec6d2bb8
KH
1143 } \
1144 coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
1145 : c1 == '2' ? COMPOSITION_WITH_RULE \
1146 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
1147 : COMPOSITION_WITH_RULE_ALTCHARS); \
1148 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \
1149 coding->composing); \
1150 coding->composition_rule_follows = 0; \
1151 } \
1152 else \
1153 { \
1154 /* We are already handling a composition. If the method is \
1155 the following two, the codes following the current escape \
1156 sequence are actual characters stored in a buffer. */ \
1157 if (coding->composing == COMPOSITION_WITH_ALTCHARS \
1158 || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \
1159 { \
1160 coding->composing = COMPOSITION_RELATIVE; \
1161 coding->composition_rule_follows = 0; \
1162 } \
1163 } \
1164 } while (0)
1165
1166/* Handle compositoin end sequence ESC 1. */
1167
1168#define DECODE_COMPOSITION_END(c1) \
1169 do { \
1170 if (coding->composing == COMPOSITION_DISABLED) \
1171 { \
1172 *dst++ = ISO_CODE_ESC; \
1173 *dst++ = c1; \
1174 coding->produced_char += 2; \
1175 } \
1176 else \
1177 { \
1178 CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
1179 coding->composing = COMPOSITION_NO; \
1180 } \
1181 } while (0)
1182
1183/* Decode a composition rule from the byte C1 (and maybe one more byte
1184 from SRC) and store one encoded composition rule in
1185 coding->cmp_data. */
1186
1187#define DECODE_COMPOSITION_RULE(c1) \
1188 do { \
1189 int rule = 0; \
1190 (c1) -= 32; \
1191 if (c1 < 81) /* old format (before ver.21) */ \
1192 { \
1193 int gref = (c1) / 9; \
1194 int nref = (c1) % 9; \
1195 if (gref == 4) gref = 10; \
1196 if (nref == 4) nref = 10; \
1197 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
1198 } \
b73bfc1c 1199 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
1200 { \
1201 ONE_MORE_BYTE (c2); \
1202 rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
1203 } \
1204 CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \
1205 coding->composition_rule_follows = 0; \
1206 } while (0)
88993dfd 1207
d46c5b12 1208
4ed46869
KH
1209/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1210
b73bfc1c 1211static void
d46c5b12 1212decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
1213 struct coding_system *coding;
1214 unsigned char *source, *destination;
1215 int src_bytes, dst_bytes;
4ed46869
KH
1216{
1217 unsigned char *src = source;
1218 unsigned char *src_end = source + src_bytes;
1219 unsigned char *dst = destination;
1220 unsigned char *dst_end = destination + dst_bytes;
4ed46869
KH
1221 /* Charsets invoked to graphic plane 0 and 1 respectively. */
1222 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1223 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
b73bfc1c
KH
1224 /* SRC_BASE remembers the start position in source in each loop.
1225 The loop will be exited when there's not enough source code
1226 (within macro ONE_MORE_BYTE), or when there's not enough
1227 destination area to produce a character (within macro
1228 EMIT_CHAR). */
1229 unsigned char *src_base;
1230 int c, charset;
1231 Lisp_Object translation_table;
bdd9fb48 1232
b73bfc1c
KH
1233 if (NILP (Venable_character_translation))
1234 translation_table = Qnil;
1235 else
1236 {
1237 translation_table = coding->translation_table_for_decode;
1238 if (NILP (translation_table))
1239 translation_table = Vstandard_translation_table_for_decode;
1240 }
4ed46869 1241
b73bfc1c
KH
1242 coding->result = CODING_FINISH_NORMAL;
1243
1244 while (1)
4ed46869 1245 {
b73bfc1c
KH
1246 int c1, c2;
1247
1248 src_base = src;
1249 ONE_MORE_BYTE (c1);
4ed46869 1250
ec6d2bb8 1251 /* We produce no character or one character. */
4ed46869
KH
1252 switch (iso_code_class [c1])
1253 {
1254 case ISO_0x20_or_0x7F:
ec6d2bb8
KH
1255 if (COMPOSING_P (coding) && coding->composition_rule_follows)
1256 {
1257 DECODE_COMPOSITION_RULE (c1);
b73bfc1c 1258 continue;
ec6d2bb8
KH
1259 }
1260 if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
4ed46869
KH
1261 {
1262 /* This is SPACE or DEL. */
b73bfc1c 1263 charset = CHARSET_ASCII;
4ed46869
KH
1264 break;
1265 }
1266 /* This is a graphic character, we fall down ... */
1267
1268 case ISO_graphic_plane_0:
ec6d2bb8 1269 if (COMPOSING_P (coding) && coding->composition_rule_follows)
b73bfc1c
KH
1270 {
1271 DECODE_COMPOSITION_RULE (c1);
1272 continue;
1273 }
1274 charset = charset0;
4ed46869
KH
1275 break;
1276
1277 case ISO_0xA0_or_0xFF:
d46c5b12
KH
1278 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1279 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
fb88bf2d 1280 goto label_invalid_code;
4ed46869
KH
1281 /* This is a graphic character, we fall down ... */
1282
1283 case ISO_graphic_plane_1:
b73bfc1c 1284 if (charset1 < 0)
fb88bf2d 1285 goto label_invalid_code;
b73bfc1c 1286 charset = charset1;
4ed46869
KH
1287 break;
1288
b73bfc1c 1289 case ISO_control_0:
ec6d2bb8
KH
1290 if (COMPOSING_P (coding))
1291 DECODE_COMPOSITION_END ('1');
1292
4ed46869
KH
1293 /* All ISO2022 control characters in this class have the
1294 same representation in Emacs internal format. */
d46c5b12
KH
1295 if (c1 == '\n'
1296 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1297 && (coding->eol_type == CODING_EOL_CR
1298 || coding->eol_type == CODING_EOL_CRLF))
1299 {
b73bfc1c
KH
1300 coding->result = CODING_FINISH_INCONSISTENT_EOL;
1301 goto label_end_of_loop;
d46c5b12 1302 }
b73bfc1c 1303 charset = CHARSET_ASCII;
4ed46869
KH
1304 break;
1305
b73bfc1c
KH
1306 case ISO_control_1:
1307 if (COMPOSING_P (coding))
1308 DECODE_COMPOSITION_END ('1');
1309 goto label_invalid_code;
1310
4ed46869 1311 case ISO_carriage_return:
ec6d2bb8
KH
1312 if (COMPOSING_P (coding))
1313 DECODE_COMPOSITION_END ('1');
1314
4ed46869 1315 if (coding->eol_type == CODING_EOL_CR)
b73bfc1c 1316 c1 = '\n';
4ed46869
KH
1317 else if (coding->eol_type == CODING_EOL_CRLF)
1318 {
1319 ONE_MORE_BYTE (c1);
b73bfc1c 1320 if (c1 != ISO_CODE_LF)
4ed46869 1321 {
d46c5b12
KH
1322 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1323 {
b73bfc1c
KH
1324 coding->result = CODING_FINISH_INCONSISTENT_EOL;
1325 goto label_end_of_loop;
d46c5b12 1326 }
4ed46869 1327 src--;
b73bfc1c 1328 c1 = '\r';
4ed46869
KH
1329 }
1330 }
b73bfc1c 1331 charset = CHARSET_ASCII;
4ed46869
KH
1332 break;
1333
1334 case ISO_shift_out:
d46c5b12
KH
1335 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1336 || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1337 goto label_invalid_code;
4ed46869
KH
1338 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1339 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
b73bfc1c 1340 continue;
4ed46869
KH
1341
1342 case ISO_shift_in:
d46c5b12
KH
1343 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1344 goto label_invalid_code;
4ed46869
KH
1345 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1346 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
b73bfc1c 1347 continue;
4ed46869
KH
1348
1349 case ISO_single_shift_2_7:
1350 case ISO_single_shift_2:
d46c5b12
KH
1351 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1352 goto label_invalid_code;
4ed46869
KH
1353 /* SS2 is handled as an escape sequence of ESC 'N' */
1354 c1 = 'N';
1355 goto label_escape_sequence;
1356
1357 case ISO_single_shift_3:
d46c5b12
KH
1358 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1359 goto label_invalid_code;
4ed46869
KH
1360 /* SS2 is handled as an escape sequence of ESC 'O' */
1361 c1 = 'O';
1362 goto label_escape_sequence;
1363
1364 case ISO_control_sequence_introducer:
1365 /* CSI is handled as an escape sequence of ESC '[' ... */
1366 c1 = '[';
1367 goto label_escape_sequence;
1368
1369 case ISO_escape:
1370 ONE_MORE_BYTE (c1);
1371 label_escape_sequence:
1372 /* Escape sequences handled by Emacs are invocation,
1373 designation, direction specification, and character
1374 composition specification. */
1375 switch (c1)
1376 {
1377 case '&': /* revision of following character set */
1378 ONE_MORE_BYTE (c1);
1379 if (!(c1 >= '@' && c1 <= '~'))
d46c5b12 1380 goto label_invalid_code;
4ed46869
KH
1381 ONE_MORE_BYTE (c1);
1382 if (c1 != ISO_CODE_ESC)
d46c5b12 1383 goto label_invalid_code;
4ed46869
KH
1384 ONE_MORE_BYTE (c1);
1385 goto label_escape_sequence;
1386
1387 case '$': /* designation of 2-byte character set */
d46c5b12
KH
1388 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1389 goto label_invalid_code;
4ed46869
KH
1390 ONE_MORE_BYTE (c1);
1391 if (c1 >= '@' && c1 <= 'B')
1392 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 1393 or JISX0208.1980 */
4ed46869
KH
1394 DECODE_DESIGNATION (0, 2, 94, c1);
1395 }
1396 else if (c1 >= 0x28 && c1 <= 0x2B)
1397 { /* designation of DIMENSION2_CHARS94 character set */
1398 ONE_MORE_BYTE (c2);
1399 DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1400 }
1401 else if (c1 >= 0x2C && c1 <= 0x2F)
1402 { /* designation of DIMENSION2_CHARS96 character set */
1403 ONE_MORE_BYTE (c2);
1404 DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1405 }
1406 else
d46c5b12 1407 goto label_invalid_code;
b73bfc1c
KH
1408 /* We must update these variables now. */
1409 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1410 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1411 continue;
4ed46869
KH
1412
1413 case 'n': /* invocation of locking-shift-2 */
d46c5b12
KH
1414 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1415 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1416 goto label_invalid_code;
4ed46869 1417 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
e0e989f6 1418 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
b73bfc1c 1419 continue;
4ed46869
KH
1420
1421 case 'o': /* invocation of locking-shift-3 */
d46c5b12
KH
1422 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1423 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1424 goto label_invalid_code;
4ed46869 1425 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
e0e989f6 1426 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
b73bfc1c 1427 continue;
4ed46869
KH
1428
1429 case 'N': /* invocation of single-shift-2 */
d46c5b12
KH
1430 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1431 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1432 goto label_invalid_code;
4ed46869 1433 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
b73bfc1c 1434 ONE_MORE_BYTE (c1);
4ed46869
KH
1435 break;
1436
1437 case 'O': /* invocation of single-shift-3 */
d46c5b12
KH
1438 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1439 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1440 goto label_invalid_code;
4ed46869 1441 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
b73bfc1c 1442 ONE_MORE_BYTE (c1);
4ed46869
KH
1443 break;
1444
ec6d2bb8
KH
1445 case '0': case '2': case '3': case '4': /* start composition */
1446 DECODE_COMPOSITION_START (c1);
b73bfc1c 1447 continue;
4ed46869 1448
ec6d2bb8
KH
1449 case '1': /* end composition */
1450 DECODE_COMPOSITION_END (c1);
b73bfc1c 1451 continue;
4ed46869
KH
1452
1453 case '[': /* specification of direction */
d46c5b12
KH
1454 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1455 goto label_invalid_code;
4ed46869 1456 /* For the moment, nested direction is not supported.
d46c5b12
KH
1457 So, `coding->mode & CODING_MODE_DIRECTION' zero means
1458 left-to-right, and nozero means right-to-left. */
4ed46869
KH
1459 ONE_MORE_BYTE (c1);
1460 switch (c1)
1461 {
1462 case ']': /* end of the current direction */
d46c5b12 1463 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
1464
1465 case '0': /* end of the current direction */
1466 case '1': /* start of left-to-right direction */
1467 ONE_MORE_BYTE (c1);
1468 if (c1 == ']')
d46c5b12 1469 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 1470 else
d46c5b12 1471 goto label_invalid_code;
4ed46869
KH
1472 break;
1473
1474 case '2': /* start of right-to-left direction */
1475 ONE_MORE_BYTE (c1);
1476 if (c1 == ']')
d46c5b12 1477 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 1478 else
d46c5b12 1479 goto label_invalid_code;
4ed46869
KH
1480 break;
1481
1482 default:
d46c5b12 1483 goto label_invalid_code;
4ed46869 1484 }
b73bfc1c 1485 continue;
4ed46869
KH
1486
1487 default:
d46c5b12
KH
1488 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1489 goto label_invalid_code;
4ed46869
KH
1490 if (c1 >= 0x28 && c1 <= 0x2B)
1491 { /* designation of DIMENSION1_CHARS94 character set */
1492 ONE_MORE_BYTE (c2);
1493 DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1494 }
1495 else if (c1 >= 0x2C && c1 <= 0x2F)
1496 { /* designation of DIMENSION1_CHARS96 character set */
1497 ONE_MORE_BYTE (c2);
1498 DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1499 }
1500 else
b73bfc1c
KH
1501 goto label_invalid_code;
1502 /* We must update these variables now. */
1503 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1504 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1505 continue;
4ed46869 1506 }
b73bfc1c 1507 }
4ed46869 1508
b73bfc1c
KH
1509 /* Now we know CHARSET and 1st position code C1 of a character.
1510 Produce a multibyte sequence for that character while getting
1511 2nd position code C2 if necessary. */
1512 if (CHARSET_DIMENSION (charset) == 2)
1513 {
1514 ONE_MORE_BYTE (c2);
1515 if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
1516 /* C2 is not in a valid range. */
1517 goto label_invalid_code;
4ed46869 1518 }
b73bfc1c
KH
1519 c = DECODE_ISO_CHARACTER (charset, c1, c2);
1520 EMIT_CHAR (c);
4ed46869
KH
1521 continue;
1522
b73bfc1c
KH
1523 label_invalid_code:
1524 coding->errors++;
1525 if (COMPOSING_P (coding))
1526 DECODE_COMPOSITION_END ('1');
4ed46869 1527 src = src_base;
b73bfc1c
KH
1528 c = *src++;
1529 EMIT_CHAR (c);
4ed46869 1530 }
fb88bf2d 1531
b73bfc1c
KH
1532 label_end_of_loop:
1533 coding->consumed = coding->consumed_char = src_base - source;
d46c5b12 1534 coding->produced = dst - destination;
b73bfc1c 1535 return;
4ed46869
KH
1536}
1537
b73bfc1c 1538
f4dee582 1539/* ISO2022 encoding stuff. */
4ed46869
KH
1540
1541/*
f4dee582 1542 It is not enough to say just "ISO2022" on encoding, we have to
d46c5b12 1543 specify more details. In Emacs, each coding system of ISO2022
4ed46869
KH
1544 variant has the following specifications:
1545 1. Initial designation to G0 thru G3.
1546 2. Allows short-form designation?
1547 3. ASCII should be designated to G0 before control characters?
1548 4. ASCII should be designated to G0 at end of line?
1549 5. 7-bit environment or 8-bit environment?
1550 6. Use locking-shift?
1551 7. Use Single-shift?
1552 And the following two are only for Japanese:
1553 8. Use ASCII in place of JIS0201-1976-Roman?
1554 9. Use JISX0208-1983 in place of JISX0208-1978?
1555 These specifications are encoded in `coding->flags' as flag bits
1556 defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more
f4dee582 1557 details.
4ed46869
KH
1558*/
1559
1560/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
1561 register REG at DST, and increment DST. If <final-char> of CHARSET is
1562 '@', 'A', or 'B' and the coding system CODING allows, produce
1563 designation sequence of short-form. */
4ed46869
KH
1564
1565#define ENCODE_DESIGNATION(charset, reg, coding) \
1566 do { \
1567 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
1568 char *intermediate_char_94 = "()*+"; \
1569 char *intermediate_char_96 = ",-./"; \
70c22245 1570 int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
b73bfc1c 1571 \
70c22245
KH
1572 if (revision < 255) \
1573 { \
4ed46869
KH
1574 *dst++ = ISO_CODE_ESC; \
1575 *dst++ = '&'; \
70c22245 1576 *dst++ = '@' + revision; \
4ed46869 1577 } \
b73bfc1c 1578 *dst++ = ISO_CODE_ESC; \
4ed46869
KH
1579 if (CHARSET_DIMENSION (charset) == 1) \
1580 { \
1581 if (CHARSET_CHARS (charset) == 94) \
1582 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1583 else \
1584 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1585 } \
1586 else \
1587 { \
1588 *dst++ = '$'; \
1589 if (CHARSET_CHARS (charset) == 94) \
1590 { \
b73bfc1c
KH
1591 if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
1592 || reg != 0 \
1593 || final_char < '@' || final_char > 'B') \
4ed46869
KH
1594 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1595 } \
1596 else \
b73bfc1c 1597 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
4ed46869 1598 } \
b73bfc1c 1599 *dst++ = final_char; \
4ed46869
KH
1600 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1601 } while (0)
1602
1603/* The following two macros produce codes (control character or escape
1604 sequence) for ISO2022 single-shift functions (single-shift-2 and
1605 single-shift-3). */
1606
1607#define ENCODE_SINGLE_SHIFT_2 \
1608 do { \
1609 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1610 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
1611 else \
b73bfc1c 1612 *dst++ = ISO_CODE_SS2; \
4ed46869
KH
1613 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1614 } while (0)
1615
fb88bf2d
KH
1616#define ENCODE_SINGLE_SHIFT_3 \
1617 do { \
4ed46869 1618 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
fb88bf2d
KH
1619 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
1620 else \
b73bfc1c 1621 *dst++ = ISO_CODE_SS3; \
4ed46869
KH
1622 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1623 } while (0)
1624
1625/* The following four macros produce codes (control character or
1626 escape sequence) for ISO2022 locking-shift functions (shift-in,
1627 shift-out, locking-shift-2, and locking-shift-3). */
1628
b73bfc1c
KH
1629#define ENCODE_SHIFT_IN \
1630 do { \
1631 *dst++ = ISO_CODE_SI; \
4ed46869
KH
1632 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1633 } while (0)
1634
b73bfc1c
KH
1635#define ENCODE_SHIFT_OUT \
1636 do { \
1637 *dst++ = ISO_CODE_SO; \
4ed46869
KH
1638 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1639 } while (0)
1640
1641#define ENCODE_LOCKING_SHIFT_2 \
1642 do { \
1643 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
1644 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1645 } while (0)
1646
b73bfc1c
KH
1647#define ENCODE_LOCKING_SHIFT_3 \
1648 do { \
1649 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
4ed46869
KH
1650 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1651 } while (0)
1652
f4dee582
RS
1653/* Produce codes for a DIMENSION1 character whose character set is
1654 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
1655 sequences are also produced in advance if necessary. */
1656
6e85d753
KH
1657#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
1658 do { \
1659 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1660 { \
1661 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1662 *dst++ = c1 & 0x7F; \
1663 else \
1664 *dst++ = c1 | 0x80; \
1665 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1666 break; \
1667 } \
1668 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1669 { \
1670 *dst++ = c1 & 0x7F; \
1671 break; \
1672 } \
1673 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1674 { \
1675 *dst++ = c1 | 0x80; \
1676 break; \
1677 } \
1678 else if (coding->flags & CODING_FLAG_ISO_SAFE \
70c22245 1679 && !coding->safe_charsets[charset]) \
6e85d753
KH
1680 { \
1681 /* We should not encode this character, instead produce one or \
1682 two `?'s. */ \
1683 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1684 if (CHARSET_WIDTH (charset) == 2) \
1685 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1686 break; \
1687 } \
1688 else \
1689 /* Since CHARSET is not yet invoked to any graphic planes, we \
1690 must invoke it, or, at first, designate it to some graphic \
1691 register. Then repeat the loop to actually produce the \
1692 character. */ \
1693 dst = encode_invocation_designation (charset, coding, dst); \
4ed46869
KH
1694 } while (1)
1695
f4dee582
RS
1696/* Produce codes for a DIMENSION2 character whose character set is
1697 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
1698 invocation codes are also produced in advance if necessary. */
1699
6e85d753
KH
1700#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
1701 do { \
1702 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1703 { \
1704 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1705 *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
1706 else \
1707 *dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
1708 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1709 break; \
1710 } \
1711 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1712 { \
1713 *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
1714 break; \
1715 } \
1716 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1717 { \
1718 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
1719 break; \
1720 } \
1721 else if (coding->flags & CODING_FLAG_ISO_SAFE \
70c22245 1722 && !coding->safe_charsets[charset]) \
6e85d753
KH
1723 { \
1724 /* We should not encode this character, instead produce one or \
1725 two `?'s. */ \
1726 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1727 if (CHARSET_WIDTH (charset) == 2) \
1728 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1729 break; \
1730 } \
1731 else \
1732 /* Since CHARSET is not yet invoked to any graphic planes, we \
1733 must invoke it, or, at first, designate it to some graphic \
1734 register. Then repeat the loop to actually produce the \
1735 character. */ \
1736 dst = encode_invocation_designation (charset, coding, dst); \
4ed46869
KH
1737 } while (1)
1738
6f551029
KH
1739#define ENCODE_ISO_CHARACTER(charset, c1, c2) \
1740 do { \
b73bfc1c 1741 int alt_charset = charset; \
ec6d2bb8 1742 \
b73bfc1c 1743 if (CHARSET_DEFINED_P (charset)) \
6f551029 1744 { \
b73bfc1c 1745 if (CHARSET_DIMENSION (charset) == 1) \
6f551029
KH
1746 { \
1747 if (charset == CHARSET_ASCII \
1748 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
b73bfc1c
KH
1749 alt_charset = charset_latin_jisx0201; \
1750 ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \
6f551029
KH
1751 } \
1752 else \
1753 { \
1754 if (charset == charset_jisx0208 \
1755 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
b73bfc1c
KH
1756 alt_charset = charset_jisx0208_1978; \
1757 ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \
6f551029
KH
1758 } \
1759 } \
1760 else \
1761 { \
b73bfc1c
KH
1762 *dst++ = c1; \
1763 if (c2 >= 0) \
1764 *dst++ = c2; \
6f551029 1765 } \
84fbb8a0 1766 } while (0)
bdd9fb48 1767
4ed46869
KH
1768/* Produce designation and invocation codes at a place pointed by DST
1769 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1770 Return new DST. */
1771
1772unsigned char *
1773encode_invocation_designation (charset, coding, dst)
1774 int charset;
1775 struct coding_system *coding;
1776 unsigned char *dst;
1777{
1778 int reg; /* graphic register number */
1779
1780 /* At first, check designations. */
1781 for (reg = 0; reg < 4; reg++)
1782 if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1783 break;
1784
1785 if (reg >= 4)
1786 {
1787 /* CHARSET is not yet designated to any graphic registers. */
1788 /* At first check the requested designation. */
1789 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1ba9e4ab
KH
1790 if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1791 /* Since CHARSET requests no special designation, designate it
1792 to graphic register 0. */
4ed46869
KH
1793 reg = 0;
1794
1795 ENCODE_DESIGNATION (charset, reg, coding);
1796 }
1797
1798 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1799 && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1800 {
1801 /* Since the graphic register REG is not invoked to any graphic
1802 planes, invoke it to graphic plane 0. */
1803 switch (reg)
1804 {
1805 case 0: /* graphic register 0 */
1806 ENCODE_SHIFT_IN;
1807 break;
1808
1809 case 1: /* graphic register 1 */
1810 ENCODE_SHIFT_OUT;
1811 break;
1812
1813 case 2: /* graphic register 2 */
1814 if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1815 ENCODE_SINGLE_SHIFT_2;
1816 else
1817 ENCODE_LOCKING_SHIFT_2;
1818 break;
1819
1820 case 3: /* graphic register 3 */
1821 if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1822 ENCODE_SINGLE_SHIFT_3;
1823 else
1824 ENCODE_LOCKING_SHIFT_3;
1825 break;
1826 }
1827 }
b73bfc1c 1828
4ed46869
KH
1829 return dst;
1830}
1831
ec6d2bb8
KH
1832/* Produce 2-byte codes for encoded composition rule RULE. */
1833
1834#define ENCODE_COMPOSITION_RULE(rule) \
1835 do { \
1836 int gref, nref; \
1837 COMPOSITION_DECODE_RULE (rule, gref, nref); \
1838 *dst++ = 32 + 81 + gref; \
1839 *dst++ = 32 + nref; \
1840 } while (0)
1841
1842/* Produce codes for indicating the start of a composition sequence
1843 (ESC 0, ESC 3, or ESC 4). DATA points to an array of integers
1844 which specify information about the composition. See the comment
1845 in coding.h for the format of DATA. */
1846
1847#define ENCODE_COMPOSITION_START(coding, data) \
1848 do { \
1849 coding->composing = data[3]; \
1850 *dst++ = ISO_CODE_ESC; \
1851 if (coding->composing == COMPOSITION_RELATIVE) \
1852 *dst++ = '0'; \
1853 else \
1854 { \
1855 *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS \
1856 ? '3' : '4'); \
1857 coding->cmp_data_index = coding->cmp_data_start + 4; \
1858 coding->composition_rule_follows = 0; \
1859 } \
1860 } while (0)
1861
1862/* Produce codes for indicating the end of the current composition. */
1863
1864#define ENCODE_COMPOSITION_END(coding, data) \
1865 do { \
1866 *dst++ = ISO_CODE_ESC; \
1867 *dst++ = '1'; \
1868 coding->cmp_data_start += data[0]; \
1869 coding->composing = COMPOSITION_NO; \
1870 if (coding->cmp_data_start == coding->cmp_data->used \
1871 && coding->cmp_data->next) \
1872 { \
1873 coding->cmp_data = coding->cmp_data->next; \
1874 coding->cmp_data_start = 0; \
1875 } \
1876 } while (0)
1877
1878/* Produce composition start sequence ESC 0. Here, this sequence
1879 doesn't mean the start of a new composition but means that we have
1880 just produced components (alternate chars and composition rules) of
1881 the composition and the actual text follows in SRC. */
1882
1883#define ENCODE_COMPOSITION_FAKE_START(coding) \
1884 do { \
1885 *dst++ = ISO_CODE_ESC; \
1886 *dst++ = '0'; \
1887 coding->composing = COMPOSITION_RELATIVE; \
1888 } while (0)
4ed46869
KH
1889
1890/* The following three macros produce codes for indicating direction
1891 of text. */
b73bfc1c
KH
1892#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
1893 do { \
4ed46869 1894 if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
b73bfc1c
KH
1895 *dst++ = ISO_CODE_ESC, *dst++ = '['; \
1896 else \
1897 *dst++ = ISO_CODE_CSI; \
4ed46869
KH
1898 } while (0)
1899
1900#define ENCODE_DIRECTION_R2L \
b73bfc1c 1901 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
4ed46869
KH
1902
1903#define ENCODE_DIRECTION_L2R \
b73bfc1c 1904 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
4ed46869
KH
1905
1906/* Produce codes for designation and invocation to reset the graphic
1907 planes and registers to initial state. */
e0e989f6
KH
1908#define ENCODE_RESET_PLANE_AND_REGISTER \
1909 do { \
1910 int reg; \
1911 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
1912 ENCODE_SHIFT_IN; \
1913 for (reg = 0; reg < 4; reg++) \
1914 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
1915 && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
1916 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
1917 ENCODE_DESIGNATION \
1918 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
4ed46869
KH
1919 } while (0)
1920
bdd9fb48 1921/* Produce designation sequences of charsets in the line started from
b73bfc1c 1922 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
1923
1924 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
1925 find all the necessary designations. */
1926
b73bfc1c
KH
1927static unsigned char *
1928encode_designation_at_bol (coding, translation_table, src, src_end, dst)
e0e989f6 1929 struct coding_system *coding;
b73bfc1c
KH
1930 Lisp_Object translation_table;
1931 unsigned char *src, *src_end, *dst;
e0e989f6 1932{
bdd9fb48
KH
1933 int charset, c, found = 0, reg;
1934 /* Table of charsets to be designated to each graphic register. */
1935 int r[4];
bdd9fb48
KH
1936
1937 for (reg = 0; reg < 4; reg++)
1938 r[reg] = -1;
1939
b73bfc1c 1940 while (found < 4)
e0e989f6 1941 {
b73bfc1c
KH
1942 ONE_MORE_CHAR (c);
1943 if (c == '\n')
1944 break;
bdd9fb48 1945
b73bfc1c 1946 charset = CHAR_CHARSET (c);
e0e989f6 1947 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
d46c5b12 1948 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
bdd9fb48
KH
1949 {
1950 found++;
1951 r[reg] = charset;
1952 }
bdd9fb48
KH
1953 }
1954
b73bfc1c 1955 label_end_of_loop:
bdd9fb48
KH
1956 if (found)
1957 {
1958 for (reg = 0; reg < 4; reg++)
1959 if (r[reg] >= 0
1960 && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1961 ENCODE_DESIGNATION (r[reg], reg, coding);
e0e989f6 1962 }
b73bfc1c
KH
1963
1964 return dst;
e0e989f6
KH
1965}
1966
4ed46869
KH
1967/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
1968
b73bfc1c 1969static void
d46c5b12 1970encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
1971 struct coding_system *coding;
1972 unsigned char *source, *destination;
1973 int src_bytes, dst_bytes;
4ed46869
KH
1974{
1975 unsigned char *src = source;
1976 unsigned char *src_end = source + src_bytes;
1977 unsigned char *dst = destination;
1978 unsigned char *dst_end = destination + dst_bytes;
b73bfc1c 1979 /* Since the maximum bytes produced by each loop is 20, we subtract 19
4ed46869
KH
1980 from DST_END to assure overflow checking is necessary only at the
1981 head of loop. */
b73bfc1c
KH
1982 unsigned char *adjusted_dst_end = dst_end - 19;
1983 /* SRC_BASE remembers the start position in source in each loop.
1984 The loop will be exited when there's not enough source text to
1985 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
1986 there's not enough destination area to produce encoded codes
1987 (within macro EMIT_BYTES). */
1988 unsigned char *src_base;
1989 int c;
1990 Lisp_Object translation_table;
bdd9fb48 1991
b73bfc1c
KH
1992 if (NILP (Venable_character_translation))
1993 translation_table = Qnil;
1994 else
1995 {
1996 translation_table = coding->translation_table_for_encode;
1997 if (NILP (translation_table))
1998 translation_table = Vstandard_translation_table_for_encode;
1999 }
4ed46869 2000
d46c5b12 2001 coding->consumed_char = 0;
b73bfc1c
KH
2002 coding->errors = 0;
2003 while (1)
4ed46869 2004 {
b73bfc1c
KH
2005 int charset, c1, c2;
2006
2007 src_base = src;
2008
2009 if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2010 {
2011 coding->result = CODING_FINISH_INSUFFICIENT_DST;
2012 break;
2013 }
4ed46869 2014
e0e989f6
KH
2015 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2016 && CODING_SPEC_ISO_BOL (coding))
2017 {
bdd9fb48 2018 /* We have to produce designation sequences if any now. */
b73bfc1c
KH
2019 dst = encode_designation_at_bol (coding, translation_table,
2020 src, src_end, dst);
e0e989f6
KH
2021 CODING_SPEC_ISO_BOL (coding) = 0;
2022 }
2023
ec6d2bb8
KH
2024 /* Check composition start and end. */
2025 if (coding->composing != COMPOSITION_DISABLED
2026 && coding->cmp_data_start < coding->cmp_data->used)
4ed46869 2027 {
ec6d2bb8
KH
2028 struct composition_data *cmp_data = coding->cmp_data;
2029 int *data = cmp_data->data + coding->cmp_data_start;
2030 int this_pos = cmp_data->char_offset + coding->consumed_char;
2031
2032 if (coding->composing == COMPOSITION_RELATIVE)
4ed46869 2033 {
ec6d2bb8
KH
2034 if (this_pos == data[2])
2035 {
2036 ENCODE_COMPOSITION_END (coding, data);
2037 cmp_data = coding->cmp_data;
2038 data = cmp_data->data + coding->cmp_data_start;
2039 }
4ed46869 2040 }
ec6d2bb8 2041 else if (COMPOSING_P (coding))
4ed46869 2042 {
ec6d2bb8
KH
2043 /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */
2044 if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2045 /* We have consumed components of the composition.
2046 What follows in SRC is the compositions's base
2047 text. */
2048 ENCODE_COMPOSITION_FAKE_START (coding);
2049 else
4ed46869 2050 {
ec6d2bb8
KH
2051 int c = cmp_data->data[coding->cmp_data_index++];
2052 if (coding->composition_rule_follows)
2053 {
2054 ENCODE_COMPOSITION_RULE (c);
2055 coding->composition_rule_follows = 0;
2056 }
2057 else
2058 {
2059 SPLIT_CHAR (c, charset, c1, c2);
2060 ENCODE_ISO_CHARACTER (charset, c1, c2);
ec6d2bb8
KH
2061 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2062 coding->composition_rule_follows = 1;
2063 }
4ed46869
KH
2064 continue;
2065 }
ec6d2bb8
KH
2066 }
2067 if (!COMPOSING_P (coding))
2068 {
2069 if (this_pos == data[1])
4ed46869 2070 {
ec6d2bb8
KH
2071 ENCODE_COMPOSITION_START (coding, data);
2072 continue;
4ed46869 2073 }
4ed46869
KH
2074 }
2075 }
ec6d2bb8 2076
b73bfc1c 2077 ONE_MORE_CHAR (c);
4ed46869 2078
b73bfc1c
KH
2079 /* Now encode the character C. */
2080 if (c < 0x20 || c == 0x7F)
2081 {
2082 if (c == '\r')
19a8d9e0 2083 {
b73bfc1c
KH
2084 if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2085 {
2086 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2087 ENCODE_RESET_PLANE_AND_REGISTER;
2088 *dst++ = c;
2089 continue;
2090 }
2091 /* fall down to treat '\r' as '\n' ... */
2092 c = '\n';
19a8d9e0 2093 }
b73bfc1c 2094 if (c == '\n')
19a8d9e0 2095 {
b73bfc1c
KH
2096 if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2097 ENCODE_RESET_PLANE_AND_REGISTER;
2098 if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2099 bcopy (coding->spec.iso2022.initial_designation,
2100 coding->spec.iso2022.current_designation,
2101 sizeof coding->spec.iso2022.initial_designation);
2102 if (coding->eol_type == CODING_EOL_LF
2103 || coding->eol_type == CODING_EOL_UNDECIDED)
2104 *dst++ = ISO_CODE_LF;
2105 else if (coding->eol_type == CODING_EOL_CRLF)
2106 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2107 else
2108 *dst++ = ISO_CODE_CR;
2109 CODING_SPEC_ISO_BOL (coding) = 1;
19a8d9e0 2110 }
b73bfc1c 2111 else
19a8d9e0 2112 {
b73bfc1c
KH
2113 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2114 ENCODE_RESET_PLANE_AND_REGISTER;
2115 *dst++ = c;
19a8d9e0 2116 }
4ed46869 2117 }
b73bfc1c
KH
2118 else if (ASCII_BYTE_P (c))
2119 ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1);
2120 else if (SINGLE_BYTE_CHAR_P (c))
88993dfd 2121 {
b73bfc1c
KH
2122 *dst++ = c;
2123 coding->errors++;
88993dfd 2124 }
b73bfc1c
KH
2125 else
2126 {
2127 SPLIT_CHAR (c, charset, c1, c2);
2128 ENCODE_ISO_CHARACTER (charset, c1, c2);
2129 }
2130
2131 coding->consumed_char++;
84fbb8a0 2132 }
b73bfc1c
KH
2133
2134 label_end_of_loop:
2135 coding->consumed = src_base - source;
d46c5b12 2136 coding->produced = coding->produced_char = dst - destination;
4ed46869
KH
2137}
2138
2139\f
2140/*** 4. SJIS and BIG5 handlers ***/
2141
f4dee582 2142/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
2143 quite widely. So, for the moment, Emacs supports them in the bare
2144 C code. But, in the future, they may be supported only by CCL. */
2145
2146/* SJIS is a coding system encoding three character sets: ASCII, right
2147 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
2148 as is. A character of charset katakana-jisx0201 is encoded by
2149 "position-code + 0x80". A character of charset japanese-jisx0208
2150 is encoded in 2-byte but two position-codes are divided and shifted
2151 so that it fit in the range below.
2152
2153 --- CODE RANGE of SJIS ---
2154 (character set) (range)
2155 ASCII 0x00 .. 0x7F
2156 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 2157 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 2158 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
2159 -------------------------------
2160
2161*/
2162
2163/* BIG5 is a coding system encoding two character sets: ASCII and
2164 Big5. An ASCII character is encoded as is. Big5 is a two-byte
2165 character set and is encoded in two-byte.
2166
2167 --- CODE RANGE of BIG5 ---
2168 (character set) (range)
2169 ASCII 0x00 .. 0x7F
2170 Big5 (1st byte) 0xA1 .. 0xFE
2171 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
2172 --------------------------
2173
2174 Since the number of characters in Big5 is larger than maximum
2175 characters in Emacs' charset (96x96), it can't be handled as one
2176 charset. So, in Emacs, Big5 is divided into two: `charset-big5-1'
2177 and `charset-big5-2'. Both are DIMENSION2 and CHARS94. The former
2178 contains frequently used characters and the latter contains less
2179 frequently used characters. */
2180
2181/* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
2182 are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2183 C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2184 format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
2185
2186/* Number of Big5 characters which have the same code in 1st byte. */
2187#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2188
2189#define DECODE_BIG5(b1, b2, charset, c1, c2) \
2190 do { \
2191 unsigned int temp \
2192 = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62); \
2193 if (b1 < 0xC9) \
2194 charset = charset_big5_1; \
2195 else \
2196 { \
2197 charset = charset_big5_2; \
2198 temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW; \
2199 } \
2200 c1 = temp / (0xFF - 0xA1) + 0x21; \
2201 c2 = temp % (0xFF - 0xA1) + 0x21; \
2202 } while (0)
2203
2204#define ENCODE_BIG5(charset, c1, c2, b1, b2) \
2205 do { \
2206 unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21); \
2207 if (charset == charset_big5_2) \
2208 temp += BIG5_SAME_ROW * (0xC9 - 0xA1); \
2209 b1 = temp / BIG5_SAME_ROW + 0xA1; \
2210 b2 = temp % BIG5_SAME_ROW; \
2211 b2 += b2 < 0x3F ? 0x40 : 0x62; \
2212 } while (0)
2213
2214/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2215 Check if a text is encoded in SJIS. If it is, return
2216 CODING_CATEGORY_MASK_SJIS, else return 0. */
2217
2218int
2219detect_coding_sjis (src, src_end)
2220 unsigned char *src, *src_end;
2221{
b73bfc1c
KH
2222 int c;
2223 /* Dummy for ONE_MORE_BYTE. */
2224 struct coding_system dummy_coding;
2225 struct coding_system *coding = &dummy_coding;
4ed46869 2226
b73bfc1c 2227 while (1)
4ed46869 2228 {
b73bfc1c 2229 ONE_MORE_BYTE (c);
4ed46869
KH
2230 if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2231 {
b73bfc1c
KH
2232 ONE_MORE_BYTE (c);
2233 if (c < 0x40)
4ed46869
KH
2234 return 0;
2235 }
2236 }
b73bfc1c 2237 label_end_of_loop:
4ed46869
KH
2238 return CODING_CATEGORY_MASK_SJIS;
2239}
2240
2241/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2242 Check if a text is encoded in BIG5. If it is, return
2243 CODING_CATEGORY_MASK_BIG5, else return 0. */
2244
2245int
2246detect_coding_big5 (src, src_end)
2247 unsigned char *src, *src_end;
2248{
b73bfc1c
KH
2249 int c;
2250 /* Dummy for ONE_MORE_BYTE. */
2251 struct coding_system dummy_coding;
2252 struct coding_system *coding = &dummy_coding;
4ed46869 2253
b73bfc1c 2254 while (1)
4ed46869 2255 {
b73bfc1c 2256 ONE_MORE_BYTE (c);
4ed46869
KH
2257 if (c >= 0xA1)
2258 {
b73bfc1c 2259 ONE_MORE_BYTE (c);
4ed46869
KH
2260 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2261 return 0;
2262 }
2263 }
b73bfc1c 2264 label_end_of_loop:
4ed46869
KH
2265 return CODING_CATEGORY_MASK_BIG5;
2266}
2267
fa42c37f
KH
2268/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2269 Check if a text is encoded in UTF-8. If it is, return
2270 CODING_CATEGORY_MASK_UTF_8, else return 0. */
2271
2272#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
2273#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
2274#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2275#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2276#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2277#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2278#define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2279
2280int
2281detect_coding_utf_8 (src, src_end)
2282 unsigned char *src, *src_end;
2283{
2284 unsigned char c;
2285 int seq_maybe_bytes;
b73bfc1c
KH
2286 /* Dummy for ONE_MORE_BYTE. */
2287 struct coding_system dummy_coding;
2288 struct coding_system *coding = &dummy_coding;
fa42c37f 2289
b73bfc1c 2290 while (1)
fa42c37f 2291 {
b73bfc1c 2292 ONE_MORE_BYTE (c);
fa42c37f
KH
2293 if (UTF_8_1_OCTET_P (c))
2294 continue;
2295 else if (UTF_8_2_OCTET_LEADING_P (c))
2296 seq_maybe_bytes = 1;
2297 else if (UTF_8_3_OCTET_LEADING_P (c))
2298 seq_maybe_bytes = 2;
2299 else if (UTF_8_4_OCTET_LEADING_P (c))
2300 seq_maybe_bytes = 3;
2301 else if (UTF_8_5_OCTET_LEADING_P (c))
2302 seq_maybe_bytes = 4;
2303 else if (UTF_8_6_OCTET_LEADING_P (c))
2304 seq_maybe_bytes = 5;
2305 else
2306 return 0;
2307
2308 do
2309 {
b73bfc1c 2310 ONE_MORE_BYTE (c);
fa42c37f
KH
2311 if (!UTF_8_EXTRA_OCTET_P (c))
2312 return 0;
2313 seq_maybe_bytes--;
2314 }
2315 while (seq_maybe_bytes > 0);
2316 }
2317
b73bfc1c 2318 label_end_of_loop:
fa42c37f
KH
2319 return CODING_CATEGORY_MASK_UTF_8;
2320}
2321
2322/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2323 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2324 Little Endian (otherwise). If it is, return
2325 CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2326 else return 0. */
2327
2328#define UTF_16_INVALID_P(val) \
2329 (((val) == 0xFFFE) \
2330 || ((val) == 0xFFFF))
2331
2332#define UTF_16_HIGH_SURROGATE_P(val) \
2333 (((val) & 0xD800) == 0xD800)
2334
2335#define UTF_16_LOW_SURROGATE_P(val) \
2336 (((val) & 0xDC00) == 0xDC00)
2337
2338int
2339detect_coding_utf_16 (src, src_end)
2340 unsigned char *src, *src_end;
2341{
b73bfc1c
KH
2342 unsigned char c1, c2;
2343 /* Dummy for TWO_MORE_BYTES. */
2344 struct coding_system dummy_coding;
2345 struct coding_system *coding = &dummy_coding;
fa42c37f 2346
b73bfc1c
KH
2347 TWO_MORE_BYTES (c1, c2);
2348
2349 if ((c1 == 0xFF) && (c2 == 0xFE))
fa42c37f 2350 return CODING_CATEGORY_MASK_UTF_16_LE;
b73bfc1c 2351 else if ((c1 == 0xFE) && (c2 == 0xFF))
fa42c37f
KH
2352 return CODING_CATEGORY_MASK_UTF_16_BE;
2353
b73bfc1c 2354 label_end_of_loop:
fa42c37f
KH
2355 return 0;
2356}
2357
4ed46869
KH
2358/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2359 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
2360
b73bfc1c 2361static void
4ed46869 2362decode_coding_sjis_big5 (coding, source, destination,
d46c5b12 2363 src_bytes, dst_bytes, sjis_p)
4ed46869
KH
2364 struct coding_system *coding;
2365 unsigned char *source, *destination;
2366 int src_bytes, dst_bytes;
4ed46869
KH
2367 int sjis_p;
2368{
2369 unsigned char *src = source;
2370 unsigned char *src_end = source + src_bytes;
2371 unsigned char *dst = destination;
2372 unsigned char *dst_end = destination + dst_bytes;
b73bfc1c
KH
2373 /* SRC_BASE remembers the start position in source in each loop.
2374 The loop will be exited when there's not enough source code
2375 (within macro ONE_MORE_BYTE), or when there's not enough
2376 destination area to produce a character (within macro
2377 EMIT_CHAR). */
2378 unsigned char *src_base;
2379 Lisp_Object translation_table;
a5d301df 2380
b73bfc1c
KH
2381 if (NILP (Venable_character_translation))
2382 translation_table = Qnil;
2383 else
2384 {
2385 translation_table = coding->translation_table_for_decode;
2386 if (NILP (translation_table))
2387 translation_table = Vstandard_translation_table_for_decode;
2388 }
4ed46869 2389
d46c5b12 2390 coding->produced_char = 0;
b73bfc1c 2391 while (1)
4ed46869 2392 {
b73bfc1c
KH
2393 int c, charset, c1, c2;
2394
2395 src_base = src;
2396 ONE_MORE_BYTE (c1);
2397
2398 if (c1 < 0x80)
4ed46869 2399 {
b73bfc1c
KH
2400 charset = CHARSET_ASCII;
2401 if (c1 < 0x20)
4ed46869 2402 {
b73bfc1c 2403 if (c1 == '\r')
d46c5b12 2404 {
b73bfc1c 2405 if (coding->eol_type == CODING_EOL_CRLF)
d46c5b12 2406 {
b73bfc1c
KH
2407 ONE_MORE_BYTE (c2);
2408 if (c2 == '\n')
2409 c1 = c2;
2410 else if (coding->mode
2411 & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2412 {
2413 coding->result = CODING_FINISH_INCONSISTENT_EOL;
2414 goto label_end_of_loop;
2415 }
2416 else
2417 /* To process C2 again, SRC is subtracted by 1. */
2418 src--;
d46c5b12 2419 }
b73bfc1c
KH
2420 else if (coding->eol_type == CODING_EOL_CR)
2421 c1 = '\n';
2422 }
2423 else if (c1 == '\n'
2424 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2425 && (coding->eol_type == CODING_EOL_CR
2426 || coding->eol_type == CODING_EOL_CRLF))
2427 {
2428 coding->result = CODING_FINISH_INCONSISTENT_EOL;
2429 goto label_end_of_loop;
d46c5b12 2430 }
4ed46869 2431 }
4ed46869 2432 }
54f78171 2433 else
b73bfc1c 2434 {
4ed46869
KH
2435 if (sjis_p)
2436 {
b73bfc1c
KH
2437 if (c1 >= 0xF0)
2438 goto label_invalid_code;
2439 if (c1 < 0xA0 || c1 >= 0xE0)
fb88bf2d 2440 {
54f78171
KH
2441 /* SJIS -> JISX0208 */
2442 ONE_MORE_BYTE (c2);
b73bfc1c
KH
2443 if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2444 goto label_invalid_code;
2445 DECODE_SJIS (c1, c2, c1, c2);
2446 charset = charset_jisx0208;
5e34de15 2447 }
fb88bf2d 2448 else
b73bfc1c
KH
2449 /* SJIS -> JISX0201-Kana */
2450 charset = charset_katakana_jisx0201;
4ed46869 2451 }
fb88bf2d 2452 else
fb88bf2d 2453 {
54f78171 2454 /* BIG5 -> Big5 */
b73bfc1c
KH
2455 if (c1 < 0xA1 || c1 > 0xFE)
2456 goto label_invalid_code;
2457 ONE_MORE_BYTE (c2);
2458 if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2459 goto label_invalid_code;
2460 DECODE_BIG5 (c1, c2, charset, c1, c2);
4ed46869
KH
2461 }
2462 }
4ed46869 2463
b73bfc1c
KH
2464 c = DECODE_ISO_CHARACTER (charset, c1, c2);
2465 EMIT_CHAR (c);
fb88bf2d
KH
2466 continue;
2467
b73bfc1c
KH
2468 label_invalid_code:
2469 coding->errors++;
4ed46869 2470 src = src_base;
b73bfc1c
KH
2471 c = *src++;
2472 EMIT_CHAR (c);
fb88bf2d 2473 }
d46c5b12 2474
b73bfc1c
KH
2475 label_end_of_loop:
2476 coding->consumed = coding->consumed_char = src_base - source;
d46c5b12 2477 coding->produced = dst - destination;
b73bfc1c 2478 return;
4ed46869
KH
2479}
2480
2481/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
2482 This function can encode charsets `ascii', `katakana-jisx0201',
2483 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
2484 are sure that all these charsets are registered as official charset
4ed46869
KH
2485 (i.e. do not have extended leading-codes). Characters of other
2486 charsets are produced without any encoding. If SJIS_P is 1, encode
2487 SJIS text, else encode BIG5 text. */
2488
b73bfc1c 2489static void
4ed46869 2490encode_coding_sjis_big5 (coding, source, destination,
d46c5b12 2491 src_bytes, dst_bytes, sjis_p)
4ed46869
KH
2492 struct coding_system *coding;
2493 unsigned char *source, *destination;
2494 int src_bytes, dst_bytes;
4ed46869
KH
2495 int sjis_p;
2496{
2497 unsigned char *src = source;
2498 unsigned char *src_end = source + src_bytes;
2499 unsigned char *dst = destination;
2500 unsigned char *dst_end = destination + dst_bytes;
b73bfc1c
KH
2501 /* SRC_BASE remembers the start position in source in each loop.
2502 The loop will be exited when there's not enough source text to
2503 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2504 there's not enough destination area to produce encoded codes
2505 (within macro EMIT_BYTES). */
2506 unsigned char *src_base;
2507 Lisp_Object translation_table;
4ed46869 2508
b73bfc1c
KH
2509 if (NILP (Venable_character_translation))
2510 translation_table = Qnil;
2511 else
4ed46869 2512 {
b73bfc1c
KH
2513 translation_table = coding->translation_table_for_decode;
2514 if (NILP (translation_table))
2515 translation_table = Vstandard_translation_table_for_decode;
2516 }
a5d301df 2517
b73bfc1c
KH
2518 while (1)
2519 {
2520 int c, charset, c1, c2;
4ed46869 2521
b73bfc1c
KH
2522 src_base = src;
2523 ONE_MORE_CHAR (c);
2524
2525 /* Now encode the character C. */
2526 if (SINGLE_BYTE_CHAR_P (c))
2527 {
2528 switch (c)
4ed46869 2529 {
b73bfc1c
KH
2530 case '\r':
2531 if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2532 {
2533 EMIT_ONE_BYTE (c);
2534 break;
2535 }
2536 c = '\n';
2537 case '\n':
2538 if (coding->eol_type == CODING_EOL_CRLF)
2539 {
2540 EMIT_TWO_BYTES ('\r', c);
2541 break;
2542 }
2543 else if (coding->eol_type == CODING_EOL_CR)
2544 c = '\r';
2545 default:
2546 EMIT_ONE_BYTE (c);
2547 }
2548 }
2549 else
2550 {
2551 SPLIT_CHAR (c, charset, c1, c2);
2552 if (sjis_p)
2553 {
2554 if (charset == charset_jisx0208
2555 || charset == charset_jisx0208_1978)
2556 {
2557 ENCODE_SJIS (c1, c2, c1, c2);
2558 EMIT_TWO_BYTES (c1, c2);
2559 }
2560 else if (charset == charset_latin_jisx0201)
2561 EMIT_ONE_BYTE (c1);
2562 else
2563 /* There's no way other than producing the internal
2564 codes as is. */
2565 EMIT_BYTES (src_base, src);
4ed46869 2566 }
4ed46869 2567 else
b73bfc1c
KH
2568 {
2569 if (charset == charset_big5_1 || charset == charset_big5_2)
2570 {
2571 ENCODE_BIG5 (charset, c1, c2, c1, c2);
2572 EMIT_TWO_BYTES (c1, c2);
2573 }
2574 else
2575 /* There's no way other than producing the internal
2576 codes as is. */
2577 EMIT_BYTES (src_base, src);
2578 }
4ed46869 2579 }
b73bfc1c 2580 coding->consumed_char++;
4ed46869
KH
2581 }
2582
b73bfc1c
KH
2583 label_end_of_loop:
2584 coding->consumed = src_base - source;
d46c5b12 2585 coding->produced = coding->produced_char = dst - destination;
4ed46869
KH
2586}
2587
2588\f
1397dc18
KH
2589/*** 5. CCL handlers ***/
2590
2591/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2592 Check if a text is encoded in a coding system of which
2593 encoder/decoder are written in CCL program. If it is, return
2594 CODING_CATEGORY_MASK_CCL, else return 0. */
2595
2596int
2597detect_coding_ccl (src, src_end)
2598 unsigned char *src, *src_end;
2599{
2600 unsigned char *valid;
b73bfc1c
KH
2601 int c;
2602 /* Dummy for ONE_MORE_BYTE. */
2603 struct coding_system dummy_coding;
2604 struct coding_system *coding = &dummy_coding;
1397dc18
KH
2605
2606 /* No coding system is assigned to coding-category-ccl. */
2607 if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2608 return 0;
2609
2610 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
b73bfc1c 2611 while (1)
1397dc18 2612 {
b73bfc1c
KH
2613 ONE_MORE_BYTE (c);
2614 if (! valid[c])
2615 return 0;
1397dc18 2616 }
b73bfc1c 2617 label_end_of_loop:
1397dc18
KH
2618 return CODING_CATEGORY_MASK_CCL;
2619}
2620
2621\f
2622/*** 6. End-of-line handlers ***/
4ed46869 2623
b73bfc1c 2624/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 2625
b73bfc1c 2626static void
d46c5b12 2627decode_eol (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
2628 struct coding_system *coding;
2629 unsigned char *source, *destination;
2630 int src_bytes, dst_bytes;
4ed46869
KH
2631{
2632 unsigned char *src = source;
4ed46869 2633 unsigned char *dst = destination;
b73bfc1c
KH
2634 unsigned char *src_end = src + src_bytes;
2635 unsigned char *dst_end = dst + dst_bytes;
2636 Lisp_Object translation_table;
2637 /* SRC_BASE remembers the start position in source in each loop.
2638 The loop will be exited when there's not enough source code
2639 (within macro ONE_MORE_BYTE), or when there's not enough
2640 destination area to produce a character (within macro
2641 EMIT_CHAR). */
2642 unsigned char *src_base;
2643 int c;
2644
2645 translation_table = Qnil;
4ed46869
KH
2646 switch (coding->eol_type)
2647 {
2648 case CODING_EOL_CRLF:
b73bfc1c 2649 while (1)
d46c5b12 2650 {
b73bfc1c
KH
2651 src_base = src;
2652 ONE_MORE_BYTE (c);
2653 if (c == '\r')
fb88bf2d 2654 {
b73bfc1c
KH
2655 ONE_MORE_BYTE (c);
2656 if (c != '\n')
2657 {
2658 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2659 {
2660 coding->result = CODING_FINISH_INCONSISTENT_EOL;
2661 goto label_end_of_loop;
2662 }
2663 src--;
2664 c = '\r';
2665 }
fb88bf2d 2666 }
b73bfc1c
KH
2667 else if (c == '\n'
2668 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
d46c5b12 2669 {
b73bfc1c
KH
2670 coding->result = CODING_FINISH_INCONSISTENT_EOL;
2671 goto label_end_of_loop;
d46c5b12 2672 }
b73bfc1c 2673 EMIT_CHAR (c);
d46c5b12 2674 }
b73bfc1c
KH
2675 break;
2676
2677 case CODING_EOL_CR:
2678 while (1)
d46c5b12 2679 {
b73bfc1c
KH
2680 src_base = src;
2681 ONE_MORE_BYTE (c);
2682 if (c == '\n')
2683 {
2684 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2685 {
2686 coding->result = CODING_FINISH_INCONSISTENT_EOL;
2687 goto label_end_of_loop;
2688 }
2689 }
2690 else if (c == '\r')
2691 c = '\n';
2692 EMIT_CHAR (c);
d46c5b12 2693 }
4ed46869
KH
2694 break;
2695
b73bfc1c
KH
2696 default: /* no need for EOL handling */
2697 while (1)
d46c5b12 2698 {
b73bfc1c
KH
2699 src_base = src;
2700 ONE_MORE_BYTE (c);
2701 EMIT_CHAR (c);
d46c5b12 2702 }
4ed46869
KH
2703 }
2704
b73bfc1c
KH
2705 label_end_of_loop:
2706 coding->consumed = coding->consumed_char = src_base - source;
2707 coding->produced = dst - destination;
2708 return;
4ed46869
KH
2709}
2710
2711/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
b73bfc1c
KH
2712 format of end-of-line according to `coding->eol_type'. It also
2713 convert multibyte form 8-bit characers to unibyte if
2714 CODING->src_multibyte is nonzero. If `coding->mode &
2715 CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
2716 also means end-of-line. */
4ed46869 2717
b73bfc1c 2718static void
d46c5b12 2719encode_eol (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
2720 struct coding_system *coding;
2721 unsigned char *source, *destination;
2722 int src_bytes, dst_bytes;
4ed46869
KH
2723{
2724 unsigned char *src = source;
2725 unsigned char *dst = destination;
b73bfc1c
KH
2726 unsigned char *src_end = src + src_bytes;
2727 unsigned char *dst_end = dst + dst_bytes;
2728 Lisp_Object translation_table;
2729 /* SRC_BASE remembers the start position in source in each loop.
2730 The loop will be exited when there's not enough source text to
2731 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2732 there's not enough destination area to produce encoded codes
2733 (within macro EMIT_BYTES). */
2734 unsigned char *src_base;
2735 int c;
2736 int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
2737
2738 translation_table = Qnil;
2739 if (coding->src_multibyte
2740 && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
2741 {
2742 src_end--;
2743 src_bytes--;
2744 coding->result = CODING_FINISH_INSUFFICIENT_SRC;
2745 }
fb88bf2d 2746
d46c5b12
KH
2747 if (coding->eol_type == CODING_EOL_CRLF)
2748 {
b73bfc1c 2749 while (src < src_end)
d46c5b12 2750 {
b73bfc1c 2751 src_base = src;
d46c5b12 2752 c = *src++;
b73bfc1c
KH
2753 if (c >= 0x20)
2754 EMIT_ONE_BYTE (c);
2755 else if (c == '\n' || (c == '\r' && selective_display))
2756 EMIT_TWO_BYTES ('\r', '\n');
d46c5b12 2757 else
b73bfc1c 2758 EMIT_ONE_BYTE (c);
d46c5b12 2759 }
ff2b1ea9 2760 src_base = src;
b73bfc1c 2761 label_end_of_loop:
005f0d35 2762 ;
d46c5b12
KH
2763 }
2764 else
4ed46869 2765 {
b73bfc1c 2766 if (src_bytes <= dst_bytes)
4ed46869 2767 {
b73bfc1c
KH
2768 safe_bcopy (src, dst, src_bytes);
2769 src_base = src_end;
2770 dst += src_bytes;
d46c5b12 2771 }
d46c5b12 2772 else
b73bfc1c
KH
2773 {
2774 if (coding->src_multibyte
2775 && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
2776 dst_bytes--;
2777 safe_bcopy (src, dst, dst_bytes);
2778 src_base = src + dst_bytes;
2779 dst = destination + dst_bytes;
2780 coding->result = CODING_FINISH_INSUFFICIENT_DST;
2781 }
993824c9 2782 if (coding->eol_type == CODING_EOL_CR)
d46c5b12 2783 {
b73bfc1c
KH
2784 for (src = destination; src < dst; src++)
2785 if (*src == '\n') *src = '\r';
d46c5b12 2786 }
b73bfc1c 2787 else if (selective_display)
d46c5b12 2788 {
b73bfc1c
KH
2789 for (src = destination; src < dst; src++)
2790 if (*src == '\r') *src = '\n';
4ed46869 2791 }
4ed46869 2792 }
b73bfc1c
KH
2793 if (coding->src_multibyte)
2794 dst = destination + str_as_unibyte (destination, dst - destination);
4ed46869 2795
b73bfc1c
KH
2796 coding->consumed = src_base - source;
2797 coding->produced = dst - destination;
4ed46869
KH
2798}
2799
2800\f
1397dc18 2801/*** 7. C library functions ***/
4ed46869
KH
2802
2803/* In Emacs Lisp, coding system is represented by a Lisp symbol which
2804 has a property `coding-system'. The value of this property is a
2805 vector of length 5 (called as coding-vector). Among elements of
2806 this vector, the first (element[0]) and the fifth (element[4])
2807 carry important information for decoding/encoding. Before
2808 decoding/encoding, this information should be set in fields of a
2809 structure of type `coding_system'.
2810
2811 A value of property `coding-system' can be a symbol of another
2812 subsidiary coding-system. In that case, Emacs gets coding-vector
2813 from that symbol.
2814
2815 `element[0]' contains information to be set in `coding->type'. The
2816 value and its meaning is as follows:
2817
0ef69138
KH
2818 0 -- coding_type_emacs_mule
2819 1 -- coding_type_sjis
2820 2 -- coding_type_iso2022
2821 3 -- coding_type_big5
2822 4 -- coding_type_ccl encoder/decoder written in CCL
2823 nil -- coding_type_no_conversion
2824 t -- coding_type_undecided (automatic conversion on decoding,
2825 no-conversion on encoding)
4ed46869
KH
2826
2827 `element[4]' contains information to be set in `coding->flags' and
2828 `coding->spec'. The meaning varies by `coding->type'.
2829
2830 If `coding->type' is `coding_type_iso2022', element[4] is a vector
2831 of length 32 (of which the first 13 sub-elements are used now).
2832 Meanings of these sub-elements are:
2833
2834 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2835 If the value is an integer of valid charset, the charset is
2836 assumed to be designated to graphic register N initially.
2837
2838 If the value is minus, it is a minus value of charset which
2839 reserves graphic register N, which means that the charset is
2840 not designated initially but should be designated to graphic
2841 register N just before encoding a character in that charset.
2842
2843 If the value is nil, graphic register N is never used on
2844 encoding.
2845
2846 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2847 Each value takes t or nil. See the section ISO2022 of
2848 `coding.h' for more information.
2849
2850 If `coding->type' is `coding_type_big5', element[4] is t to denote
2851 BIG5-ETen or nil to denote BIG5-HKU.
2852
2853 If `coding->type' takes the other value, element[4] is ignored.
2854
2855 Emacs Lisp's coding system also carries information about format of
2856 end-of-line in a value of property `eol-type'. If the value is
2857 integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2858 means CODING_EOL_CR. If it is not integer, it should be a vector
2859 of subsidiary coding systems of which property `eol-type' has one
2860 of above values.
2861
2862*/
2863
2864/* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2865 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING
2866 is setup so that no conversion is necessary and return -1, else
2867 return 0. */
2868
2869int
e0e989f6
KH
2870setup_coding_system (coding_system, coding)
2871 Lisp_Object coding_system;
4ed46869
KH
2872 struct coding_system *coding;
2873{
d46c5b12 2874 Lisp_Object coding_spec, coding_type, eol_type, plist;
4608c386 2875 Lisp_Object val;
70c22245 2876 int i;
4ed46869 2877
d46c5b12 2878 /* Initialize some fields required for all kinds of coding systems. */
774324d6 2879 coding->symbol = coding_system;
d46c5b12
KH
2880 coding->common_flags = 0;
2881 coding->mode = 0;
2882 coding->heading_ascii = -1;
2883 coding->post_read_conversion = coding->pre_write_conversion = Qnil;
ec6d2bb8
KH
2884 coding->composing = COMPOSITION_DISABLED;
2885 coding->cmp_data = NULL;
1f5dbf34
KH
2886
2887 if (NILP (coding_system))
2888 goto label_invalid_coding_system;
2889
4608c386 2890 coding_spec = Fget (coding_system, Qcoding_system);
1f5dbf34 2891
4608c386
KH
2892 if (!VECTORP (coding_spec)
2893 || XVECTOR (coding_spec)->size != 5
2894 || !CONSP (XVECTOR (coding_spec)->contents[3]))
4ed46869 2895 goto label_invalid_coding_system;
4608c386 2896
d46c5b12
KH
2897 eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2898 if (VECTORP (eol_type))
2899 {
2900 coding->eol_type = CODING_EOL_UNDECIDED;
2901 coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2902 }
2903 else if (XFASTINT (eol_type) == 1)
2904 {
2905 coding->eol_type = CODING_EOL_CRLF;
2906 coding->common_flags
2907 = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2908 }
2909 else if (XFASTINT (eol_type) == 2)
2910 {
2911 coding->eol_type = CODING_EOL_CR;
2912 coding->common_flags
2913 = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2914 }
2915 else
2916 coding->eol_type = CODING_EOL_LF;
2917
2918 coding_type = XVECTOR (coding_spec)->contents[0];
2919 /* Try short cut. */
2920 if (SYMBOLP (coding_type))
2921 {
2922 if (EQ (coding_type, Qt))
2923 {
2924 coding->type = coding_type_undecided;
2925 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2926 }
2927 else
2928 coding->type = coding_type_no_conversion;
2929 return 0;
2930 }
2931
d46c5b12
KH
2932 /* Get values of coding system properties:
2933 `post-read-conversion', `pre-write-conversion',
f967223b 2934 `translation-table-for-decode', `translation-table-for-encode'. */
4608c386 2935 plist = XVECTOR (coding_spec)->contents[3];
b843d1ae
KH
2936 /* Pre & post conversion functions should be disabled if
2937 inhibit_eol_conversion is nozero. This is the case that a code
2938 conversion function is called while those functions are running. */
2939 if (! inhibit_pre_post_conversion)
2940 {
2941 coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2942 coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2943 }
f967223b 2944 val = Fplist_get (plist, Qtranslation_table_for_decode);
4608c386 2945 if (SYMBOLP (val))
f967223b
KH
2946 val = Fget (val, Qtranslation_table_for_decode);
2947 coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2948 val = Fplist_get (plist, Qtranslation_table_for_encode);
4608c386 2949 if (SYMBOLP (val))
f967223b
KH
2950 val = Fget (val, Qtranslation_table_for_encode);
2951 coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
d46c5b12
KH
2952 val = Fplist_get (plist, Qcoding_category);
2953 if (!NILP (val))
2954 {
2955 val = Fget (val, Qcoding_category_index);
2956 if (INTEGERP (val))
2957 coding->category_idx = XINT (val);
2958 else
2959 goto label_invalid_coding_system;
2960 }
2961 else
2962 goto label_invalid_coding_system;
4608c386 2963
70c22245
KH
2964 val = Fplist_get (plist, Qsafe_charsets);
2965 if (EQ (val, Qt))
2966 {
2967 for (i = 0; i <= MAX_CHARSET; i++)
2968 coding->safe_charsets[i] = 1;
2969 }
2970 else
2971 {
2972 bzero (coding->safe_charsets, MAX_CHARSET + 1);
2973 while (CONSP (val))
2974 {
03699b14 2975 if ((i = get_charset_id (XCAR (val))) >= 0)
70c22245 2976 coding->safe_charsets[i] = 1;
03699b14 2977 val = XCDR (val);
70c22245
KH
2978 }
2979 }
2980
ec6d2bb8
KH
2981 /* If the coding system has non-nil `composition' property, enable
2982 composition handling. */
2983 val = Fplist_get (plist, Qcomposition);
2984 if (!NILP (val))
2985 coding->composing = COMPOSITION_NO;
2986
d46c5b12 2987 switch (XFASTINT (coding_type))
4ed46869
KH
2988 {
2989 case 0:
0ef69138 2990 coding->type = coding_type_emacs_mule;
c952af22
KH
2991 if (!NILP (coding->post_read_conversion))
2992 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2993 if (!NILP (coding->pre_write_conversion))
2994 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4ed46869
KH
2995 break;
2996
2997 case 1:
2998 coding->type = coding_type_sjis;
c952af22
KH
2999 coding->common_flags
3000 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869
KH
3001 break;
3002
3003 case 2:
3004 coding->type = coding_type_iso2022;
c952af22
KH
3005 coding->common_flags
3006 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869 3007 {
70c22245 3008 Lisp_Object val, temp;
4ed46869 3009 Lisp_Object *flags;
d46c5b12 3010 int i, charset, reg_bits = 0;
4ed46869 3011
4608c386 3012 val = XVECTOR (coding_spec)->contents[4];
f44d27ce 3013
4ed46869
KH
3014 if (!VECTORP (val) || XVECTOR (val)->size != 32)
3015 goto label_invalid_coding_system;
3016
3017 flags = XVECTOR (val)->contents;
3018 coding->flags
3019 = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3020 | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3021 | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3022 | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3023 | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3024 | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3025 | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3026 | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
e0e989f6
KH
3027 | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3028 | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
c4825358
KH
3029 | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3030 | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3f003981 3031 | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
c4825358 3032 );
4ed46869
KH
3033
3034 /* Invoke graphic register 0 to plane 0. */
3035 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3036 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */
3037 CODING_SPEC_ISO_INVOCATION (coding, 1)
3038 = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3039 /* Not single shifting at first. */
6e85d753 3040 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
e0e989f6 3041 /* Beginning of buffer should also be regarded as bol. */
6e85d753 3042 CODING_SPEC_ISO_BOL (coding) = 1;
4ed46869 3043
70c22245
KH
3044 for (charset = 0; charset <= MAX_CHARSET; charset++)
3045 CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3046 val = Vcharset_revision_alist;
3047 while (CONSP (val))
3048 {
03699b14 3049 charset = get_charset_id (Fcar_safe (XCAR (val)));
70c22245 3050 if (charset >= 0
03699b14 3051 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
70c22245
KH
3052 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3053 CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
03699b14 3054 val = XCDR (val);
70c22245
KH
3055 }
3056
4ed46869
KH
3057 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3058 FLAGS[REG] can be one of below:
3059 integer CHARSET: CHARSET occupies register I,
3060 t: designate nothing to REG initially, but can be used
3061 by any charsets,
3062 list of integer, nil, or t: designate the first
3063 element (if integer) to REG initially, the remaining
3064 elements (if integer) is designated to REG on request,
d46c5b12 3065 if an element is t, REG can be used by any charsets,
4ed46869 3066 nil: REG is never used. */
467e7675 3067 for (charset = 0; charset <= MAX_CHARSET; charset++)
1ba9e4ab
KH
3068 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3069 = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
4ed46869
KH
3070 for (i = 0; i < 4; i++)
3071 {
3072 if (INTEGERP (flags[i])
e0e989f6
KH
3073 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3074 || (charset = get_charset_id (flags[i])) >= 0)
4ed46869
KH
3075 {
3076 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3077 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3078 }
3079 else if (EQ (flags[i], Qt))
3080 {
3081 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
d46c5b12
KH
3082 reg_bits |= 1 << i;
3083 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
4ed46869
KH
3084 }
3085 else if (CONSP (flags[i]))
3086 {
84d60297
RS
3087 Lisp_Object tail;
3088 tail = flags[i];
4ed46869 3089
d46c5b12 3090 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
03699b14
KR
3091 if (INTEGERP (XCAR (tail))
3092 && (charset = XINT (XCAR (tail)),
e0e989f6 3093 CHARSET_VALID_P (charset))
03699b14 3094 || (charset = get_charset_id (XCAR (tail))) >= 0)
4ed46869
KH
3095 {
3096 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3097 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3098 }
3099 else
3100 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
03699b14 3101 tail = XCDR (tail);
4ed46869
KH
3102 while (CONSP (tail))
3103 {
03699b14
KR
3104 if (INTEGERP (XCAR (tail))
3105 && (charset = XINT (XCAR (tail)),
e0e989f6 3106 CHARSET_VALID_P (charset))
03699b14 3107 || (charset = get_charset_id (XCAR (tail))) >= 0)
70c22245
KH
3108 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3109 = i;
03699b14 3110 else if (EQ (XCAR (tail), Qt))
d46c5b12 3111 reg_bits |= 1 << i;
03699b14 3112 tail = XCDR (tail);
4ed46869
KH
3113 }
3114 }
3115 else
3116 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3117
3118 CODING_SPEC_ISO_DESIGNATION (coding, i)
3119 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3120 }
3121
d46c5b12 3122 if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
4ed46869
KH
3123 {
3124 /* REG 1 can be used only by locking shift in 7-bit env. */
3125 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
d46c5b12 3126 reg_bits &= ~2;
4ed46869
KH
3127 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3128 /* Without any shifting, only REG 0 and 1 can be used. */
d46c5b12 3129 reg_bits &= 3;
4ed46869
KH
3130 }
3131
d46c5b12
KH
3132 if (reg_bits)
3133 for (charset = 0; charset <= MAX_CHARSET; charset++)
6e85d753 3134 {
d46c5b12
KH
3135 if (CHARSET_VALID_P (charset))
3136 {
3137 /* There exist some default graphic registers to be
3138 used CHARSET. */
3139
3140 /* We had better avoid designating a charset of
3141 CHARS96 to REG 0 as far as possible. */
3142 if (CHARSET_CHARS (charset) == 96)
3143 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3144 = (reg_bits & 2
3145 ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3146 else
3147 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3148 = (reg_bits & 1
3149 ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3150 }
6e85d753 3151 }
4ed46869 3152 }
c952af22 3153 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
d46c5b12 3154 coding->spec.iso2022.last_invalid_designation_register = -1;
4ed46869
KH
3155 break;
3156
3157 case 3:
3158 coding->type = coding_type_big5;
c952af22
KH
3159 coding->common_flags
3160 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869 3161 coding->flags
4608c386 3162 = (NILP (XVECTOR (coding_spec)->contents[4])
4ed46869
KH
3163 ? CODING_FLAG_BIG5_HKU
3164 : CODING_FLAG_BIG5_ETEN);
3165 break;
3166
3167 case 4:
3168 coding->type = coding_type_ccl;
c952af22
KH
3169 coding->common_flags
3170 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869 3171 {
84d60297 3172 val = XVECTOR (coding_spec)->contents[4];
ef4ced28
KH
3173 if (! CONSP (val)
3174 || setup_ccl_program (&(coding->spec.ccl.decoder),
03699b14 3175 XCAR (val)) < 0
ef4ced28 3176 || setup_ccl_program (&(coding->spec.ccl.encoder),
03699b14 3177 XCDR (val)) < 0)
4ed46869 3178 goto label_invalid_coding_system;
1397dc18
KH
3179
3180 bzero (coding->spec.ccl.valid_codes, 256);
3181 val = Fplist_get (plist, Qvalid_codes);
3182 if (CONSP (val))
3183 {
3184 Lisp_Object this;
3185
03699b14 3186 for (; CONSP (val); val = XCDR (val))
1397dc18 3187 {
03699b14 3188 this = XCAR (val);
1397dc18
KH
3189 if (INTEGERP (this)
3190 && XINT (this) >= 0 && XINT (this) < 256)
3191 coding->spec.ccl.valid_codes[XINT (this)] = 1;
3192 else if (CONSP (this)
03699b14
KR
3193 && INTEGERP (XCAR (this))
3194 && INTEGERP (XCDR (this)))
1397dc18 3195 {
03699b14
KR
3196 int start = XINT (XCAR (this));
3197 int end = XINT (XCDR (this));
1397dc18
KH
3198
3199 if (start >= 0 && start <= end && end < 256)
e133c8fa 3200 while (start <= end)
1397dc18
KH
3201 coding->spec.ccl.valid_codes[start++] = 1;
3202 }
3203 }
3204 }
4ed46869 3205 }
c952af22 3206 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
4ed46869
KH
3207 break;
3208
27901516
KH
3209 case 5:
3210 coding->type = coding_type_raw_text;
3211 break;
3212
4ed46869 3213 default:
d46c5b12 3214 goto label_invalid_coding_system;
4ed46869
KH
3215 }
3216 return 0;
3217
3218 label_invalid_coding_system:
3219 coding->type = coding_type_no_conversion;
d46c5b12 3220 coding->category_idx = CODING_CATEGORY_IDX_BINARY;
c952af22 3221 coding->common_flags = 0;
dec137e5 3222 coding->eol_type = CODING_EOL_LF;
d46c5b12 3223 coding->pre_write_conversion = coding->post_read_conversion = Qnil;
4ed46869
KH
3224 return -1;
3225}
3226
ec6d2bb8
KH
3227/* Free memory blocks allocated for storing composition information. */
3228
3229void
3230coding_free_composition_data (coding)
3231 struct coding_system *coding;
3232{
3233 struct composition_data *cmp_data = coding->cmp_data, *next;
3234
3235 if (!cmp_data)
3236 return;
3237 /* Memory blocks are chained. At first, rewind to the first, then,
3238 free blocks one by one. */
3239 while (cmp_data->prev)
3240 cmp_data = cmp_data->prev;
3241 while (cmp_data)
3242 {
3243 next = cmp_data->next;
3244 xfree (cmp_data);
3245 cmp_data = next;
3246 }
3247 coding->cmp_data = NULL;
3248}
3249
3250/* Set `char_offset' member of all memory blocks pointed by
3251 coding->cmp_data to POS. */
3252
3253void
3254coding_adjust_composition_offset (coding, pos)
3255 struct coding_system *coding;
3256 int pos;
3257{
3258 struct composition_data *cmp_data;
3259
3260 for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3261 cmp_data->char_offset = pos;
3262}
3263
54f78171
KH
3264/* Setup raw-text or one of its subsidiaries in the structure
3265 coding_system CODING according to the already setup value eol_type
3266 in CODING. CODING should be setup for some coding system in
3267 advance. */
3268
3269void
3270setup_raw_text_coding_system (coding)
3271 struct coding_system *coding;
3272{
3273 if (coding->type != coding_type_raw_text)
3274 {
3275 coding->symbol = Qraw_text;
3276 coding->type = coding_type_raw_text;
3277 if (coding->eol_type != CODING_EOL_UNDECIDED)
3278 {
84d60297
RS
3279 Lisp_Object subsidiaries;
3280 subsidiaries = Fget (Qraw_text, Qeol_type);
54f78171
KH
3281
3282 if (VECTORP (subsidiaries)
3283 && XVECTOR (subsidiaries)->size == 3)
3284 coding->symbol
3285 = XVECTOR (subsidiaries)->contents[coding->eol_type];
3286 }
716e0b0a 3287 setup_coding_system (coding->symbol, coding);
54f78171
KH
3288 }
3289 return;
3290}
3291
4ed46869
KH
3292/* Emacs has a mechanism to automatically detect a coding system if it
3293 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
3294 it's impossible to distinguish some coding systems accurately
3295 because they use the same range of codes. So, at first, coding
3296 systems are categorized into 7, those are:
3297
0ef69138 3298 o coding-category-emacs-mule
4ed46869
KH
3299
3300 The category for a coding system which has the same code range
3301 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 3302 symbol) `emacs-mule' by default.
4ed46869
KH
3303
3304 o coding-category-sjis
3305
3306 The category for a coding system which has the same code range
3307 as SJIS. Assigned the coding-system (Lisp
7717c392 3308 symbol) `japanese-shift-jis' by default.
4ed46869
KH
3309
3310 o coding-category-iso-7
3311
3312 The category for a coding system which has the same code range
7717c392 3313 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
3314 shift and single shift functions. This can encode/decode all
3315 charsets. Assigned the coding-system (Lisp symbol)
3316 `iso-2022-7bit' by default.
3317
3318 o coding-category-iso-7-tight
3319
3320 Same as coding-category-iso-7 except that this can
3321 encode/decode only the specified charsets.
4ed46869
KH
3322
3323 o coding-category-iso-8-1
3324
3325 The category for a coding system which has the same code range
3326 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
3327 for DIMENSION1 charset. This doesn't use any locking shift
3328 and single shift functions. Assigned the coding-system (Lisp
3329 symbol) `iso-latin-1' by default.
4ed46869
KH
3330
3331 o coding-category-iso-8-2
3332
3333 The category for a coding system which has the same code range
3334 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
3335 for DIMENSION2 charset. This doesn't use any locking shift
3336 and single shift functions. Assigned the coding-system (Lisp
3337 symbol) `japanese-iso-8bit' by default.
4ed46869 3338
7717c392 3339 o coding-category-iso-7-else
4ed46869
KH
3340
3341 The category for a coding system which has the same code range
7717c392
KH
3342 as ISO2022 of 7-bit environemnt but uses locking shift or
3343 single shift functions. Assigned the coding-system (Lisp
3344 symbol) `iso-2022-7bit-lock' by default.
3345
3346 o coding-category-iso-8-else
3347
3348 The category for a coding system which has the same code range
3349 as ISO2022 of 8-bit environemnt but uses locking shift or
3350 single shift functions. Assigned the coding-system (Lisp
3351 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
3352
3353 o coding-category-big5
3354
3355 The category for a coding system which has the same code range
3356 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 3357 `cn-big5' by default.
4ed46869 3358
fa42c37f
KH
3359 o coding-category-utf-8
3360
3361 The category for a coding system which has the same code range
3362 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
3363 symbol) `utf-8' by default.
3364
3365 o coding-category-utf-16-be
3366
3367 The category for a coding system in which a text has an
3368 Unicode signature (cf. Unicode Standard) in the order of BIG
3369 endian at the head. Assigned the coding-system (Lisp symbol)
3370 `utf-16-be' by default.
3371
3372 o coding-category-utf-16-le
3373
3374 The category for a coding system in which a text has an
3375 Unicode signature (cf. Unicode Standard) in the order of
3376 LITTLE endian at the head. Assigned the coding-system (Lisp
3377 symbol) `utf-16-le' by default.
3378
1397dc18
KH
3379 o coding-category-ccl
3380
3381 The category for a coding system of which encoder/decoder is
3382 written in CCL programs. The default value is nil, i.e., no
3383 coding system is assigned.
3384
4ed46869
KH
3385 o coding-category-binary
3386
3387 The category for a coding system not categorized in any of the
3388 above. Assigned the coding-system (Lisp symbol)
e0e989f6 3389 `no-conversion' by default.
4ed46869
KH
3390
3391 Each of them is a Lisp symbol and the value is an actual
3392 `coding-system's (this is also a Lisp symbol) assigned by a user.
3393 What Emacs does actually is to detect a category of coding system.
3394 Then, it uses a `coding-system' assigned to it. If Emacs can't
3395 decide only one possible category, it selects a category of the
3396 highest priority. Priorities of categories are also specified by a
3397 user in a Lisp variable `coding-category-list'.
3398
3399*/
3400
66cfb530
KH
3401static
3402int ascii_skip_code[256];
3403
d46c5b12 3404/* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
4ed46869
KH
3405 If it detects possible coding systems, return an integer in which
3406 appropriate flag bits are set. Flag bits are defined by macros
fa42c37f
KH
3407 CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL,
3408 it should point the table `coding_priorities'. In that case, only
3409 the flag bit for a coding system of the highest priority is set in
3410 the returned value.
4ed46869 3411
d46c5b12
KH
3412 How many ASCII characters are at the head is returned as *SKIP. */
3413
3414static int
3415detect_coding_mask (source, src_bytes, priorities, skip)
3416 unsigned char *source;
3417 int src_bytes, *priorities, *skip;
4ed46869
KH
3418{
3419 register unsigned char c;
d46c5b12 3420 unsigned char *src = source, *src_end = source + src_bytes;
fa42c37f
KH
3421 unsigned int mask, utf16_examined_p, iso2022_examined_p;
3422 int i, idx;
4ed46869
KH
3423
3424 /* At first, skip all ASCII characters and control characters except
3425 for three ISO2022 specific control characters. */
66cfb530
KH
3426 ascii_skip_code[ISO_CODE_SO] = 0;
3427 ascii_skip_code[ISO_CODE_SI] = 0;
3428 ascii_skip_code[ISO_CODE_ESC] = 0;
3429
bcf26d6a 3430 label_loop_detect_coding:
66cfb530 3431 while (src < src_end && ascii_skip_code[*src]) src++;
d46c5b12 3432 *skip = src - source;
4ed46869
KH
3433
3434 if (src >= src_end)
3435 /* We found nothing other than ASCII. There's nothing to do. */
d46c5b12 3436 return 0;
4ed46869 3437
8a8147d6 3438 c = *src;
4ed46869
KH
3439 /* The text seems to be encoded in some multilingual coding system.
3440 Now, try to find in which coding system the text is encoded. */
3441 if (c < 0x80)
bcf26d6a
KH
3442 {
3443 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3444 /* C is an ISO2022 specific control code of C0. */
3445 mask = detect_coding_iso2022 (src, src_end);
1b2af4b0 3446 if (mask == 0)
d46c5b12
KH
3447 {
3448 /* No valid ISO2022 code follows C. Try again. */
3449 src++;
66cfb530
KH
3450 if (c == ISO_CODE_ESC)
3451 ascii_skip_code[ISO_CODE_ESC] = 1;
3452 else
3453 ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
d46c5b12
KH
3454 goto label_loop_detect_coding;
3455 }
3456 if (priorities)
fa42c37f
KH
3457 {
3458 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3459 {
3460 if (mask & priorities[i])
3461 return priorities[i];
3462 }
3463 return CODING_CATEGORY_MASK_RAW_TEXT;
3464 }
bcf26d6a 3465 }
d46c5b12 3466 else
c4825358 3467 {
d46c5b12 3468 int try;
4ed46869 3469
d46c5b12
KH
3470 if (c < 0xA0)
3471 {
3472 /* C is the first byte of SJIS character code,
fa42c37f
KH
3473 or a leading-code of Emacs' internal format (emacs-mule),
3474 or the first byte of UTF-16. */
3475 try = (CODING_CATEGORY_MASK_SJIS
3476 | CODING_CATEGORY_MASK_EMACS_MULE
3477 | CODING_CATEGORY_MASK_UTF_16_BE
3478 | CODING_CATEGORY_MASK_UTF_16_LE);
d46c5b12
KH
3479
3480 /* Or, if C is a special latin extra code,
3481 or is an ISO2022 specific control code of C1 (SS2 or SS3),
3482 or is an ISO2022 control-sequence-introducer (CSI),
3483 we should also consider the possibility of ISO2022 codings. */
3484 if ((VECTORP (Vlatin_extra_code_table)
3485 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3486 || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3487 || (c == ISO_CODE_CSI
3488 && (src < src_end
3489 && (*src == ']'
3490 || ((*src == '0' || *src == '1' || *src == '2')
3491 && src + 1 < src_end
3492 && src[1] == ']')))))
3493 try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3494 | CODING_CATEGORY_MASK_ISO_8BIT);
3495 }
c4825358 3496 else
d46c5b12
KH
3497 /* C is a character of ISO2022 in graphic plane right,
3498 or a SJIS's 1-byte character code (i.e. JISX0201),
fa42c37f
KH
3499 or the first byte of BIG5's 2-byte code,
3500 or the first byte of UTF-8/16. */
d46c5b12
KH
3501 try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3502 | CODING_CATEGORY_MASK_ISO_8BIT
3503 | CODING_CATEGORY_MASK_SJIS
fa42c37f
KH
3504 | CODING_CATEGORY_MASK_BIG5
3505 | CODING_CATEGORY_MASK_UTF_8
3506 | CODING_CATEGORY_MASK_UTF_16_BE
3507 | CODING_CATEGORY_MASK_UTF_16_LE);
d46c5b12 3508
1397dc18
KH
3509 /* Or, we may have to consider the possibility of CCL. */
3510 if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3511 && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3512 ->spec.ccl.valid_codes)[c])
3513 try |= CODING_CATEGORY_MASK_CCL;
3514
d46c5b12 3515 mask = 0;
fa42c37f 3516 utf16_examined_p = iso2022_examined_p = 0;
d46c5b12
KH
3517 if (priorities)
3518 {
3519 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3520 {
fa42c37f
KH
3521 if (!iso2022_examined_p
3522 && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
3523 {
3524 mask |= detect_coding_iso2022 (src, src_end);
3525 iso2022_examined_p = 1;
3526 }
5ab13dd0 3527 else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
fa42c37f
KH
3528 mask |= detect_coding_sjis (src, src_end);
3529 else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
3530 mask |= detect_coding_utf_8 (src, src_end);
3531 else if (!utf16_examined_p
3532 && (priorities[i] & try &
3533 CODING_CATEGORY_MASK_UTF_16_BE_LE))
3534 {
3535 mask |= detect_coding_utf_16 (src, src_end);
3536 utf16_examined_p = 1;
3537 }
5ab13dd0 3538 else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
fa42c37f 3539 mask |= detect_coding_big5 (src, src_end);
5ab13dd0 3540 else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
fa42c37f 3541 mask |= detect_coding_emacs_mule (src, src_end);
89fa8b36 3542 else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
fa42c37f 3543 mask |= detect_coding_ccl (src, src_end);
5ab13dd0 3544 else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
fa42c37f 3545 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
5ab13dd0 3546 else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
fa42c37f
KH
3547 mask |= CODING_CATEGORY_MASK_BINARY;
3548 if (mask & priorities[i])
3549 return priorities[i];
d46c5b12
KH
3550 }
3551 return CODING_CATEGORY_MASK_RAW_TEXT;
3552 }
3553 if (try & CODING_CATEGORY_MASK_ISO)
3554 mask |= detect_coding_iso2022 (src, src_end);
3555 if (try & CODING_CATEGORY_MASK_SJIS)
3556 mask |= detect_coding_sjis (src, src_end);
3557 if (try & CODING_CATEGORY_MASK_BIG5)
3558 mask |= detect_coding_big5 (src, src_end);
fa42c37f
KH
3559 if (try & CODING_CATEGORY_MASK_UTF_8)
3560 mask |= detect_coding_utf_8 (src, src_end);
3561 if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
3562 mask |= detect_coding_utf_16 (src, src_end);
d46c5b12 3563 if (try & CODING_CATEGORY_MASK_EMACS_MULE)
1397dc18
KH
3564 mask |= detect_coding_emacs_mule (src, src_end);
3565 if (try & CODING_CATEGORY_MASK_CCL)
3566 mask |= detect_coding_ccl (src, src_end);
c4825358 3567 }
5ab13dd0 3568 return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4ed46869
KH
3569}
3570
3571/* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3572 The information of the detected coding system is set in CODING. */
3573
3574void
3575detect_coding (coding, src, src_bytes)
3576 struct coding_system *coding;
3577 unsigned char *src;
3578 int src_bytes;
3579{
d46c5b12
KH
3580 unsigned int idx;
3581 int skip, mask, i;
84d60297 3582 Lisp_Object val;
4ed46869 3583
84d60297 3584 val = Vcoding_category_list;
66cfb530 3585 mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
d46c5b12 3586 coding->heading_ascii = skip;
4ed46869 3587
d46c5b12
KH
3588 if (!mask) return;
3589
3590 /* We found a single coding system of the highest priority in MASK. */
3591 idx = 0;
3592 while (mask && ! (mask & 1)) mask >>= 1, idx++;
3593 if (! mask)
3594 idx = CODING_CATEGORY_IDX_RAW_TEXT;
4ed46869 3595
d46c5b12
KH
3596 val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3597
3598 if (coding->eol_type != CODING_EOL_UNDECIDED)
27901516 3599 {
84d60297 3600 Lisp_Object tmp;
d46c5b12 3601
84d60297 3602 tmp = Fget (val, Qeol_type);
d46c5b12
KH
3603 if (VECTORP (tmp))
3604 val = XVECTOR (tmp)->contents[coding->eol_type];
4ed46869 3605 }
b73bfc1c
KH
3606
3607 /* Setup this new coding system while preserving some slots. */
3608 {
3609 int src_multibyte = coding->src_multibyte;
3610 int dst_multibyte = coding->dst_multibyte;
3611
3612 setup_coding_system (val, coding);
3613 coding->src_multibyte = src_multibyte;
3614 coding->dst_multibyte = dst_multibyte;
3615 coding->heading_ascii = skip;
3616 }
4ed46869
KH
3617}
3618
d46c5b12
KH
3619/* Detect how end-of-line of a text of length SRC_BYTES pointed by
3620 SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3621 CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3622
3623 How many non-eol characters are at the head is returned as *SKIP. */
4ed46869 3624
bc4bc72a
RS
3625#define MAX_EOL_CHECK_COUNT 3
3626
d46c5b12
KH
3627static int
3628detect_eol_type (source, src_bytes, skip)
3629 unsigned char *source;
3630 int src_bytes, *skip;
4ed46869 3631{
d46c5b12 3632 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 3633 unsigned char c;
bc4bc72a
RS
3634 int total = 0; /* How many end-of-lines are found so far. */
3635 int eol_type = CODING_EOL_UNDECIDED;
3636 int this_eol_type;
4ed46869 3637
d46c5b12
KH
3638 *skip = 0;
3639
bc4bc72a 3640 while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4ed46869
KH
3641 {
3642 c = *src++;
bc4bc72a 3643 if (c == '\n' || c == '\r')
4ed46869 3644 {
d46c5b12
KH
3645 if (*skip == 0)
3646 *skip = src - 1 - source;
bc4bc72a
RS
3647 total++;
3648 if (c == '\n')
3649 this_eol_type = CODING_EOL_LF;
3650 else if (src >= src_end || *src != '\n')
3651 this_eol_type = CODING_EOL_CR;
4ed46869 3652 else
bc4bc72a
RS
3653 this_eol_type = CODING_EOL_CRLF, src++;
3654
3655 if (eol_type == CODING_EOL_UNDECIDED)
3656 /* This is the first end-of-line. */
3657 eol_type = this_eol_type;
3658 else if (eol_type != this_eol_type)
d46c5b12
KH
3659 {
3660 /* The found type is different from what found before. */
3661 eol_type = CODING_EOL_INCONSISTENT;
3662 break;
3663 }
4ed46869
KH
3664 }
3665 }
bc4bc72a 3666
d46c5b12
KH
3667 if (*skip == 0)
3668 *skip = src_end - source;
85a02ca4 3669 return eol_type;
4ed46869
KH
3670}
3671
fa42c37f
KH
3672/* Like detect_eol_type, but detect EOL type in 2-octet
3673 big-endian/little-endian format for coding systems utf-16-be and
3674 utf-16-le. */
3675
3676static int
3677detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
3678 unsigned char *source;
3679 int src_bytes, *skip;
3680{
3681 unsigned char *src = source, *src_end = src + src_bytes;
3682 unsigned int c1, c2;
3683 int total = 0; /* How many end-of-lines are found so far. */
3684 int eol_type = CODING_EOL_UNDECIDED;
3685 int this_eol_type;
3686 int msb, lsb;
3687
3688 if (big_endian_p)
3689 msb = 0, lsb = 1;
3690 else
3691 msb = 1, lsb = 0;
3692
3693 *skip = 0;
3694
3695 while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
3696 {
3697 c1 = (src[msb] << 8) | (src[lsb]);
3698 src += 2;
3699
3700 if (c1 == '\n' || c1 == '\r')
3701 {
3702 if (*skip == 0)
3703 *skip = src - 2 - source;
3704 total++;
3705 if (c1 == '\n')
3706 {
3707 this_eol_type = CODING_EOL_LF;
3708 }
3709 else
3710 {
3711 if ((src + 1) >= src_end)
3712 {
3713 this_eol_type = CODING_EOL_CR;
3714 }
3715 else
3716 {
3717 c2 = (src[msb] << 8) | (src[lsb]);
3718 if (c2 == '\n')
3719 this_eol_type = CODING_EOL_CRLF, src += 2;
3720 else
3721 this_eol_type = CODING_EOL_CR;
3722 }
3723 }
3724
3725 if (eol_type == CODING_EOL_UNDECIDED)
3726 /* This is the first end-of-line. */
3727 eol_type = this_eol_type;
3728 else if (eol_type != this_eol_type)
3729 {
3730 /* The found type is different from what found before. */
3731 eol_type = CODING_EOL_INCONSISTENT;
3732 break;
3733 }
3734 }
3735 }
3736
3737 if (*skip == 0)
3738 *skip = src_end - source;
3739 return eol_type;
3740}
3741
4ed46869
KH
3742/* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3743 is encoded. If it detects an appropriate format of end-of-line, it
3744 sets the information in *CODING. */
3745
3746void
3747detect_eol (coding, src, src_bytes)
3748 struct coding_system *coding;
3749 unsigned char *src;
3750 int src_bytes;
3751{
4608c386 3752 Lisp_Object val;
d46c5b12 3753 int skip;
fa42c37f
KH
3754 int eol_type;
3755
3756 switch (coding->category_idx)
3757 {
3758 case CODING_CATEGORY_IDX_UTF_16_BE:
3759 eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
3760 break;
3761 case CODING_CATEGORY_IDX_UTF_16_LE:
3762 eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
3763 break;
3764 default:
3765 eol_type = detect_eol_type (src, src_bytes, &skip);
3766 break;
3767 }
d46c5b12
KH
3768
3769 if (coding->heading_ascii > skip)
3770 coding->heading_ascii = skip;
3771 else
3772 skip = coding->heading_ascii;
4ed46869 3773
0ef69138 3774 if (eol_type == CODING_EOL_UNDECIDED)
4ed46869 3775 return;
27901516
KH
3776 if (eol_type == CODING_EOL_INCONSISTENT)
3777 {
3778#if 0
3779 /* This code is suppressed until we find a better way to
992f23f2 3780 distinguish raw text file and binary file. */
27901516
KH
3781
3782 /* If we have already detected that the coding is raw-text, the
3783 coding should actually be no-conversion. */
3784 if (coding->type == coding_type_raw_text)
3785 {
3786 setup_coding_system (Qno_conversion, coding);
3787 return;
3788 }
3789 /* Else, let's decode only text code anyway. */
3790#endif /* 0 */
1b2af4b0 3791 eol_type = CODING_EOL_LF;
27901516
KH
3792 }
3793
4608c386 3794 val = Fget (coding->symbol, Qeol_type);
4ed46869 3795 if (VECTORP (val) && XVECTOR (val)->size == 3)
d46c5b12 3796 {
b73bfc1c
KH
3797 int src_multibyte = coding->src_multibyte;
3798 int dst_multibyte = coding->dst_multibyte;
3799
d46c5b12 3800 setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
b73bfc1c
KH
3801 coding->src_multibyte = src_multibyte;
3802 coding->dst_multibyte = dst_multibyte;
d46c5b12
KH
3803 coding->heading_ascii = skip;
3804 }
3805}
3806
3807#define CONVERSION_BUFFER_EXTRA_ROOM 256
3808
b73bfc1c
KH
3809#define DECODING_BUFFER_MAG(coding) \
3810 (coding->type == coding_type_iso2022 \
3811 ? 3 \
3812 : (coding->type == coding_type_ccl \
3813 ? coding->spec.ccl.decoder.buf_magnification \
3814 : 2))
d46c5b12
KH
3815
3816/* Return maximum size (bytes) of a buffer enough for decoding
3817 SRC_BYTES of text encoded in CODING. */
3818
3819int
3820decoding_buffer_size (coding, src_bytes)
3821 struct coding_system *coding;
3822 int src_bytes;
3823{
3824 return (src_bytes * DECODING_BUFFER_MAG (coding)
3825 + CONVERSION_BUFFER_EXTRA_ROOM);
3826}
3827
3828/* Return maximum size (bytes) of a buffer enough for encoding
3829 SRC_BYTES of text to CODING. */
3830
3831int
3832encoding_buffer_size (coding, src_bytes)
3833 struct coding_system *coding;
3834 int src_bytes;
3835{
3836 int magnification;
3837
3838 if (coding->type == coding_type_ccl)
3839 magnification = coding->spec.ccl.encoder.buf_magnification;
b73bfc1c 3840 else if (CODING_REQUIRE_ENCODING (coding))
d46c5b12 3841 magnification = 3;
b73bfc1c
KH
3842 else
3843 magnification = 1;
d46c5b12
KH
3844
3845 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3846}
3847
3848#ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3849#define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3850#endif
3851
3852char *conversion_buffer;
3853int conversion_buffer_size;
3854
3855/* Return a pointer to a SIZE bytes of buffer to be used for encoding
3856 or decoding. Sufficient memory is allocated automatically. If we
3857 run out of memory, return NULL. */
3858
3859char *
3860get_conversion_buffer (size)
3861 int size;
3862{
3863 if (size > conversion_buffer_size)
3864 {
3865 char *buf;
3866 int real_size = conversion_buffer_size * 2;
3867
3868 while (real_size < size) real_size *= 2;
3869 buf = (char *) xmalloc (real_size);
3870 xfree (conversion_buffer);
3871 conversion_buffer = buf;
3872 conversion_buffer_size = real_size;
3873 }
3874 return conversion_buffer;
3875}
3876
3877int
3878ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3879 struct coding_system *coding;
3880 unsigned char *source, *destination;
3881 int src_bytes, dst_bytes, encodep;
3882{
3883 struct ccl_program *ccl
3884 = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3885 int result;
3886
ae9ff118 3887 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
7b179c2d 3888
d46c5b12
KH
3889 coding->produced = ccl_driver (ccl, source, destination,
3890 src_bytes, dst_bytes, &(coding->consumed));
b73bfc1c
KH
3891 if (encodep)
3892 coding->produced_char = coding->produced;
3893 else
3894 {
3895 int bytes
3896 = dst_bytes ? dst_bytes : source + coding->consumed - destination;
3897 coding->produced = str_as_multibyte (destination, bytes,
3898 coding->produced,
3899 &(coding->produced_char));
3900 }
69f76525 3901
d46c5b12
KH
3902 switch (ccl->status)
3903 {
3904 case CCL_STAT_SUSPEND_BY_SRC:
3905 result = CODING_FINISH_INSUFFICIENT_SRC;
3906 break;
3907 case CCL_STAT_SUSPEND_BY_DST:
3908 result = CODING_FINISH_INSUFFICIENT_DST;
3909 break;
9864ebce
KH
3910 case CCL_STAT_QUIT:
3911 case CCL_STAT_INVALID_CMD:
3912 result = CODING_FINISH_INTERRUPT;
3913 break;
d46c5b12
KH
3914 default:
3915 result = CODING_FINISH_NORMAL;
3916 break;
3917 }
3918 return result;
4ed46869
KH
3919}
3920
3921/* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
3922 decoding, it may detect coding system and format of end-of-line if
b73bfc1c
KH
3923 those are not yet decided. The source should be unibyte, the
3924 result is multibyte if CODING->dst_multibyte is nonzero, else
3925 unibyte. */
4ed46869
KH
3926
3927int
d46c5b12 3928decode_coding (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
3929 struct coding_system *coding;
3930 unsigned char *source, *destination;
3931 int src_bytes, dst_bytes;
4ed46869 3932{
0ef69138 3933 if (coding->type == coding_type_undecided)
4ed46869
KH
3934 detect_coding (coding, source, src_bytes);
3935
0ef69138 3936 if (coding->eol_type == CODING_EOL_UNDECIDED)
4ed46869
KH
3937 detect_eol (coding, source, src_bytes);
3938
b73bfc1c
KH
3939 coding->produced = coding->produced_char = 0;
3940 coding->consumed = coding->consumed_char = 0;
3941 coding->errors = 0;
3942 coding->result = CODING_FINISH_NORMAL;
3943
4ed46869
KH
3944 switch (coding->type)
3945 {
4ed46869 3946 case coding_type_sjis:
b73bfc1c
KH
3947 decode_coding_sjis_big5 (coding, source, destination,
3948 src_bytes, dst_bytes, 1);
4ed46869
KH
3949 break;
3950
3951 case coding_type_iso2022:
b73bfc1c
KH
3952 decode_coding_iso2022 (coding, source, destination,
3953 src_bytes, dst_bytes);
4ed46869
KH
3954 break;
3955
3956 case coding_type_big5:
b73bfc1c
KH
3957 decode_coding_sjis_big5 (coding, source, destination,
3958 src_bytes, dst_bytes, 0);
3959 break;
3960
3961 case coding_type_emacs_mule:
3962 decode_coding_emacs_mule (coding, source, destination,
3963 src_bytes, dst_bytes);
4ed46869
KH
3964 break;
3965
3966 case coding_type_ccl:
b73bfc1c
KH
3967 ccl_coding_driver (coding, source, destination,
3968 src_bytes, dst_bytes, 0);
d46c5b12
KH
3969 break;
3970
b73bfc1c
KH
3971 default:
3972 decode_eol (coding, source, destination, src_bytes, dst_bytes);
3973 }
3974
3975 if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
3976 && coding->consumed == src_bytes)
3977 coding->result = CODING_FINISH_NORMAL;
3978
3979 if (coding->mode & CODING_MODE_LAST_BLOCK
3980 && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
3981 {
3982 unsigned char *src = source + coding->consumed;
3983 unsigned char *dst = destination + coding->produced;
3984
3985 src_bytes -= coding->consumed;
3986 coding->errors++;
3987 if (COMPOSING_P (coding))
3988 DECODE_COMPOSITION_END ('1');
3989 while (src_bytes--)
d46c5b12 3990 {
b73bfc1c
KH
3991 int c = *src++;
3992 dst += CHAR_STRING (c, dst);
3993 coding->produced_char++;
d46c5b12 3994 }
b73bfc1c
KH
3995 coding->consumed = coding->consumed_char = src - source;
3996 coding->produced = dst - destination;
4ed46869
KH
3997 }
3998
b73bfc1c
KH
3999 if (!coding->dst_multibyte)
4000 {
4001 coding->produced = str_as_unibyte (destination, coding->produced);
4002 coding->produced_char = coding->produced;
4003 }
4ed46869 4004
b73bfc1c
KH
4005 return coding->result;
4006}
52d41803 4007
b73bfc1c
KH
4008/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". The
4009 multibyteness of the source is CODING->src_multibyte, the
4010 multibyteness of the result is always unibyte. */
4ed46869
KH
4011
4012int
d46c5b12 4013encode_coding (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
4014 struct coding_system *coding;
4015 unsigned char *source, *destination;
4016 int src_bytes, dst_bytes;
4ed46869 4017{
b73bfc1c
KH
4018 coding->produced = coding->produced_char = 0;
4019 coding->consumed = coding->consumed_char = 0;
4020 coding->errors = 0;
4021 coding->result = CODING_FINISH_NORMAL;
4ed46869 4022
d46c5b12
KH
4023 switch (coding->type)
4024 {
4ed46869 4025 case coding_type_sjis:
b73bfc1c
KH
4026 encode_coding_sjis_big5 (coding, source, destination,
4027 src_bytes, dst_bytes, 1);
4ed46869
KH
4028 break;
4029
4030 case coding_type_iso2022:
b73bfc1c
KH
4031 encode_coding_iso2022 (coding, source, destination,
4032 src_bytes, dst_bytes);
4ed46869
KH
4033 break;
4034
4035 case coding_type_big5:
b73bfc1c
KH
4036 encode_coding_sjis_big5 (coding, source, destination,
4037 src_bytes, dst_bytes, 0);
4038 break;
4039
4040 case coding_type_emacs_mule:
4041 encode_coding_emacs_mule (coding, source, destination,
4042 src_bytes, dst_bytes);
4ed46869
KH
4043 break;
4044
4045 case coding_type_ccl:
b73bfc1c
KH
4046 ccl_coding_driver (coding, source, destination,
4047 src_bytes, dst_bytes, 1);
d46c5b12
KH
4048 break;
4049
b73bfc1c
KH
4050 default:
4051 encode_eol (coding, source, destination, src_bytes, dst_bytes);
4052 }
4053
4054 if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4055 && coding->consumed == src_bytes)
4056 coding->result = CODING_FINISH_NORMAL;
4057
4058 if (coding->mode & CODING_MODE_LAST_BLOCK)
4059 {
4060 unsigned char *src = source + coding->consumed;
4061 unsigned char *src_end = src + src_bytes;
4062 unsigned char *dst = destination + coding->produced;
4063
4064 if (coding->type == coding_type_iso2022)
4065 ENCODE_RESET_PLANE_AND_REGISTER;
4066 if (COMPOSING_P (coding))
4067 *dst++ = ISO_CODE_ESC, *dst++ = '1';
4068 if (coding->consumed < src_bytes)
d46c5b12 4069 {
b73bfc1c
KH
4070 int len = src_bytes - coding->consumed;
4071
4072 BCOPY_SHORT (source + coding->consumed, dst, len);
4073 if (coding->src_multibyte)
4074 len = str_as_unibyte (dst, len);
4075 dst += len;
4076 coding->consumed = src_bytes;
d46c5b12 4077 }
b73bfc1c 4078 coding->produced = coding->produced_char = dst - destination;
4ed46869
KH
4079 }
4080
b73bfc1c 4081 return coding->result;
4ed46869
KH
4082}
4083
fb88bf2d
KH
4084/* Scan text in the region between *BEG and *END (byte positions),
4085 skip characters which we don't have to decode by coding system
4086 CODING at the head and tail, then set *BEG and *END to the region
4087 of the text we actually have to convert. The caller should move
b73bfc1c
KH
4088 the gap out of the region in advance if the region is from a
4089 buffer.
4ed46869 4090
d46c5b12
KH
4091 If STR is not NULL, *BEG and *END are indices into STR. */
4092
4093static void
4094shrink_decoding_region (beg, end, coding, str)
4095 int *beg, *end;
4096 struct coding_system *coding;
4097 unsigned char *str;
4098{
fb88bf2d 4099 unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
d46c5b12 4100 int eol_conversion;
88993dfd 4101 Lisp_Object translation_table;
d46c5b12
KH
4102
4103 if (coding->type == coding_type_ccl
4104 || coding->type == coding_type_undecided
b73bfc1c
KH
4105 || coding->eol_type != CODING_EOL_LF
4106 || !NILP (coding->post_read_conversion)
4107 || coding->composing != COMPOSITION_DISABLED)
d46c5b12
KH
4108 {
4109 /* We can't skip any data. */
4110 return;
4111 }
b73bfc1c
KH
4112 if (coding->type == coding_type_no_conversion
4113 || coding->type == coding_type_raw_text
4114 || coding->type == coding_type_emacs_mule)
d46c5b12 4115 {
fb88bf2d
KH
4116 /* We need no conversion, but don't have to skip any data here.
4117 Decoding routine handles them effectively anyway. */
d46c5b12
KH
4118 return;
4119 }
4120
88993dfd
KH
4121 translation_table = coding->translation_table_for_decode;
4122 if (NILP (translation_table) && !NILP (Venable_character_translation))
4123 translation_table = Vstandard_translation_table_for_decode;
4124 if (CHAR_TABLE_P (translation_table))
4125 {
4126 int i;
4127 for (i = 0; i < 128; i++)
4128 if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4129 break;
4130 if (i < 128)
4131 /* Some ASCII character should be tranlsated. We give up
4132 shrinking. */
4133 return;
4134 }
4135
b73bfc1c 4136 if (coding->heading_ascii >= 0)
d46c5b12
KH
4137 /* Detection routine has already found how much we can skip at the
4138 head. */
4139 *beg += coding->heading_ascii;
4140
4141 if (str)
4142 {
4143 begp_orig = begp = str + *beg;
4144 endp_orig = endp = str + *end;
4145 }
4146 else
4147 {
fb88bf2d 4148 begp_orig = begp = BYTE_POS_ADDR (*beg);
d46c5b12
KH
4149 endp_orig = endp = begp + *end - *beg;
4150 }
4151
d46c5b12
KH
4152 switch (coding->type)
4153 {
d46c5b12
KH
4154 case coding_type_sjis:
4155 case coding_type_big5:
4156 /* We can skip all ASCII characters at the head. */
4157 if (coding->heading_ascii < 0)
4158 {
4159 if (eol_conversion)
de9d083c 4160 while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
d46c5b12
KH
4161 else
4162 while (begp < endp && *begp < 0x80) begp++;
4163 }
4164 /* We can skip all ASCII characters at the tail except for the
4165 second byte of SJIS or BIG5 code. */
4166 if (eol_conversion)
de9d083c 4167 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
d46c5b12
KH
4168 else
4169 while (begp < endp && endp[-1] < 0x80) endp--;
ee59c65f
RS
4170 /* Do not consider LF as ascii if preceded by CR, since that
4171 confuses eol decoding. */
4172 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4173 endp++;
d46c5b12
KH
4174 if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4175 endp++;
4176 break;
4177
b73bfc1c 4178 case coding_type_iso2022:
622fece5
KH
4179 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4180 /* We can't skip any data. */
4181 break;
d46c5b12
KH
4182 if (coding->heading_ascii < 0)
4183 {
d46c5b12
KH
4184 /* We can skip all ASCII characters at the head except for a
4185 few control codes. */
4186 while (begp < endp && (c = *begp) < 0x80
4187 && c != ISO_CODE_CR && c != ISO_CODE_SO
4188 && c != ISO_CODE_SI && c != ISO_CODE_ESC
4189 && (!eol_conversion || c != ISO_CODE_LF))
4190 begp++;
4191 }
4192 switch (coding->category_idx)
4193 {
4194 case CODING_CATEGORY_IDX_ISO_8_1:
4195 case CODING_CATEGORY_IDX_ISO_8_2:
4196 /* We can skip all ASCII characters at the tail. */
4197 if (eol_conversion)
de9d083c 4198 while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
d46c5b12
KH
4199 else
4200 while (begp < endp && endp[-1] < 0x80) endp--;
ee59c65f
RS
4201 /* Do not consider LF as ascii if preceded by CR, since that
4202 confuses eol decoding. */
4203 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4204 endp++;
d46c5b12
KH
4205 break;
4206
4207 case CODING_CATEGORY_IDX_ISO_7:
4208 case CODING_CATEGORY_IDX_ISO_7_TIGHT:
de79a6a5
KH
4209 {
4210 /* We can skip all charactes at the tail except for 8-bit
4211 codes and ESC and the following 2-byte at the tail. */
4212 unsigned char *eight_bit = NULL;
4213
4214 if (eol_conversion)
4215 while (begp < endp
4216 && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4217 {
4218 if (!eight_bit && c & 0x80) eight_bit = endp;
4219 endp--;
4220 }
4221 else
4222 while (begp < endp
4223 && (c = endp[-1]) != ISO_CODE_ESC)
4224 {
4225 if (!eight_bit && c & 0x80) eight_bit = endp;
4226 endp--;
4227 }
4228 /* Do not consider LF as ascii if preceded by CR, since that
4229 confuses eol decoding. */
4230 if (begp < endp && endp < endp_orig
4231 && endp[-1] == '\r' && endp[0] == '\n')
4232 endp++;
4233 if (begp < endp && endp[-1] == ISO_CODE_ESC)
4234 {
4235 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4236 /* This is an ASCII designation sequence. We can
4237 surely skip the tail. But, if we have
4238 encountered an 8-bit code, skip only the codes
4239 after that. */
4240 endp = eight_bit ? eight_bit : endp + 2;
4241 else
4242 /* Hmmm, we can't skip the tail. */
4243 endp = endp_orig;
4244 }
4245 else if (eight_bit)
4246 endp = eight_bit;
4247 }
d46c5b12 4248 }
b73bfc1c
KH
4249 break;
4250
4251 default:
4252 abort ();
d46c5b12
KH
4253 }
4254 *beg += begp - begp_orig;
4255 *end += endp - endp_orig;
4256 return;
4257}
4258
4259/* Like shrink_decoding_region but for encoding. */
4260
4261static void
4262shrink_encoding_region (beg, end, coding, str)
4263 int *beg, *end;
4264 struct coding_system *coding;
4265 unsigned char *str;
4266{
4267 unsigned char *begp_orig, *begp, *endp_orig, *endp;
4268 int eol_conversion;
88993dfd 4269 Lisp_Object translation_table;
d46c5b12 4270
b73bfc1c
KH
4271 if (coding->type == coding_type_ccl
4272 || coding->eol_type == CODING_EOL_CRLF
4273 || coding->eol_type == CODING_EOL_CR
4274 || coding->cmp_data && coding->cmp_data->used > 0)
d46c5b12 4275 {
b73bfc1c
KH
4276 /* We can't skip any data. */
4277 return;
4278 }
4279 if (coding->type == coding_type_no_conversion
4280 || coding->type == coding_type_raw_text
4281 || coding->type == coding_type_emacs_mule
4282 || coding->type == coding_type_undecided)
4283 {
4284 /* We need no conversion, but don't have to skip any data here.
4285 Encoding routine handles them effectively anyway. */
d46c5b12
KH
4286 return;
4287 }
4288
88993dfd
KH
4289 translation_table = coding->translation_table_for_encode;
4290 if (NILP (translation_table) && !NILP (Venable_character_translation))
4291 translation_table = Vstandard_translation_table_for_encode;
4292 if (CHAR_TABLE_P (translation_table))
4293 {
4294 int i;
4295 for (i = 0; i < 128; i++)
4296 if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4297 break;
4298 if (i < 128)
4299 /* Some ASCII character should be tranlsated. We give up
4300 shrinking. */
4301 return;
4302 }
4303
d46c5b12
KH
4304 if (str)
4305 {
4306 begp_orig = begp = str + *beg;
4307 endp_orig = endp = str + *end;
4308 }
4309 else
4310 {
fb88bf2d 4311 begp_orig = begp = BYTE_POS_ADDR (*beg);
d46c5b12
KH
4312 endp_orig = endp = begp + *end - *beg;
4313 }
4314
4315 eol_conversion = (coding->eol_type == CODING_EOL_CR
4316 || coding->eol_type == CODING_EOL_CRLF);
4317
4318 /* Here, we don't have to check coding->pre_write_conversion because
4319 the caller is expected to have handled it already. */
4320 switch (coding->type)
4321 {
d46c5b12 4322 case coding_type_iso2022:
622fece5
KH
4323 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4324 /* We can't skip any data. */
4325 break;
d46c5b12
KH
4326 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4327 {
4328 unsigned char *bol = begp;
4329 while (begp < endp && *begp < 0x80)
4330 {
4331 begp++;
4332 if (begp[-1] == '\n')
4333 bol = begp;
4334 }
4335 begp = bol;
4336 goto label_skip_tail;
4337 }
4338 /* fall down ... */
4339
b73bfc1c
KH
4340 case coding_type_sjis:
4341 case coding_type_big5:
d46c5b12
KH
4342 /* We can skip all ASCII characters at the head and tail. */
4343 if (eol_conversion)
4344 while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4345 else
4346 while (begp < endp && *begp < 0x80) begp++;
4347 label_skip_tail:
4348 if (eol_conversion)
4349 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4350 else
4351 while (begp < endp && *(endp - 1) < 0x80) endp--;
4352 break;
b73bfc1c
KH
4353
4354 default:
4355 abort ();
d46c5b12
KH
4356 }
4357
4358 *beg += begp - begp_orig;
4359 *end += endp - endp_orig;
4360 return;
4361}
4362
88993dfd
KH
4363/* As shrinking conversion region requires some overhead, we don't try
4364 shrinking if the length of conversion region is less than this
4365 value. */
4366static int shrink_conversion_region_threshhold = 1024;
4367
4368#define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \
4369 do { \
4370 if (*(end) - *(beg) > shrink_conversion_region_threshhold) \
4371 { \
4372 if (encodep) shrink_encoding_region (beg, end, coding, str); \
4373 else shrink_decoding_region (beg, end, coding, str); \
4374 } \
4375 } while (0)
4376
b843d1ae
KH
4377static Lisp_Object
4378code_convert_region_unwind (dummy)
4379 Lisp_Object dummy;
4380{
4381 inhibit_pre_post_conversion = 0;
4382 return Qnil;
4383}
4384
ec6d2bb8
KH
4385/* Store information about all compositions in the range FROM and TO
4386 of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a
4387 buffer or a string, defaults to the current buffer. */
4388
4389void
4390coding_save_composition (coding, from, to, obj)
4391 struct coding_system *coding;
4392 int from, to;
4393 Lisp_Object obj;
4394{
4395 Lisp_Object prop;
4396 int start, end;
4397
91bee881
KH
4398 if (coding->composing == COMPOSITION_DISABLED)
4399 return;
4400 if (!coding->cmp_data)
4401 coding_allocate_composition_data (coding, from);
ec6d2bb8
KH
4402 if (!find_composition (from, to, &start, &end, &prop, obj)
4403 || end > to)
4404 return;
4405 if (start < from
4406 && (!find_composition (end, to, &start, &end, &prop, obj)
4407 || end > to))
4408 return;
4409 coding->composing = COMPOSITION_NO;
ec6d2bb8
KH
4410 do
4411 {
4412 if (COMPOSITION_VALID_P (start, end, prop))
4413 {
4414 enum composition_method method = COMPOSITION_METHOD (prop);
4415 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
4416 >= COMPOSITION_DATA_SIZE)
4417 coding_allocate_composition_data (coding, from);
4418 /* For relative composition, we remember start and end
4419 positions, for the other compositions, we also remember
4420 components. */
4421 CODING_ADD_COMPOSITION_START (coding, start - from, method);
4422 if (method != COMPOSITION_RELATIVE)
4423 {
4424 /* We must store a*/
4425 Lisp_Object val, ch;
4426
4427 val = COMPOSITION_COMPONENTS (prop);
4428 if (CONSP (val))
4429 while (CONSP (val))
4430 {
4431 ch = XCAR (val), val = XCDR (val);
4432 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4433 }
4434 else if (VECTORP (val) || STRINGP (val))
4435 {
4436 int len = (VECTORP (val)
4437 ? XVECTOR (val)->size : XSTRING (val)->size);
4438 int i;
4439 for (i = 0; i < len; i++)
4440 {
4441 ch = (STRINGP (val)
4442 ? Faref (val, make_number (i))
4443 : XVECTOR (val)->contents[i]);
4444 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4445 }
4446 }
4447 else /* INTEGERP (val) */
4448 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
4449 }
4450 CODING_ADD_COMPOSITION_END (coding, end - from);
4451 }
4452 start = end;
4453 }
4454 while (start < to
4455 && find_composition (start, to, &start, &end, &prop, obj)
4456 && end <= to);
4457
4458 /* Make coding->cmp_data point to the first memory block. */
4459 while (coding->cmp_data->prev)
4460 coding->cmp_data = coding->cmp_data->prev;
4461 coding->cmp_data_start = 0;
4462}
4463
4464/* Reflect the saved information about compositions to OBJ.
4465 CODING->cmp_data points to a memory block for the informaiton. OBJ
4466 is a buffer or a string, defaults to the current buffer. */
4467
4468static void
4469coding_restore_composition (coding, obj)
4470 struct coding_system *coding;
4471 Lisp_Object obj;
4472{
4473 struct composition_data *cmp_data = coding->cmp_data;
4474
4475 if (!cmp_data)
4476 return;
4477
4478 while (cmp_data->prev)
4479 cmp_data = cmp_data->prev;
4480
4481 while (cmp_data)
4482 {
4483 int i;
4484
4485 for (i = 0; i < cmp_data->used; i += cmp_data->data[i])
4486 {
4487 int *data = cmp_data->data + i;
4488 enum composition_method method = (enum composition_method) data[3];
4489 Lisp_Object components;
4490
4491 if (method == COMPOSITION_RELATIVE)
4492 components = Qnil;
4493 else
4494 {
4495 int len = data[0] - 4, j;
4496 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
4497
4498 for (j = 0; j < len; j++)
4499 args[j] = make_number (data[4 + j]);
4500 components = (method == COMPOSITION_WITH_ALTCHARS
4501 ? Fstring (len, args) : Fvector (len, args));
4502 }
4503 compose_text (data[1], data[2], components, Qnil, obj);
4504 }
4505 cmp_data = cmp_data->next;
4506 }
4507}
4508
d46c5b12 4509/* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
fb88bf2d
KH
4510 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4511 coding system CODING, and return the status code of code conversion
4512 (currently, this value has no meaning).
4513
4514 How many characters (and bytes) are converted to how many
4515 characters (and bytes) are recorded in members of the structure
4516 CODING.
d46c5b12 4517
6e44253b 4518 If REPLACE is nonzero, we do various things as if the original text
d46c5b12 4519 is deleted and a new text is inserted. See the comments in
b73bfc1c
KH
4520 replace_range (insdel.c) to know what we are doing.
4521
4522 If REPLACE is zero, it is assumed that the source text is unibyte.
4523 Otherwize, it is assumed that the source text is multibyte. */
4ed46869
KH
4524
4525int
6e44253b
KH
4526code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4527 int from, from_byte, to, to_byte, encodep, replace;
4ed46869 4528 struct coding_system *coding;
4ed46869 4529{
fb88bf2d
KH
4530 int len = to - from, len_byte = to_byte - from_byte;
4531 int require, inserted, inserted_byte;
4b39528c 4532 int head_skip, tail_skip, total_skip = 0;
84d60297 4533 Lisp_Object saved_coding_symbol;
fb88bf2d 4534 int first = 1;
fb88bf2d 4535 unsigned char *src, *dst;
84d60297 4536 Lisp_Object deletion;
e133c8fa 4537 int orig_point = PT, orig_len = len;
6abb9bd9 4538 int prev_Z;
b73bfc1c
KH
4539 int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
4540
4541 coding->src_multibyte = replace && multibyte_p;
4542 coding->dst_multibyte = multibyte_p;
84d60297
RS
4543
4544 deletion = Qnil;
4545 saved_coding_symbol = Qnil;
d46c5b12 4546
83fa074f 4547 if (from < PT && PT < to)
e133c8fa
KH
4548 {
4549 TEMP_SET_PT_BOTH (from, from_byte);
4550 orig_point = from;
4551 }
83fa074f 4552
6e44253b 4553 if (replace)
d46c5b12 4554 {
fb88bf2d
KH
4555 int saved_from = from;
4556
d46c5b12 4557 prepare_to_modify_buffer (from, to, &from);
fb88bf2d
KH
4558 if (saved_from != from)
4559 {
4560 to = from + len;
b73bfc1c 4561 from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
fb88bf2d
KH
4562 len_byte = to_byte - from_byte;
4563 }
d46c5b12 4564 }
d46c5b12
KH
4565
4566 if (! encodep && CODING_REQUIRE_DETECTION (coding))
4567 {
12410ef1 4568 /* We must detect encoding of text and eol format. */
d46c5b12
KH
4569
4570 if (from < GPT && to > GPT)
4571 move_gap_both (from, from_byte);
4572 if (coding->type == coding_type_undecided)
4573 {
fb88bf2d 4574 detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
d46c5b12 4575 if (coding->type == coding_type_undecided)
12410ef1
KH
4576 /* It seems that the text contains only ASCII, but we
4577 should not left it undecided because the deeper
4578 decoding routine (decode_coding) tries to detect the
4579 encodings again in vain. */
d46c5b12
KH
4580 coding->type = coding_type_emacs_mule;
4581 }
4582 if (coding->eol_type == CODING_EOL_UNDECIDED)
4583 {
4584 saved_coding_symbol = coding->symbol;
4585 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4586 if (coding->eol_type == CODING_EOL_UNDECIDED)
4587 coding->eol_type = CODING_EOL_LF;
4588 /* We had better recover the original eol format if we
4589 encounter an inconsitent eol format while decoding. */
4590 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4591 }
4592 }
4593
d46c5b12
KH
4594 /* Now we convert the text. */
4595
4596 /* For encoding, we must process pre-write-conversion in advance. */
b73bfc1c
KH
4597 if (! inhibit_pre_post_conversion
4598 && encodep
d46c5b12
KH
4599 && SYMBOLP (coding->pre_write_conversion)
4600 && ! NILP (Ffboundp (coding->pre_write_conversion)))
4601 {
2b4f9037
KH
4602 /* The function in pre-write-conversion may put a new text in a
4603 new buffer. */
0007bdd0
KH
4604 struct buffer *prev = current_buffer;
4605 Lisp_Object new;
b843d1ae 4606 int count = specpdl_ptr - specpdl;
d46c5b12 4607
b843d1ae
KH
4608 record_unwind_protect (code_convert_region_unwind, Qnil);
4609 /* We should not call any more pre-write/post-read-conversion
4610 functions while this pre-write-conversion is running. */
4611 inhibit_pre_post_conversion = 1;
b39f748c
AS
4612 call2 (coding->pre_write_conversion,
4613 make_number (from), make_number (to));
b843d1ae
KH
4614 inhibit_pre_post_conversion = 0;
4615 /* Discard the unwind protect. */
4616 specpdl_ptr--;
4617
d46c5b12
KH
4618 if (current_buffer != prev)
4619 {
4620 len = ZV - BEGV;
0007bdd0 4621 new = Fcurrent_buffer ();
d46c5b12 4622 set_buffer_internal_1 (prev);
7dae4502 4623 del_range_2 (from, from_byte, to, to_byte, 0);
e133c8fa 4624 TEMP_SET_PT_BOTH (from, from_byte);
0007bdd0
KH
4625 insert_from_buffer (XBUFFER (new), 1, len, 0);
4626 Fkill_buffer (new);
e133c8fa
KH
4627 if (orig_point >= to)
4628 orig_point += len - orig_len;
4629 else if (orig_point > from)
4630 orig_point = from;
4631 orig_len = len;
d46c5b12 4632 to = from + len;
b73bfc1c
KH
4633 from_byte = CHAR_TO_BYTE (from);
4634 to_byte = CHAR_TO_BYTE (to);
d46c5b12 4635 len_byte = to_byte - from_byte;
e133c8fa 4636 TEMP_SET_PT_BOTH (from, from_byte);
d46c5b12
KH
4637 }
4638 }
4639
12410ef1
KH
4640 if (replace)
4641 deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4642
ec6d2bb8
KH
4643 if (coding->composing != COMPOSITION_DISABLED)
4644 {
4645 if (encodep)
4646 coding_save_composition (coding, from, to, Fcurrent_buffer ());
4647 else
4648 coding_allocate_composition_data (coding, from);
4649 }
fb88bf2d 4650
b73bfc1c
KH
4651 /* Try to skip the heading and tailing ASCIIs. */
4652 {
4653 int from_byte_orig = from_byte, to_byte_orig = to_byte;
4654
4655 if (from < GPT && GPT < to)
4656 move_gap_both (from, from_byte);
4657 SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4658 if (from_byte == to_byte
4659 && (encodep || NILP (coding->post_read_conversion))
4660 && ! CODING_REQUIRE_FLUSHING (coding))
4661 {
4662 coding->produced = len_byte;
4663 coding->produced_char = len;
4664 if (!replace)
4665 /* We must record and adjust for this new text now. */
4666 adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4667 return 0;
4668 }
ec6d2bb8 4669
b73bfc1c
KH
4670 head_skip = from_byte - from_byte_orig;
4671 tail_skip = to_byte_orig - to_byte;
4672 total_skip = head_skip + tail_skip;
4673 from += head_skip;
4674 to -= tail_skip;
4675 len -= total_skip; len_byte -= total_skip;
4676 }
d46c5b12 4677
88993dfd 4678 /* The code conversion routine can not preserve text properties for
55d8d769
KH
4679 now. So, we must remove all text properties in the region.
4680 Here, we must suppress all modification hooks. */
88993dfd 4681 if (replace)
55d8d769
KH
4682 {
4683 int saved_inhibit_modification_hooks = inhibit_modification_hooks;
4684 inhibit_modification_hooks = 1;
4685 Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4686 inhibit_modification_hooks = saved_inhibit_modification_hooks;
4687 }
88993dfd 4688
fb88bf2d
KH
4689 /* For converion, we must put the gap before the text in addition to
4690 making the gap larger for efficient decoding. The required gap
4691 size starts from 2000 which is the magic number used in make_gap.
4692 But, after one batch of conversion, it will be incremented if we
4693 find that it is not enough . */
d46c5b12
KH
4694 require = 2000;
4695
4696 if (GAP_SIZE < require)
4697 make_gap (require - GAP_SIZE);
4698 move_gap_both (from, from_byte);
4699
d46c5b12 4700 inserted = inserted_byte = 0;
fb88bf2d
KH
4701
4702 GAP_SIZE += len_byte;
4703 ZV -= len;
4704 Z -= len;
4705 ZV_BYTE -= len_byte;
4706 Z_BYTE -= len_byte;
4707
d9f9a1bc
GM
4708 if (GPT - BEG < BEG_UNCHANGED)
4709 BEG_UNCHANGED = GPT - BEG;
4710 if (Z - GPT < END_UNCHANGED)
4711 END_UNCHANGED = Z - GPT;
f2558efd 4712
b73bfc1c
KH
4713 if (!encodep && coding->src_multibyte)
4714 {
4715 /* Decoding routines expects that the source text is unibyte.
4716 We must convert 8-bit characters of multibyte form to
4717 unibyte. */
4718 int len_byte_orig = len_byte;
4719 len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
4720 if (len_byte < len_byte_orig)
4721 safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
4722 len_byte);
4723 coding->src_multibyte = 0;
4724 }
4725
d46c5b12
KH
4726 for (;;)
4727 {
fb88bf2d 4728 int result;
d46c5b12 4729
ec6d2bb8 4730 /* The buffer memory is now:
b73bfc1c
KH
4731 +--------+converted-text+---------+-------original-text-------+---+
4732 |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
4733 |<---------------------- GAP ----------------------->| */
ec6d2bb8
KH
4734 src = GAP_END_ADDR - len_byte;
4735 dst = GPT_ADDR + inserted_byte;
4736
d46c5b12 4737 if (encodep)
fb88bf2d 4738 result = encode_coding (coding, src, dst, len_byte, 0);
d46c5b12 4739 else
fb88bf2d 4740 result = decode_coding (coding, src, dst, len_byte, 0);
ec6d2bb8
KH
4741
4742 /* The buffer memory is now:
b73bfc1c
KH
4743 +--------+-------converted-text----+--+------original-text----+---+
4744 |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
4745 |<---------------------- GAP ----------------------->| */
ec6d2bb8 4746
d46c5b12
KH
4747 inserted += coding->produced_char;
4748 inserted_byte += coding->produced;
d46c5b12 4749 len_byte -= coding->consumed;
ec6d2bb8
KH
4750
4751 if (result == CODING_FINISH_INSUFFICIENT_CMP)
4752 {
4753 coding_allocate_composition_data (coding, from + inserted);
4754 continue;
4755 }
4756
fb88bf2d 4757 src += coding->consumed;
3636f7a3 4758 dst += coding->produced;
d46c5b12 4759
9864ebce
KH
4760 if (result == CODING_FINISH_NORMAL)
4761 {
4762 src += len_byte;
4763 break;
4764 }
d46c5b12
KH
4765 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4766 {
fb88bf2d 4767 unsigned char *pend = dst, *p = pend - inserted_byte;
38edf7d4 4768 Lisp_Object eol_type;
d46c5b12
KH
4769
4770 /* Encode LFs back to the original eol format (CR or CRLF). */
4771 if (coding->eol_type == CODING_EOL_CR)
4772 {
4773 while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4774 }
4775 else
4776 {
d46c5b12
KH
4777 int count = 0;
4778
fb88bf2d
KH
4779 while (p < pend) if (*p++ == '\n') count++;
4780 if (src - dst < count)
d46c5b12 4781 {
38edf7d4 4782 /* We don't have sufficient room for encoding LFs
fb88bf2d
KH
4783 back to CRLF. We must record converted and
4784 not-yet-converted text back to the buffer
4785 content, enlarge the gap, then record them out of
4786 the buffer contents again. */
4787 int add = len_byte + inserted_byte;
4788
4789 GAP_SIZE -= add;
4790 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4791 GPT += inserted_byte; GPT_BYTE += inserted_byte;
4792 make_gap (count - GAP_SIZE);
4793 GAP_SIZE += add;
4794 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4795 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4796 /* Don't forget to update SRC, DST, and PEND. */
4797 src = GAP_END_ADDR - len_byte;
4798 dst = GPT_ADDR + inserted_byte;
4799 pend = dst;
d46c5b12 4800 }
d46c5b12
KH
4801 inserted += count;
4802 inserted_byte += count;
fb88bf2d
KH
4803 coding->produced += count;
4804 p = dst = pend + count;
4805 while (count)
4806 {
4807 *--p = *--pend;
4808 if (*p == '\n') count--, *--p = '\r';
4809 }
d46c5b12
KH
4810 }
4811
4812 /* Suppress eol-format conversion in the further conversion. */
4813 coding->eol_type = CODING_EOL_LF;
4814
38edf7d4
KH
4815 /* Set the coding system symbol to that for Unix-like EOL. */
4816 eol_type = Fget (saved_coding_symbol, Qeol_type);
4817 if (VECTORP (eol_type)
4818 && XVECTOR (eol_type)->size == 3
4819 && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
4820 coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
4821 else
4822 coding->symbol = saved_coding_symbol;
fb88bf2d
KH
4823
4824 continue;
d46c5b12
KH
4825 }
4826 if (len_byte <= 0)
944bd420
KH
4827 {
4828 if (coding->type != coding_type_ccl
4829 || coding->mode & CODING_MODE_LAST_BLOCK)
4830 break;
4831 coding->mode |= CODING_MODE_LAST_BLOCK;
4832 continue;
4833 }
d46c5b12
KH
4834 if (result == CODING_FINISH_INSUFFICIENT_SRC)
4835 {
4836 /* The source text ends in invalid codes. Let's just
4837 make them valid buffer contents, and finish conversion. */
fb88bf2d 4838 inserted += len_byte;
d46c5b12 4839 inserted_byte += len_byte;
fb88bf2d 4840 while (len_byte--)
ee59c65f 4841 *dst++ = *src++;
d46c5b12
KH
4842 break;
4843 }
9864ebce
KH
4844 if (result == CODING_FINISH_INTERRUPT)
4845 {
4846 /* The conversion procedure was interrupted by a user. */
9864ebce
KH
4847 break;
4848 }
4849 /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */
4850 if (coding->consumed < 1)
4851 {
4852 /* It's quite strange to require more memory without
4853 consuming any bytes. Perhaps CCL program bug. */
9864ebce
KH
4854 break;
4855 }
fb88bf2d
KH
4856 if (first)
4857 {
4858 /* We have just done the first batch of conversion which was
4859 stoped because of insufficient gap. Let's reconsider the
4860 required gap size (i.e. SRT - DST) now.
4861
4862 We have converted ORIG bytes (== coding->consumed) into
4863 NEW bytes (coding->produced). To convert the remaining
4864 LEN bytes, we may need REQUIRE bytes of gap, where:
4865 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4866 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4867 Here, we are sure that NEW >= ORIG. */
6e44253b
KH
4868 float ratio = coding->produced - coding->consumed;
4869 ratio /= coding->consumed;
4870 require = len_byte * ratio;
fb88bf2d
KH
4871 first = 0;
4872 }
4873 if ((src - dst) < (require + 2000))
4874 {
4875 /* See the comment above the previous call of make_gap. */
4876 int add = len_byte + inserted_byte;
4877
4878 GAP_SIZE -= add;
4879 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4880 GPT += inserted_byte; GPT_BYTE += inserted_byte;
4881 make_gap (require + 2000);
4882 GAP_SIZE += add;
4883 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4884 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
fb88bf2d 4885 }
d46c5b12 4886 }
fb88bf2d
KH
4887 if (src - dst > 0) *dst = 0; /* Put an anchor. */
4888
b73bfc1c
KH
4889 if (encodep && coding->dst_multibyte)
4890 {
4891 /* The output is unibyte. We must convert 8-bit characters to
4892 multibyte form. */
4893 if (inserted_byte * 2 > GAP_SIZE)
4894 {
4895 GAP_SIZE -= inserted_byte;
4896 ZV += inserted_byte; Z += inserted_byte;
4897 ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
4898 GPT += inserted_byte; GPT_BYTE += inserted_byte;
4899 make_gap (inserted_byte - GAP_SIZE);
4900 GAP_SIZE += inserted_byte;
4901 ZV -= inserted_byte; Z -= inserted_byte;
4902 ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
4903 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4904 }
4905 inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
4906 }
7553d0e1 4907
12410ef1
KH
4908 /* If we have shrinked the conversion area, adjust it now. */
4909 if (total_skip > 0)
4910 {
4911 if (tail_skip > 0)
4912 safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4913 inserted += total_skip; inserted_byte += total_skip;
4914 GAP_SIZE += total_skip;
4915 GPT -= head_skip; GPT_BYTE -= head_skip;
4916 ZV -= total_skip; ZV_BYTE -= total_skip;
4917 Z -= total_skip; Z_BYTE -= total_skip;
4918 from -= head_skip; from_byte -= head_skip;
4919 to += tail_skip; to_byte += tail_skip;
4920 }
4921
6abb9bd9 4922 prev_Z = Z;
12410ef1 4923 adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
6abb9bd9 4924 inserted = Z - prev_Z;
4ed46869 4925
ec6d2bb8
KH
4926 if (!encodep && coding->cmp_data && coding->cmp_data->used)
4927 coding_restore_composition (coding, Fcurrent_buffer ());
4928 coding_free_composition_data (coding);
4929
b73bfc1c
KH
4930 if (! inhibit_pre_post_conversion
4931 && ! encodep && ! NILP (coding->post_read_conversion))
d46c5b12 4932 {
2b4f9037 4933 Lisp_Object val;
b843d1ae 4934 int count = specpdl_ptr - specpdl;
4ed46869 4935
e133c8fa
KH
4936 if (from != PT)
4937 TEMP_SET_PT_BOTH (from, from_byte);
6abb9bd9 4938 prev_Z = Z;
b843d1ae
KH
4939 record_unwind_protect (code_convert_region_unwind, Qnil);
4940 /* We should not call any more pre-write/post-read-conversion
4941 functions while this post-read-conversion is running. */
4942 inhibit_pre_post_conversion = 1;
2b4f9037 4943 val = call1 (coding->post_read_conversion, make_number (inserted));
b843d1ae
KH
4944 inhibit_pre_post_conversion = 0;
4945 /* Discard the unwind protect. */
4946 specpdl_ptr--;
6abb9bd9 4947 CHECK_NUMBER (val, 0);
944bd420 4948 inserted += Z - prev_Z;
e133c8fa
KH
4949 }
4950
4951 if (orig_point >= from)
4952 {
4953 if (orig_point >= from + orig_len)
4954 orig_point += inserted - orig_len;
4955 else
4956 orig_point = from;
4957 TEMP_SET_PT (orig_point);
d46c5b12 4958 }
4ed46869 4959
ec6d2bb8
KH
4960 if (replace)
4961 {
4962 signal_after_change (from, to - from, inserted);
e19539f1 4963 update_compositions (from, from + inserted, CHECK_BORDER);
ec6d2bb8 4964 }
2b4f9037 4965
fb88bf2d 4966 {
12410ef1
KH
4967 coding->consumed = to_byte - from_byte;
4968 coding->consumed_char = to - from;
4969 coding->produced = inserted_byte;
4970 coding->produced_char = inserted;
fb88bf2d 4971 }
7553d0e1 4972
fb88bf2d 4973 return 0;
d46c5b12
KH
4974}
4975
4976Lisp_Object
b73bfc1c
KH
4977run_pre_post_conversion_on_str (str, coding, encodep)
4978 Lisp_Object str;
4979 struct coding_system *coding;
4980 int encodep;
4981{
4982 int count = specpdl_ptr - specpdl;
4983 struct gcpro gcpro1;
4984 struct buffer *prev = current_buffer;
4985 int multibyte = STRING_MULTIBYTE (str);
4986
4987 record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4988 record_unwind_protect (code_convert_region_unwind, Qnil);
4989 GCPRO1 (str);
4990 temp_output_buffer_setup (" *code-converting-work*");
4991 set_buffer_internal (XBUFFER (Vstandard_output));
4992 /* We must insert the contents of STR as is without
4993 unibyte<->multibyte conversion. For that, we adjust the
4994 multibyteness of the working buffer to that of STR. */
4995 Ferase_buffer ();
4996 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
4997 insert_from_string (str, 0, 0,
4998 XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
4999 UNGCPRO;
5000 inhibit_pre_post_conversion = 1;
5001 if (encodep)
5002 call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5003 else
6bac5b12
KH
5004 {
5005 TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5006 call1 (coding->post_read_conversion, make_number (Z - BEG));
5007 }
b73bfc1c
KH
5008 inhibit_pre_post_conversion = 0;
5009 str = make_buffer_string (BEG, Z, 0);
5010 return unbind_to (count, str);
5011}
5012
5013Lisp_Object
5014decode_coding_string (str, coding, nocopy)
d46c5b12 5015 Lisp_Object str;
4ed46869 5016 struct coding_system *coding;
b73bfc1c 5017 int nocopy;
4ed46869 5018{
d46c5b12
KH
5019 int len;
5020 char *buf;
b73bfc1c 5021 int from, to, to_byte;
d46c5b12 5022 struct gcpro gcpro1;
84d60297 5023 Lisp_Object saved_coding_symbol;
d46c5b12 5024 int result;
4ed46869 5025
b73bfc1c
KH
5026 from = 0;
5027 to = XSTRING (str)->size;
5028 to_byte = STRING_BYTES (XSTRING (str));
4ed46869 5029
b73bfc1c
KH
5030 saved_coding_symbol = Qnil;
5031 if (CODING_REQUIRE_DETECTION (coding))
d46c5b12
KH
5032 {
5033 /* See the comments in code_convert_region. */
5034 if (coding->type == coding_type_undecided)
5035 {
5036 detect_coding (coding, XSTRING (str)->data, to_byte);
5037 if (coding->type == coding_type_undecided)
5038 coding->type = coding_type_emacs_mule;
5039 }
5040 if (coding->eol_type == CODING_EOL_UNDECIDED)
5041 {
5042 saved_coding_symbol = coding->symbol;
5043 detect_eol (coding, XSTRING (str)->data, to_byte);
5044 if (coding->eol_type == CODING_EOL_UNDECIDED)
5045 coding->eol_type = CODING_EOL_LF;
5046 /* We had better recover the original eol format if we
5047 encounter an inconsitent eol format while decoding. */
5048 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5049 }
5050 }
4ed46869 5051
b73bfc1c 5052 if (! CODING_REQUIRE_DECODING (coding))
ec6d2bb8 5053 {
b73bfc1c
KH
5054 if (!STRING_MULTIBYTE (str))
5055 {
5056 str = Fstring_as_multibyte (str);
5057 nocopy = 1;
5058 }
5059 return (nocopy ? str : Fcopy_sequence (str));
ec6d2bb8
KH
5060 }
5061
b73bfc1c 5062 if (STRING_MULTIBYTE (str))
d46c5b12 5063 {
b73bfc1c
KH
5064 /* Decoding routines expect the source text to be unibyte. */
5065 str = Fstring_as_unibyte (str);
5066 nocopy = 1;
5067 coding->src_multibyte = 0;
5068 }
5069 coding->dst_multibyte = 1;
ec6d2bb8 5070
b73bfc1c
KH
5071 if (coding->composing != COMPOSITION_DISABLED)
5072 coding_allocate_composition_data (coding, from);
ec6d2bb8 5073
b73bfc1c
KH
5074 /* Try to skip the heading and tailing ASCIIs. */
5075 {
5076 int from_orig = from;
4ed46869 5077
b73bfc1c
KH
5078 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5079 0);
5080 if (from == to_byte)
5081 return (nocopy ? str : Fcopy_sequence (str));
5082 }
5083
5084 len = decoding_buffer_size (coding, to_byte - from);
fc932ac6 5085 len += from + STRING_BYTES (XSTRING (str)) - to_byte;
d46c5b12
KH
5086 GCPRO1 (str);
5087 buf = get_conversion_buffer (len);
5088 UNGCPRO;
4ed46869 5089
d46c5b12
KH
5090 if (from > 0)
5091 bcopy (XSTRING (str)->data, buf, from);
b73bfc1c
KH
5092 result = decode_coding (coding, XSTRING (str)->data + from,
5093 buf + from, to_byte - from, len);
5094 if (result == CODING_FINISH_INCONSISTENT_EOL)
4ed46869 5095 {
ec6d2bb8 5096 /* We simply try to decode the whole string again but without
d46c5b12
KH
5097 eol-conversion this time. */
5098 coding->eol_type = CODING_EOL_LF;
5099 coding->symbol = saved_coding_symbol;
ec6d2bb8 5100 coding_free_composition_data (coding);
b73bfc1c 5101 return decode_coding_string (str, coding, nocopy);
4ed46869 5102 }
d46c5b12
KH
5103
5104 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
fc932ac6 5105 STRING_BYTES (XSTRING (str)) - to_byte);
d46c5b12 5106
fc932ac6 5107 len = from + STRING_BYTES (XSTRING (str)) - to_byte;
b73bfc1c
KH
5108 str = make_multibyte_string (buf, len + coding->produced_char,
5109 len + coding->produced);
5110
5111 if (coding->cmp_data && coding->cmp_data->used)
5112 coding_restore_composition (coding, str);
5113 coding_free_composition_data (coding);
5114
5115 if (SYMBOLP (coding->post_read_conversion)
5116 && !NILP (Ffboundp (coding->post_read_conversion)))
6bac5b12 5117 str = run_pre_post_conversion_on_str (str, coding, 0);
b73bfc1c
KH
5118
5119 return str;
5120}
5121
5122Lisp_Object
5123encode_coding_string (str, coding, nocopy)
5124 Lisp_Object str;
5125 struct coding_system *coding;
5126 int nocopy;
5127{
5128 int len;
5129 char *buf;
5130 int from, to, to_byte;
5131 struct gcpro gcpro1;
5132 Lisp_Object saved_coding_symbol;
5133 int result;
5134
5135 if (SYMBOLP (coding->pre_write_conversion)
5136 && !NILP (Ffboundp (coding->pre_write_conversion)))
6bac5b12 5137 str = run_pre_post_conversion_on_str (str, coding, 1);
b73bfc1c
KH
5138
5139 from = 0;
5140 to = XSTRING (str)->size;
5141 to_byte = STRING_BYTES (XSTRING (str));
5142
5143 saved_coding_symbol = Qnil;
5144 if (! CODING_REQUIRE_ENCODING (coding))
826bfb8b 5145 {
b73bfc1c
KH
5146 if (STRING_MULTIBYTE (str))
5147 {
5148 str = Fstring_as_unibyte (str);
5149 nocopy = 1;
5150 }
5151 return (nocopy ? str : Fcopy_sequence (str));
826bfb8b
KH
5152 }
5153
b73bfc1c
KH
5154 /* Encoding routines determine the multibyteness of the source text
5155 by coding->src_multibyte. */
5156 coding->src_multibyte = STRING_MULTIBYTE (str);
5157 coding->dst_multibyte = 0;
5158
5159 if (coding->composing != COMPOSITION_DISABLED)
5160 coding_save_composition (coding, from, to, str);
ec6d2bb8 5161
b73bfc1c
KH
5162 /* Try to skip the heading and tailing ASCIIs. */
5163 {
5164 int from_orig = from;
5165
5166 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5167 1);
5168 if (from == to_byte)
5169 return (nocopy ? str : Fcopy_sequence (str));
5170 }
5171
5172 len = encoding_buffer_size (coding, to_byte - from);
5173 len += from + STRING_BYTES (XSTRING (str)) - to_byte;
5174 GCPRO1 (str);
5175 buf = get_conversion_buffer (len);
5176 UNGCPRO;
5177
5178 if (from > 0)
5179 bcopy (XSTRING (str)->data, buf, from);
5180 result = encode_coding (coding, XSTRING (str)->data + from,
5181 buf + from, to_byte - from, len);
5182 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
5183 STRING_BYTES (XSTRING (str)) - to_byte);
5184
5185 len = from + STRING_BYTES (XSTRING (str)) - to_byte;
5186 str = make_unibyte_string (buf, len + coding->produced);
ec6d2bb8 5187 coding_free_composition_data (coding);
b73bfc1c 5188
d46c5b12 5189 return str;
4ed46869
KH
5190}
5191
5192\f
5193#ifdef emacs
1397dc18 5194/*** 8. Emacs Lisp library functions ***/
4ed46869 5195
4ed46869
KH
5196DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
5197 "Return t if OBJECT is nil or a coding-system.\n\
3a73fa5d
RS
5198See the documentation of `make-coding-system' for information\n\
5199about coding-system objects.")
4ed46869
KH
5200 (obj)
5201 Lisp_Object obj;
5202{
4608c386
KH
5203 if (NILP (obj))
5204 return Qt;
5205 if (!SYMBOLP (obj))
5206 return Qnil;
5207 /* Get coding-spec vector for OBJ. */
5208 obj = Fget (obj, Qcoding_system);
5209 return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
5210 ? Qt : Qnil);
4ed46869
KH
5211}
5212
9d991de8
RS
5213DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
5214 Sread_non_nil_coding_system, 1, 1, 0,
e0e989f6 5215 "Read a coding system from the minibuffer, prompting with string PROMPT.")
4ed46869
KH
5216 (prompt)
5217 Lisp_Object prompt;
5218{
e0e989f6 5219 Lisp_Object val;
9d991de8
RS
5220 do
5221 {
4608c386
KH
5222 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5223 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
5224 }
5225 while (XSTRING (val)->size == 0);
e0e989f6 5226 return (Fintern (val, Qnil));
4ed46869
KH
5227}
5228
9b787f3e
RS
5229DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
5230 "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5231If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5232 (prompt, default_coding_system)
5233 Lisp_Object prompt, default_coding_system;
4ed46869 5234{
f44d27ce 5235 Lisp_Object val;
9b787f3e
RS
5236 if (SYMBOLP (default_coding_system))
5237 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 5238 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
5239 Qt, Qnil, Qcoding_system_history,
5240 default_coding_system, Qnil);
e0e989f6 5241 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
5242}
5243
5244DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
5245 1, 1, 0,
5246 "Check validity of CODING-SYSTEM.\n\
3a73fa5d
RS
5247If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5248It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4ed46869
KH
5249The value of property should be a vector of length 5.")
5250 (coding_system)
5251 Lisp_Object coding_system;
5252{
5253 CHECK_SYMBOL (coding_system, 0);
5254 if (!NILP (Fcoding_system_p (coding_system)))
5255 return coding_system;
5256 while (1)
02ba4723 5257 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 5258}
3a73fa5d 5259\f
d46c5b12
KH
5260Lisp_Object
5261detect_coding_system (src, src_bytes, highest)
5262 unsigned char *src;
5263 int src_bytes, highest;
4ed46869
KH
5264{
5265 int coding_mask, eol_type;
d46c5b12
KH
5266 Lisp_Object val, tmp;
5267 int dummy;
4ed46869 5268
d46c5b12
KH
5269 coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
5270 eol_type = detect_eol_type (src, src_bytes, &dummy);
5271 if (eol_type == CODING_EOL_INCONSISTENT)
25b02698 5272 eol_type = CODING_EOL_UNDECIDED;
4ed46869 5273
d46c5b12 5274 if (!coding_mask)
4ed46869 5275 {
27901516 5276 val = Qundecided;
d46c5b12 5277 if (eol_type != CODING_EOL_UNDECIDED)
4ed46869 5278 {
f44d27ce
RS
5279 Lisp_Object val2;
5280 val2 = Fget (Qundecided, Qeol_type);
4ed46869
KH
5281 if (VECTORP (val2))
5282 val = XVECTOR (val2)->contents[eol_type];
5283 }
80e803b4 5284 return (highest ? val : Fcons (val, Qnil));
4ed46869 5285 }
4ed46869 5286
d46c5b12
KH
5287 /* At first, gather possible coding systems in VAL. */
5288 val = Qnil;
fa42c37f 5289 for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
4ed46869 5290 {
fa42c37f
KH
5291 Lisp_Object category_val, category_index;
5292
5293 category_index = Fget (XCAR (tmp), Qcoding_category_index);
5294 category_val = Fsymbol_value (XCAR (tmp));
5295 if (!NILP (category_val)
5296 && NATNUMP (category_index)
5297 && (coding_mask & (1 << XFASTINT (category_index))))
4ed46869 5298 {
fa42c37f 5299 val = Fcons (category_val, val);
d46c5b12
KH
5300 if (highest)
5301 break;
4ed46869
KH
5302 }
5303 }
d46c5b12
KH
5304 if (!highest)
5305 val = Fnreverse (val);
4ed46869 5306
65059037 5307 /* Then, replace the elements with subsidiary coding systems. */
fa42c37f 5308 for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
4ed46869 5309 {
65059037
RS
5310 if (eol_type != CODING_EOL_UNDECIDED
5311 && eol_type != CODING_EOL_INCONSISTENT)
4ed46869 5312 {
d46c5b12 5313 Lisp_Object eol;
03699b14 5314 eol = Fget (XCAR (tmp), Qeol_type);
d46c5b12 5315 if (VECTORP (eol))
03699b14 5316 XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
4ed46869
KH
5317 }
5318 }
03699b14 5319 return (highest ? XCAR (val) : val);
d46c5b12 5320}
4ed46869 5321
d46c5b12
KH
5322DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
5323 2, 3, 0,
5324 "Detect coding system of the text in the region between START and END.\n\
5325Return a list of possible coding systems ordered by priority.\n\
5326\n\
80e803b4
KH
5327If only ASCII characters are found, it returns a list of single element\n\
5328`undecided' or its subsidiary coding system according to a detected\n\
5329end-of-line format.\n\
d46c5b12
KH
5330\n\
5331If optional argument HIGHEST is non-nil, return the coding system of\n\
5332highest priority.")
5333 (start, end, highest)
5334 Lisp_Object start, end, highest;
5335{
5336 int from, to;
5337 int from_byte, to_byte;
6289dd10 5338
d46c5b12
KH
5339 CHECK_NUMBER_COERCE_MARKER (start, 0);
5340 CHECK_NUMBER_COERCE_MARKER (end, 1);
4ed46869 5341
d46c5b12
KH
5342 validate_region (&start, &end);
5343 from = XINT (start), to = XINT (end);
5344 from_byte = CHAR_TO_BYTE (from);
5345 to_byte = CHAR_TO_BYTE (to);
6289dd10 5346
d46c5b12
KH
5347 if (from < GPT && to >= GPT)
5348 move_gap_both (to, to_byte);
4ed46869 5349
d46c5b12
KH
5350 return detect_coding_system (BYTE_POS_ADDR (from_byte),
5351 to_byte - from_byte,
5352 !NILP (highest));
5353}
6289dd10 5354
d46c5b12
KH
5355DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
5356 1, 2, 0,
5357 "Detect coding system of the text in STRING.\n\
5358Return a list of possible coding systems ordered by priority.\n\
5359\n\
80e803b4
KH
5360If only ASCII characters are found, it returns a list of single element\n\
5361`undecided' or its subsidiary coding system according to a detected\n\
5362end-of-line format.\n\
d46c5b12
KH
5363\n\
5364If optional argument HIGHEST is non-nil, return the coding system of\n\
5365highest priority.")
5366 (string, highest)
5367 Lisp_Object string, highest;
5368{
5369 CHECK_STRING (string, 0);
4ed46869 5370
d46c5b12 5371 return detect_coding_system (XSTRING (string)->data,
fc932ac6 5372 STRING_BYTES (XSTRING (string)),
d46c5b12 5373 !NILP (highest));
4ed46869
KH
5374}
5375
4031e2bf
KH
5376Lisp_Object
5377code_convert_region1 (start, end, coding_system, encodep)
d46c5b12 5378 Lisp_Object start, end, coding_system;
4031e2bf 5379 int encodep;
3a73fa5d
RS
5380{
5381 struct coding_system coding;
4031e2bf 5382 int from, to, len;
3a73fa5d 5383
d46c5b12
KH
5384 CHECK_NUMBER_COERCE_MARKER (start, 0);
5385 CHECK_NUMBER_COERCE_MARKER (end, 1);
3a73fa5d
RS
5386 CHECK_SYMBOL (coding_system, 2);
5387
d46c5b12
KH
5388 validate_region (&start, &end);
5389 from = XFASTINT (start);
5390 to = XFASTINT (end);
5391
3a73fa5d 5392 if (NILP (coding_system))
d46c5b12
KH
5393 return make_number (to - from);
5394
3a73fa5d 5395 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
d46c5b12 5396 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
3a73fa5d 5397
d46c5b12 5398 coding.mode |= CODING_MODE_LAST_BLOCK;
b73bfc1c
KH
5399 coding.src_multibyte = coding.dst_multibyte
5400 = !NILP (current_buffer->enable_multibyte_characters);
fb88bf2d
KH
5401 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5402 &coding, encodep, 1);
f072a3e8 5403 Vlast_coding_system_used = coding.symbol;
fb88bf2d 5404 return make_number (coding.produced_char);
4031e2bf
KH
5405}
5406
5407DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
5408 3, 3, "r\nzCoding system: ",
5409 "Decode the current region by specified coding system.\n\
5410When called from a program, takes three arguments:\n\
5411START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
f072a3e8
RS
5412This function sets `last-coding-system-used' to the precise coding system\n\
5413used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5414not fully specified.)\n\
5415It returns the length of the decoded text.")
4031e2bf
KH
5416 (start, end, coding_system)
5417 Lisp_Object start, end, coding_system;
5418{
5419 return code_convert_region1 (start, end, coding_system, 0);
3a73fa5d
RS
5420}
5421
5422DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
5423 3, 3, "r\nzCoding system: ",
d46c5b12 5424 "Encode the current region by specified coding system.\n\
3a73fa5d 5425When called from a program, takes three arguments:\n\
d46c5b12 5426START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
f072a3e8
RS
5427This function sets `last-coding-system-used' to the precise coding system\n\
5428used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5429not fully specified.)\n\
5430It returns the length of the encoded text.")
d46c5b12
KH
5431 (start, end, coding_system)
5432 Lisp_Object start, end, coding_system;
3a73fa5d 5433{
4031e2bf
KH
5434 return code_convert_region1 (start, end, coding_system, 1);
5435}
3a73fa5d 5436
4031e2bf
KH
5437Lisp_Object
5438code_convert_string1 (string, coding_system, nocopy, encodep)
5439 Lisp_Object string, coding_system, nocopy;
5440 int encodep;
5441{
5442 struct coding_system coding;
3a73fa5d 5443
4031e2bf
KH
5444 CHECK_STRING (string, 0);
5445 CHECK_SYMBOL (coding_system, 1);
4ed46869 5446
d46c5b12 5447 if (NILP (coding_system))
4031e2bf 5448 return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4ed46869 5449
d46c5b12
KH
5450 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5451 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5f1cd180 5452
d46c5b12 5453 coding.mode |= CODING_MODE_LAST_BLOCK;
b73bfc1c
KH
5454 string = (encodep
5455 ? encode_coding_string (string, &coding, !NILP (nocopy))
5456 : decode_coding_string (string, &coding, !NILP (nocopy)));
f072a3e8 5457 Vlast_coding_system_used = coding.symbol;
ec6d2bb8
KH
5458
5459 return string;
4ed46869
KH
5460}
5461
4ed46869 5462DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
e0e989f6
KH
5463 2, 3, 0,
5464 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
fe487a71 5465Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
f072a3e8
RS
5466if the decoding operation is trivial.\n\
5467This function sets `last-coding-system-used' to the precise coding system\n\
5468used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5469not fully specified.)")
e0e989f6
KH
5470 (string, coding_system, nocopy)
5471 Lisp_Object string, coding_system, nocopy;
4ed46869 5472{
f072a3e8 5473 return code_convert_string1 (string, coding_system, nocopy, 0);
4ed46869
KH
5474}
5475
5476DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
e0e989f6
KH
5477 2, 3, 0,
5478 "Encode STRING to CODING-SYSTEM, and return the result.\n\
fe487a71 5479Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
f072a3e8
RS
5480if the encoding operation is trivial.\n\
5481This function sets `last-coding-system-used' to the precise coding system\n\
5482used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5483not fully specified.)")
e0e989f6
KH
5484 (string, coding_system, nocopy)
5485 Lisp_Object string, coding_system, nocopy;
4ed46869 5486{
f072a3e8 5487 return code_convert_string1 (string, coding_system, nocopy, 1);
4ed46869 5488}
4031e2bf 5489
ecec61c1 5490/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
5491 Do not set Vlast_coding_system_used.
5492
5493 This function is called only from macros DECODE_FILE and
5494 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
5495
5496Lisp_Object
5497code_convert_string_norecord (string, coding_system, encodep)
5498 Lisp_Object string, coding_system;
5499 int encodep;
5500{
5501 struct coding_system coding;
5502
5503 CHECK_STRING (string, 0);
5504 CHECK_SYMBOL (coding_system, 1);
5505
5506 if (NILP (coding_system))
5507 return string;
5508
5509 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5510 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5511
ec6d2bb8 5512 coding.composing = COMPOSITION_DISABLED;
ecec61c1 5513 coding.mode |= CODING_MODE_LAST_BLOCK;
b73bfc1c
KH
5514 return (encodep
5515 ? encode_coding_string (string, &coding, 1)
5516 : decode_coding_string (string, &coding, 1));
ecec61c1 5517}
3a73fa5d 5518\f
4ed46869 5519DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
55ab7be3 5520 "Decode a Japanese character which has CODE in shift_jis encoding.\n\
4ed46869
KH
5521Return the corresponding character.")
5522 (code)
5523 Lisp_Object code;
5524{
5525 unsigned char c1, c2, s1, s2;
5526 Lisp_Object val;
5527
5528 CHECK_NUMBER (code, 0);
5529 s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
55ab7be3
KH
5530 if (s1 == 0)
5531 {
c28a9453
KH
5532 if (s2 < 0x80)
5533 XSETFASTINT (val, s2);
5534 else if (s2 >= 0xA0 || s2 <= 0xDF)
b73bfc1c 5535 XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
c28a9453 5536 else
9da8350f 5537 error ("Invalid Shift JIS code: %x", XFASTINT (code));
55ab7be3
KH
5538 }
5539 else
5540 {
5541 if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
5542 || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
9da8350f 5543 error ("Invalid Shift JIS code: %x", XFASTINT (code));
55ab7be3 5544 DECODE_SJIS (s1, s2, c1, c2);
b73bfc1c 5545 XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
55ab7be3 5546 }
4ed46869
KH
5547 return val;
5548}
5549
5550DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
55ab7be3
KH
5551 "Encode a Japanese character CHAR to shift_jis encoding.\n\
5552Return the corresponding code in SJIS.")
4ed46869
KH
5553 (ch)
5554 Lisp_Object ch;
5555{
bcf26d6a 5556 int charset, c1, c2, s1, s2;
4ed46869
KH
5557 Lisp_Object val;
5558
5559 CHECK_NUMBER (ch, 0);
5560 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
c28a9453
KH
5561 if (charset == CHARSET_ASCII)
5562 {
5563 val = ch;
5564 }
5565 else if (charset == charset_jisx0208
5566 && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
4ed46869
KH
5567 {
5568 ENCODE_SJIS (c1, c2, s1, s2);
bcf26d6a 5569 XSETFASTINT (val, (s1 << 8) | s2);
4ed46869 5570 }
55ab7be3
KH
5571 else if (charset == charset_katakana_jisx0201
5572 && c1 > 0x20 && c2 < 0xE0)
5573 {
5574 XSETFASTINT (val, c1 | 0x80);
5575 }
4ed46869 5576 else
55ab7be3 5577 error ("Can't encode to shift_jis: %d", XFASTINT (ch));
4ed46869
KH
5578 return val;
5579}
5580
5581DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
c28a9453 5582 "Decode a Big5 character which has CODE in BIG5 coding system.\n\
4ed46869
KH
5583Return the corresponding character.")
5584 (code)
5585 Lisp_Object code;
5586{
5587 int charset;
5588 unsigned char b1, b2, c1, c2;
5589 Lisp_Object val;
5590
5591 CHECK_NUMBER (code, 0);
5592 b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
c28a9453
KH
5593 if (b1 == 0)
5594 {
5595 if (b2 >= 0x80)
9da8350f 5596 error ("Invalid BIG5 code: %x", XFASTINT (code));
c28a9453
KH
5597 val = code;
5598 }
5599 else
5600 {
5601 if ((b1 < 0xA1 || b1 > 0xFE)
5602 || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
9da8350f 5603 error ("Invalid BIG5 code: %x", XFASTINT (code));
c28a9453 5604 DECODE_BIG5 (b1, b2, charset, c1, c2);
b73bfc1c 5605 XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
c28a9453 5606 }
4ed46869
KH
5607 return val;
5608}
5609
5610DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
d46c5b12 5611 "Encode the Big5 character CHAR to BIG5 coding system.\n\
4ed46869
KH
5612Return the corresponding character code in Big5.")
5613 (ch)
5614 Lisp_Object ch;
5615{
bcf26d6a 5616 int charset, c1, c2, b1, b2;
4ed46869
KH
5617 Lisp_Object val;
5618
5619 CHECK_NUMBER (ch, 0);
5620 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
c28a9453
KH
5621 if (charset == CHARSET_ASCII)
5622 {
5623 val = ch;
5624 }
5625 else if ((charset == charset_big5_1
5626 && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
5627 || (charset == charset_big5_2
5628 && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
4ed46869
KH
5629 {
5630 ENCODE_BIG5 (charset, c1, c2, b1, b2);
bcf26d6a 5631 XSETFASTINT (val, (b1 << 8) | b2);
4ed46869
KH
5632 }
5633 else
c28a9453 5634 error ("Can't encode to Big5: %d", XFASTINT (ch));
4ed46869
KH
5635 return val;
5636}
3a73fa5d 5637\f
1ba9e4ab
KH
5638DEFUN ("set-terminal-coding-system-internal",
5639 Fset_terminal_coding_system_internal,
5640 Sset_terminal_coding_system_internal, 1, 1, 0, "")
4ed46869
KH
5641 (coding_system)
5642 Lisp_Object coding_system;
5643{
5644 CHECK_SYMBOL (coding_system, 0);
5645 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
70c22245 5646 /* We had better not send unsafe characters to terminal. */
6e85d753 5647 terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
ec6d2bb8
KH
5648 /* Characer composition should be disabled. */
5649 terminal_coding.composing = COMPOSITION_DISABLED;
b73bfc1c
KH
5650 terminal_coding.src_multibyte = 1;
5651 terminal_coding.dst_multibyte = 0;
4ed46869
KH
5652 return Qnil;
5653}
5654
c4825358
KH
5655DEFUN ("set-safe-terminal-coding-system-internal",
5656 Fset_safe_terminal_coding_system_internal,
5657 Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
5658 (coding_system)
5659 Lisp_Object coding_system;
5660{
5661 CHECK_SYMBOL (coding_system, 0);
5662 setup_coding_system (Fcheck_coding_system (coding_system),
5663 &safe_terminal_coding);
ec6d2bb8
KH
5664 /* Characer composition should be disabled. */
5665 safe_terminal_coding.composing = COMPOSITION_DISABLED;
b73bfc1c
KH
5666 safe_terminal_coding.src_multibyte = 1;
5667 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
5668 return Qnil;
5669}
5670
4ed46869
KH
5671DEFUN ("terminal-coding-system",
5672 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
3a73fa5d 5673 "Return coding system specified for terminal output.")
4ed46869
KH
5674 ()
5675{
5676 return terminal_coding.symbol;
5677}
5678
1ba9e4ab
KH
5679DEFUN ("set-keyboard-coding-system-internal",
5680 Fset_keyboard_coding_system_internal,
5681 Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4ed46869
KH
5682 (coding_system)
5683 Lisp_Object coding_system;
5684{
5685 CHECK_SYMBOL (coding_system, 0);
5686 setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
ec6d2bb8
KH
5687 /* Characer composition should be disabled. */
5688 keyboard_coding.composing = COMPOSITION_DISABLED;
4ed46869
KH
5689 return Qnil;
5690}
5691
5692DEFUN ("keyboard-coding-system",
5693 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
3a73fa5d 5694 "Return coding system specified for decoding keyboard input.")
4ed46869
KH
5695 ()
5696{
5697 return keyboard_coding.symbol;
5698}
5699
5700\f
a5d301df
KH
5701DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
5702 Sfind_operation_coding_system, 1, MANY, 0,
5703 "Choose a coding system for an operation based on the target name.\n\
69f76525 5704The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
9ce27fde
KH
5705DECODING-SYSTEM is the coding system to use for decoding\n\
5706\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
5707for encoding (in case OPERATION does encoding).\n\
ccdb79f5
RS
5708\n\
5709The first argument OPERATION specifies an I/O primitive:\n\
5710 For file I/O, `insert-file-contents' or `write-region'.\n\
5711 For process I/O, `call-process', `call-process-region', or `start-process'.\n\
5712 For network I/O, `open-network-stream'.\n\
5713\n\
5714The remaining arguments should be the same arguments that were passed\n\
5715to the primitive. Depending on which primitive, one of those arguments\n\
5716is selected as the TARGET. For example, if OPERATION does file I/O,\n\
5717whichever argument specifies the file name is TARGET.\n\
5718\n\
5719TARGET has a meaning which depends on OPERATION:\n\
4ed46869
KH
5720 For file I/O, TARGET is a file name.\n\
5721 For process I/O, TARGET is a process name.\n\
5722 For network I/O, TARGET is a service name or a port number\n\
5723\n\
02ba4723
KH
5724This function looks up what specified for TARGET in,\n\
5725`file-coding-system-alist', `process-coding-system-alist',\n\
5726or `network-coding-system-alist' depending on OPERATION.\n\
5727They may specify a coding system, a cons of coding systems,\n\
5728or a function symbol to call.\n\
5729In the last case, we call the function with one argument,\n\
9ce27fde 5730which is a list of all the arguments given to this function.")
4ed46869
KH
5731 (nargs, args)
5732 int nargs;
5733 Lisp_Object *args;
5734{
5735 Lisp_Object operation, target_idx, target, val;
5736 register Lisp_Object chain;
5737
5738 if (nargs < 2)
5739 error ("Too few arguments");
5740 operation = args[0];
5741 if (!SYMBOLP (operation)
5742 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
5743 error ("Invalid first arguement");
5744 if (nargs < 1 + XINT (target_idx))
5745 error ("Too few arguments for operation: %s",
5746 XSYMBOL (operation)->name->data);
5747 target = args[XINT (target_idx) + 1];
5748 if (!(STRINGP (target)
5749 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
5750 error ("Invalid %dth argument", XINT (target_idx) + 1);
5751
2e34157c
RS
5752 chain = ((EQ (operation, Qinsert_file_contents)
5753 || EQ (operation, Qwrite_region))
02ba4723 5754 ? Vfile_coding_system_alist
2e34157c 5755 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
5756 ? Vnetwork_coding_system_alist
5757 : Vprocess_coding_system_alist));
4ed46869
KH
5758 if (NILP (chain))
5759 return Qnil;
5760
03699b14 5761 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 5762 {
f44d27ce 5763 Lisp_Object elt;
03699b14 5764 elt = XCAR (chain);
4ed46869
KH
5765
5766 if (CONSP (elt)
5767 && ((STRINGP (target)
03699b14
KR
5768 && STRINGP (XCAR (elt))
5769 && fast_string_match (XCAR (elt), target) >= 0)
5770 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 5771 {
03699b14 5772 val = XCDR (elt);
b19fd4c5
KH
5773 /* Here, if VAL is both a valid coding system and a valid
5774 function symbol, we return VAL as a coding system. */
02ba4723
KH
5775 if (CONSP (val))
5776 return val;
5777 if (! SYMBOLP (val))
5778 return Qnil;
5779 if (! NILP (Fcoding_system_p (val)))
5780 return Fcons (val, val);
b19fd4c5
KH
5781 if (! NILP (Ffboundp (val)))
5782 {
5783 val = call1 (val, Flist (nargs, args));
5784 if (CONSP (val))
5785 return val;
5786 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
5787 return Fcons (val, val);
5788 }
02ba4723
KH
5789 return Qnil;
5790 }
4ed46869
KH
5791 }
5792 return Qnil;
5793}
5794
1397dc18
KH
5795DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal,
5796 Supdate_coding_systems_internal, 0, 0, 0,
5797 "Update internal database for ISO2022 and CCL based coding systems.\n\
fa42c37f
KH
5798When values of any coding categories are changed, you must\n\
5799call this function")
d46c5b12
KH
5800 ()
5801{
5802 int i;
5803
fa42c37f 5804 for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
d46c5b12 5805 {
1397dc18
KH
5806 Lisp_Object val;
5807
5808 val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
5809 if (!NILP (val))
5810 {
5811 if (! coding_system_table[i])
5812 coding_system_table[i] = ((struct coding_system *)
5813 xmalloc (sizeof (struct coding_system)));
5814 setup_coding_system (val, coding_system_table[i]);
5815 }
5816 else if (coding_system_table[i])
5817 {
5818 xfree (coding_system_table[i]);
5819 coding_system_table[i] = NULL;
5820 }
d46c5b12 5821 }
1397dc18 5822
d46c5b12
KH
5823 return Qnil;
5824}
5825
66cfb530
KH
5826DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
5827 Sset_coding_priority_internal, 0, 0, 0,
5828 "Update internal database for the current value of `coding-category-list'.\n\
5829This function is internal use only.")
5830 ()
5831{
5832 int i = 0, idx;
84d60297
RS
5833 Lisp_Object val;
5834
5835 val = Vcoding_category_list;
66cfb530
KH
5836
5837 while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
5838 {
03699b14 5839 if (! SYMBOLP (XCAR (val)))
66cfb530 5840 break;
03699b14 5841 idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
66cfb530
KH
5842 if (idx >= CODING_CATEGORY_IDX_MAX)
5843 break;
5844 coding_priorities[i++] = (1 << idx);
03699b14 5845 val = XCDR (val);
66cfb530
KH
5846 }
5847 /* If coding-category-list is valid and contains all coding
5848 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
fa42c37f 5849 the following code saves Emacs from crashing. */
66cfb530
KH
5850 while (i < CODING_CATEGORY_IDX_MAX)
5851 coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5852
5853 return Qnil;
5854}
5855
4ed46869
KH
5856#endif /* emacs */
5857
5858\f
1397dc18 5859/*** 9. Post-amble ***/
4ed46869 5860
6d74c3aa
KH
5861void
5862init_coding ()
5863{
5864 conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5865}
5866
dfcf069d 5867void
4ed46869
KH
5868init_coding_once ()
5869{
5870 int i;
5871
0ef69138 5872 /* Emacs' internal format specific initialize routine. */
4ed46869
KH
5873 for (i = 0; i <= 0x20; i++)
5874 emacs_code_class[i] = EMACS_control_code;
5875 emacs_code_class[0x0A] = EMACS_linefeed_code;
5876 emacs_code_class[0x0D] = EMACS_carriage_return_code;
5877 for (i = 0x21 ; i < 0x7F; i++)
5878 emacs_code_class[i] = EMACS_ascii_code;
5879 emacs_code_class[0x7F] = EMACS_control_code;
ec6d2bb8 5880 for (i = 0x80; i < 0xFF; i++)
4ed46869
KH
5881 emacs_code_class[i] = EMACS_invalid_code;
5882 emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5883 emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5884 emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5885 emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5886
5887 /* ISO2022 specific initialize routine. */
5888 for (i = 0; i < 0x20; i++)
b73bfc1c 5889 iso_code_class[i] = ISO_control_0;
4ed46869
KH
5890 for (i = 0x21; i < 0x7F; i++)
5891 iso_code_class[i] = ISO_graphic_plane_0;
5892 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 5893 iso_code_class[i] = ISO_control_1;
4ed46869
KH
5894 for (i = 0xA1; i < 0xFF; i++)
5895 iso_code_class[i] = ISO_graphic_plane_1;
5896 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5897 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5898 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5899 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5900 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5901 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5902 iso_code_class[ISO_CODE_ESC] = ISO_escape;
5903 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5904 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5905 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5906
e0e989f6 5907 conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
e0e989f6
KH
5908
5909 setup_coding_system (Qnil, &keyboard_coding);
5910 setup_coding_system (Qnil, &terminal_coding);
c4825358 5911 setup_coding_system (Qnil, &safe_terminal_coding);
6bc51348 5912 setup_coding_system (Qnil, &default_buffer_file_coding);
9ce27fde 5913
d46c5b12
KH
5914 bzero (coding_system_table, sizeof coding_system_table);
5915
66cfb530
KH
5916 bzero (ascii_skip_code, sizeof ascii_skip_code);
5917 for (i = 0; i < 128; i++)
5918 ascii_skip_code[i] = 1;
5919
9ce27fde
KH
5920#if defined (MSDOS) || defined (WINDOWSNT)
5921 system_eol_type = CODING_EOL_CRLF;
5922#else
5923 system_eol_type = CODING_EOL_LF;
5924#endif
b843d1ae
KH
5925
5926 inhibit_pre_post_conversion = 0;
e0e989f6
KH
5927}
5928
5929#ifdef emacs
5930
dfcf069d 5931void
e0e989f6
KH
5932syms_of_coding ()
5933{
5934 Qtarget_idx = intern ("target-idx");
5935 staticpro (&Qtarget_idx);
5936
bb0115a2
RS
5937 Qcoding_system_history = intern ("coding-system-history");
5938 staticpro (&Qcoding_system_history);
5939 Fset (Qcoding_system_history, Qnil);
5940
9ce27fde 5941 /* Target FILENAME is the first argument. */
e0e989f6 5942 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 5943 /* Target FILENAME is the third argument. */
e0e989f6
KH
5944 Fput (Qwrite_region, Qtarget_idx, make_number (2));
5945
5946 Qcall_process = intern ("call-process");
5947 staticpro (&Qcall_process);
9ce27fde 5948 /* Target PROGRAM is the first argument. */
e0e989f6
KH
5949 Fput (Qcall_process, Qtarget_idx, make_number (0));
5950
5951 Qcall_process_region = intern ("call-process-region");
5952 staticpro (&Qcall_process_region);
9ce27fde 5953 /* Target PROGRAM is the third argument. */
e0e989f6
KH
5954 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5955
5956 Qstart_process = intern ("start-process");
5957 staticpro (&Qstart_process);
9ce27fde 5958 /* Target PROGRAM is the third argument. */
e0e989f6
KH
5959 Fput (Qstart_process, Qtarget_idx, make_number (2));
5960
5961 Qopen_network_stream = intern ("open-network-stream");
5962 staticpro (&Qopen_network_stream);
9ce27fde 5963 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
5964 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5965
4ed46869
KH
5966 Qcoding_system = intern ("coding-system");
5967 staticpro (&Qcoding_system);
5968
5969 Qeol_type = intern ("eol-type");
5970 staticpro (&Qeol_type);
5971
5972 Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5973 staticpro (&Qbuffer_file_coding_system);
5974
5975 Qpost_read_conversion = intern ("post-read-conversion");
5976 staticpro (&Qpost_read_conversion);
5977
5978 Qpre_write_conversion = intern ("pre-write-conversion");
5979 staticpro (&Qpre_write_conversion);
5980
27901516
KH
5981 Qno_conversion = intern ("no-conversion");
5982 staticpro (&Qno_conversion);
5983
5984 Qundecided = intern ("undecided");
5985 staticpro (&Qundecided);
5986
4ed46869
KH
5987 Qcoding_system_p = intern ("coding-system-p");
5988 staticpro (&Qcoding_system_p);
5989
5990 Qcoding_system_error = intern ("coding-system-error");
5991 staticpro (&Qcoding_system_error);
5992
5993 Fput (Qcoding_system_error, Qerror_conditions,
5994 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5995 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 5996 build_string ("Invalid coding system"));
4ed46869 5997
d46c5b12
KH
5998 Qcoding_category = intern ("coding-category");
5999 staticpro (&Qcoding_category);
4ed46869
KH
6000 Qcoding_category_index = intern ("coding-category-index");
6001 staticpro (&Qcoding_category_index);
6002
d46c5b12
KH
6003 Vcoding_category_table
6004 = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
6005 staticpro (&Vcoding_category_table);
4ed46869
KH
6006 {
6007 int i;
6008 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
6009 {
d46c5b12
KH
6010 XVECTOR (Vcoding_category_table)->contents[i]
6011 = intern (coding_category_name[i]);
6012 Fput (XVECTOR (Vcoding_category_table)->contents[i],
6013 Qcoding_category_index, make_number (i));
4ed46869
KH
6014 }
6015 }
6016
f967223b
KH
6017 Qtranslation_table = intern ("translation-table");
6018 staticpro (&Qtranslation_table);
1397dc18 6019 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
bdd9fb48 6020
f967223b
KH
6021 Qtranslation_table_id = intern ("translation-table-id");
6022 staticpro (&Qtranslation_table_id);
84fbb8a0 6023
f967223b
KH
6024 Qtranslation_table_for_decode = intern ("translation-table-for-decode");
6025 staticpro (&Qtranslation_table_for_decode);
a5d301df 6026
f967223b
KH
6027 Qtranslation_table_for_encode = intern ("translation-table-for-encode");
6028 staticpro (&Qtranslation_table_for_encode);
a5d301df 6029
70c22245
KH
6030 Qsafe_charsets = intern ("safe-charsets");
6031 staticpro (&Qsafe_charsets);
6032
1397dc18
KH
6033 Qvalid_codes = intern ("valid-codes");
6034 staticpro (&Qvalid_codes);
6035
9ce27fde
KH
6036 Qemacs_mule = intern ("emacs-mule");
6037 staticpro (&Qemacs_mule);
6038
d46c5b12
KH
6039 Qraw_text = intern ("raw-text");
6040 staticpro (&Qraw_text);
6041
4ed46869
KH
6042 defsubr (&Scoding_system_p);
6043 defsubr (&Sread_coding_system);
6044 defsubr (&Sread_non_nil_coding_system);
6045 defsubr (&Scheck_coding_system);
6046 defsubr (&Sdetect_coding_region);
d46c5b12 6047 defsubr (&Sdetect_coding_string);
4ed46869
KH
6048 defsubr (&Sdecode_coding_region);
6049 defsubr (&Sencode_coding_region);
6050 defsubr (&Sdecode_coding_string);
6051 defsubr (&Sencode_coding_string);
6052 defsubr (&Sdecode_sjis_char);
6053 defsubr (&Sencode_sjis_char);
6054 defsubr (&Sdecode_big5_char);
6055 defsubr (&Sencode_big5_char);
1ba9e4ab 6056 defsubr (&Sset_terminal_coding_system_internal);
c4825358 6057 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 6058 defsubr (&Sterminal_coding_system);
1ba9e4ab 6059 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 6060 defsubr (&Skeyboard_coding_system);
a5d301df 6061 defsubr (&Sfind_operation_coding_system);
1397dc18 6062 defsubr (&Supdate_coding_systems_internal);
66cfb530 6063 defsubr (&Sset_coding_priority_internal);
4ed46869 6064
4608c386
KH
6065 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
6066 "List of coding systems.\n\
6067\n\
6068Do not alter the value of this variable manually. This variable should be\n\
6069updated by the functions `make-coding-system' and\n\
6070`define-coding-system-alias'.");
6071 Vcoding_system_list = Qnil;
6072
6073 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
6074 "Alist of coding system names.\n\
6075Each element is one element list of coding system name.\n\
6076This variable is given to `completing-read' as TABLE argument.\n\
6077\n\
6078Do not alter the value of this variable manually. This variable should be\n\
6079updated by the functions `make-coding-system' and\n\
6080`define-coding-system-alias'.");
6081 Vcoding_system_alist = Qnil;
6082
4ed46869
KH
6083 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
6084 "List of coding-categories (symbols) ordered by priority.");
6085 {
6086 int i;
6087
6088 Vcoding_category_list = Qnil;
6089 for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
6090 Vcoding_category_list
d46c5b12
KH
6091 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
6092 Vcoding_category_list);
4ed46869
KH
6093 }
6094
6095 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10bff6f1 6096 "Specify the coding system for read operations.\n\
2ebb362d 6097It is useful to bind this variable with `let', but do not set it globally.\n\
4ed46869 6098If the value is a coding system, it is used for decoding on read operation.\n\
a67a9c66 6099If not, an appropriate element is used from one of the coding system alists:\n\
10bff6f1 6100There are three such tables, `file-coding-system-alist',\n\
a67a9c66 6101`process-coding-system-alist', and `network-coding-system-alist'.");
4ed46869
KH
6102 Vcoding_system_for_read = Qnil;
6103
6104 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10bff6f1 6105 "Specify the coding system for write operations.\n\
928aedd8
RS
6106Programs bind this variable with `let', but you should not set it globally.\n\
6107If the value is a coding system, it is used for encoding of output,\n\
6108when writing it to a file and when sending it to a file or subprocess.\n\
6109\n\
6110If this does not specify a coding system, an appropriate element\n\
6111is used from one of the coding system alists:\n\
10bff6f1 6112There are three such tables, `file-coding-system-alist',\n\
928aedd8
RS
6113`process-coding-system-alist', and `network-coding-system-alist'.\n\
6114For output to files, if the above procedure does not specify a coding system,\n\
6115the value of `buffer-file-coding-system' is used.");
4ed46869
KH
6116 Vcoding_system_for_write = Qnil;
6117
6118 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
a67a9c66 6119 "Coding system used in the latest file or process I/O.");
4ed46869
KH
6120 Vlast_coding_system_used = Qnil;
6121
9ce27fde 6122 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
f07f4a24 6123 "*Non-nil means always inhibit code conversion of end-of-line format.\n\
94c7a214
DL
6124See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6125such conversion.");
9ce27fde
KH
6126 inhibit_eol_conversion = 0;
6127
ed29121d
EZ
6128 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
6129 "Non-nil means process buffer inherits coding system of process output.\n\
6130Bind it to t if the process output is to be treated as if it were a file\n\
6131read from some filesystem.");
6132 inherit_process_coding_system = 0;
6133
02ba4723
KH
6134 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
6135 "Alist to decide a coding system to use for a file I/O operation.\n\
6136The format is ((PATTERN . VAL) ...),\n\
6137where PATTERN is a regular expression matching a file name,\n\
6138VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6139If VAL is a coding system, it is used for both decoding and encoding\n\
6140the file contents.\n\
6141If VAL is a cons of coding systems, the car part is used for decoding,\n\
6142and the cdr part is used for encoding.\n\
6143If VAL is a function symbol, the function must return a coding system\n\
6144or a cons of coding systems which are used as above.\n\
e0e989f6 6145\n\
a85a871a 6146See also the function `find-operation-coding-system'\n\
eda284ac 6147and the variable `auto-coding-alist'.");
02ba4723
KH
6148 Vfile_coding_system_alist = Qnil;
6149
6150 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
6151 "Alist to decide a coding system to use for a process I/O operation.\n\
6152The format is ((PATTERN . VAL) ...),\n\
6153where PATTERN is a regular expression matching a program name,\n\
6154VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6155If VAL is a coding system, it is used for both decoding what received\n\
6156from the program and encoding what sent to the program.\n\
6157If VAL is a cons of coding systems, the car part is used for decoding,\n\
6158and the cdr part is used for encoding.\n\
6159If VAL is a function symbol, the function must return a coding system\n\
6160or a cons of coding systems which are used as above.\n\
4ed46869 6161\n\
9ce27fde 6162See also the function `find-operation-coding-system'.");
02ba4723
KH
6163 Vprocess_coding_system_alist = Qnil;
6164
6165 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
6166 "Alist to decide a coding system to use for a network I/O operation.\n\
6167The format is ((PATTERN . VAL) ...),\n\
6168where PATTERN is a regular expression matching a network service name\n\
6169or is a port number to connect to,\n\
6170VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6171If VAL is a coding system, it is used for both decoding what received\n\
6172from the network stream and encoding what sent to the network stream.\n\
6173If VAL is a cons of coding systems, the car part is used for decoding,\n\
6174and the cdr part is used for encoding.\n\
6175If VAL is a function symbol, the function must return a coding system\n\
6176or a cons of coding systems which are used as above.\n\
4ed46869 6177\n\
9ce27fde 6178See also the function `find-operation-coding-system'.");
02ba4723 6179 Vnetwork_coding_system_alist = Qnil;
4ed46869 6180
68c45bf0
PE
6181 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
6182 "Coding system to use with system messages.");
6183 Vlocale_coding_system = Qnil;
6184
005f0d35 6185 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9
EZ
6186 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
6187 "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6188 eol_mnemonic_unix = build_string (":");
4ed46869 6189
7722baf9
EZ
6190 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
6191 "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6192 eol_mnemonic_dos = build_string ("\\");
4ed46869 6193
7722baf9
EZ
6194 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
6195 "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6196 eol_mnemonic_mac = build_string ("/");
4ed46869 6197
7722baf9
EZ
6198 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
6199 "*String displayed in mode line when end-of-line format is not yet determined.");
6200 eol_mnemonic_undecided = build_string (":");
4ed46869 6201
84fbb8a0 6202 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
f967223b 6203 "*Non-nil enables character translation while encoding and decoding.");
84fbb8a0 6204 Venable_character_translation = Qt;
bdd9fb48 6205
f967223b
KH
6206 DEFVAR_LISP ("standard-translation-table-for-decode",
6207 &Vstandard_translation_table_for_decode,
84fbb8a0 6208 "Table for translating characters while decoding.");
f967223b 6209 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 6210
f967223b
KH
6211 DEFVAR_LISP ("standard-translation-table-for-encode",
6212 &Vstandard_translation_table_for_encode,
84fbb8a0 6213 "Table for translationg characters while encoding.");
f967223b 6214 Vstandard_translation_table_for_encode = Qnil;
4ed46869
KH
6215
6216 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
6217 "Alist of charsets vs revision numbers.\n\
6218While encoding, if a charset (car part of an element) is found,\n\
6219designate it with the escape sequence identifing revision (cdr part of the element).");
6220 Vcharset_revision_alist = Qnil;
02ba4723
KH
6221
6222 DEFVAR_LISP ("default-process-coding-system",
6223 &Vdefault_process_coding_system,
6224 "Cons of coding systems used for process I/O by default.\n\
6225The car part is used for decoding a process output,\n\
6226the cdr part is used for encoding a text to be sent to a process.");
6227 Vdefault_process_coding_system = Qnil;
c4825358 6228
3f003981
KH
6229 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
6230 "Table of extra Latin codes in the range 128..159 (inclusive).\n\
c4825358
KH
6231This is a vector of length 256.\n\
6232If Nth element is non-nil, the existence of code N in a file\n\
bb0115a2 6233\(or output of subprocess) doesn't prevent it to be detected as\n\
3f003981
KH
6234a coding system of ISO 2022 variant which has a flag\n\
6235`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
c4825358
KH
6236or reading output of a subprocess.\n\
6237Only 128th through 159th elements has a meaning.");
3f003981 6238 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
6239
6240 DEFVAR_LISP ("select-safe-coding-system-function",
6241 &Vselect_safe_coding_system_function,
6242 "Function to call to select safe coding system for encoding a text.\n\
6243\n\
6244If set, this function is called to force a user to select a proper\n\
6245coding system which can encode the text in the case that a default\n\
6246coding system used in each operation can't encode the text.\n\
6247\n\
a85a871a 6248The default value is `select-safe-coding-system' (which see).");
d46c5b12
KH
6249 Vselect_safe_coding_system_function = Qnil;
6250
4ed46869
KH
6251}
6252
68c45bf0
PE
6253char *
6254emacs_strerror (error_number)
6255 int error_number;
6256{
6257 char *str;
6258
ca9c0567 6259 synchronize_system_messages_locale ();
68c45bf0
PE
6260 str = strerror (error_number);
6261
6262 if (! NILP (Vlocale_coding_system))
6263 {
6264 Lisp_Object dec = code_convert_string_norecord (build_string (str),
6265 Vlocale_coding_system,
6266 0);
6267 str = (char *) XSTRING (dec)->data;
6268 }
6269
6270 return str;
6271}
6272
4ed46869 6273#endif /* emacs */
005f0d35 6274(encode_eol):