Patch from rms.
[bpt/emacs.git] / src / coding.c
CommitLineData
4ed46869 1/* Coding system handler (conversion, detection, and etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
4ed46869 4
369314dc
KH
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
4ed46869 11
369314dc
KH
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
4ed46869 16
369314dc
KH
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
4ed46869
KH
21
22/*** TABLE OF CONTENTS ***
23
24 1. Preamble
0ef69138 25 2. Emacs' internal format (emacs-mule) handlers
4ed46869
KH
26 3. ISO2022 handlers
27 4. Shift-JIS and BIG5 handlers
1397dc18
KH
28 5. CCL handlers
29 6. End-of-line handlers
30 7. C library functions
31 8. Emacs Lisp library functions
32 9. Post-amble
4ed46869
KH
33
34*/
35
36/*** GENERAL NOTE on CODING SYSTEM ***
37
38 Coding system is an encoding mechanism of one or more character
39 sets. Here's a list of coding systems which Emacs can handle. When
40 we say "decode", it means converting some other coding system to
0ef69138
KH
41 Emacs' internal format (emacs-internal), and when we say "encode",
42 it means converting the coding system emacs-mule to some other
43 coding system.
4ed46869 44
0ef69138 45 0. Emacs' internal format (emacs-mule)
4ed46869
KH
46
47 Emacs itself holds a multi-lingual character in a buffer and a string
f4dee582 48 in a special format. Details are described in section 2.
4ed46869
KH
49
50 1. ISO2022
51
52 The most famous coding system for multiple character sets. X's
f4dee582
RS
53 Compound Text, various EUCs (Extended Unix Code), and coding
54 systems used in Internet communication such as ISO-2022-JP are
55 all variants of ISO2022. Details are described in section 3.
4ed46869
KH
56
57 2. SJIS (or Shift-JIS or MS-Kanji-Code)
58
59 A coding system to encode character sets: ASCII, JISX0201, and
60 JISX0208. Widely used for PC's in Japan. Details are described in
f4dee582 61 section 4.
4ed46869
KH
62
63 3. BIG5
64
65 A coding system to encode character sets: ASCII and Big5. Widely
66 used by Chinese (mainly in Taiwan and Hong Kong). Details are
f4dee582
RS
67 described in section 4. In this file, when we write "BIG5"
68 (all uppercase), we mean the coding system, and when we write
69 "Big5" (capitalized), we mean the character set.
4ed46869 70
27901516
KH
71 4. Raw text
72
4608c386
KH
73 A coding system for a text containing random 8-bit code. Emacs does
74 no code conversion on such a text except for end-of-line format.
27901516
KH
75
76 5. Other
4ed46869 77
f4dee582 78 If a user wants to read/write a text encoded in a coding system not
4ed46869
KH
79 listed above, he can supply a decoder and an encoder for it in CCL
80 (Code Conversion Language) programs. Emacs executes the CCL program
81 while reading/writing.
82
d46c5b12
KH
83 Emacs represents a coding system by a Lisp symbol that has a property
84 `coding-system'. But, before actually using the coding system, the
4ed46869 85 information about it is set in a structure of type `struct
f4dee582 86 coding_system' for rapid processing. See section 6 for more details.
4ed46869
KH
87
88*/
89
90/*** GENERAL NOTES on END-OF-LINE FORMAT ***
91
92 How end-of-line of a text is encoded depends on a system. For
93 instance, Unix's format is just one byte of `line-feed' code,
f4dee582 94 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
95 `line-feed' codes. MacOS's format is usually one byte of
96 `carriage-return'.
4ed46869 97
f4dee582
RS
98 Since text characters encoding and end-of-line encoding are
99 independent, any coding system described above can take
4ed46869 100 any format of end-of-line. So, Emacs has information of format of
f4dee582 101 end-of-line in each coding-system. See section 6 for more details.
4ed46869
KH
102
103*/
104
105/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
106
107 These functions check if a text between SRC and SRC_END is encoded
108 in the coding system category XXX. Each returns an integer value in
109 which appropriate flag bits for the category XXX is set. The flag
110 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the
111 template of these functions. */
112#if 0
113int
0ef69138 114detect_coding_emacs_mule (src, src_end)
4ed46869
KH
115 unsigned char *src, *src_end;
116{
117 ...
118}
119#endif
120
121/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
122
123 These functions decode SRC_BYTES length text at SOURCE encoded in
0ef69138 124 CODING to Emacs' internal format (emacs-mule). The resulting text
d46c5b12
KH
125 goes to a place pointed to by DESTINATION, the length of which
126 should not exceed DST_BYTES. These functions set the information of
127 original and decoded texts in the members produced, produced_char,
128 consumed, and consumed_char of the structure *CODING.
129
130 The return value is an integer (CODING_FINISH_XXX) indicating how
131 the decoding finished.
132
133 DST_BYTES zero means that source area and destination area are
134 overlapped, which means that we can produce a decoded text until it
135 reaches at the head of not-yet-decoded source text.
136
137 Below is a template of these functions. */
4ed46869 138#if 0
d46c5b12 139decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
140 struct coding_system *coding;
141 unsigned char *source, *destination;
142 int src_bytes, dst_bytes;
4ed46869
KH
143{
144 ...
145}
146#endif
147
148/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
149
0ef69138
KH
150 These functions encode SRC_BYTES length text at SOURCE of Emacs'
151 internal format (emacs-mule) to CODING. The resulting text goes to
f4dee582 152 a place pointed to by DESTINATION, the length of which should not
d46c5b12
KH
153 exceed DST_BYTES. These functions set the information of
154 original and encoded texts in the members produced, produced_char,
155 consumed, and consumed_char of the structure *CODING.
156
157 The return value is an integer (CODING_FINISH_XXX) indicating how
158 the encoding finished.
159
160 DST_BYTES zero means that source area and destination area are
161 overlapped, which means that we can produce a decoded text until it
162 reaches at the head of not-yet-decoded source text.
163
164 Below is a template of these functions. */
4ed46869 165#if 0
d46c5b12 166encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
167 struct coding_system *coding;
168 unsigned char *source, *destination;
169 int src_bytes, dst_bytes;
4ed46869
KH
170{
171 ...
172}
173#endif
174
175/*** COMMONLY USED MACROS ***/
176
177/* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
178 THREE_MORE_BYTES safely get one, two, and three bytes from the
179 source text respectively. If there are not enough bytes in the
180 source, they jump to `label_end_of_loop'. The caller should set
181 variables `src' and `src_end' to appropriate areas in advance. */
182
183#define ONE_MORE_BYTE(c1) \
184 do { \
185 if (src < src_end) \
186 c1 = *src++; \
187 else \
188 goto label_end_of_loop; \
189 } while (0)
190
191#define TWO_MORE_BYTES(c1, c2) \
192 do { \
193 if (src + 1 < src_end) \
194 c1 = *src++, c2 = *src++; \
195 else \
196 goto label_end_of_loop; \
197 } while (0)
198
199#define THREE_MORE_BYTES(c1, c2, c3) \
200 do { \
201 if (src + 2 < src_end) \
202 c1 = *src++, c2 = *src++, c3 = *src++; \
203 else \
204 goto label_end_of_loop; \
205 } while (0)
206
207/* The following three macros DECODE_CHARACTER_ASCII,
208 DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
209 the multi-byte form of a character of each class at the place
210 pointed by `dst'. The caller should set the variable `dst' to
211 point to an appropriate area and the variable `coding' to point to
212 the coding-system of the currently decoding text in advance. */
213
214/* Decode one ASCII character C. */
215
de79a6a5
KH
216#define DECODE_CHARACTER_ASCII(c) \
217 do { \
218 if (COMPOSING_P (coding->composing)) \
219 { \
220 *dst++ = 0xA0, *dst++ = (c) | 0x80; \
221 coding->composed_chars++; \
d14d03ac
KH
222 if (((c) | 0x80) < 0xA0) \
223 coding->fake_multibyte = 1; \
de79a6a5
KH
224 } \
225 else \
226 { \
251aeb24
KH
227 /* If ASCII charset is invoked to GR, \
228 we must reset MSB now. */ \
229 *dst++ = (c) & 0x7F; \
de79a6a5
KH
230 coding->produced_char++; \
231 } \
4ed46869
KH
232 } while (0)
233
f4dee582 234/* Decode one DIMENSION1 character whose charset is CHARSET and whose
4ed46869
KH
235 position-code is C. */
236
237#define DECODE_CHARACTER_DIMENSION1(charset, c) \
238 do { \
239 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \
240 if (COMPOSING_P (coding->composing)) \
de79a6a5
KH
241 { \
242 *dst++ = leading_code + 0x20; \
243 coding->composed_chars++; \
244 } \
4ed46869 245 else \
d46c5b12
KH
246 { \
247 *dst++ = leading_code; \
248 coding->produced_char++; \
249 } \
4ed46869
KH
250 if (leading_code = CHARSET_LEADING_CODE_EXT (charset)) \
251 *dst++ = leading_code; \
252 *dst++ = (c) | 0x80; \
d14d03ac
KH
253 if (((c) | 0x80) < 0xA0) \
254 coding->fake_multibyte = 1; \
4ed46869
KH
255 } while (0)
256
f4dee582 257/* Decode one DIMENSION2 character whose charset is CHARSET and whose
4ed46869
KH
258 position-codes are C1 and C2. */
259
260#define DECODE_CHARACTER_DIMENSION2(charset, c1, c2) \
261 do { \
262 DECODE_CHARACTER_DIMENSION1 (charset, c1); \
263 *dst++ = (c2) | 0x80; \
d14d03ac
KH
264 if (((c2) | 0x80) < 0xA0) \
265 coding->fake_multibyte = 1; \
4ed46869
KH
266 } while (0)
267
268\f
269/*** 1. Preamble ***/
270
68c45bf0
PE
271#ifdef emacs
272#include <config.h>
273#endif
274
4ed46869
KH
275#include <stdio.h>
276
277#ifdef emacs
278
4ed46869
KH
279#include "lisp.h"
280#include "buffer.h"
281#include "charset.h"
282#include "ccl.h"
283#include "coding.h"
284#include "window.h"
285
286#else /* not emacs */
287
288#include "mulelib.h"
289
290#endif /* not emacs */
291
292Lisp_Object Qcoding_system, Qeol_type;
293Lisp_Object Qbuffer_file_coding_system;
294Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
27901516 295Lisp_Object Qno_conversion, Qundecided;
bb0115a2 296Lisp_Object Qcoding_system_history;
70c22245 297Lisp_Object Qsafe_charsets;
1397dc18 298Lisp_Object Qvalid_codes;
4ed46869
KH
299
300extern Lisp_Object Qinsert_file_contents, Qwrite_region;
301Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
302Lisp_Object Qstart_process, Qopen_network_stream;
303Lisp_Object Qtarget_idx;
304
d46c5b12
KH
305Lisp_Object Vselect_safe_coding_system_function;
306
7722baf9
EZ
307/* Mnemonic string for each format of end-of-line. */
308Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
309/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 310 decided. */
7722baf9 311Lisp_Object eol_mnemonic_undecided;
4ed46869 312
9ce27fde
KH
313/* Format of end-of-line decided by system. This is CODING_EOL_LF on
314 Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac. */
315int system_eol_type;
316
4ed46869
KH
317#ifdef emacs
318
4608c386
KH
319Lisp_Object Vcoding_system_list, Vcoding_system_alist;
320
321Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 322
d46c5b12
KH
323/* Coding system emacs-mule and raw-text are for converting only
324 end-of-line format. */
325Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 326
4ed46869
KH
327/* Coding-systems are handed between Emacs Lisp programs and C internal
328 routines by the following three variables. */
329/* Coding-system for reading files and receiving data from process. */
330Lisp_Object Vcoding_system_for_read;
331/* Coding-system for writing files and sending data to process. */
332Lisp_Object Vcoding_system_for_write;
333/* Coding-system actually used in the latest I/O. */
334Lisp_Object Vlast_coding_system_used;
335
c4825358 336/* A vector of length 256 which contains information about special
94487c4e 337 Latin codes (especially for dealing with Microsoft codes). */
3f003981 338Lisp_Object Vlatin_extra_code_table;
c4825358 339
9ce27fde
KH
340/* Flag to inhibit code conversion of end-of-line format. */
341int inhibit_eol_conversion;
342
ed29121d
EZ
343/* Flag to make buffer-file-coding-system inherit from process-coding. */
344int inherit_process_coding_system;
345
c4825358 346/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
347struct coding_system terminal_coding;
348
c4825358
KH
349/* Coding system to be used to encode text for terminal display when
350 terminal coding system is nil. */
351struct coding_system safe_terminal_coding;
352
353/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
354struct coding_system keyboard_coding;
355
6bc51348
KH
356/* Default coding system to be used to write a file. */
357struct coding_system default_buffer_file_coding;
358
02ba4723
KH
359Lisp_Object Vfile_coding_system_alist;
360Lisp_Object Vprocess_coding_system_alist;
361Lisp_Object Vnetwork_coding_system_alist;
4ed46869 362
68c45bf0
PE
363Lisp_Object Vlocale_coding_system;
364
4ed46869
KH
365#endif /* emacs */
366
d46c5b12 367Lisp_Object Qcoding_category, Qcoding_category_index;
4ed46869
KH
368
369/* List of symbols `coding-category-xxx' ordered by priority. */
370Lisp_Object Vcoding_category_list;
371
d46c5b12
KH
372/* Table of coding categories (Lisp symbols). */
373Lisp_Object Vcoding_category_table;
4ed46869
KH
374
375/* Table of names of symbol for each coding-category. */
376char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
0ef69138 377 "coding-category-emacs-mule",
4ed46869
KH
378 "coding-category-sjis",
379 "coding-category-iso-7",
d46c5b12 380 "coding-category-iso-7-tight",
4ed46869
KH
381 "coding-category-iso-8-1",
382 "coding-category-iso-8-2",
7717c392
KH
383 "coding-category-iso-7-else",
384 "coding-category-iso-8-else",
89fa8b36 385 "coding-category-ccl",
4ed46869 386 "coding-category-big5",
27901516 387 "coding-category-raw-text",
89fa8b36 388 "coding-category-binary"
4ed46869
KH
389};
390
66cfb530 391/* Table of pointers to coding systems corresponding to each coding
d46c5b12
KH
392 categories. */
393struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
394
66cfb530
KH
395/* Table of coding category masks. Nth element is a mask for a coding
396 cateogry of which priority is Nth. */
397static
398int coding_priorities[CODING_CATEGORY_IDX_MAX];
399
f967223b
KH
400/* Flag to tell if we look up translation table on character code
401 conversion. */
84fbb8a0 402Lisp_Object Venable_character_translation;
f967223b
KH
403/* Standard translation table to look up on decoding (reading). */
404Lisp_Object Vstandard_translation_table_for_decode;
405/* Standard translation table to look up on encoding (writing). */
406Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 407
f967223b
KH
408Lisp_Object Qtranslation_table;
409Lisp_Object Qtranslation_table_id;
410Lisp_Object Qtranslation_table_for_decode;
411Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
412
413/* Alist of charsets vs revision number. */
414Lisp_Object Vcharset_revision_alist;
415
02ba4723
KH
416/* Default coding systems used for process I/O. */
417Lisp_Object Vdefault_process_coding_system;
418
b843d1ae
KH
419/* Global flag to tell that we can't call post-read-conversion and
420 pre-write-conversion functions. Usually the value is zero, but it
421 is set to 1 temporarily while such functions are running. This is
422 to avoid infinite recursive call. */
423static int inhibit_pre_post_conversion;
424
4ed46869 425\f
0ef69138 426/*** 2. Emacs internal format (emacs-mule) handlers ***/
4ed46869
KH
427
428/* Emacs' internal format for encoding multiple character sets is a
f4dee582
RS
429 kind of multi-byte encoding, i.e. characters are encoded by
430 variable-length sequences of one-byte codes. ASCII characters
431 and control characters (e.g. `tab', `newline') are represented by
432 one-byte sequences which are their ASCII codes, in the range 0x00
433 through 0x7F. The other characters are represented by a sequence
434 of `base leading-code', optional `extended leading-code', and one
435 or two `position-code's. The length of the sequence is determined
436 by the base leading-code. Leading-code takes the range 0x80
437 through 0x9F, whereas extended leading-code and position-code take
438 the range 0xA0 through 0xFF. See `charset.h' for more details
439 about leading-code and position-code.
440
441 There's one exception to this rule. Special leading-code
4ed46869
KH
442 `leading-code-composition' denotes that the following several
443 characters should be composed into one character. Leading-codes of
444 components (except for ASCII) are added 0x20. An ASCII character
445 component is represented by a 2-byte sequence of `0xA0' and
f4dee582
RS
446 `ASCII-code + 0x80'. See also the comments in `charset.h' for the
447 details of composite character. Hence, we can summarize the code
4ed46869
KH
448 range as follows:
449
450 --- CODE RANGE of Emacs' internal format ---
451 (character set) (range)
452 ASCII 0x00 .. 0x7F
453 ELSE (1st byte) 0x80 .. 0x9F
454 (rest bytes) 0xA0 .. 0xFF
455 ---------------------------------------------
456
457 */
458
459enum emacs_code_class_type emacs_code_class[256];
460
461/* Go to the next statement only if *SRC is accessible and the code is
462 greater than 0xA0. */
463#define CHECK_CODE_RANGE_A0_FF \
464 do { \
465 if (src >= src_end) \
466 goto label_end_of_switch; \
467 else if (*src++ < 0xA0) \
468 return 0; \
469 } while (0)
470
471/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
472 Check if a text is encoded in Emacs' internal format. If it is,
d46c5b12 473 return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
4ed46869
KH
474
475int
0ef69138 476detect_coding_emacs_mule (src, src_end)
4ed46869
KH
477 unsigned char *src, *src_end;
478{
479 unsigned char c;
480 int composing = 0;
481
482 while (src < src_end)
483 {
484 c = *src++;
485
486 if (composing)
487 {
488 if (c < 0xA0)
489 composing = 0;
490 else
491 c -= 0x20;
492 }
493
494 switch (emacs_code_class[c])
495 {
496 case EMACS_ascii_code:
497 case EMACS_linefeed_code:
498 break;
499
500 case EMACS_control_code:
501 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
502 return 0;
503 break;
504
505 case EMACS_invalid_code:
506 return 0;
507
508 case EMACS_leading_code_composition: /* c == 0x80 */
509 if (composing)
510 CHECK_CODE_RANGE_A0_FF;
511 else
512 composing = 1;
513 break;
514
515 case EMACS_leading_code_4:
516 CHECK_CODE_RANGE_A0_FF;
517 /* fall down to check it two more times ... */
518
519 case EMACS_leading_code_3:
520 CHECK_CODE_RANGE_A0_FF;
521 /* fall down to check it one more time ... */
522
523 case EMACS_leading_code_2:
524 CHECK_CODE_RANGE_A0_FF;
525 break;
526
527 default:
528 label_end_of_switch:
529 break;
530 }
531 }
0ef69138 532 return CODING_CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
533}
534
535\f
536/*** 3. ISO2022 handlers ***/
537
538/* The following note describes the coding system ISO2022 briefly.
39787efd
KH
539 Since the intention of this note is to help understand the
540 functions in this file, some parts are NOT ACCURATE or OVERLY
541 SIMPLIFIED. For thorough understanding, please refer to the
4ed46869
KH
542 original document of ISO2022.
543
544 ISO2022 provides many mechanisms to encode several character sets
39787efd
KH
545 in 7-bit and 8-bit environments. For 7-bite environments, all text
546 is encoded using bytes less than 128. This may make the encoded
547 text a little bit longer, but the text passes more easily through
548 several gateways, some of which strip off MSB (Most Signigant Bit).
549
550 There are two kinds of character sets: control character set and
4ed46869
KH
551 graphic character set. The former contains control characters such
552 as `newline' and `escape' to provide control functions (control
39787efd
KH
553 functions are also provided by escape sequences). The latter
554 contains graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
555 two control character sets and many graphic character sets.
556
557 Graphic character sets are classified into one of the following
39787efd
KH
558 four classes, according to the number of bytes (DIMENSION) and
559 number of characters in one dimension (CHARS) of the set:
560 - DIMENSION1_CHARS94
561 - DIMENSION1_CHARS96
562 - DIMENSION2_CHARS94
563 - DIMENSION2_CHARS96
564
565 In addition, each character set is assigned an identification tag,
566 unique for each set, called "final character" (denoted as <F>
567 hereafter). The <F> of each character set is decided by ECMA(*)
568 when it is registered in ISO. The code range of <F> is 0x30..0x7F
569 (0x30..0x3F are for private use only).
4ed46869
KH
570
571 Note (*): ECMA = European Computer Manufacturers Association
572
573 Here are examples of graphic character set [NAME(<F>)]:
574 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
575 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
576 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
577 o DIMENSION2_CHARS96 -- none for the moment
578
39787efd 579 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
580 C0 [0x00..0x1F] -- control character plane 0
581 GL [0x20..0x7F] -- graphic character plane 0
582 C1 [0x80..0x9F] -- control character plane 1
583 GR [0xA0..0xFF] -- graphic character plane 1
584
585 A control character set is directly designated and invoked to C0 or
39787efd
KH
586 C1 by an escape sequence. The most common case is that:
587 - ISO646's control character set is designated/invoked to C0, and
588 - ISO6429's control character set is designated/invoked to C1,
589 and usually these designations/invocations are omitted in encoded
590 text. In a 7-bit environment, only C0 can be used, and a control
591 character for C1 is encoded by an appropriate escape sequence to
592 fit into the environment. All control characters for C1 are
593 defined to have corresponding escape sequences.
4ed46869
KH
594
595 A graphic character set is at first designated to one of four
596 graphic registers (G0 through G3), then these graphic registers are
597 invoked to GL or GR. These designations and invocations can be
598 done independently. The most common case is that G0 is invoked to
39787efd
KH
599 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
600 these invocations and designations are omitted in encoded text.
601 In a 7-bit environment, only GL can be used.
4ed46869 602
39787efd
KH
603 When a graphic character set of CHARS94 is invoked to GL, codes
604 0x20 and 0x7F of the GL area work as control characters SPACE and
605 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
606 be used.
4ed46869
KH
607
608 There are two ways of invocation: locking-shift and single-shift.
609 With locking-shift, the invocation lasts until the next different
39787efd
KH
610 invocation, whereas with single-shift, the invocation affects the
611 following character only and doesn't affect the locking-shift
612 state. Invocations are done by the following control characters or
613 escape sequences:
4ed46869
KH
614
615 ----------------------------------------------------------------------
39787efd 616 abbrev function cntrl escape seq description
4ed46869 617 ----------------------------------------------------------------------
39787efd
KH
618 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
619 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
620 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
621 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
622 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
623 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
624 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
625 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
626 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 627 ----------------------------------------------------------------------
39787efd
KH
628 (*) These are not used by any known coding system.
629
630 Control characters for these functions are defined by macros
631 ISO_CODE_XXX in `coding.h'.
4ed46869 632
39787efd 633 Designations are done by the following escape sequences:
4ed46869
KH
634 ----------------------------------------------------------------------
635 escape sequence description
636 ----------------------------------------------------------------------
637 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
638 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
639 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
640 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
641 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
642 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
643 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
644 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
645 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
646 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
647 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
648 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
649 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
650 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
651 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
652 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
653 ----------------------------------------------------------------------
654
655 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 656 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
657
658 Note (*): Although these designations are not allowed in ISO2022,
659 Emacs accepts them on decoding, and produces them on encoding
39787efd 660 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
661 7-bit environment, non-locking-shift, and non-single-shift.
662
663 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
39787efd 664 '(' can be omitted. We refer to this as "short-form" hereafter.
4ed46869
KH
665
666 Now you may notice that there are a lot of ways for encoding the
39787efd
KH
667 same multilingual text in ISO2022. Actually, there exist many
668 coding systems such as Compound Text (used in X11's inter client
669 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
670 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
671 localized platforms), and all of these are variants of ISO2022.
672
673 In addition to the above, Emacs handles two more kinds of escape
674 sequences: ISO6429's direction specification and Emacs' private
675 sequence for specifying character composition.
676
39787efd 677 ISO6429's direction specification takes the following form:
4ed46869
KH
678 o CSI ']' -- end of the current direction
679 o CSI '0' ']' -- end of the current direction
680 o CSI '1' ']' -- start of left-to-right text
681 o CSI '2' ']' -- start of right-to-left text
682 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
683 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
684
685 Character composition specification takes the following form:
4ed46869
KH
686 o ESC '0' -- start character composition
687 o ESC '1' -- end character composition
39787efd
KH
688 Since these are not standard escape sequences of any ISO standard,
689 the use of them for these meaning is restricted to Emacs only. */
4ed46869
KH
690
691enum iso_code_class_type iso_code_class[256];
692
f024b6aa
RS
693#define CHARSET_OK(idx, charset) \
694 (coding_system_table[idx] \
695 && (coding_system_table[idx]->safe_charsets[charset] \
696 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \
697 (coding_system_table[idx], charset) \
698 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
d46c5b12
KH
699
700#define SHIFT_OUT_OK(idx) \
701 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
702
4ed46869
KH
703/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
704 Check if a text is encoded in ISO2022. If it is, returns an
705 integer in which appropriate flag bits any of:
706 CODING_CATEGORY_MASK_ISO_7
d46c5b12 707 CODING_CATEGORY_MASK_ISO_7_TIGHT
4ed46869
KH
708 CODING_CATEGORY_MASK_ISO_8_1
709 CODING_CATEGORY_MASK_ISO_8_2
7717c392
KH
710 CODING_CATEGORY_MASK_ISO_7_ELSE
711 CODING_CATEGORY_MASK_ISO_8_ELSE
4ed46869
KH
712 are set. If a code which should never appear in ISO2022 is found,
713 returns 0. */
714
715int
716detect_coding_iso2022 (src, src_end)
717 unsigned char *src, *src_end;
718{
d46c5b12
KH
719 int mask = CODING_CATEGORY_MASK_ISO;
720 int mask_found = 0;
f46869e4 721 int reg[4], shift_out = 0, single_shifting = 0;
d46c5b12 722 int c, c1, i, charset;
3f003981 723
d46c5b12 724 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
3f003981 725 while (mask && src < src_end)
4ed46869
KH
726 {
727 c = *src++;
728 switch (c)
729 {
730 case ISO_CODE_ESC:
f46869e4 731 single_shifting = 0;
e0e989f6 732 if (src >= src_end)
4ed46869
KH
733 break;
734 c = *src++;
d46c5b12 735 if (c >= '(' && c <= '/')
4ed46869 736 {
bf9cdd4e
KH
737 /* Designation sequence for a charset of dimension 1. */
738 if (src >= src_end)
739 break;
d46c5b12
KH
740 c1 = *src++;
741 if (c1 < ' ' || c1 >= 0x80
742 || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
743 /* Invalid designation sequence. Just ignore. */
744 break;
745 reg[(c - '(') % 4] = charset;
bf9cdd4e
KH
746 }
747 else if (c == '$')
748 {
749 /* Designation sequence for a charset of dimension 2. */
750 if (src >= src_end)
751 break;
752 c = *src++;
753 if (c >= '@' && c <= 'B')
754 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
d46c5b12 755 reg[0] = charset = iso_charset_table[1][0][c];
bf9cdd4e 756 else if (c >= '(' && c <= '/')
bcf26d6a 757 {
bf9cdd4e
KH
758 if (src >= src_end)
759 break;
d46c5b12
KH
760 c1 = *src++;
761 if (c1 < ' ' || c1 >= 0x80
762 || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
763 /* Invalid designation sequence. Just ignore. */
764 break;
765 reg[(c - '(') % 4] = charset;
bcf26d6a 766 }
bf9cdd4e 767 else
d46c5b12
KH
768 /* Invalid designation sequence. Just ignore. */
769 break;
770 }
ae9ff118 771 else if (c == 'N' || c == 'O')
d46c5b12 772 {
ae9ff118
KH
773 /* ESC <Fe> for SS2 or SS3. */
774 mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
d46c5b12 775 break;
4ed46869 776 }
bf9cdd4e 777 else if (c == '0' || c == '1' || c == '2')
ae9ff118 778 /* ESC <Fp> for start/end composition. Just ignore. */
d46c5b12 779 break;
bf9cdd4e 780 else
d46c5b12
KH
781 /* Invalid escape sequence. Just ignore. */
782 break;
783
784 /* We found a valid designation sequence for CHARSET. */
785 mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
786 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
787 mask_found |= CODING_CATEGORY_MASK_ISO_7;
788 else
789 mask &= ~CODING_CATEGORY_MASK_ISO_7;
790 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
791 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
792 else
793 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
ae9ff118
KH
794 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
795 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
796 else
d46c5b12 797 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
ae9ff118
KH
798 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
799 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
800 else
d46c5b12 801 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
802 break;
803
4ed46869 804 case ISO_CODE_SO:
f46869e4 805 single_shifting = 0;
d46c5b12
KH
806 if (shift_out == 0
807 && (reg[1] >= 0
808 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
809 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
810 {
811 /* Locking shift out. */
812 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
813 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
814 }
e0e989f6
KH
815 break;
816
d46c5b12 817 case ISO_CODE_SI:
f46869e4 818 single_shifting = 0;
d46c5b12
KH
819 if (shift_out == 1)
820 {
821 /* Locking shift in. */
822 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
823 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
824 }
825 break;
826
4ed46869 827 case ISO_CODE_CSI:
f46869e4 828 single_shifting = 0;
4ed46869
KH
829 case ISO_CODE_SS2:
830 case ISO_CODE_SS3:
3f003981
KH
831 {
832 int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
833
70c22245
KH
834 if (c != ISO_CODE_CSI)
835 {
d46c5b12
KH
836 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
837 & CODING_FLAG_ISO_SINGLE_SHIFT)
70c22245 838 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
d46c5b12
KH
839 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
840 & CODING_FLAG_ISO_SINGLE_SHIFT)
70c22245 841 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
f46869e4 842 single_shifting = 1;
70c22245 843 }
3f003981
KH
844 if (VECTORP (Vlatin_extra_code_table)
845 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
846 {
d46c5b12
KH
847 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
848 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981 849 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
d46c5b12
KH
850 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
851 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981
KH
852 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
853 }
854 mask &= newmask;
d46c5b12 855 mask_found |= newmask;
3f003981
KH
856 }
857 break;
4ed46869
KH
858
859 default:
860 if (c < 0x80)
f46869e4
KH
861 {
862 single_shifting = 0;
863 break;
864 }
4ed46869 865 else if (c < 0xA0)
c4825358 866 {
f46869e4 867 single_shifting = 0;
3f003981
KH
868 if (VECTORP (Vlatin_extra_code_table)
869 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
c4825358 870 {
3f003981
KH
871 int newmask = 0;
872
d46c5b12
KH
873 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
874 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981 875 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
d46c5b12
KH
876 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
877 & CODING_FLAG_ISO_LATIN_EXTRA)
3f003981
KH
878 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
879 mask &= newmask;
d46c5b12 880 mask_found |= newmask;
c4825358 881 }
3f003981
KH
882 else
883 return 0;
c4825358 884 }
4ed46869
KH
885 else
886 {
7717c392 887 unsigned char *src_begin = src;
4ed46869 888
d46c5b12 889 mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
7717c392 890 | CODING_CATEGORY_MASK_ISO_7_ELSE);
d46c5b12 891 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
f46869e4
KH
892 /* Check the length of succeeding codes of the range
893 0xA0..0FF. If the byte length is odd, we exclude
894 CODING_CATEGORY_MASK_ISO_8_2. We can check this only
895 when we are not single shifting. */
896 if (!single_shifting)
897 {
898 while (src < src_end && *src >= 0xA0)
899 src++;
900 if ((src - src_begin - 1) & 1 && src < src_end)
901 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
902 else
903 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
904 }
4ed46869
KH
905 }
906 break;
907 }
908 }
909
d46c5b12 910 return (mask & mask_found);
4ed46869
KH
911}
912
913/* Decode a character of which charset is CHARSET and the 1st position
bdd9fb48 914 code is C1. If dimension of CHARSET is 2, the 2nd position code is
4ed46869
KH
915 fetched from SRC and set to C2. If CHARSET is negative, it means
916 that we are decoding ill formed text, and what we can do is just to
917 read C1 as is. */
918
bdd9fb48
KH
919#define DECODE_ISO_CHARACTER(charset, c1) \
920 do { \
921 int c_alt, charset_alt = (charset); \
922 if (COMPOSING_HEAD_P (coding->composing)) \
923 { \
924 *dst++ = LEADING_CODE_COMPOSITION; \
925 if (COMPOSING_WITH_RULE_P (coding->composing)) \
926 /* To tell composition rules are embeded. */ \
927 *dst++ = 0xFF; \
928 coding->composing += 2; \
929 } \
85bbb134 930 if (charset_alt >= 0) \
bdd9fb48 931 { \
85bbb134 932 if (CHARSET_DIMENSION (charset_alt) == 2) \
70c22245
KH
933 { \
934 ONE_MORE_BYTE (c2); \
935 if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \
936 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \
937 { \
938 src--; \
85bbb134 939 charset_alt = CHARSET_ASCII; \
70c22245
KH
940 } \
941 } \
84fbb8a0
KH
942 if (!NILP (translation_table) \
943 && ((c_alt = translate_char (translation_table, \
85bbb134 944 -1, charset_alt, c1, c2)) >= 0)) \
bdd9fb48
KH
945 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
946 } \
947 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
948 DECODE_CHARACTER_ASCII (c1); \
949 else if (CHARSET_DIMENSION (charset_alt) == 1) \
950 DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
951 else \
952 DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
953 if (COMPOSING_WITH_RULE_P (coding->composing)) \
954 /* To tell a composition rule follows. */ \
955 coding->composing = COMPOSING_WITH_RULE_RULE; \
4ed46869
KH
956 } while (0)
957
958/* Set designation state into CODING. */
d46c5b12
KH
959#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
960 do { \
944bd420
KH
961 int charset; \
962 \
963 if (final_char < '0' || final_char >= 128) \
964 goto label_invalid_code; \
965 charset = ISO_CHARSET_TABLE (make_number (dimension), \
966 make_number (chars), \
967 make_number (final_char)); \
d46c5b12 968 if (charset >= 0 \
704c5781
KH
969 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
970 || coding->safe_charsets[charset])) \
d46c5b12
KH
971 { \
972 if (coding->spec.iso2022.last_invalid_designation_register == 0 \
973 && reg == 0 \
974 && charset == CHARSET_ASCII) \
975 { \
976 /* We should insert this designation sequence as is so \
977 that it is surely written back to a file. */ \
978 coding->spec.iso2022.last_invalid_designation_register = -1; \
979 goto label_invalid_code; \
980 } \
981 coding->spec.iso2022.last_invalid_designation_register = -1; \
982 if ((coding->mode & CODING_MODE_DIRECTION) \
983 && CHARSET_REVERSE_CHARSET (charset) >= 0) \
984 charset = CHARSET_REVERSE_CHARSET (charset); \
985 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
986 } \
987 else \
988 { \
989 coding->spec.iso2022.last_invalid_designation_register = reg; \
990 goto label_invalid_code; \
991 } \
4ed46869
KH
992 } while (0)
993
88993dfd
KH
994/* Return 0 if there's a valid composing sequence starting at SRC and
995 ending before SRC_END, else return -1. */
d46c5b12 996
84fbb8a0
KH
997int
998check_composing_code (coding, src, src_end)
d46c5b12
KH
999 struct coding_system *coding;
1000 unsigned char *src, *src_end;
1001{
d46c5b12
KH
1002 int charset, c, c1, dim;
1003
1004 while (src < src_end)
1005 {
88993dfd
KH
1006 c = *src++;
1007 if (c >= 0x20)
1008 continue;
1009 if (c != ISO_CODE_ESC || src >= src_end)
1010 return -1;
1011 c = *src++;
1012 if (c == '1') /* end of compsition */
1013 return 0;
1014 if (src + 2 >= src_end
1015 || !coding->flags & CODING_FLAG_ISO_DESIGNATION)
1016 return -1;
1017
1018 dim = (c == '$');
1019 if (dim == 1)
1020 c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
1021 if (c >= '(' && c <= '/')
d46c5b12 1022 {
88993dfd
KH
1023 c1 = *src++;
1024 if ((c1 < ' ' || c1 >= 0x80)
1025 || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
1026 || ! coding->safe_charsets[charset]
1027 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
1028 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1029 return -1;
d46c5b12 1030 }
88993dfd
KH
1031 else
1032 return -1;
d46c5b12 1033 }
88993dfd
KH
1034
1035 /* We have not found the sequence "ESC 1". */
1036 return -1;
d46c5b12
KH
1037}
1038
4ed46869
KH
1039/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1040
1041int
d46c5b12 1042decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
1043 struct coding_system *coding;
1044 unsigned char *source, *destination;
1045 int src_bytes, dst_bytes;
4ed46869
KH
1046{
1047 unsigned char *src = source;
1048 unsigned char *src_end = source + src_bytes;
1049 unsigned char *dst = destination;
1050 unsigned char *dst_end = destination + dst_bytes;
1051 /* Since the maximum bytes produced by each loop is 7, we subtract 6
1052 from DST_END to assure that overflow checking is necessary only
1053 at the head of loop. */
1054 unsigned char *adjusted_dst_end = dst_end - 6;
1055 int charset;
1056 /* Charsets invoked to graphic plane 0 and 1 respectively. */
1057 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1058 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
84fbb8a0 1059 Lisp_Object translation_table
f967223b 1060 = coding->translation_table_for_decode;
d46c5b12 1061 int result = CODING_FINISH_NORMAL;
bdd9fb48 1062
84fbb8a0 1063 if (!NILP (Venable_character_translation) && NILP (translation_table))
f967223b 1064 translation_table = Vstandard_translation_table_for_decode;
4ed46869 1065
d46c5b12 1066 coding->produced_char = 0;
fb88bf2d 1067 coding->fake_multibyte = 0;
d46c5b12
KH
1068 while (src < src_end && (dst_bytes
1069 ? (dst < adjusted_dst_end)
1070 : (dst < src - 6)))
4ed46869
KH
1071 {
1072 /* SRC_BASE remembers the start position in source in each loop.
1073 The loop will be exited when there's not enough source text
1074 to analyze long escape sequence or 2-byte code (within macros
1075 ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset
1076 to SRC_BASE before exiting. */
1077 unsigned char *src_base = src;
bdd9fb48 1078 int c1 = *src++, c2;
4ed46869
KH
1079
1080 switch (iso_code_class [c1])
1081 {
1082 case ISO_0x20_or_0x7F:
1083 if (!coding->composing
1084 && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1085 {
1086 /* This is SPACE or DEL. */
1087 *dst++ = c1;
d46c5b12 1088 coding->produced_char++;
4ed46869
KH
1089 break;
1090 }
1091 /* This is a graphic character, we fall down ... */
1092
1093 case ISO_graphic_plane_0:
1094 if (coding->composing == COMPOSING_WITH_RULE_RULE)
1095 {
1096 /* This is a composition rule. */
1097 *dst++ = c1 | 0x80;
1098 coding->composing = COMPOSING_WITH_RULE_TAIL;
1099 }
1100 else
1101 DECODE_ISO_CHARACTER (charset0, c1);
1102 break;
1103
1104 case ISO_0xA0_or_0xFF:
d46c5b12
KH
1105 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1106 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
fb88bf2d 1107 goto label_invalid_code;
4ed46869
KH
1108 /* This is a graphic character, we fall down ... */
1109
1110 case ISO_graphic_plane_1:
d46c5b12 1111 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
fb88bf2d 1112 goto label_invalid_code;
d46c5b12
KH
1113 else
1114 DECODE_ISO_CHARACTER (charset1, c1);
4ed46869
KH
1115 break;
1116
1117 case ISO_control_code:
1118 /* All ISO2022 control characters in this class have the
1119 same representation in Emacs internal format. */
d46c5b12
KH
1120 if (c1 == '\n'
1121 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1122 && (coding->eol_type == CODING_EOL_CR
1123 || coding->eol_type == CODING_EOL_CRLF))
1124 {
1125 result = CODING_FINISH_INCONSISTENT_EOL;
1126 goto label_end_of_loop_2;
1127 }
4ed46869 1128 *dst++ = c1;
d46c5b12 1129 coding->produced_char++;
174a4cbe
KH
1130 if (c1 >= 0x80)
1131 coding->fake_multibyte = 1;
4ed46869
KH
1132 break;
1133
1134 case ISO_carriage_return:
1135 if (coding->eol_type == CODING_EOL_CR)
d46c5b12 1136 *dst++ = '\n';
4ed46869
KH
1137 else if (coding->eol_type == CODING_EOL_CRLF)
1138 {
1139 ONE_MORE_BYTE (c1);
1140 if (c1 == ISO_CODE_LF)
1141 *dst++ = '\n';
1142 else
1143 {
d46c5b12
KH
1144 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1145 {
1146 result = CODING_FINISH_INCONSISTENT_EOL;
1147 goto label_end_of_loop_2;
1148 }
4ed46869 1149 src--;
d46c5b12 1150 *dst++ = '\r';
4ed46869
KH
1151 }
1152 }
1153 else
d46c5b12
KH
1154 *dst++ = c1;
1155 coding->produced_char++;
4ed46869
KH
1156 break;
1157
1158 case ISO_shift_out:
d46c5b12
KH
1159 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1160 || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1161 goto label_invalid_code;
4ed46869
KH
1162 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1163 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1164 break;
1165
1166 case ISO_shift_in:
d46c5b12
KH
1167 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1168 goto label_invalid_code;
4ed46869
KH
1169 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1170 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1171 break;
1172
1173 case ISO_single_shift_2_7:
1174 case ISO_single_shift_2:
d46c5b12
KH
1175 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1176 goto label_invalid_code;
4ed46869
KH
1177 /* SS2 is handled as an escape sequence of ESC 'N' */
1178 c1 = 'N';
1179 goto label_escape_sequence;
1180
1181 case ISO_single_shift_3:
d46c5b12
KH
1182 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1183 goto label_invalid_code;
4ed46869
KH
1184 /* SS2 is handled as an escape sequence of ESC 'O' */
1185 c1 = 'O';
1186 goto label_escape_sequence;
1187
1188 case ISO_control_sequence_introducer:
1189 /* CSI is handled as an escape sequence of ESC '[' ... */
1190 c1 = '[';
1191 goto label_escape_sequence;
1192
1193 case ISO_escape:
1194 ONE_MORE_BYTE (c1);
1195 label_escape_sequence:
1196 /* Escape sequences handled by Emacs are invocation,
1197 designation, direction specification, and character
1198 composition specification. */
1199 switch (c1)
1200 {
1201 case '&': /* revision of following character set */
1202 ONE_MORE_BYTE (c1);
1203 if (!(c1 >= '@' && c1 <= '~'))
d46c5b12 1204 goto label_invalid_code;
4ed46869
KH
1205 ONE_MORE_BYTE (c1);
1206 if (c1 != ISO_CODE_ESC)
d46c5b12 1207 goto label_invalid_code;
4ed46869
KH
1208 ONE_MORE_BYTE (c1);
1209 goto label_escape_sequence;
1210
1211 case '$': /* designation of 2-byte character set */
d46c5b12
KH
1212 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1213 goto label_invalid_code;
4ed46869
KH
1214 ONE_MORE_BYTE (c1);
1215 if (c1 >= '@' && c1 <= 'B')
1216 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 1217 or JISX0208.1980 */
4ed46869
KH
1218 DECODE_DESIGNATION (0, 2, 94, c1);
1219 }
1220 else if (c1 >= 0x28 && c1 <= 0x2B)
1221 { /* designation of DIMENSION2_CHARS94 character set */
1222 ONE_MORE_BYTE (c2);
1223 DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1224 }
1225 else if (c1 >= 0x2C && c1 <= 0x2F)
1226 { /* designation of DIMENSION2_CHARS96 character set */
1227 ONE_MORE_BYTE (c2);
1228 DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1229 }
1230 else
d46c5b12 1231 goto label_invalid_code;
4ed46869
KH
1232 break;
1233
1234 case 'n': /* invocation of locking-shift-2 */
d46c5b12
KH
1235 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1236 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1237 goto label_invalid_code;
4ed46869 1238 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
e0e989f6 1239 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
4ed46869
KH
1240 break;
1241
1242 case 'o': /* invocation of locking-shift-3 */
d46c5b12
KH
1243 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1244 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1245 goto label_invalid_code;
4ed46869 1246 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
e0e989f6 1247 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
4ed46869
KH
1248 break;
1249
1250 case 'N': /* invocation of single-shift-2 */
d46c5b12
KH
1251 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1252 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1253 goto label_invalid_code;
4ed46869
KH
1254 ONE_MORE_BYTE (c1);
1255 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1256 DECODE_ISO_CHARACTER (charset, c1);
1257 break;
1258
1259 case 'O': /* invocation of single-shift-3 */
d46c5b12
KH
1260 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1261 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1262 goto label_invalid_code;
4ed46869
KH
1263 ONE_MORE_BYTE (c1);
1264 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1265 DECODE_ISO_CHARACTER (charset, c1);
1266 break;
1267
d46c5b12
KH
1268 case '0': case '2': /* start composing */
1269 /* Before processing composing, we must be sure that all
1270 characters being composed are supported by CODING.
88993dfd
KH
1271 If not, we must give up composing. */
1272 if (check_composing_code (coding, src, src_end) == 0)
1273 {
1274 /* We are looking at a valid composition sequence. */
1275 coding->composing = (c1 == '0'
1276 ? COMPOSING_NO_RULE_HEAD
1277 : COMPOSING_WITH_RULE_HEAD);
1278 coding->composed_chars = 0;
1279 }
1280 else
1281 {
1282 *dst++ = ISO_CODE_ESC;
1283 *dst++ = c1;
1284 coding->produced_char += 2;
1285 }
4ed46869
KH
1286 break;
1287
1288 case '1': /* end composing */
88993dfd
KH
1289 if (!coding->composing)
1290 {
1291 *dst++ = ISO_CODE_ESC;
1292 *dst++ = c1;
1293 coding->produced_char += 2;
1294 break;
1295 }
1296
de79a6a5
KH
1297 if (coding->composed_chars > 0)
1298 {
1299 if (coding->composed_chars == 1)
1300 {
1301 unsigned char *this_char_start = dst;
1302 int this_bytes;
1303
1304 /* Only one character is in the composing
1305 sequence. Make it a normal character. */
1306 while (*--this_char_start != LEADING_CODE_COMPOSITION);
1307 dst = (this_char_start
1308 + (coding->composing == COMPOSING_NO_RULE_TAIL
1309 ? 1 : 2));
1310 *dst -= 0x20;
1311 if (*dst == 0x80)
1312 *++dst &= 0x7F;
1313 this_bytes = BYTES_BY_CHAR_HEAD (*dst);
1314 while (this_bytes--) *this_char_start++ = *dst++;
1315 dst = this_char_start;
1316 }
1317 coding->produced_char++;
1318 }
4ed46869 1319 coding->composing = COMPOSING_NO;
4ed46869
KH
1320 break;
1321
1322 case '[': /* specification of direction */
d46c5b12
KH
1323 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1324 goto label_invalid_code;
4ed46869 1325 /* For the moment, nested direction is not supported.
d46c5b12
KH
1326 So, `coding->mode & CODING_MODE_DIRECTION' zero means
1327 left-to-right, and nozero means right-to-left. */
4ed46869
KH
1328 ONE_MORE_BYTE (c1);
1329 switch (c1)
1330 {
1331 case ']': /* end of the current direction */
d46c5b12 1332 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
1333
1334 case '0': /* end of the current direction */
1335 case '1': /* start of left-to-right direction */
1336 ONE_MORE_BYTE (c1);
1337 if (c1 == ']')
d46c5b12 1338 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 1339 else
d46c5b12 1340 goto label_invalid_code;
4ed46869
KH
1341 break;
1342
1343 case '2': /* start of right-to-left direction */
1344 ONE_MORE_BYTE (c1);
1345 if (c1 == ']')
d46c5b12 1346 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 1347 else
d46c5b12 1348 goto label_invalid_code;
4ed46869
KH
1349 break;
1350
1351 default:
d46c5b12 1352 goto label_invalid_code;
4ed46869
KH
1353 }
1354 break;
1355
1356 default:
d46c5b12
KH
1357 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1358 goto label_invalid_code;
4ed46869
KH
1359 if (c1 >= 0x28 && c1 <= 0x2B)
1360 { /* designation of DIMENSION1_CHARS94 character set */
1361 ONE_MORE_BYTE (c2);
1362 DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1363 }
1364 else if (c1 >= 0x2C && c1 <= 0x2F)
1365 { /* designation of DIMENSION1_CHARS96 character set */
1366 ONE_MORE_BYTE (c2);
1367 DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1368 }
1369 else
1370 {
d46c5b12 1371 goto label_invalid_code;
4ed46869
KH
1372 }
1373 }
1374 /* We must update these variables now. */
1375 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1376 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1377 break;
1378
d46c5b12 1379 label_invalid_code:
d46c5b12
KH
1380 while (src_base < src)
1381 *dst++ = *src_base++;
fb88bf2d 1382 coding->fake_multibyte = 1;
4ed46869
KH
1383 }
1384 continue;
1385
1386 label_end_of_loop:
d46c5b12
KH
1387 result = CODING_FINISH_INSUFFICIENT_SRC;
1388 label_end_of_loop_2:
4ed46869
KH
1389 src = src_base;
1390 break;
1391 }
1392
fb88bf2d 1393 if (src < src_end)
4ed46869 1394 {
fb88bf2d
KH
1395 if (result == CODING_FINISH_NORMAL)
1396 result = CODING_FINISH_INSUFFICIENT_DST;
1397 else if (result != CODING_FINISH_INCONSISTENT_EOL
1398 && coding->mode & CODING_MODE_LAST_BLOCK)
1399 {
1400 /* This is the last block of the text to be decoded. We had
1401 better just flush out all remaining codes in the text
1402 although they are not valid characters. */
1403 src_bytes = src_end - src;
1404 if (dst_bytes && (dst_end - dst < src_bytes))
1405 src_bytes = dst_end - dst;
1406 bcopy (src, dst, src_bytes);
1407 dst += src_bytes;
1408 src += src_bytes;
1409 coding->fake_multibyte = 1;
1410 }
4ed46869 1411 }
fb88bf2d 1412
d46c5b12
KH
1413 coding->consumed = coding->consumed_char = src - source;
1414 coding->produced = dst - destination;
1415 return result;
4ed46869
KH
1416}
1417
f4dee582 1418/* ISO2022 encoding stuff. */
4ed46869
KH
1419
1420/*
f4dee582 1421 It is not enough to say just "ISO2022" on encoding, we have to
d46c5b12 1422 specify more details. In Emacs, each coding system of ISO2022
4ed46869
KH
1423 variant has the following specifications:
1424 1. Initial designation to G0 thru G3.
1425 2. Allows short-form designation?
1426 3. ASCII should be designated to G0 before control characters?
1427 4. ASCII should be designated to G0 at end of line?
1428 5. 7-bit environment or 8-bit environment?
1429 6. Use locking-shift?
1430 7. Use Single-shift?
1431 And the following two are only for Japanese:
1432 8. Use ASCII in place of JIS0201-1976-Roman?
1433 9. Use JISX0208-1983 in place of JISX0208-1978?
1434 These specifications are encoded in `coding->flags' as flag bits
1435 defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more
f4dee582 1436 details.
4ed46869
KH
1437*/
1438
1439/* Produce codes (escape sequence) for designating CHARSET to graphic
1440 register REG. If <final-char> of CHARSET is '@', 'A', or 'B' and
1441 the coding system CODING allows, produce designation sequence of
1442 short-form. */
1443
1444#define ENCODE_DESIGNATION(charset, reg, coding) \
1445 do { \
1446 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
1447 char *intermediate_char_94 = "()*+"; \
1448 char *intermediate_char_96 = ",-./"; \
70c22245
KH
1449 int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
1450 if (revision < 255) \
1451 { \
4ed46869
KH
1452 *dst++ = ISO_CODE_ESC; \
1453 *dst++ = '&'; \
70c22245 1454 *dst++ = '@' + revision; \
4ed46869
KH
1455 } \
1456 *dst++ = ISO_CODE_ESC; \
1457 if (CHARSET_DIMENSION (charset) == 1) \
1458 { \
1459 if (CHARSET_CHARS (charset) == 94) \
1460 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1461 else \
1462 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1463 } \
1464 else \
1465 { \
1466 *dst++ = '$'; \
1467 if (CHARSET_CHARS (charset) == 94) \
1468 { \
1469 if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
1470 || reg != 0 \
1471 || final_char < '@' || final_char > 'B') \
1472 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1473 } \
1474 else \
1475 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1476 } \
1477 *dst++ = final_char; \
1478 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1479 } while (0)
1480
1481/* The following two macros produce codes (control character or escape
1482 sequence) for ISO2022 single-shift functions (single-shift-2 and
1483 single-shift-3). */
1484
1485#define ENCODE_SINGLE_SHIFT_2 \
1486 do { \
1487 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1488 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
1489 else \
fb88bf2d
KH
1490 { \
1491 *dst++ = ISO_CODE_SS2; \
1492 coding->fake_multibyte = 1; \
1493 } \
4ed46869
KH
1494 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1495 } while (0)
1496
fb88bf2d
KH
1497#define ENCODE_SINGLE_SHIFT_3 \
1498 do { \
4ed46869 1499 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
fb88bf2d
KH
1500 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
1501 else \
1502 { \
1503 *dst++ = ISO_CODE_SS3; \
1504 coding->fake_multibyte = 1; \
1505 } \
4ed46869
KH
1506 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1507 } while (0)
1508
1509/* The following four macros produce codes (control character or
1510 escape sequence) for ISO2022 locking-shift functions (shift-in,
1511 shift-out, locking-shift-2, and locking-shift-3). */
1512
1513#define ENCODE_SHIFT_IN \
1514 do { \
1515 *dst++ = ISO_CODE_SI; \
1516 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1517 } while (0)
1518
1519#define ENCODE_SHIFT_OUT \
1520 do { \
1521 *dst++ = ISO_CODE_SO; \
1522 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1523 } while (0)
1524
1525#define ENCODE_LOCKING_SHIFT_2 \
1526 do { \
1527 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
1528 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1529 } while (0)
1530
1531#define ENCODE_LOCKING_SHIFT_3 \
1532 do { \
1533 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
1534 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1535 } while (0)
1536
f4dee582
RS
1537/* Produce codes for a DIMENSION1 character whose character set is
1538 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
1539 sequences are also produced in advance if necessary. */
1540
1541
6e85d753
KH
1542#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
1543 do { \
1544 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1545 { \
1546 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1547 *dst++ = c1 & 0x7F; \
1548 else \
1549 *dst++ = c1 | 0x80; \
1550 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1551 break; \
1552 } \
1553 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1554 { \
1555 *dst++ = c1 & 0x7F; \
1556 break; \
1557 } \
1558 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1559 { \
1560 *dst++ = c1 | 0x80; \
1561 break; \
1562 } \
1563 else if (coding->flags & CODING_FLAG_ISO_SAFE \
70c22245 1564 && !coding->safe_charsets[charset]) \
6e85d753
KH
1565 { \
1566 /* We should not encode this character, instead produce one or \
1567 two `?'s. */ \
1568 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1569 if (CHARSET_WIDTH (charset) == 2) \
1570 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1571 break; \
1572 } \
1573 else \
1574 /* Since CHARSET is not yet invoked to any graphic planes, we \
1575 must invoke it, or, at first, designate it to some graphic \
1576 register. Then repeat the loop to actually produce the \
1577 character. */ \
1578 dst = encode_invocation_designation (charset, coding, dst); \
4ed46869
KH
1579 } while (1)
1580
f4dee582
RS
1581/* Produce codes for a DIMENSION2 character whose character set is
1582 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
1583 invocation codes are also produced in advance if necessary. */
1584
6e85d753
KH
1585#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
1586 do { \
1587 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1588 { \
1589 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1590 *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
1591 else \
1592 *dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
1593 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1594 break; \
1595 } \
1596 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1597 { \
1598 *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
1599 break; \
1600 } \
1601 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1602 { \
1603 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
1604 break; \
1605 } \
1606 else if (coding->flags & CODING_FLAG_ISO_SAFE \
70c22245 1607 && !coding->safe_charsets[charset]) \
6e85d753
KH
1608 { \
1609 /* We should not encode this character, instead produce one or \
1610 two `?'s. */ \
1611 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1612 if (CHARSET_WIDTH (charset) == 2) \
1613 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1614 break; \
1615 } \
1616 else \
1617 /* Since CHARSET is not yet invoked to any graphic planes, we \
1618 must invoke it, or, at first, designate it to some graphic \
1619 register. Then repeat the loop to actually produce the \
1620 character. */ \
1621 dst = encode_invocation_designation (charset, coding, dst); \
4ed46869
KH
1622 } while (1)
1623
6f551029
KH
1624#define ENCODE_ISO_CHARACTER(charset, c1, c2) \
1625 do { \
1626 int c_alt, charset_alt; \
1627 if (!NILP (translation_table) \
1628 && ((c_alt = translate_char (translation_table, -1, \
1629 charset, c1, c2)) \
1630 >= 0)) \
1631 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
1632 else \
1633 charset_alt = charset; \
1634 if (CHARSET_DEFINED_P (charset_alt)) \
1635 { \
1636 if (CHARSET_DIMENSION (charset_alt) == 1) \
1637 { \
1638 if (charset == CHARSET_ASCII \
1639 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
1640 charset_alt = charset_latin_jisx0201; \
1641 ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \
1642 } \
1643 else \
1644 { \
1645 if (charset == charset_jisx0208 \
1646 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
1647 charset_alt = charset_jisx0208_1978; \
1648 ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
1649 } \
1650 } \
1651 else \
1652 { \
1653 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1654 { \
1655 *dst++ = charset & 0x7f; \
1656 *dst++ = c1 & 0x7f; \
1657 if (c2) \
1658 *dst++ = c2 & 0x7f; \
1659 } \
1660 else \
1661 { \
1662 *dst++ = charset; \
1663 *dst++ = c1; \
1664 if (c2) \
1665 *dst++ = c2; \
1666 } \
1667 } \
1668 if (! COMPOSING_P (coding->composing)) \
1669 coding->consumed_char++; \
84fbb8a0 1670 } while (0)
bdd9fb48 1671
4ed46869
KH
1672/* Produce designation and invocation codes at a place pointed by DST
1673 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1674 Return new DST. */
1675
1676unsigned char *
1677encode_invocation_designation (charset, coding, dst)
1678 int charset;
1679 struct coding_system *coding;
1680 unsigned char *dst;
1681{
1682 int reg; /* graphic register number */
1683
1684 /* At first, check designations. */
1685 for (reg = 0; reg < 4; reg++)
1686 if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1687 break;
1688
1689 if (reg >= 4)
1690 {
1691 /* CHARSET is not yet designated to any graphic registers. */
1692 /* At first check the requested designation. */
1693 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1ba9e4ab
KH
1694 if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1695 /* Since CHARSET requests no special designation, designate it
1696 to graphic register 0. */
4ed46869
KH
1697 reg = 0;
1698
1699 ENCODE_DESIGNATION (charset, reg, coding);
1700 }
1701
1702 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1703 && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1704 {
1705 /* Since the graphic register REG is not invoked to any graphic
1706 planes, invoke it to graphic plane 0. */
1707 switch (reg)
1708 {
1709 case 0: /* graphic register 0 */
1710 ENCODE_SHIFT_IN;
1711 break;
1712
1713 case 1: /* graphic register 1 */
1714 ENCODE_SHIFT_OUT;
1715 break;
1716
1717 case 2: /* graphic register 2 */
1718 if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1719 ENCODE_SINGLE_SHIFT_2;
1720 else
1721 ENCODE_LOCKING_SHIFT_2;
1722 break;
1723
1724 case 3: /* graphic register 3 */
1725 if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1726 ENCODE_SINGLE_SHIFT_3;
1727 else
1728 ENCODE_LOCKING_SHIFT_3;
1729 break;
1730 }
1731 }
1732 return dst;
1733}
1734
1735/* The following two macros produce codes for indicating composition. */
1736#define ENCODE_COMPOSITION_NO_RULE_START *dst++ = ISO_CODE_ESC, *dst++ = '0'
1737#define ENCODE_COMPOSITION_WITH_RULE_START *dst++ = ISO_CODE_ESC, *dst++ = '2'
1738#define ENCODE_COMPOSITION_END *dst++ = ISO_CODE_ESC, *dst++ = '1'
1739
1740/* The following three macros produce codes for indicating direction
1741 of text. */
1742#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
1743 do { \
1744 if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
1745 *dst++ = ISO_CODE_ESC, *dst++ = '['; \
1746 else \
1747 *dst++ = ISO_CODE_CSI; \
1748 } while (0)
1749
1750#define ENCODE_DIRECTION_R2L \
1751 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1752
1753#define ENCODE_DIRECTION_L2R \
1754 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1755
1756/* Produce codes for designation and invocation to reset the graphic
1757 planes and registers to initial state. */
e0e989f6
KH
1758#define ENCODE_RESET_PLANE_AND_REGISTER \
1759 do { \
1760 int reg; \
1761 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
1762 ENCODE_SHIFT_IN; \
1763 for (reg = 0; reg < 4; reg++) \
1764 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
1765 && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
1766 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
1767 ENCODE_DESIGNATION \
1768 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
4ed46869
KH
1769 } while (0)
1770
bdd9fb48 1771/* Produce designation sequences of charsets in the line started from
d46c5b12 1772 SRC to a place pointed by *DSTP, and update DSTP.
bdd9fb48
KH
1773
1774 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
1775 find all the necessary designations. */
1776
dfcf069d 1777void
bdd9fb48 1778encode_designation_at_bol (coding, table, src, src_end, dstp)
e0e989f6 1779 struct coding_system *coding;
bdd9fb48 1780 Lisp_Object table;
e0e989f6
KH
1781 unsigned char *src, *src_end, **dstp;
1782{
bdd9fb48
KH
1783 int charset, c, found = 0, reg;
1784 /* Table of charsets to be designated to each graphic register. */
1785 int r[4];
1786 unsigned char *dst = *dstp;
1787
1788 for (reg = 0; reg < 4; reg++)
1789 r[reg] = -1;
1790
1791 while (src < src_end && *src != '\n' && found < 4)
e0e989f6 1792 {
bdd9fb48
KH
1793 int bytes = BYTES_BY_CHAR_HEAD (*src);
1794
1795 if (NILP (table))
1796 charset = CHARSET_AT (src);
1797 else
e0e989f6 1798 {
35cb8686
RS
1799 int c_alt;
1800 unsigned char c1, c2;
bdd9fb48
KH
1801
1802 SPLIT_STRING(src, bytes, charset, c1, c2);
84fbb8a0 1803 if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
bdd9fb48 1804 charset = CHAR_CHARSET (c_alt);
e0e989f6 1805 }
bdd9fb48 1806
e0e989f6 1807 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
d46c5b12 1808 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
bdd9fb48
KH
1809 {
1810 found++;
1811 r[reg] = charset;
1812 }
1813
1814 src += bytes;
1815 }
1816
1817 if (found)
1818 {
1819 for (reg = 0; reg < 4; reg++)
1820 if (r[reg] >= 0
1821 && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1822 ENCODE_DESIGNATION (r[reg], reg, coding);
1823 *dstp = dst;
e0e989f6 1824 }
e0e989f6
KH
1825}
1826
4ed46869
KH
1827/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
1828
1829int
d46c5b12 1830encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
1831 struct coding_system *coding;
1832 unsigned char *source, *destination;
1833 int src_bytes, dst_bytes;
4ed46869
KH
1834{
1835 unsigned char *src = source;
1836 unsigned char *src_end = source + src_bytes;
1837 unsigned char *dst = destination;
1838 unsigned char *dst_end = destination + dst_bytes;
e0e989f6 1839 /* Since the maximum bytes produced by each loop is 20, we subtract 19
4ed46869
KH
1840 from DST_END to assure overflow checking is necessary only at the
1841 head of loop. */
e0e989f6 1842 unsigned char *adjusted_dst_end = dst_end - 19;
84fbb8a0 1843 Lisp_Object translation_table
f967223b 1844 = coding->translation_table_for_encode;
d46c5b12 1845 int result = CODING_FINISH_NORMAL;
bdd9fb48 1846
84fbb8a0 1847 if (!NILP (Venable_character_translation) && NILP (translation_table))
f967223b 1848 translation_table = Vstandard_translation_table_for_encode;
4ed46869 1849
d46c5b12 1850 coding->consumed_char = 0;
fb88bf2d 1851 coding->fake_multibyte = 0;
d46c5b12
KH
1852 while (src < src_end && (dst_bytes
1853 ? (dst < adjusted_dst_end)
1854 : (dst < src - 19)))
4ed46869
KH
1855 {
1856 /* SRC_BASE remembers the start position in source in each loop.
1857 The loop will be exited when there's not enough source text
1858 to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1859 TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is
1860 reset to SRC_BASE before exiting. */
1861 unsigned char *src_base = src;
bdd9fb48 1862 int charset, c1, c2, c3, c4;
4ed46869 1863
e0e989f6
KH
1864 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1865 && CODING_SPEC_ISO_BOL (coding))
1866 {
bdd9fb48 1867 /* We have to produce designation sequences if any now. */
84fbb8a0 1868 encode_designation_at_bol (coding, translation_table,
bdd9fb48 1869 src, src_end, &dst);
e0e989f6
KH
1870 CODING_SPEC_ISO_BOL (coding) = 0;
1871 }
1872
1873 c1 = *src++;
4ed46869 1874 /* If we are seeing a component of a composite character, we are
d46c5b12
KH
1875 seeing a leading-code encoded irregularly for composition, or
1876 a composition rule if composing with rule. We must set C1 to
1877 a normal leading-code or an ASCII code. If we are not seeing
1878 a composite character, we must reset composition,
1879 designation, and invocation states. */
4ed46869
KH
1880 if (COMPOSING_P (coding->composing))
1881 {
1882 if (c1 < 0xA0)
1883 {
1884 /* We are not in a composite character any longer. */
1885 coding->composing = COMPOSING_NO;
d46c5b12 1886 ENCODE_RESET_PLANE_AND_REGISTER;
4ed46869
KH
1887 ENCODE_COMPOSITION_END;
1888 }
1889 else
1890 {
1891 if (coding->composing == COMPOSING_WITH_RULE_RULE)
1892 {
1893 *dst++ = c1 & 0x7F;
1894 coding->composing = COMPOSING_WITH_RULE_HEAD;
1895 continue;
1896 }
1897 else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1898 coding->composing = COMPOSING_WITH_RULE_RULE;
1899 if (c1 == 0xA0)
1900 {
1901 /* This is an ASCII component. */
1902 ONE_MORE_BYTE (c1);
1903 c1 &= 0x7F;
1904 }
1905 else
1906 /* This is a leading-code of non ASCII component. */
1907 c1 -= 0x20;
1908 }
1909 }
1910
1911 /* Now encode one character. C1 is a control character, an
1912 ASCII character, or a leading-code of multi-byte character. */
1913 switch (emacs_code_class[c1])
1914 {
1915 case EMACS_ascii_code:
8dbb769e 1916 c2 = 0;
bdd9fb48 1917 ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
4ed46869
KH
1918 break;
1919
1920 case EMACS_control_code:
1921 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
e0e989f6 1922 ENCODE_RESET_PLANE_AND_REGISTER;
4ed46869 1923 *dst++ = c1;
d46c5b12 1924 coding->consumed_char++;
4ed46869
KH
1925 break;
1926
1927 case EMACS_carriage_return_code:
d46c5b12 1928 if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
4ed46869
KH
1929 {
1930 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
e0e989f6 1931 ENCODE_RESET_PLANE_AND_REGISTER;
4ed46869 1932 *dst++ = c1;
d46c5b12 1933 coding->consumed_char++;
4ed46869
KH
1934 break;
1935 }
1936 /* fall down to treat '\r' as '\n' ... */
1937
1938 case EMACS_linefeed_code:
1939 if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
e0e989f6
KH
1940 ENCODE_RESET_PLANE_AND_REGISTER;
1941 if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1942 bcopy (coding->spec.iso2022.initial_designation,
1943 coding->spec.iso2022.current_designation,
1944 sizeof coding->spec.iso2022.initial_designation);
4ed46869 1945 if (coding->eol_type == CODING_EOL_LF
0ef69138 1946 || coding->eol_type == CODING_EOL_UNDECIDED)
4ed46869
KH
1947 *dst++ = ISO_CODE_LF;
1948 else if (coding->eol_type == CODING_EOL_CRLF)
1949 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1950 else
1951 *dst++ = ISO_CODE_CR;
e0e989f6 1952 CODING_SPEC_ISO_BOL (coding) = 1;
d46c5b12 1953 coding->consumed_char++;
4ed46869
KH
1954 break;
1955
1956 case EMACS_leading_code_2:
1957 ONE_MORE_BYTE (c2);
8dbb769e 1958 c3 = 0;
19a8d9e0
KH
1959 if (c2 < 0xA0)
1960 {
1961 /* invalid sequence */
1962 *dst++ = c1;
38cf95df
RS
1963 src--;
1964 coding->consumed_char++;
19a8d9e0
KH
1965 }
1966 else
1967 ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
4ed46869
KH
1968 break;
1969
1970 case EMACS_leading_code_3:
1971 TWO_MORE_BYTES (c2, c3);
8dbb769e 1972 c4 = 0;
19a8d9e0
KH
1973 if (c2 < 0xA0 || c3 < 0xA0)
1974 {
1975 /* invalid sequence */
1976 *dst++ = c1;
38cf95df
RS
1977 src -= 2;
1978 coding->consumed_char++;
19a8d9e0
KH
1979 }
1980 else if (c1 < LEADING_CODE_PRIVATE_11)
bdd9fb48 1981 ENCODE_ISO_CHARACTER (c1, c2, c3);
4ed46869 1982 else
bdd9fb48 1983 ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
4ed46869
KH
1984 break;
1985
1986 case EMACS_leading_code_4:
1987 THREE_MORE_BYTES (c2, c3, c4);
19a8d9e0
KH
1988 if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1989 {
1990 /* invalid sequence */
1991 *dst++ = c1;
38cf95df
RS
1992 src -= 3;
1993 coding->consumed_char++;
19a8d9e0
KH
1994 }
1995 else
1996 ENCODE_ISO_CHARACTER (c2, c3, c4);
4ed46869
KH
1997 break;
1998
1999 case EMACS_leading_code_composition:
19a8d9e0
KH
2000 ONE_MORE_BYTE (c2);
2001 if (c2 < 0xA0)
2002 {
2003 /* invalid sequence */
2004 *dst++ = c1;
38cf95df
RS
2005 src--;
2006 coding->consumed_char++;
19a8d9e0
KH
2007 }
2008 else if (c2 == 0xFF)
4ed46869 2009 {
d46c5b12 2010 ENCODE_RESET_PLANE_AND_REGISTER;
4ed46869
KH
2011 coding->composing = COMPOSING_WITH_RULE_HEAD;
2012 ENCODE_COMPOSITION_WITH_RULE_START;
d46c5b12 2013 coding->consumed_char++;
4ed46869
KH
2014 }
2015 else
2016 {
d46c5b12 2017 ENCODE_RESET_PLANE_AND_REGISTER;
4ed46869
KH
2018 /* Rewind one byte because it is a character code of
2019 composition elements. */
2020 src--;
2021 coding->composing = COMPOSING_NO_RULE_HEAD;
2022 ENCODE_COMPOSITION_NO_RULE_START;
d46c5b12 2023 coding->consumed_char++;
4ed46869
KH
2024 }
2025 break;
2026
2027 case EMACS_invalid_code:
3efbce95
KH
2028 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2029 ENCODE_RESET_PLANE_AND_REGISTER;
4ed46869 2030 *dst++ = c1;
d46c5b12 2031 coding->consumed_char++;
4ed46869
KH
2032 break;
2033 }
2034 continue;
2035 label_end_of_loop:
d46c5b12
KH
2036 result = CODING_FINISH_INSUFFICIENT_SRC;
2037 src = src_base;
4ed46869
KH
2038 break;
2039 }
2040
49cb52b4
KH
2041 if (src < src_end && result == CODING_FINISH_NORMAL)
2042 result = CODING_FINISH_INSUFFICIENT_DST;
2043
2044 /* If this is the last block of the text to be encoded, we must
2045 reset graphic planes and registers to the initial state, and
2046 flush out the carryover if any. */
2047 if (coding->mode & CODING_MODE_LAST_BLOCK)
84fbb8a0
KH
2048 {
2049 ENCODE_RESET_PLANE_AND_REGISTER;
2050 if (COMPOSING_P (coding->composing))
2051 ENCODE_COMPOSITION_END;
88993dfd
KH
2052 if (result == CODING_FINISH_INSUFFICIENT_SRC)
2053 {
2054 while (src < src_end && dst < dst_end)
2055 *dst++ = *src++;
2056 }
84fbb8a0 2057 }
d46c5b12
KH
2058 coding->consumed = src - source;
2059 coding->produced = coding->produced_char = dst - destination;
2060 return result;
4ed46869
KH
2061}
2062
2063\f
2064/*** 4. SJIS and BIG5 handlers ***/
2065
f4dee582 2066/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
2067 quite widely. So, for the moment, Emacs supports them in the bare
2068 C code. But, in the future, they may be supported only by CCL. */
2069
2070/* SJIS is a coding system encoding three character sets: ASCII, right
2071 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
2072 as is. A character of charset katakana-jisx0201 is encoded by
2073 "position-code + 0x80". A character of charset japanese-jisx0208
2074 is encoded in 2-byte but two position-codes are divided and shifted
2075 so that it fit in the range below.
2076
2077 --- CODE RANGE of SJIS ---
2078 (character set) (range)
2079 ASCII 0x00 .. 0x7F
2080 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 2081 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 2082 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
2083 -------------------------------
2084
2085*/
2086
2087/* BIG5 is a coding system encoding two character sets: ASCII and
2088 Big5. An ASCII character is encoded as is. Big5 is a two-byte
2089 character set and is encoded in two-byte.
2090
2091 --- CODE RANGE of BIG5 ---
2092 (character set) (range)
2093 ASCII 0x00 .. 0x7F
2094 Big5 (1st byte) 0xA1 .. 0xFE
2095 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
2096 --------------------------
2097
2098 Since the number of characters in Big5 is larger than maximum
2099 characters in Emacs' charset (96x96), it can't be handled as one
2100 charset. So, in Emacs, Big5 is divided into two: `charset-big5-1'
2101 and `charset-big5-2'. Both are DIMENSION2 and CHARS94. The former
2102 contains frequently used characters and the latter contains less
2103 frequently used characters. */
2104
2105/* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
2106 are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2107 C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2108 format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
2109
2110/* Number of Big5 characters which have the same code in 1st byte. */
2111#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2112
2113#define DECODE_BIG5(b1, b2, charset, c1, c2) \
2114 do { \
2115 unsigned int temp \
2116 = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62); \
2117 if (b1 < 0xC9) \
2118 charset = charset_big5_1; \
2119 else \
2120 { \
2121 charset = charset_big5_2; \
2122 temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW; \
2123 } \
2124 c1 = temp / (0xFF - 0xA1) + 0x21; \
2125 c2 = temp % (0xFF - 0xA1) + 0x21; \
2126 } while (0)
2127
2128#define ENCODE_BIG5(charset, c1, c2, b1, b2) \
2129 do { \
2130 unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21); \
2131 if (charset == charset_big5_2) \
2132 temp += BIG5_SAME_ROW * (0xC9 - 0xA1); \
2133 b1 = temp / BIG5_SAME_ROW + 0xA1; \
2134 b2 = temp % BIG5_SAME_ROW; \
2135 b2 += b2 < 0x3F ? 0x40 : 0x62; \
2136 } while (0)
2137
a5d301df
KH
2138#define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
2139 do { \
2140 int c_alt, charset_alt = (charset); \
84fbb8a0
KH
2141 if (!NILP (translation_table) \
2142 && ((c_alt = translate_char (translation_table, \
2143 -1, (charset), c1, c2)) >= 0)) \
55ab7be3 2144 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
a5d301df
KH
2145 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
2146 DECODE_CHARACTER_ASCII (c1); \
2147 else if (CHARSET_DIMENSION (charset_alt) == 1) \
2148 DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
2149 else \
2150 DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
2151 } while (0)
2152
84fbb8a0
KH
2153#define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
2154 do { \
2155 int c_alt, charset_alt; \
2156 if (!NILP (translation_table) \
2157 && ((c_alt = translate_char (translation_table, -1, \
2158 charset, c1, c2)) \
2159 >= 0)) \
2160 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
2161 else \
2162 charset_alt = charset; \
2163 if (charset_alt == charset_ascii) \
2164 *dst++ = c1; \
2165 else if (CHARSET_DIMENSION (charset_alt) == 1) \
2166 { \
2167 if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2168 *dst++ = c1; \
497ee4fb
KH
2169 else if (sjis_p && charset_alt == charset_latin_jisx0201) \
2170 *dst++ = c1 & 0x7F; \
84fbb8a0
KH
2171 else \
2172 { \
2173 *dst++ = charset_alt, *dst++ = c1; \
2174 coding->fake_multibyte = 1; \
2175 } \
2176 } \
2177 else \
2178 { \
2179 c1 &= 0x7F, c2 &= 0x7F; \
d6bd663c
KH
2180 if (sjis_p && (charset_alt == charset_jisx0208 \
2181 || charset_alt == charset_jisx0208_1978))\
84fbb8a0
KH
2182 { \
2183 unsigned char s1, s2; \
2184 \
2185 ENCODE_SJIS (c1, c2, s1, s2); \
2186 *dst++ = s1, *dst++ = s2; \
2187 coding->fake_multibyte = 1; \
2188 } \
2189 else if (!sjis_p \
2190 && (charset_alt == charset_big5_1 \
2191 || charset_alt == charset_big5_2)) \
2192 { \
2193 unsigned char b1, b2; \
2194 \
2195 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \
2196 *dst++ = b1, *dst++ = b2; \
2197 } \
2198 else \
2199 { \
2200 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \
2201 coding->fake_multibyte = 1; \
2202 } \
2203 } \
2204 coding->consumed_char++; \
a5d301df
KH
2205 } while (0);
2206
4ed46869
KH
2207/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2208 Check if a text is encoded in SJIS. If it is, return
2209 CODING_CATEGORY_MASK_SJIS, else return 0. */
2210
2211int
2212detect_coding_sjis (src, src_end)
2213 unsigned char *src, *src_end;
2214{
2215 unsigned char c;
2216
2217 while (src < src_end)
2218 {
2219 c = *src++;
4ed46869
KH
2220 if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2221 {
2222 if (src < src_end && *src++ < 0x40)
2223 return 0;
2224 }
2225 }
2226 return CODING_CATEGORY_MASK_SJIS;
2227}
2228
2229/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2230 Check if a text is encoded in BIG5. If it is, return
2231 CODING_CATEGORY_MASK_BIG5, else return 0. */
2232
2233int
2234detect_coding_big5 (src, src_end)
2235 unsigned char *src, *src_end;
2236{
2237 unsigned char c;
2238
2239 while (src < src_end)
2240 {
2241 c = *src++;
4ed46869
KH
2242 if (c >= 0xA1)
2243 {
2244 if (src >= src_end)
2245 break;
2246 c = *src++;
2247 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2248 return 0;
2249 }
2250 }
2251 return CODING_CATEGORY_MASK_BIG5;
2252}
2253
2254/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2255 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
2256
2257int
2258decode_coding_sjis_big5 (coding, source, destination,
d46c5b12 2259 src_bytes, dst_bytes, sjis_p)
4ed46869
KH
2260 struct coding_system *coding;
2261 unsigned char *source, *destination;
2262 int src_bytes, dst_bytes;
4ed46869
KH
2263 int sjis_p;
2264{
2265 unsigned char *src = source;
2266 unsigned char *src_end = source + src_bytes;
2267 unsigned char *dst = destination;
2268 unsigned char *dst_end = destination + dst_bytes;
2269 /* Since the maximum bytes produced by each loop is 4, we subtract 3
2270 from DST_END to assure overflow checking is necessary only at the
2271 head of loop. */
2272 unsigned char *adjusted_dst_end = dst_end - 3;
84fbb8a0 2273 Lisp_Object translation_table
f967223b 2274 = coding->translation_table_for_decode;
d46c5b12 2275 int result = CODING_FINISH_NORMAL;
a5d301df 2276
84fbb8a0 2277 if (!NILP (Venable_character_translation) && NILP (translation_table))
f967223b 2278 translation_table = Vstandard_translation_table_for_decode;
4ed46869 2279
d46c5b12 2280 coding->produced_char = 0;
fb88bf2d 2281 coding->fake_multibyte = 0;
d46c5b12
KH
2282 while (src < src_end && (dst_bytes
2283 ? (dst < adjusted_dst_end)
2284 : (dst < src - 3)))
4ed46869
KH
2285 {
2286 /* SRC_BASE remembers the start position in source in each loop.
2287 The loop will be exited when there's not enough source text
2288 to analyze two-byte character (within macro ONE_MORE_BYTE).
2289 In that case, SRC is reset to SRC_BASE before exiting. */
2290 unsigned char *src_base = src;
2291 unsigned char c1 = *src++, c2, c3, c4;
2292
d46c5b12 2293 if (c1 < 0x20)
4ed46869 2294 {
d46c5b12 2295 if (c1 == '\r')
4ed46869 2296 {
d46c5b12
KH
2297 if (coding->eol_type == CODING_EOL_CRLF)
2298 {
2299 ONE_MORE_BYTE (c2);
2300 if (c2 == '\n')
2301 *dst++ = c2;
2302 else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2303 {
2304 result = CODING_FINISH_INCONSISTENT_EOL;
2305 goto label_end_of_loop_2;
2306 }
2307 else
2308 /* To process C2 again, SRC is subtracted by 1. */
2309 *dst++ = c1, src--;
2310 }
2311 else if (coding->eol_type == CODING_EOL_CR)
2312 *dst++ = '\n';
4ed46869 2313 else
d46c5b12
KH
2314 *dst++ = c1;
2315 }
2316 else if (c1 == '\n'
2317 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2318 && (coding->eol_type == CODING_EOL_CR
2319 || coding->eol_type == CODING_EOL_CRLF))
2320 {
2321 result = CODING_FINISH_INCONSISTENT_EOL;
2322 goto label_end_of_loop_2;
4ed46869
KH
2323 }
2324 else
2325 *dst++ = c1;
d46c5b12 2326 coding->produced_char++;
4ed46869 2327 }
a5d301df 2328 else if (c1 < 0x80)
5e34de15
KH
2329 {
2330 c2 = 0; /* avoid warning */
2331 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2332 }
54f78171 2333 else
4ed46869 2334 {
4ed46869
KH
2335 if (sjis_p)
2336 {
54f78171 2337 if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
fb88bf2d 2338 {
54f78171
KH
2339 /* SJIS -> JISX0208 */
2340 ONE_MORE_BYTE (c2);
d14d03ac 2341 if (c2 >= 0x40 && c2 != 0x7F && c2 <= 0xFC)
54f78171
KH
2342 {
2343 DECODE_SJIS (c1, c2, c3, c4);
2344 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2345 }
2346 else
2347 goto label_invalid_code_2;
fb88bf2d 2348 }
54f78171
KH
2349 else if (c1 < 0xE0)
2350 /* SJIS -> JISX0201-Kana */
5e34de15
KH
2351 {
2352 c2 = 0; /* avoid warning */
2353 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2354 /* dummy */ c2);
2355 }
fb88bf2d 2356 else
54f78171 2357 goto label_invalid_code_1;
4ed46869 2358 }
fb88bf2d 2359 else
fb88bf2d 2360 {
54f78171
KH
2361 /* BIG5 -> Big5 */
2362 if (c1 >= 0xA1 && c1 <= 0xFE)
fb88bf2d 2363 {
54f78171
KH
2364 ONE_MORE_BYTE (c2);
2365 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2366 {
2367 int charset;
4ed46869 2368
54f78171
KH
2369 DECODE_BIG5 (c1, c2, charset, c3, c4);
2370 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2371 }
2372 else
2373 goto label_invalid_code_2;
fb88bf2d
KH
2374 }
2375 else
54f78171 2376 goto label_invalid_code_1;
4ed46869
KH
2377 }
2378 }
2379 continue;
2380
fb88bf2d
KH
2381 label_invalid_code_1:
2382 *dst++ = c1;
2383 coding->produced_char++;
2384 coding->fake_multibyte = 1;
2385 continue;
2386
2387 label_invalid_code_2:
2388 *dst++ = c1; *dst++= c2;
2389 coding->produced_char += 2;
2390 coding->fake_multibyte = 1;
2391 continue;
2392
4ed46869 2393 label_end_of_loop:
d46c5b12
KH
2394 result = CODING_FINISH_INSUFFICIENT_SRC;
2395 label_end_of_loop_2:
4ed46869
KH
2396 src = src_base;
2397 break;
2398 }
2399
fb88bf2d
KH
2400 if (src < src_end)
2401 {
2402 if (result == CODING_FINISH_NORMAL)
2403 result = CODING_FINISH_INSUFFICIENT_DST;
2404 else if (result != CODING_FINISH_INCONSISTENT_EOL
2405 && coding->mode & CODING_MODE_LAST_BLOCK)
2406 {
2407 src_bytes = src_end - src;
2408 if (dst_bytes && (dst_end - dst < src_bytes))
2409 src_bytes = dst_end - dst;
2410 bcopy (dst, src, src_bytes);
2411 src += src_bytes;
2412 dst += src_bytes;
2413 coding->fake_multibyte = 1;
2414 }
2415 }
d46c5b12
KH
2416
2417 coding->consumed = coding->consumed_char = src - source;
2418 coding->produced = dst - destination;
2419 return result;
4ed46869
KH
2420}
2421
2422/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2423 This function can encode `charset_ascii', `charset_katakana_jisx0201',
2424 `charset_jisx0208', `charset_big5_1', and `charset_big5-2'. We are
2425 sure that all these charsets are registered as official charset
2426 (i.e. do not have extended leading-codes). Characters of other
2427 charsets are produced without any encoding. If SJIS_P is 1, encode
2428 SJIS text, else encode BIG5 text. */
2429
2430int
2431encode_coding_sjis_big5 (coding, source, destination,
d46c5b12 2432 src_bytes, dst_bytes, sjis_p)
4ed46869
KH
2433 struct coding_system *coding;
2434 unsigned char *source, *destination;
2435 int src_bytes, dst_bytes;
4ed46869
KH
2436 int sjis_p;
2437{
2438 unsigned char *src = source;
2439 unsigned char *src_end = source + src_bytes;
2440 unsigned char *dst = destination;
2441 unsigned char *dst_end = destination + dst_bytes;
2442 /* Since the maximum bytes produced by each loop is 2, we subtract 1
2443 from DST_END to assure overflow checking is necessary only at the
2444 head of loop. */
2445 unsigned char *adjusted_dst_end = dst_end - 1;
84fbb8a0 2446 Lisp_Object translation_table
f967223b 2447 = coding->translation_table_for_encode;
d46c5b12 2448 int result = CODING_FINISH_NORMAL;
a5d301df 2449
84fbb8a0 2450 if (!NILP (Venable_character_translation) && NILP (translation_table))
f967223b 2451 translation_table = Vstandard_translation_table_for_encode;
4ed46869 2452
d46c5b12 2453 coding->consumed_char = 0;
fb88bf2d 2454 coding->fake_multibyte = 0;
d46c5b12
KH
2455 while (src < src_end && (dst_bytes
2456 ? (dst < adjusted_dst_end)
2457 : (dst < src - 1)))
4ed46869
KH
2458 {
2459 /* SRC_BASE remembers the start position in source in each loop.
2460 The loop will be exited when there's not enough source text
2461 to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2462 TWO_MORE_BYTES). In that case, SRC is reset to SRC_BASE
2463 before exiting. */
2464 unsigned char *src_base = src;
2465 unsigned char c1 = *src++, c2, c3, c4;
2466
2467 if (coding->composing)
2468 {
2469 if (c1 == 0xA0)
2470 {
2471 ONE_MORE_BYTE (c1);
2472 c1 &= 0x7F;
2473 }
2474 else if (c1 >= 0xA0)
2475 c1 -= 0x20;
2476 else
2477 coding->composing = 0;
2478 }
2479
2480 switch (emacs_code_class[c1])
2481 {
2482 case EMACS_ascii_code:
a5d301df
KH
2483 ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2484 break;
2485
4ed46869
KH
2486 case EMACS_control_code:
2487 *dst++ = c1;
d46c5b12 2488 coding->consumed_char++;
4ed46869
KH
2489 break;
2490
2491 case EMACS_carriage_return_code:
d46c5b12 2492 if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
4ed46869
KH
2493 {
2494 *dst++ = c1;
d46c5b12 2495 coding->consumed_char++;
4ed46869
KH
2496 break;
2497 }
2498 /* fall down to treat '\r' as '\n' ... */
2499
2500 case EMACS_linefeed_code:
2501 if (coding->eol_type == CODING_EOL_LF
0ef69138 2502 || coding->eol_type == CODING_EOL_UNDECIDED)
4ed46869
KH
2503 *dst++ = '\n';
2504 else if (coding->eol_type == CODING_EOL_CRLF)
2505 *dst++ = '\r', *dst++ = '\n';
2506 else
2507 *dst++ = '\r';
d46c5b12 2508 coding->consumed_char++;
4ed46869
KH
2509 break;
2510
2511 case EMACS_leading_code_2:
2512 ONE_MORE_BYTE (c2);
a5d301df 2513 ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
4ed46869
KH
2514 break;
2515
2516 case EMACS_leading_code_3:
2517 TWO_MORE_BYTES (c2, c3);
a5d301df 2518 ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
4ed46869
KH
2519 break;
2520
2521 case EMACS_leading_code_4:
2522 THREE_MORE_BYTES (c2, c3, c4);
a5d301df 2523 ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
4ed46869
KH
2524 break;
2525
2526 case EMACS_leading_code_composition:
2527 coding->composing = 1;
2528 break;
2529
2530 default: /* i.e. case EMACS_invalid_code: */
2531 *dst++ = c1;
d46c5b12 2532 coding->consumed_char++;
4ed46869
KH
2533 }
2534 continue;
2535
2536 label_end_of_loop:
d46c5b12
KH
2537 result = CODING_FINISH_INSUFFICIENT_SRC;
2538 src = src_base;
4ed46869
KH
2539 break;
2540 }
2541
d46c5b12
KH
2542 if (result == CODING_FINISH_NORMAL
2543 && src < src_end)
2544 result = CODING_FINISH_INSUFFICIENT_DST;
2545 coding->consumed = src - source;
2546 coding->produced = coding->produced_char = dst - destination;
2547 return result;
4ed46869
KH
2548}
2549
2550\f
1397dc18
KH
2551/*** 5. CCL handlers ***/
2552
2553/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2554 Check if a text is encoded in a coding system of which
2555 encoder/decoder are written in CCL program. If it is, return
2556 CODING_CATEGORY_MASK_CCL, else return 0. */
2557
2558int
2559detect_coding_ccl (src, src_end)
2560 unsigned char *src, *src_end;
2561{
2562 unsigned char *valid;
2563
2564 /* No coding system is assigned to coding-category-ccl. */
2565 if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2566 return 0;
2567
2568 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2569 while (src < src_end)
2570 {
2571 if (! valid[*src]) return 0;
2572 src++;
2573 }
2574 return CODING_CATEGORY_MASK_CCL;
2575}
2576
2577\f
2578/*** 6. End-of-line handlers ***/
4ed46869
KH
2579
2580/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2581 This function is called only when `coding->eol_type' is
2582 CODING_EOL_CRLF or CODING_EOL_CR. */
2583
dfcf069d 2584int
d46c5b12 2585decode_eol (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
2586 struct coding_system *coding;
2587 unsigned char *source, *destination;
2588 int src_bytes, dst_bytes;
4ed46869
KH
2589{
2590 unsigned char *src = source;
2591 unsigned char *src_end = source + src_bytes;
2592 unsigned char *dst = destination;
2593 unsigned char *dst_end = destination + dst_bytes;
fb88bf2d 2594 unsigned char c;
d46c5b12
KH
2595 int result = CODING_FINISH_NORMAL;
2596
fb88bf2d
KH
2597 coding->fake_multibyte = 0;
2598
d46c5b12 2599 if (src_bytes <= 0)
716e0b0a
AI
2600 {
2601 coding->produced = coding->produced_char = 0;
2602 coding->consumed = coding->consumed_char = 0;
2603 return result;
2604 }
4ed46869
KH
2605
2606 switch (coding->eol_type)
2607 {
2608 case CODING_EOL_CRLF:
2609 {
2610 /* Since the maximum bytes produced by each loop is 2, we
2611 subtract 1 from DST_END to assure overflow checking is
2612 necessary only at the head of loop. */
2613 unsigned char *adjusted_dst_end = dst_end - 1;
2614
d46c5b12
KH
2615 while (src < src_end && (dst_bytes
2616 ? (dst < adjusted_dst_end)
2617 : (dst < src - 1)))
4ed46869
KH
2618 {
2619 unsigned char *src_base = src;
fb88bf2d
KH
2620
2621 c = *src++;
4ed46869
KH
2622 if (c == '\r')
2623 {
2624 ONE_MORE_BYTE (c);
fdfcf19d
KH
2625 if (c == '\n')
2626 *dst++ = c;
2627 else
d46c5b12
KH
2628 {
2629 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2630 {
2631 result = CODING_FINISH_INCONSISTENT_EOL;
2632 goto label_end_of_loop_2;
2633 }
fdfcf19d 2634 src--;
d46c5b12 2635 *dst++ = '\r';
fb88bf2d
KH
2636 if (BASE_LEADING_CODE_P (c))
2637 coding->fake_multibyte = 1;
d46c5b12 2638 }
4ed46869 2639 }
d46c5b12
KH
2640 else if (c == '\n'
2641 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2642 {
2643 result = CODING_FINISH_INCONSISTENT_EOL;
2644 goto label_end_of_loop_2;
2645 }
4ed46869 2646 else
fb88bf2d
KH
2647 {
2648 *dst++ = c;
2649 if (BASE_LEADING_CODE_P (c))
2650 coding->fake_multibyte = 1;
2651 }
4ed46869
KH
2652 continue;
2653
2654 label_end_of_loop:
d46c5b12
KH
2655 result = CODING_FINISH_INSUFFICIENT_SRC;
2656 label_end_of_loop_2:
4ed46869
KH
2657 src = src_base;
2658 break;
2659 }
fdfcf19d
KH
2660 if (src < src_end)
2661 {
2662 if (result == CODING_FINISH_NORMAL)
2663 result = CODING_FINISH_INSUFFICIENT_DST;
2664 else if (result != CODING_FINISH_INCONSISTENT_EOL
2665 && coding->mode & CODING_MODE_LAST_BLOCK)
2666 {
2667 /* This is the last block of the text to be decoded.
2668 We flush out all remaining codes. */
2669 src_bytes = src_end - src;
2670 if (dst_bytes && (dst_end - dst < src_bytes))
2671 src_bytes = dst_end - dst;
2672 bcopy (src, dst, src_bytes);
2673 dst += src_bytes;
2674 src += src_bytes;
2675 }
2676 }
4ed46869 2677 }
d46c5b12 2678 break;
4ed46869
KH
2679
2680 case CODING_EOL_CR:
d46c5b12
KH
2681 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2682 {
fb88bf2d
KH
2683 while (src < src_end)
2684 {
2685 if ((c = *src++) == '\n')
2686 break;
2687 if (BASE_LEADING_CODE_P (c))
2688 coding->fake_multibyte = 1;
2689 }
d46c5b12
KH
2690 if (*--src == '\n')
2691 {
2692 src_bytes = src - source;
2693 result = CODING_FINISH_INCONSISTENT_EOL;
2694 }
2695 }
2696 if (dst_bytes && src_bytes > dst_bytes)
2697 {
2698 result = CODING_FINISH_INSUFFICIENT_DST;
2699 src_bytes = dst_bytes;
2700 }
2701 if (dst_bytes)
2702 bcopy (source, destination, src_bytes);
2703 else
2704 safe_bcopy (source, destination, src_bytes);
2705 src = source + src_bytes;
2706 while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
4ed46869
KH
2707 break;
2708
2709 default: /* i.e. case: CODING_EOL_LF */
d46c5b12
KH
2710 if (dst_bytes && src_bytes > dst_bytes)
2711 {
2712 result = CODING_FINISH_INSUFFICIENT_DST;
2713 src_bytes = dst_bytes;
2714 }
2715 if (dst_bytes)
2716 bcopy (source, destination, src_bytes);
2717 else
2718 safe_bcopy (source, destination, src_bytes);
2719 src += src_bytes;
993824c9 2720 dst += src_bytes;
fb88bf2d 2721 coding->fake_multibyte = 1;
4ed46869
KH
2722 break;
2723 }
2724
d46c5b12
KH
2725 coding->consumed = coding->consumed_char = src - source;
2726 coding->produced = coding->produced_char = dst - destination;
2727 return result;
4ed46869
KH
2728}
2729
2730/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
2731 format of end-of-line according to `coding->eol_type'. If
d46c5b12
KH
2732 `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2733 '\r' in source text also means end-of-line. */
4ed46869 2734
dfcf069d 2735int
d46c5b12 2736encode_eol (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
2737 struct coding_system *coding;
2738 unsigned char *source, *destination;
2739 int src_bytes, dst_bytes;
4ed46869
KH
2740{
2741 unsigned char *src = source;
2742 unsigned char *dst = destination;
d46c5b12 2743 int result = CODING_FINISH_NORMAL;
4ed46869 2744
fb88bf2d
KH
2745 coding->fake_multibyte = 0;
2746
d46c5b12
KH
2747 if (coding->eol_type == CODING_EOL_CRLF)
2748 {
2749 unsigned char c;
2750 unsigned char *src_end = source + src_bytes;
2751 unsigned char *dst_end = destination + dst_bytes;
2752 /* Since the maximum bytes produced by each loop is 2, we
2753 subtract 1 from DST_END to assure overflow checking is
2754 necessary only at the head of loop. */
2755 unsigned char *adjusted_dst_end = dst_end - 1;
2756
2757 while (src < src_end && (dst_bytes
2758 ? (dst < adjusted_dst_end)
2759 : (dst < src - 1)))
2760 {
2761 c = *src++;
2762 if (c == '\n'
2763 || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2764 *dst++ = '\r', *dst++ = '\n';
2765 else
fb88bf2d
KH
2766 {
2767 *dst++ = c;
2768 if (BASE_LEADING_CODE_P (c))
2769 coding->fake_multibyte = 1;
2770 }
d46c5b12
KH
2771 }
2772 if (src < src_end)
2773 result = CODING_FINISH_INSUFFICIENT_DST;
2774 }
2775 else
4ed46869 2776 {
fb88bf2d
KH
2777 unsigned char c;
2778
d46c5b12 2779 if (dst_bytes && src_bytes > dst_bytes)
4ed46869 2780 {
d46c5b12
KH
2781 src_bytes = dst_bytes;
2782 result = CODING_FINISH_INSUFFICIENT_DST;
2783 }
2784 if (dst_bytes)
2785 bcopy (source, destination, src_bytes);
2786 else
993824c9
RS
2787 safe_bcopy (source, destination, src_bytes);
2788 dst_bytes = src_bytes;
2789 if (coding->eol_type == CODING_EOL_CR)
d46c5b12
KH
2790 {
2791 while (src_bytes--)
fb88bf2d
KH
2792 {
2793 if ((c = *dst++) == '\n')
2794 dst[-1] = '\r';
2795 else if (BASE_LEADING_CODE_P (c))
993824c9 2796 coding->fake_multibyte = 1;
fb88bf2d 2797 }
d46c5b12 2798 }
fb88bf2d 2799 else
d46c5b12 2800 {
fb88bf2d
KH
2801 if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2802 {
2803 while (src_bytes--)
2804 if (*dst++ == '\r') dst[-1] = '\n';
2805 }
2806 coding->fake_multibyte = 1;
4ed46869 2807 }
fb88bf2d
KH
2808 src = source + dst_bytes;
2809 dst = destination + dst_bytes;
4ed46869
KH
2810 }
2811
d46c5b12
KH
2812 coding->consumed = coding->consumed_char = src - source;
2813 coding->produced = coding->produced_char = dst - destination;
2814 return result;
4ed46869
KH
2815}
2816
2817\f
1397dc18 2818/*** 7. C library functions ***/
4ed46869
KH
2819
2820/* In Emacs Lisp, coding system is represented by a Lisp symbol which
2821 has a property `coding-system'. The value of this property is a
2822 vector of length 5 (called as coding-vector). Among elements of
2823 this vector, the first (element[0]) and the fifth (element[4])
2824 carry important information for decoding/encoding. Before
2825 decoding/encoding, this information should be set in fields of a
2826 structure of type `coding_system'.
2827
2828 A value of property `coding-system' can be a symbol of another
2829 subsidiary coding-system. In that case, Emacs gets coding-vector
2830 from that symbol.
2831
2832 `element[0]' contains information to be set in `coding->type'. The
2833 value and its meaning is as follows:
2834
0ef69138
KH
2835 0 -- coding_type_emacs_mule
2836 1 -- coding_type_sjis
2837 2 -- coding_type_iso2022
2838 3 -- coding_type_big5
2839 4 -- coding_type_ccl encoder/decoder written in CCL
2840 nil -- coding_type_no_conversion
2841 t -- coding_type_undecided (automatic conversion on decoding,
2842 no-conversion on encoding)
4ed46869
KH
2843
2844 `element[4]' contains information to be set in `coding->flags' and
2845 `coding->spec'. The meaning varies by `coding->type'.
2846
2847 If `coding->type' is `coding_type_iso2022', element[4] is a vector
2848 of length 32 (of which the first 13 sub-elements are used now).
2849 Meanings of these sub-elements are:
2850
2851 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2852 If the value is an integer of valid charset, the charset is
2853 assumed to be designated to graphic register N initially.
2854
2855 If the value is minus, it is a minus value of charset which
2856 reserves graphic register N, which means that the charset is
2857 not designated initially but should be designated to graphic
2858 register N just before encoding a character in that charset.
2859
2860 If the value is nil, graphic register N is never used on
2861 encoding.
2862
2863 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2864 Each value takes t or nil. See the section ISO2022 of
2865 `coding.h' for more information.
2866
2867 If `coding->type' is `coding_type_big5', element[4] is t to denote
2868 BIG5-ETen or nil to denote BIG5-HKU.
2869
2870 If `coding->type' takes the other value, element[4] is ignored.
2871
2872 Emacs Lisp's coding system also carries information about format of
2873 end-of-line in a value of property `eol-type'. If the value is
2874 integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2875 means CODING_EOL_CR. If it is not integer, it should be a vector
2876 of subsidiary coding systems of which property `eol-type' has one
2877 of above values.
2878
2879*/
2880
2881/* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2882 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING
2883 is setup so that no conversion is necessary and return -1, else
2884 return 0. */
2885
2886int
e0e989f6
KH
2887setup_coding_system (coding_system, coding)
2888 Lisp_Object coding_system;
4ed46869
KH
2889 struct coding_system *coding;
2890{
d46c5b12 2891 Lisp_Object coding_spec, coding_type, eol_type, plist;
4608c386 2892 Lisp_Object val;
70c22245 2893 int i;
4ed46869 2894
d46c5b12 2895 /* Initialize some fields required for all kinds of coding systems. */
774324d6 2896 coding->symbol = coding_system;
d46c5b12
KH
2897 coding->common_flags = 0;
2898 coding->mode = 0;
2899 coding->heading_ascii = -1;
2900 coding->post_read_conversion = coding->pre_write_conversion = Qnil;
1f5dbf34
KH
2901
2902 if (NILP (coding_system))
2903 goto label_invalid_coding_system;
2904
4608c386 2905 coding_spec = Fget (coding_system, Qcoding_system);
1f5dbf34 2906
4608c386
KH
2907 if (!VECTORP (coding_spec)
2908 || XVECTOR (coding_spec)->size != 5
2909 || !CONSP (XVECTOR (coding_spec)->contents[3]))
4ed46869 2910 goto label_invalid_coding_system;
4608c386 2911
d46c5b12
KH
2912 eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2913 if (VECTORP (eol_type))
2914 {
2915 coding->eol_type = CODING_EOL_UNDECIDED;
2916 coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2917 }
2918 else if (XFASTINT (eol_type) == 1)
2919 {
2920 coding->eol_type = CODING_EOL_CRLF;
2921 coding->common_flags
2922 = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2923 }
2924 else if (XFASTINT (eol_type) == 2)
2925 {
2926 coding->eol_type = CODING_EOL_CR;
2927 coding->common_flags
2928 = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2929 }
2930 else
2931 coding->eol_type = CODING_EOL_LF;
2932
2933 coding_type = XVECTOR (coding_spec)->contents[0];
2934 /* Try short cut. */
2935 if (SYMBOLP (coding_type))
2936 {
2937 if (EQ (coding_type, Qt))
2938 {
2939 coding->type = coding_type_undecided;
2940 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2941 }
2942 else
2943 coding->type = coding_type_no_conversion;
2944 return 0;
2945 }
2946
2947 /* Initialize remaining fields. */
2948 coding->composing = 0;
a63063ae 2949 coding->composed_chars = 0;
d46c5b12
KH
2950
2951 /* Get values of coding system properties:
2952 `post-read-conversion', `pre-write-conversion',
f967223b 2953 `translation-table-for-decode', `translation-table-for-encode'. */
4608c386 2954 plist = XVECTOR (coding_spec)->contents[3];
b843d1ae
KH
2955 /* Pre & post conversion functions should be disabled if
2956 inhibit_eol_conversion is nozero. This is the case that a code
2957 conversion function is called while those functions are running. */
2958 if (! inhibit_pre_post_conversion)
2959 {
2960 coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2961 coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2962 }
f967223b 2963 val = Fplist_get (plist, Qtranslation_table_for_decode);
4608c386 2964 if (SYMBOLP (val))
f967223b
KH
2965 val = Fget (val, Qtranslation_table_for_decode);
2966 coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2967 val = Fplist_get (plist, Qtranslation_table_for_encode);
4608c386 2968 if (SYMBOLP (val))
f967223b
KH
2969 val = Fget (val, Qtranslation_table_for_encode);
2970 coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
d46c5b12
KH
2971 val = Fplist_get (plist, Qcoding_category);
2972 if (!NILP (val))
2973 {
2974 val = Fget (val, Qcoding_category_index);
2975 if (INTEGERP (val))
2976 coding->category_idx = XINT (val);
2977 else
2978 goto label_invalid_coding_system;
2979 }
2980 else
2981 goto label_invalid_coding_system;
4608c386 2982
70c22245
KH
2983 val = Fplist_get (plist, Qsafe_charsets);
2984 if (EQ (val, Qt))
2985 {
2986 for (i = 0; i <= MAX_CHARSET; i++)
2987 coding->safe_charsets[i] = 1;
2988 }
2989 else
2990 {
2991 bzero (coding->safe_charsets, MAX_CHARSET + 1);
2992 while (CONSP (val))
2993 {
03699b14 2994 if ((i = get_charset_id (XCAR (val))) >= 0)
70c22245 2995 coding->safe_charsets[i] = 1;
03699b14 2996 val = XCDR (val);
70c22245
KH
2997 }
2998 }
2999
d46c5b12 3000 switch (XFASTINT (coding_type))
4ed46869
KH
3001 {
3002 case 0:
0ef69138 3003 coding->type = coding_type_emacs_mule;
c952af22
KH
3004 if (!NILP (coding->post_read_conversion))
3005 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3006 if (!NILP (coding->pre_write_conversion))
3007 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4ed46869
KH
3008 break;
3009
3010 case 1:
3011 coding->type = coding_type_sjis;
c952af22
KH
3012 coding->common_flags
3013 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869
KH
3014 break;
3015
3016 case 2:
3017 coding->type = coding_type_iso2022;
c952af22
KH
3018 coding->common_flags
3019 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869 3020 {
70c22245 3021 Lisp_Object val, temp;
4ed46869 3022 Lisp_Object *flags;
d46c5b12 3023 int i, charset, reg_bits = 0;
4ed46869 3024
4608c386 3025 val = XVECTOR (coding_spec)->contents[4];
f44d27ce 3026
4ed46869
KH
3027 if (!VECTORP (val) || XVECTOR (val)->size != 32)
3028 goto label_invalid_coding_system;
3029
3030 flags = XVECTOR (val)->contents;
3031 coding->flags
3032 = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3033 | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3034 | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3035 | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3036 | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3037 | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3038 | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3039 | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
e0e989f6
KH
3040 | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3041 | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
c4825358
KH
3042 | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3043 | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3f003981 3044 | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
c4825358 3045 );
4ed46869
KH
3046
3047 /* Invoke graphic register 0 to plane 0. */
3048 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3049 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */
3050 CODING_SPEC_ISO_INVOCATION (coding, 1)
3051 = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3052 /* Not single shifting at first. */
6e85d753 3053 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
e0e989f6 3054 /* Beginning of buffer should also be regarded as bol. */
6e85d753 3055 CODING_SPEC_ISO_BOL (coding) = 1;
4ed46869 3056
70c22245
KH
3057 for (charset = 0; charset <= MAX_CHARSET; charset++)
3058 CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3059 val = Vcharset_revision_alist;
3060 while (CONSP (val))
3061 {
03699b14 3062 charset = get_charset_id (Fcar_safe (XCAR (val)));
70c22245 3063 if (charset >= 0
03699b14 3064 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
70c22245
KH
3065 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3066 CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
03699b14 3067 val = XCDR (val);
70c22245
KH
3068 }
3069
4ed46869
KH
3070 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3071 FLAGS[REG] can be one of below:
3072 integer CHARSET: CHARSET occupies register I,
3073 t: designate nothing to REG initially, but can be used
3074 by any charsets,
3075 list of integer, nil, or t: designate the first
3076 element (if integer) to REG initially, the remaining
3077 elements (if integer) is designated to REG on request,
d46c5b12 3078 if an element is t, REG can be used by any charsets,
4ed46869 3079 nil: REG is never used. */
467e7675 3080 for (charset = 0; charset <= MAX_CHARSET; charset++)
1ba9e4ab
KH
3081 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3082 = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
4ed46869
KH
3083 for (i = 0; i < 4; i++)
3084 {
3085 if (INTEGERP (flags[i])
e0e989f6
KH
3086 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3087 || (charset = get_charset_id (flags[i])) >= 0)
4ed46869
KH
3088 {
3089 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3090 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3091 }
3092 else if (EQ (flags[i], Qt))
3093 {
3094 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
d46c5b12
KH
3095 reg_bits |= 1 << i;
3096 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
4ed46869
KH
3097 }
3098 else if (CONSP (flags[i]))
3099 {
84d60297
RS
3100 Lisp_Object tail;
3101 tail = flags[i];
4ed46869 3102
d46c5b12 3103 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
03699b14
KR
3104 if (INTEGERP (XCAR (tail))
3105 && (charset = XINT (XCAR (tail)),
e0e989f6 3106 CHARSET_VALID_P (charset))
03699b14 3107 || (charset = get_charset_id (XCAR (tail))) >= 0)
4ed46869
KH
3108 {
3109 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3110 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3111 }
3112 else
3113 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
03699b14 3114 tail = XCDR (tail);
4ed46869
KH
3115 while (CONSP (tail))
3116 {
03699b14
KR
3117 if (INTEGERP (XCAR (tail))
3118 && (charset = XINT (XCAR (tail)),
e0e989f6 3119 CHARSET_VALID_P (charset))
03699b14 3120 || (charset = get_charset_id (XCAR (tail))) >= 0)
70c22245
KH
3121 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3122 = i;
03699b14 3123 else if (EQ (XCAR (tail), Qt))
d46c5b12 3124 reg_bits |= 1 << i;
03699b14 3125 tail = XCDR (tail);
4ed46869
KH
3126 }
3127 }
3128 else
3129 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3130
3131 CODING_SPEC_ISO_DESIGNATION (coding, i)
3132 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3133 }
3134
d46c5b12 3135 if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
4ed46869
KH
3136 {
3137 /* REG 1 can be used only by locking shift in 7-bit env. */
3138 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
d46c5b12 3139 reg_bits &= ~2;
4ed46869
KH
3140 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3141 /* Without any shifting, only REG 0 and 1 can be used. */
d46c5b12 3142 reg_bits &= 3;
4ed46869
KH
3143 }
3144
d46c5b12
KH
3145 if (reg_bits)
3146 for (charset = 0; charset <= MAX_CHARSET; charset++)
6e85d753 3147 {
d46c5b12
KH
3148 if (CHARSET_VALID_P (charset))
3149 {
3150 /* There exist some default graphic registers to be
3151 used CHARSET. */
3152
3153 /* We had better avoid designating a charset of
3154 CHARS96 to REG 0 as far as possible. */
3155 if (CHARSET_CHARS (charset) == 96)
3156 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3157 = (reg_bits & 2
3158 ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3159 else
3160 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3161 = (reg_bits & 1
3162 ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3163 }
6e85d753 3164 }
4ed46869 3165 }
c952af22 3166 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
d46c5b12 3167 coding->spec.iso2022.last_invalid_designation_register = -1;
4ed46869
KH
3168 break;
3169
3170 case 3:
3171 coding->type = coding_type_big5;
c952af22
KH
3172 coding->common_flags
3173 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869 3174 coding->flags
4608c386 3175 = (NILP (XVECTOR (coding_spec)->contents[4])
4ed46869
KH
3176 ? CODING_FLAG_BIG5_HKU
3177 : CODING_FLAG_BIG5_ETEN);
3178 break;
3179
3180 case 4:
3181 coding->type = coding_type_ccl;
c952af22
KH
3182 coding->common_flags
3183 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
4ed46869 3184 {
84d60297 3185 val = XVECTOR (coding_spec)->contents[4];
ef4ced28
KH
3186 if (! CONSP (val)
3187 || setup_ccl_program (&(coding->spec.ccl.decoder),
03699b14 3188 XCAR (val)) < 0
ef4ced28 3189 || setup_ccl_program (&(coding->spec.ccl.encoder),
03699b14 3190 XCDR (val)) < 0)
4ed46869 3191 goto label_invalid_coding_system;
1397dc18
KH
3192
3193 bzero (coding->spec.ccl.valid_codes, 256);
3194 val = Fplist_get (plist, Qvalid_codes);
3195 if (CONSP (val))
3196 {
3197 Lisp_Object this;
3198
03699b14 3199 for (; CONSP (val); val = XCDR (val))
1397dc18 3200 {
03699b14 3201 this = XCAR (val);
1397dc18
KH
3202 if (INTEGERP (this)
3203 && XINT (this) >= 0 && XINT (this) < 256)
3204 coding->spec.ccl.valid_codes[XINT (this)] = 1;
3205 else if (CONSP (this)
03699b14
KR
3206 && INTEGERP (XCAR (this))
3207 && INTEGERP (XCDR (this)))
1397dc18 3208 {
03699b14
KR
3209 int start = XINT (XCAR (this));
3210 int end = XINT (XCDR (this));
1397dc18
KH
3211
3212 if (start >= 0 && start <= end && end < 256)
e133c8fa 3213 while (start <= end)
1397dc18
KH
3214 coding->spec.ccl.valid_codes[start++] = 1;
3215 }
3216 }
3217 }
4ed46869 3218 }
c952af22 3219 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
4ed46869
KH
3220 break;
3221
27901516
KH
3222 case 5:
3223 coding->type = coding_type_raw_text;
3224 break;
3225
4ed46869 3226 default:
d46c5b12 3227 goto label_invalid_coding_system;
4ed46869
KH
3228 }
3229 return 0;
3230
3231 label_invalid_coding_system:
3232 coding->type = coding_type_no_conversion;
d46c5b12 3233 coding->category_idx = CODING_CATEGORY_IDX_BINARY;
c952af22 3234 coding->common_flags = 0;
dec137e5 3235 coding->eol_type = CODING_EOL_LF;
d46c5b12 3236 coding->pre_write_conversion = coding->post_read_conversion = Qnil;
4ed46869
KH
3237 return -1;
3238}
3239
54f78171
KH
3240/* Setup raw-text or one of its subsidiaries in the structure
3241 coding_system CODING according to the already setup value eol_type
3242 in CODING. CODING should be setup for some coding system in
3243 advance. */
3244
3245void
3246setup_raw_text_coding_system (coding)
3247 struct coding_system *coding;
3248{
3249 if (coding->type != coding_type_raw_text)
3250 {
3251 coding->symbol = Qraw_text;
3252 coding->type = coding_type_raw_text;
3253 if (coding->eol_type != CODING_EOL_UNDECIDED)
3254 {
84d60297
RS
3255 Lisp_Object subsidiaries;
3256 subsidiaries = Fget (Qraw_text, Qeol_type);
54f78171
KH
3257
3258 if (VECTORP (subsidiaries)
3259 && XVECTOR (subsidiaries)->size == 3)
3260 coding->symbol
3261 = XVECTOR (subsidiaries)->contents[coding->eol_type];
3262 }
716e0b0a 3263 setup_coding_system (coding->symbol, coding);
54f78171
KH
3264 }
3265 return;
3266}
3267
4ed46869
KH
3268/* Emacs has a mechanism to automatically detect a coding system if it
3269 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
3270 it's impossible to distinguish some coding systems accurately
3271 because they use the same range of codes. So, at first, coding
3272 systems are categorized into 7, those are:
3273
0ef69138 3274 o coding-category-emacs-mule
4ed46869
KH
3275
3276 The category for a coding system which has the same code range
3277 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 3278 symbol) `emacs-mule' by default.
4ed46869
KH
3279
3280 o coding-category-sjis
3281
3282 The category for a coding system which has the same code range
3283 as SJIS. Assigned the coding-system (Lisp
7717c392 3284 symbol) `japanese-shift-jis' by default.
4ed46869
KH
3285
3286 o coding-category-iso-7
3287
3288 The category for a coding system which has the same code range
7717c392 3289 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
3290 shift and single shift functions. This can encode/decode all
3291 charsets. Assigned the coding-system (Lisp symbol)
3292 `iso-2022-7bit' by default.
3293
3294 o coding-category-iso-7-tight
3295
3296 Same as coding-category-iso-7 except that this can
3297 encode/decode only the specified charsets.
4ed46869
KH
3298
3299 o coding-category-iso-8-1
3300
3301 The category for a coding system which has the same code range
3302 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
3303 for DIMENSION1 charset. This doesn't use any locking shift
3304 and single shift functions. Assigned the coding-system (Lisp
3305 symbol) `iso-latin-1' by default.
4ed46869
KH
3306
3307 o coding-category-iso-8-2
3308
3309 The category for a coding system which has the same code range
3310 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
3311 for DIMENSION2 charset. This doesn't use any locking shift
3312 and single shift functions. Assigned the coding-system (Lisp
3313 symbol) `japanese-iso-8bit' by default.
4ed46869 3314
7717c392 3315 o coding-category-iso-7-else
4ed46869
KH
3316
3317 The category for a coding system which has the same code range
7717c392
KH
3318 as ISO2022 of 7-bit environemnt but uses locking shift or
3319 single shift functions. Assigned the coding-system (Lisp
3320 symbol) `iso-2022-7bit-lock' by default.
3321
3322 o coding-category-iso-8-else
3323
3324 The category for a coding system which has the same code range
3325 as ISO2022 of 8-bit environemnt but uses locking shift or
3326 single shift functions. Assigned the coding-system (Lisp
3327 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
3328
3329 o coding-category-big5
3330
3331 The category for a coding system which has the same code range
3332 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 3333 `cn-big5' by default.
4ed46869 3334
1397dc18
KH
3335 o coding-category-ccl
3336
3337 The category for a coding system of which encoder/decoder is
3338 written in CCL programs. The default value is nil, i.e., no
3339 coding system is assigned.
3340
4ed46869
KH
3341 o coding-category-binary
3342
3343 The category for a coding system not categorized in any of the
3344 above. Assigned the coding-system (Lisp symbol)
e0e989f6 3345 `no-conversion' by default.
4ed46869
KH
3346
3347 Each of them is a Lisp symbol and the value is an actual
3348 `coding-system's (this is also a Lisp symbol) assigned by a user.
3349 What Emacs does actually is to detect a category of coding system.
3350 Then, it uses a `coding-system' assigned to it. If Emacs can't
3351 decide only one possible category, it selects a category of the
3352 highest priority. Priorities of categories are also specified by a
3353 user in a Lisp variable `coding-category-list'.
3354
3355*/
3356
66cfb530
KH
3357static
3358int ascii_skip_code[256];
3359
d46c5b12 3360/* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
4ed46869
KH
3361 If it detects possible coding systems, return an integer in which
3362 appropriate flag bits are set. Flag bits are defined by macros
d46c5b12 3363 CODING_CATEGORY_MASK_XXX in `coding.h'.
4ed46869 3364
d46c5b12
KH
3365 How many ASCII characters are at the head is returned as *SKIP. */
3366
3367static int
3368detect_coding_mask (source, src_bytes, priorities, skip)
3369 unsigned char *source;
3370 int src_bytes, *priorities, *skip;
4ed46869
KH
3371{
3372 register unsigned char c;
d46c5b12 3373 unsigned char *src = source, *src_end = source + src_bytes;
66cfb530 3374 unsigned int mask;
d46c5b12 3375 int i;
4ed46869
KH
3376
3377 /* At first, skip all ASCII characters and control characters except
3378 for three ISO2022 specific control characters. */
66cfb530
KH
3379 ascii_skip_code[ISO_CODE_SO] = 0;
3380 ascii_skip_code[ISO_CODE_SI] = 0;
3381 ascii_skip_code[ISO_CODE_ESC] = 0;
3382
bcf26d6a 3383 label_loop_detect_coding:
66cfb530 3384 while (src < src_end && ascii_skip_code[*src]) src++;
d46c5b12 3385 *skip = src - source;
4ed46869
KH
3386
3387 if (src >= src_end)
3388 /* We found nothing other than ASCII. There's nothing to do. */
d46c5b12 3389 return 0;
4ed46869 3390
8a8147d6 3391 c = *src;
4ed46869
KH
3392 /* The text seems to be encoded in some multilingual coding system.
3393 Now, try to find in which coding system the text is encoded. */
3394 if (c < 0x80)
bcf26d6a
KH
3395 {
3396 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3397 /* C is an ISO2022 specific control code of C0. */
3398 mask = detect_coding_iso2022 (src, src_end);
1b2af4b0 3399 if (mask == 0)
d46c5b12
KH
3400 {
3401 /* No valid ISO2022 code follows C. Try again. */
3402 src++;
66cfb530
KH
3403 if (c == ISO_CODE_ESC)
3404 ascii_skip_code[ISO_CODE_ESC] = 1;
3405 else
3406 ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
d46c5b12
KH
3407 goto label_loop_detect_coding;
3408 }
3409 if (priorities)
3410 goto label_return_highest_only;
bcf26d6a 3411 }
d46c5b12 3412 else
c4825358 3413 {
d46c5b12 3414 int try;
4ed46869 3415
d46c5b12
KH
3416 if (c < 0xA0)
3417 {
3418 /* C is the first byte of SJIS character code,
3419 or a leading-code of Emacs' internal format (emacs-mule). */
3420 try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3421
3422 /* Or, if C is a special latin extra code,
3423 or is an ISO2022 specific control code of C1 (SS2 or SS3),
3424 or is an ISO2022 control-sequence-introducer (CSI),
3425 we should also consider the possibility of ISO2022 codings. */
3426 if ((VECTORP (Vlatin_extra_code_table)
3427 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3428 || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3429 || (c == ISO_CODE_CSI
3430 && (src < src_end
3431 && (*src == ']'
3432 || ((*src == '0' || *src == '1' || *src == '2')
3433 && src + 1 < src_end
3434 && src[1] == ']')))))
3435 try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3436 | CODING_CATEGORY_MASK_ISO_8BIT);
3437 }
c4825358 3438 else
d46c5b12
KH
3439 /* C is a character of ISO2022 in graphic plane right,
3440 or a SJIS's 1-byte character code (i.e. JISX0201),
3441 or the first byte of BIG5's 2-byte code. */
3442 try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3443 | CODING_CATEGORY_MASK_ISO_8BIT
3444 | CODING_CATEGORY_MASK_SJIS
3445 | CODING_CATEGORY_MASK_BIG5);
3446
1397dc18
KH
3447 /* Or, we may have to consider the possibility of CCL. */
3448 if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3449 && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3450 ->spec.ccl.valid_codes)[c])
3451 try |= CODING_CATEGORY_MASK_CCL;
3452
d46c5b12
KH
3453 mask = 0;
3454 if (priorities)
3455 {
3456 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3457 {
5ab13dd0 3458 if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
d46c5b12 3459 mask = detect_coding_iso2022 (src, src_end);
5ab13dd0 3460 else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
d46c5b12 3461 mask = detect_coding_sjis (src, src_end);
5ab13dd0 3462 else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
d46c5b12 3463 mask = detect_coding_big5 (src, src_end);
5ab13dd0 3464 else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
d46c5b12 3465 mask = detect_coding_emacs_mule (src, src_end);
89fa8b36 3466 else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
1397dc18 3467 mask = detect_coding_ccl (src, src_end);
5ab13dd0
RS
3468 else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3469 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3470 else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3471 mask = CODING_CATEGORY_MASK_BINARY;
d46c5b12
KH
3472 if (mask)
3473 goto label_return_highest_only;
3474 }
3475 return CODING_CATEGORY_MASK_RAW_TEXT;
3476 }
3477 if (try & CODING_CATEGORY_MASK_ISO)
3478 mask |= detect_coding_iso2022 (src, src_end);
3479 if (try & CODING_CATEGORY_MASK_SJIS)
3480 mask |= detect_coding_sjis (src, src_end);
3481 if (try & CODING_CATEGORY_MASK_BIG5)
3482 mask |= detect_coding_big5 (src, src_end);
3483 if (try & CODING_CATEGORY_MASK_EMACS_MULE)
1397dc18
KH
3484 mask |= detect_coding_emacs_mule (src, src_end);
3485 if (try & CODING_CATEGORY_MASK_CCL)
3486 mask |= detect_coding_ccl (src, src_end);
c4825358 3487 }
5ab13dd0 3488 return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
d46c5b12
KH
3489
3490 label_return_highest_only:
3491 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3492 {
3493 if (mask & priorities[i])
3494 return priorities[i];
3495 }
3496 return CODING_CATEGORY_MASK_RAW_TEXT;
4ed46869
KH
3497}
3498
3499/* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3500 The information of the detected coding system is set in CODING. */
3501
3502void
3503detect_coding (coding, src, src_bytes)
3504 struct coding_system *coding;
3505 unsigned char *src;
3506 int src_bytes;
3507{
d46c5b12
KH
3508 unsigned int idx;
3509 int skip, mask, i;
84d60297 3510 Lisp_Object val;
4ed46869 3511
84d60297 3512 val = Vcoding_category_list;
66cfb530 3513 mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
d46c5b12 3514 coding->heading_ascii = skip;
4ed46869 3515
d46c5b12
KH
3516 if (!mask) return;
3517
3518 /* We found a single coding system of the highest priority in MASK. */
3519 idx = 0;
3520 while (mask && ! (mask & 1)) mask >>= 1, idx++;
3521 if (! mask)
3522 idx = CODING_CATEGORY_IDX_RAW_TEXT;
4ed46869 3523
d46c5b12
KH
3524 val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3525
3526 if (coding->eol_type != CODING_EOL_UNDECIDED)
27901516 3527 {
84d60297 3528 Lisp_Object tmp;
d46c5b12 3529
84d60297 3530 tmp = Fget (val, Qeol_type);
d46c5b12
KH
3531 if (VECTORP (tmp))
3532 val = XVECTOR (tmp)->contents[coding->eol_type];
4ed46869 3533 }
d46c5b12
KH
3534 setup_coding_system (val, coding);
3535 /* Set this again because setup_coding_system reset this member. */
3536 coding->heading_ascii = skip;
4ed46869
KH
3537}
3538
d46c5b12
KH
3539/* Detect how end-of-line of a text of length SRC_BYTES pointed by
3540 SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3541 CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3542
3543 How many non-eol characters are at the head is returned as *SKIP. */
4ed46869 3544
bc4bc72a
RS
3545#define MAX_EOL_CHECK_COUNT 3
3546
d46c5b12
KH
3547static int
3548detect_eol_type (source, src_bytes, skip)
3549 unsigned char *source;
3550 int src_bytes, *skip;
4ed46869 3551{
d46c5b12 3552 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 3553 unsigned char c;
bc4bc72a
RS
3554 int total = 0; /* How many end-of-lines are found so far. */
3555 int eol_type = CODING_EOL_UNDECIDED;
3556 int this_eol_type;
4ed46869 3557
d46c5b12
KH
3558 *skip = 0;
3559
bc4bc72a 3560 while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4ed46869
KH
3561 {
3562 c = *src++;
bc4bc72a 3563 if (c == '\n' || c == '\r')
4ed46869 3564 {
d46c5b12
KH
3565 if (*skip == 0)
3566 *skip = src - 1 - source;
bc4bc72a
RS
3567 total++;
3568 if (c == '\n')
3569 this_eol_type = CODING_EOL_LF;
3570 else if (src >= src_end || *src != '\n')
3571 this_eol_type = CODING_EOL_CR;
4ed46869 3572 else
bc4bc72a
RS
3573 this_eol_type = CODING_EOL_CRLF, src++;
3574
3575 if (eol_type == CODING_EOL_UNDECIDED)
3576 /* This is the first end-of-line. */
3577 eol_type = this_eol_type;
3578 else if (eol_type != this_eol_type)
d46c5b12
KH
3579 {
3580 /* The found type is different from what found before. */
3581 eol_type = CODING_EOL_INCONSISTENT;
3582 break;
3583 }
4ed46869
KH
3584 }
3585 }
bc4bc72a 3586
d46c5b12
KH
3587 if (*skip == 0)
3588 *skip = src_end - source;
85a02ca4 3589 return eol_type;
4ed46869
KH
3590}
3591
3592/* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3593 is encoded. If it detects an appropriate format of end-of-line, it
3594 sets the information in *CODING. */
3595
3596void
3597detect_eol (coding, src, src_bytes)
3598 struct coding_system *coding;
3599 unsigned char *src;
3600 int src_bytes;
3601{
4608c386 3602 Lisp_Object val;
d46c5b12
KH
3603 int skip;
3604 int eol_type = detect_eol_type (src, src_bytes, &skip);
3605
3606 if (coding->heading_ascii > skip)
3607 coding->heading_ascii = skip;
3608 else
3609 skip = coding->heading_ascii;
4ed46869 3610
0ef69138 3611 if (eol_type == CODING_EOL_UNDECIDED)
4ed46869 3612 return;
27901516
KH
3613 if (eol_type == CODING_EOL_INCONSISTENT)
3614 {
3615#if 0
3616 /* This code is suppressed until we find a better way to
992f23f2 3617 distinguish raw text file and binary file. */
27901516
KH
3618
3619 /* If we have already detected that the coding is raw-text, the
3620 coding should actually be no-conversion. */
3621 if (coding->type == coding_type_raw_text)
3622 {
3623 setup_coding_system (Qno_conversion, coding);
3624 return;
3625 }
3626 /* Else, let's decode only text code anyway. */
3627#endif /* 0 */
1b2af4b0 3628 eol_type = CODING_EOL_LF;
27901516
KH
3629 }
3630
4608c386 3631 val = Fget (coding->symbol, Qeol_type);
4ed46869 3632 if (VECTORP (val) && XVECTOR (val)->size == 3)
d46c5b12
KH
3633 {
3634 setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3635 coding->heading_ascii = skip;
3636 }
3637}
3638
3639#define CONVERSION_BUFFER_EXTRA_ROOM 256
3640
3641#define DECODING_BUFFER_MAG(coding) \
3642 (coding->type == coding_type_iso2022 \
3643 ? 3 \
3644 : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3645 ? 2 \
3646 : (coding->type == coding_type_raw_text \
3647 ? 1 \
3648 : (coding->type == coding_type_ccl \
3649 ? coding->spec.ccl.decoder.buf_magnification \
3650 : 2))))
3651
3652/* Return maximum size (bytes) of a buffer enough for decoding
3653 SRC_BYTES of text encoded in CODING. */
3654
3655int
3656decoding_buffer_size (coding, src_bytes)
3657 struct coding_system *coding;
3658 int src_bytes;
3659{
3660 return (src_bytes * DECODING_BUFFER_MAG (coding)
3661 + CONVERSION_BUFFER_EXTRA_ROOM);
3662}
3663
3664/* Return maximum size (bytes) of a buffer enough for encoding
3665 SRC_BYTES of text to CODING. */
3666
3667int
3668encoding_buffer_size (coding, src_bytes)
3669 struct coding_system *coding;
3670 int src_bytes;
3671{
3672 int magnification;
3673
3674 if (coding->type == coding_type_ccl)
3675 magnification = coding->spec.ccl.encoder.buf_magnification;
3676 else
3677 magnification = 3;
3678
3679 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3680}
3681
3682#ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3683#define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3684#endif
3685
3686char *conversion_buffer;
3687int conversion_buffer_size;
3688
3689/* Return a pointer to a SIZE bytes of buffer to be used for encoding
3690 or decoding. Sufficient memory is allocated automatically. If we
3691 run out of memory, return NULL. */
3692
3693char *
3694get_conversion_buffer (size)
3695 int size;
3696{
3697 if (size > conversion_buffer_size)
3698 {
3699 char *buf;
3700 int real_size = conversion_buffer_size * 2;
3701
3702 while (real_size < size) real_size *= 2;
3703 buf = (char *) xmalloc (real_size);
3704 xfree (conversion_buffer);
3705 conversion_buffer = buf;
3706 conversion_buffer_size = real_size;
3707 }
3708 return conversion_buffer;
3709}
3710
3711int
3712ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3713 struct coding_system *coding;
3714 unsigned char *source, *destination;
3715 int src_bytes, dst_bytes, encodep;
3716{
3717 struct ccl_program *ccl
3718 = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3719 int result;
3720
ae9ff118 3721 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
7b179c2d 3722
d46c5b12
KH
3723 coding->produced = ccl_driver (ccl, source, destination,
3724 src_bytes, dst_bytes, &(coding->consumed));
69f76525 3725 coding->produced_char
48942766
KH
3726 = (encodep
3727 ? coding->produced
3728 : multibyte_chars_in_text (destination, coding->produced));
69f76525
KH
3729 coding->consumed_char
3730 = multibyte_chars_in_text (source, coding->consumed);
3731
d46c5b12
KH
3732 switch (ccl->status)
3733 {
3734 case CCL_STAT_SUSPEND_BY_SRC:
3735 result = CODING_FINISH_INSUFFICIENT_SRC;
3736 break;
3737 case CCL_STAT_SUSPEND_BY_DST:
3738 result = CODING_FINISH_INSUFFICIENT_DST;
3739 break;
9864ebce
KH
3740 case CCL_STAT_QUIT:
3741 case CCL_STAT_INVALID_CMD:
3742 result = CODING_FINISH_INTERRUPT;
3743 break;
d46c5b12
KH
3744 default:
3745 result = CODING_FINISH_NORMAL;
3746 break;
3747 }
3748 return result;
4ed46869
KH
3749}
3750
3751/* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
3752 decoding, it may detect coding system and format of end-of-line if
52d41803
KH
3753 those are not yet decided.
3754
3755 This function does not make full use of DESTINATION buffer. For
3756 instance, if coding->type is coding_type_iso2022, it uses only
3757 (DST_BYTES - 7) bytes of DESTINATION buffer. In the case that
3758 DST_BYTES is decided by the function decoding_buffer_size, it
3759 contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
3760 So, this function can decode the full SOURCE. But, in the other
3761 case, if you want to avoid carry over, you must supply at least 7
3762 bytes more area in DESTINATION buffer than expected maximum bytes
3763 that will be produced by this function. */
4ed46869
KH
3764
3765int
d46c5b12 3766decode_coding (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
3767 struct coding_system *coding;
3768 unsigned char *source, *destination;
3769 int src_bytes, dst_bytes;
4ed46869 3770{
d46c5b12 3771 int result;
4ed46869 3772
d4e57bcd 3773 if (src_bytes <= 0
944bd420 3774 && coding->type != coding_type_ccl
d4e57bcd
KH
3775 && ! (coding->mode & CODING_MODE_LAST_BLOCK
3776 && CODING_REQUIRE_FLUSHING (coding)))
4ed46869 3777 {
d46c5b12
KH
3778 coding->produced = coding->produced_char = 0;
3779 coding->consumed = coding->consumed_char = 0;
fb88bf2d 3780 coding->fake_multibyte = 0;
d46c5b12 3781 return CODING_FINISH_NORMAL;
4ed46869
KH
3782 }
3783
0ef69138 3784 if (coding->type == coding_type_undecided)
4ed46869
KH
3785 detect_coding (coding, source, src_bytes);
3786
0ef69138 3787 if (coding->eol_type == CODING_EOL_UNDECIDED)
4ed46869
KH
3788 detect_eol (coding, source, src_bytes);
3789
4ed46869
KH
3790 switch (coding->type)
3791 {
0ef69138
KH
3792 case coding_type_emacs_mule:
3793 case coding_type_undecided:
27901516 3794 case coding_type_raw_text:
4ed46869 3795 if (coding->eol_type == CODING_EOL_LF
0ef69138 3796 || coding->eol_type == CODING_EOL_UNDECIDED)
4ed46869 3797 goto label_no_conversion;
d46c5b12 3798 result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
4ed46869
KH
3799 break;
3800
3801 case coding_type_sjis:
d46c5b12
KH
3802 result = decode_coding_sjis_big5 (coding, source, destination,
3803 src_bytes, dst_bytes, 1);
4ed46869
KH
3804 break;
3805
3806 case coding_type_iso2022:
d46c5b12
KH
3807 result = decode_coding_iso2022 (coding, source, destination,
3808 src_bytes, dst_bytes);
4ed46869
KH
3809 break;
3810
3811 case coding_type_big5:
d46c5b12
KH
3812 result = decode_coding_sjis_big5 (coding, source, destination,
3813 src_bytes, dst_bytes, 0);
4ed46869
KH
3814 break;
3815
3816 case coding_type_ccl:
d46c5b12
KH
3817 result = ccl_coding_driver (coding, source, destination,
3818 src_bytes, dst_bytes, 0);
3819 break;
3820
3821 default: /* i.e. case coding_type_no_conversion: */
3822 label_no_conversion:
3823 if (dst_bytes && src_bytes > dst_bytes)
3824 {
3825 coding->produced = dst_bytes;
3826 result = CODING_FINISH_INSUFFICIENT_DST;
3827 }
3828 else
3829 {
3830 coding->produced = src_bytes;
3831 result = CODING_FINISH_NORMAL;
3832 }
3833 if (dst_bytes)
3834 bcopy (source, destination, coding->produced);
3835 else
3836 safe_bcopy (source, destination, coding->produced);
fb88bf2d 3837 coding->fake_multibyte = 1;
d46c5b12
KH
3838 coding->consumed
3839 = coding->consumed_char = coding->produced_char = coding->produced;
4ed46869
KH
3840 break;
3841 }
3842
d46c5b12 3843 return result;
4ed46869
KH
3844}
3845
52d41803
KH
3846/* See "GENERAL NOTES about `encode_coding_XXX ()' functions".
3847
3848 This function does not make full use of DESTINATION buffer. For
3849 instance, if coding->type is coding_type_iso2022, it uses only
3850 (DST_BYTES - 20) bytes of DESTINATION buffer. In the case that
3851 DST_BYTES is decided by the function encoding_buffer_size, it
3852 contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
3853 So, this function can encode the full SOURCE. But, in the other
3854 case, if you want to avoid carry over, you must supply at least 20
3855 bytes more area in DESTINATION buffer than expected maximum bytes
3856 that will be produced by this function. */
4ed46869
KH
3857
3858int
d46c5b12 3859encode_coding (coding, source, destination, src_bytes, dst_bytes)
4ed46869
KH
3860 struct coding_system *coding;
3861 unsigned char *source, *destination;
3862 int src_bytes, dst_bytes;
4ed46869 3863{
d46c5b12 3864 int result;
4ed46869 3865
d4e57bcd
KH
3866 if (src_bytes <= 0
3867 && ! (coding->mode & CODING_MODE_LAST_BLOCK
3868 && CODING_REQUIRE_FLUSHING (coding)))
4ed46869 3869 {
d46c5b12
KH
3870 coding->produced = coding->produced_char = 0;
3871 coding->consumed = coding->consumed_char = 0;
fb88bf2d 3872 coding->fake_multibyte = 0;
d46c5b12
KH
3873 return CODING_FINISH_NORMAL;
3874 }
4ed46869 3875
d46c5b12
KH
3876 switch (coding->type)
3877 {
0ef69138
KH
3878 case coding_type_emacs_mule:
3879 case coding_type_undecided:
27901516 3880 case coding_type_raw_text:
4ed46869 3881 if (coding->eol_type == CODING_EOL_LF
0ef69138 3882 || coding->eol_type == CODING_EOL_UNDECIDED)
4ed46869 3883 goto label_no_conversion;
d46c5b12 3884 result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
4ed46869
KH
3885 break;
3886
3887 case coding_type_sjis:
d46c5b12
KH
3888 result = encode_coding_sjis_big5 (coding, source, destination,
3889 src_bytes, dst_bytes, 1);
4ed46869
KH
3890 break;
3891
3892 case coding_type_iso2022:
d46c5b12
KH
3893 result = encode_coding_iso2022 (coding, source, destination,
3894 src_bytes, dst_bytes);
4ed46869
KH
3895 break;
3896
3897 case coding_type_big5:
d46c5b12
KH
3898 result = encode_coding_sjis_big5 (coding, source, destination,
3899 src_bytes, dst_bytes, 0);
4ed46869
KH
3900 break;
3901
3902 case coding_type_ccl:
d46c5b12
KH
3903 result = ccl_coding_driver (coding, source, destination,
3904 src_bytes, dst_bytes, 1);
3905 break;
3906
3907 default: /* i.e. case coding_type_no_conversion: */
3908 label_no_conversion:
3909 if (dst_bytes && src_bytes > dst_bytes)
3910 {
3911 coding->produced = dst_bytes;
3912 result = CODING_FINISH_INSUFFICIENT_DST;
3913 }
3914 else
3915 {
3916 coding->produced = src_bytes;
3917 result = CODING_FINISH_NORMAL;
3918 }
3919 if (dst_bytes)
3920 bcopy (source, destination, coding->produced);
3921 else
3922 safe_bcopy (source, destination, coding->produced);
3923 if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3924 {
3925 unsigned char *p = destination, *pend = p + coding->produced;
3926 while (p < pend)
3927 if (*p++ == '\015') p[-1] = '\n';
3928 }
fb88bf2d 3929 coding->fake_multibyte = 1;
d46c5b12
KH
3930 coding->consumed
3931 = coding->consumed_char = coding->produced_char = coding->produced;
4ed46869
KH
3932 break;
3933 }
3934
d46c5b12 3935 return result;
4ed46869
KH
3936}
3937
fb88bf2d
KH
3938/* Scan text in the region between *BEG and *END (byte positions),
3939 skip characters which we don't have to decode by coding system
3940 CODING at the head and tail, then set *BEG and *END to the region
3941 of the text we actually have to convert. The caller should move
3942 the gap out of the region in advance.
4ed46869 3943
d46c5b12
KH
3944 If STR is not NULL, *BEG and *END are indices into STR. */
3945
3946static void
3947shrink_decoding_region (beg, end, coding, str)
3948 int *beg, *end;
3949 struct coding_system *coding;
3950 unsigned char *str;
3951{
fb88bf2d 3952 unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
d46c5b12 3953 int eol_conversion;
88993dfd 3954 Lisp_Object translation_table;
d46c5b12
KH
3955
3956 if (coding->type == coding_type_ccl
3957 || coding->type == coding_type_undecided
3958 || !NILP (coding->post_read_conversion))
3959 {
3960 /* We can't skip any data. */
3961 return;
3962 }
3963 else if (coding->type == coding_type_no_conversion)
3964 {
fb88bf2d
KH
3965 /* We need no conversion, but don't have to skip any data here.
3966 Decoding routine handles them effectively anyway. */
d46c5b12
KH
3967 return;
3968 }
3969
88993dfd
KH
3970 translation_table = coding->translation_table_for_decode;
3971 if (NILP (translation_table) && !NILP (Venable_character_translation))
3972 translation_table = Vstandard_translation_table_for_decode;
3973 if (CHAR_TABLE_P (translation_table))
3974 {
3975 int i;
3976 for (i = 0; i < 128; i++)
3977 if (!NILP (CHAR_TABLE_REF (translation_table, i)))
3978 break;
3979 if (i < 128)
3980 /* Some ASCII character should be tranlsated. We give up
3981 shrinking. */
3982 return;
3983 }
3984
aa60dea6
KH
3985 eol_conversion = (coding->eol_type != CODING_EOL_LF);
3986
3987 if ((! eol_conversion) && (coding->heading_ascii >= 0))
d46c5b12
KH
3988 /* Detection routine has already found how much we can skip at the
3989 head. */
3990 *beg += coding->heading_ascii;
3991
3992 if (str)
3993 {
3994 begp_orig = begp = str + *beg;
3995 endp_orig = endp = str + *end;
3996 }
3997 else
3998 {
fb88bf2d 3999 begp_orig = begp = BYTE_POS_ADDR (*beg);
d46c5b12
KH
4000 endp_orig = endp = begp + *end - *beg;
4001 }
4002
d46c5b12
KH
4003 switch (coding->type)
4004 {
4005 case coding_type_emacs_mule:
4006 case coding_type_raw_text:
4007 if (eol_conversion)
4008 {
4009 if (coding->heading_ascii < 0)
fb88bf2d 4010 while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
ee59c65f 4011 while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
fb88bf2d 4012 endp--;
ee59c65f
RS
4013 /* Do not consider LF as ascii if preceded by CR, since that
4014 confuses eol decoding. */
4015 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4016 endp++;
d46c5b12
KH
4017 }
4018 else
4019 begp = endp;
4020 break;
4021
4022 case coding_type_sjis:
4023 case coding_type_big5:
4024 /* We can skip all ASCII characters at the head. */
4025 if (coding->heading_ascii < 0)
4026 {
4027 if (eol_conversion)
de9d083c 4028 while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
d46c5b12
KH
4029 else
4030 while (begp < endp && *begp < 0x80) begp++;
4031 }
4032 /* We can skip all ASCII characters at the tail except for the
4033 second byte of SJIS or BIG5 code. */
4034 if (eol_conversion)
de9d083c 4035 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
d46c5b12
KH
4036 else
4037 while (begp < endp && endp[-1] < 0x80) endp--;
ee59c65f
RS
4038 /* Do not consider LF as ascii if preceded by CR, since that
4039 confuses eol decoding. */
4040 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4041 endp++;
d46c5b12
KH
4042 if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4043 endp++;
4044 break;
4045
4046 default: /* i.e. case coding_type_iso2022: */
622fece5
KH
4047 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4048 /* We can't skip any data. */
4049 break;
d46c5b12
KH
4050 if (coding->heading_ascii < 0)
4051 {
d46c5b12
KH
4052 /* We can skip all ASCII characters at the head except for a
4053 few control codes. */
4054 while (begp < endp && (c = *begp) < 0x80
4055 && c != ISO_CODE_CR && c != ISO_CODE_SO
4056 && c != ISO_CODE_SI && c != ISO_CODE_ESC
4057 && (!eol_conversion || c != ISO_CODE_LF))
4058 begp++;
4059 }
4060 switch (coding->category_idx)
4061 {
4062 case CODING_CATEGORY_IDX_ISO_8_1:
4063 case CODING_CATEGORY_IDX_ISO_8_2:
4064 /* We can skip all ASCII characters at the tail. */
4065 if (eol_conversion)
de9d083c 4066 while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
d46c5b12
KH
4067 else
4068 while (begp < endp && endp[-1] < 0x80) endp--;
ee59c65f
RS
4069 /* Do not consider LF as ascii if preceded by CR, since that
4070 confuses eol decoding. */
4071 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4072 endp++;
d46c5b12
KH
4073 break;
4074
4075 case CODING_CATEGORY_IDX_ISO_7:
4076 case CODING_CATEGORY_IDX_ISO_7_TIGHT:
de79a6a5
KH
4077 {
4078 /* We can skip all charactes at the tail except for 8-bit
4079 codes and ESC and the following 2-byte at the tail. */
4080 unsigned char *eight_bit = NULL;
4081
4082 if (eol_conversion)
4083 while (begp < endp
4084 && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4085 {
4086 if (!eight_bit && c & 0x80) eight_bit = endp;
4087 endp--;
4088 }
4089 else
4090 while (begp < endp
4091 && (c = endp[-1]) != ISO_CODE_ESC)
4092 {
4093 if (!eight_bit && c & 0x80) eight_bit = endp;
4094 endp--;
4095 }
4096 /* Do not consider LF as ascii if preceded by CR, since that
4097 confuses eol decoding. */
4098 if (begp < endp && endp < endp_orig
4099 && endp[-1] == '\r' && endp[0] == '\n')
4100 endp++;
4101 if (begp < endp && endp[-1] == ISO_CODE_ESC)
4102 {
4103 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4104 /* This is an ASCII designation sequence. We can
4105 surely skip the tail. But, if we have
4106 encountered an 8-bit code, skip only the codes
4107 after that. */
4108 endp = eight_bit ? eight_bit : endp + 2;
4109 else
4110 /* Hmmm, we can't skip the tail. */
4111 endp = endp_orig;
4112 }
4113 else if (eight_bit)
4114 endp = eight_bit;
4115 }
d46c5b12
KH
4116 }
4117 }
4118 *beg += begp - begp_orig;
4119 *end += endp - endp_orig;
4120 return;
4121}
4122
4123/* Like shrink_decoding_region but for encoding. */
4124
4125static void
4126shrink_encoding_region (beg, end, coding, str)
4127 int *beg, *end;
4128 struct coding_system *coding;
4129 unsigned char *str;
4130{
4131 unsigned char *begp_orig, *begp, *endp_orig, *endp;
4132 int eol_conversion;
88993dfd 4133 Lisp_Object translation_table;
d46c5b12
KH
4134
4135 if (coding->type == coding_type_ccl)
4136 /* We can't skip any data. */
4137 return;
4138 else if (coding->type == coding_type_no_conversion)
4139 {
4140 /* We need no conversion. */
4141 *beg = *end;
4142 return;
4143 }
4144
88993dfd
KH
4145 translation_table = coding->translation_table_for_encode;
4146 if (NILP (translation_table) && !NILP (Venable_character_translation))
4147 translation_table = Vstandard_translation_table_for_encode;
4148 if (CHAR_TABLE_P (translation_table))
4149 {
4150 int i;
4151 for (i = 0; i < 128; i++)
4152 if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4153 break;
4154 if (i < 128)
4155 /* Some ASCII character should be tranlsated. We give up
4156 shrinking. */
4157 return;
4158 }
4159
d46c5b12
KH
4160 if (str)
4161 {
4162 begp_orig = begp = str + *beg;
4163 endp_orig = endp = str + *end;
4164 }
4165 else
4166 {
fb88bf2d 4167 begp_orig = begp = BYTE_POS_ADDR (*beg);
d46c5b12
KH
4168 endp_orig = endp = begp + *end - *beg;
4169 }
4170
4171 eol_conversion = (coding->eol_type == CODING_EOL_CR
4172 || coding->eol_type == CODING_EOL_CRLF);
4173
4174 /* Here, we don't have to check coding->pre_write_conversion because
4175 the caller is expected to have handled it already. */
4176 switch (coding->type)
4177 {
4178 case coding_type_undecided:
4179 case coding_type_emacs_mule:
4180 case coding_type_raw_text:
4181 if (eol_conversion)
4182 {
4183 while (begp < endp && *begp != '\n') begp++;
4184 while (begp < endp && endp[-1] != '\n') endp--;
4185 }
4186 else
4187 begp = endp;
4188 break;
4189
4190 case coding_type_iso2022:
622fece5
KH
4191 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4192 /* We can't skip any data. */
4193 break;
d46c5b12
KH
4194 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4195 {
4196 unsigned char *bol = begp;
4197 while (begp < endp && *begp < 0x80)
4198 {
4199 begp++;
4200 if (begp[-1] == '\n')
4201 bol = begp;
4202 }
4203 begp = bol;
4204 goto label_skip_tail;
4205 }
4206 /* fall down ... */
4207
4208 default:
4209 /* We can skip all ASCII characters at the head and tail. */
4210 if (eol_conversion)
4211 while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4212 else
4213 while (begp < endp && *begp < 0x80) begp++;
4214 label_skip_tail:
4215 if (eol_conversion)
4216 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4217 else
4218 while (begp < endp && *(endp - 1) < 0x80) endp--;
4219 break;
4220 }
4221
4222 *beg += begp - begp_orig;
4223 *end += endp - endp_orig;
4224 return;
4225}
4226
88993dfd
KH
4227/* As shrinking conversion region requires some overhead, we don't try
4228 shrinking if the length of conversion region is less than this
4229 value. */
4230static int shrink_conversion_region_threshhold = 1024;
4231
4232#define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \
4233 do { \
4234 if (*(end) - *(beg) > shrink_conversion_region_threshhold) \
4235 { \
4236 if (encodep) shrink_encoding_region (beg, end, coding, str); \
4237 else shrink_decoding_region (beg, end, coding, str); \
4238 } \
4239 } while (0)
4240
b843d1ae
KH
4241static Lisp_Object
4242code_convert_region_unwind (dummy)
4243 Lisp_Object dummy;
4244{
4245 inhibit_pre_post_conversion = 0;
4246 return Qnil;
4247}
4248
d46c5b12 4249/* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
fb88bf2d
KH
4250 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4251 coding system CODING, and return the status code of code conversion
4252 (currently, this value has no meaning).
4253
4254 How many characters (and bytes) are converted to how many
4255 characters (and bytes) are recorded in members of the structure
4256 CODING.
d46c5b12 4257
6e44253b 4258 If REPLACE is nonzero, we do various things as if the original text
d46c5b12 4259 is deleted and a new text is inserted. See the comments in
6e44253b 4260 replace_range (insdel.c) to know what we are doing. */
4ed46869
KH
4261
4262int
6e44253b
KH
4263code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4264 int from, from_byte, to, to_byte, encodep, replace;
4ed46869 4265 struct coding_system *coding;
4ed46869 4266{
fb88bf2d
KH
4267 int len = to - from, len_byte = to_byte - from_byte;
4268 int require, inserted, inserted_byte;
12410ef1 4269 int head_skip, tail_skip, total_skip;
84d60297 4270 Lisp_Object saved_coding_symbol;
fb88bf2d
KH
4271 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
4272 int first = 1;
4273 int fake_multibyte = 0;
4274 unsigned char *src, *dst;
84d60297 4275 Lisp_Object deletion;
e133c8fa 4276 int orig_point = PT, orig_len = len;
6abb9bd9 4277 int prev_Z;
84d60297
RS
4278
4279 deletion = Qnil;
4280 saved_coding_symbol = Qnil;
d46c5b12 4281
83fa074f 4282 if (from < PT && PT < to)
e133c8fa
KH
4283 {
4284 TEMP_SET_PT_BOTH (from, from_byte);
4285 orig_point = from;
4286 }
83fa074f 4287
6e44253b 4288 if (replace)
d46c5b12 4289 {
fb88bf2d
KH
4290 int saved_from = from;
4291
d46c5b12 4292 prepare_to_modify_buffer (from, to, &from);
fb88bf2d
KH
4293 if (saved_from != from)
4294 {
4295 to = from + len;
4296 if (multibyte)
4297 from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4298 else
4299 from_byte = from, to_byte = to;
4300 len_byte = to_byte - from_byte;
4301 }
d46c5b12 4302 }
d46c5b12
KH
4303
4304 if (! encodep && CODING_REQUIRE_DETECTION (coding))
4305 {
12410ef1 4306 /* We must detect encoding of text and eol format. */
d46c5b12
KH
4307
4308 if (from < GPT && to > GPT)
4309 move_gap_both (from, from_byte);
4310 if (coding->type == coding_type_undecided)
4311 {
fb88bf2d 4312 detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
d46c5b12 4313 if (coding->type == coding_type_undecided)
12410ef1
KH
4314 /* It seems that the text contains only ASCII, but we
4315 should not left it undecided because the deeper
4316 decoding routine (decode_coding) tries to detect the
4317 encodings again in vain. */
d46c5b12
KH
4318 coding->type = coding_type_emacs_mule;
4319 }
4320 if (coding->eol_type == CODING_EOL_UNDECIDED)
4321 {
4322 saved_coding_symbol = coding->symbol;
4323 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4324 if (coding->eol_type == CODING_EOL_UNDECIDED)
4325 coding->eol_type = CODING_EOL_LF;
4326 /* We had better recover the original eol format if we
4327 encounter an inconsitent eol format while decoding. */
4328 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4329 }
4330 }
4331
fb88bf2d
KH
4332 coding->consumed_char = len, coding->consumed = len_byte;
4333
d46c5b12
KH
4334 if (encodep
4335 ? ! CODING_REQUIRE_ENCODING (coding)
4336 : ! CODING_REQUIRE_DECODING (coding))
fb88bf2d
KH
4337 {
4338 coding->produced = len_byte;
12410ef1
KH
4339 if (multibyte
4340 && ! replace
4341 /* See the comment of the member heading_ascii in coding.h. */
4342 && coding->heading_ascii < len_byte)
fb88bf2d 4343 {
6e44253b
KH
4344 /* We still may have to combine byte at the head and the
4345 tail of the text in the region. */
12410ef1 4346 if (from < GPT && GPT < to)
6e44253b 4347 move_gap_both (to, to_byte);
12410ef1
KH
4348 len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4349 adjust_after_insert (from, from_byte, to, to_byte, len);
4350 coding->produced_char = len;
fb88bf2d
KH
4351 }
4352 else
68e3a8f1
AS
4353 {
4354 if (!replace)
4355 adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4356 coding->produced_char = len_byte;
4357 }
fb88bf2d
KH
4358 return 0;
4359 }
d46c5b12
KH
4360
4361 /* Now we convert the text. */
4362
4363 /* For encoding, we must process pre-write-conversion in advance. */
4364 if (encodep
d46c5b12
KH
4365 && ! NILP (coding->pre_write_conversion)
4366 && SYMBOLP (coding->pre_write_conversion)
4367 && ! NILP (Ffboundp (coding->pre_write_conversion)))
4368 {
2b4f9037
KH
4369 /* The function in pre-write-conversion may put a new text in a
4370 new buffer. */
0007bdd0
KH
4371 struct buffer *prev = current_buffer;
4372 Lisp_Object new;
b843d1ae 4373 int count = specpdl_ptr - specpdl;
d46c5b12 4374
b843d1ae
KH
4375 record_unwind_protect (code_convert_region_unwind, Qnil);
4376 /* We should not call any more pre-write/post-read-conversion
4377 functions while this pre-write-conversion is running. */
4378 inhibit_pre_post_conversion = 1;
b39f748c
AS
4379 call2 (coding->pre_write_conversion,
4380 make_number (from), make_number (to));
b843d1ae
KH
4381 inhibit_pre_post_conversion = 0;
4382 /* Discard the unwind protect. */
4383 specpdl_ptr--;
4384
d46c5b12
KH
4385 if (current_buffer != prev)
4386 {
4387 len = ZV - BEGV;
0007bdd0 4388 new = Fcurrent_buffer ();
d46c5b12 4389 set_buffer_internal_1 (prev);
ddbc19ff 4390 del_range_2 (from, from_byte, to, to_byte);
e133c8fa 4391 TEMP_SET_PT_BOTH (from, from_byte);
0007bdd0
KH
4392 insert_from_buffer (XBUFFER (new), 1, len, 0);
4393 Fkill_buffer (new);
e133c8fa
KH
4394 if (orig_point >= to)
4395 orig_point += len - orig_len;
4396 else if (orig_point > from)
4397 orig_point = from;
4398 orig_len = len;
d46c5b12 4399 to = from + len;
e133c8fa 4400 from_byte = multibyte ? CHAR_TO_BYTE (from) : from_byte;
fb88bf2d 4401 to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
d46c5b12 4402 len_byte = to_byte - from_byte;
e133c8fa 4403 TEMP_SET_PT_BOTH (from, from_byte);
d46c5b12
KH
4404 }
4405 }
4406
12410ef1
KH
4407 if (replace)
4408 deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4409
d46c5b12 4410 /* Try to skip the heading and tailing ASCIIs. */
12410ef1
KH
4411 {
4412 int from_byte_orig = from_byte, to_byte_orig = to_byte;
4413
4414 if (from < GPT && GPT < to)
4415 move_gap_both (from, from_byte);
88993dfd 4416 SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
d4e57bcd 4417 if (from_byte == to_byte
944bd420 4418 && coding->type != coding_type_ccl
d4e57bcd
KH
4419 && ! (coding->mode & CODING_MODE_LAST_BLOCK
4420 && CODING_REQUIRE_FLUSHING (coding)))
12410ef1
KH
4421 {
4422 coding->produced = len_byte;
4423 coding->produced_char = multibyte ? len : len_byte;
4424 if (!replace)
4425 /* We must record and adjust for this new text now. */
4426 adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4427 return 0;
4428 }
fb88bf2d 4429
12410ef1
KH
4430 head_skip = from_byte - from_byte_orig;
4431 tail_skip = to_byte_orig - to_byte;
4432 total_skip = head_skip + tail_skip;
4433 from += head_skip;
4434 to -= tail_skip;
4435 len -= total_skip; len_byte -= total_skip;
4436 }
d46c5b12 4437
88993dfd 4438 /* The code conversion routine can not preserve text properties for
55d8d769
KH
4439 now. So, we must remove all text properties in the region.
4440 Here, we must suppress all modification hooks. */
88993dfd 4441 if (replace)
55d8d769
KH
4442 {
4443 int saved_inhibit_modification_hooks = inhibit_modification_hooks;
4444 inhibit_modification_hooks = 1;
4445 Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4446 inhibit_modification_hooks = saved_inhibit_modification_hooks;
4447 }
88993dfd 4448
fb88bf2d
KH
4449 /* For converion, we must put the gap before the text in addition to
4450 making the gap larger for efficient decoding. The required gap
4451 size starts from 2000 which is the magic number used in make_gap.
4452 But, after one batch of conversion, it will be incremented if we
4453 find that it is not enough . */
d46c5b12
KH
4454 require = 2000;
4455
4456 if (GAP_SIZE < require)
4457 make_gap (require - GAP_SIZE);
4458 move_gap_both (from, from_byte);
4459
d46c5b12 4460 inserted = inserted_byte = 0;
fb88bf2d
KH
4461 src = GAP_END_ADDR, dst = GPT_ADDR;
4462
4463 GAP_SIZE += len_byte;
4464 ZV -= len;
4465 Z -= len;
4466 ZV_BYTE -= len_byte;
4467 Z_BYTE -= len_byte;
4468
d9f9a1bc
GM
4469 if (GPT - BEG < BEG_UNCHANGED)
4470 BEG_UNCHANGED = GPT - BEG;
4471 if (Z - GPT < END_UNCHANGED)
4472 END_UNCHANGED = Z - GPT;
f2558efd 4473
d46c5b12
KH
4474 for (;;)
4475 {
fb88bf2d 4476 int result;
d46c5b12
KH
4477
4478 /* The buffer memory is changed from:
fb88bf2d
KH
4479 +--------+converted-text+---------+-------original-text------+---+
4480 |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4481 |<------------------- GAP_SIZE -------------------->| */
d46c5b12 4482 if (encodep)
fb88bf2d 4483 result = encode_coding (coding, src, dst, len_byte, 0);
d46c5b12 4484 else
fb88bf2d 4485 result = decode_coding (coding, src, dst, len_byte, 0);
d46c5b12
KH
4486 /* to:
4487 +--------+-------converted-text--------+--+---original-text--+---+
fb88bf2d
KH
4488 |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4489 |<------------------- GAP_SIZE -------------------->| */
4490 if (coding->fake_multibyte)
4491 fake_multibyte = 1;
d46c5b12 4492
fb88bf2d
KH
4493 if (!encodep && !multibyte)
4494 coding->produced_char = coding->produced;
d46c5b12
KH
4495 inserted += coding->produced_char;
4496 inserted_byte += coding->produced;
d46c5b12 4497 len_byte -= coding->consumed;
fb88bf2d
KH
4498 src += coding->consumed;
4499 dst += inserted_byte;
d46c5b12 4500
9864ebce
KH
4501 if (result == CODING_FINISH_NORMAL)
4502 {
4503 src += len_byte;
4504 break;
4505 }
d46c5b12
KH
4506 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4507 {
fb88bf2d 4508 unsigned char *pend = dst, *p = pend - inserted_byte;
38edf7d4 4509 Lisp_Object eol_type;
d46c5b12
KH
4510
4511 /* Encode LFs back to the original eol format (CR or CRLF). */
4512 if (coding->eol_type == CODING_EOL_CR)
4513 {
4514 while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4515 }
4516 else
4517 {
d46c5b12
KH
4518 int count = 0;
4519
fb88bf2d
KH
4520 while (p < pend) if (*p++ == '\n') count++;
4521 if (src - dst < count)
d46c5b12 4522 {
38edf7d4 4523 /* We don't have sufficient room for encoding LFs
fb88bf2d
KH
4524 back to CRLF. We must record converted and
4525 not-yet-converted text back to the buffer
4526 content, enlarge the gap, then record them out of
4527 the buffer contents again. */
4528 int add = len_byte + inserted_byte;
4529
4530 GAP_SIZE -= add;
4531 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4532 GPT += inserted_byte; GPT_BYTE += inserted_byte;
4533 make_gap (count - GAP_SIZE);
4534 GAP_SIZE += add;
4535 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4536 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4537 /* Don't forget to update SRC, DST, and PEND. */
4538 src = GAP_END_ADDR - len_byte;
4539 dst = GPT_ADDR + inserted_byte;
4540 pend = dst;
d46c5b12 4541 }
d46c5b12
KH
4542 inserted += count;
4543 inserted_byte += count;
fb88bf2d
KH
4544 coding->produced += count;
4545 p = dst = pend + count;
4546 while (count)
4547 {
4548 *--p = *--pend;
4549 if (*p == '\n') count--, *--p = '\r';
4550 }
d46c5b12
KH
4551 }
4552
4553 /* Suppress eol-format conversion in the further conversion. */
4554 coding->eol_type = CODING_EOL_LF;
4555
38edf7d4
KH
4556 /* Set the coding system symbol to that for Unix-like EOL. */
4557 eol_type = Fget (saved_coding_symbol, Qeol_type);
4558 if (VECTORP (eol_type)
4559 && XVECTOR (eol_type)->size == 3
4560 && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
4561 coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
4562 else
4563 coding->symbol = saved_coding_symbol;
fb88bf2d
KH
4564
4565 continue;
d46c5b12
KH
4566 }
4567 if (len_byte <= 0)
944bd420
KH
4568 {
4569 if (coding->type != coding_type_ccl
4570 || coding->mode & CODING_MODE_LAST_BLOCK)
4571 break;
4572 coding->mode |= CODING_MODE_LAST_BLOCK;
4573 continue;
4574 }
d46c5b12
KH
4575 if (result == CODING_FINISH_INSUFFICIENT_SRC)
4576 {
4577 /* The source text ends in invalid codes. Let's just
4578 make them valid buffer contents, and finish conversion. */
fb88bf2d 4579 inserted += len_byte;
d46c5b12 4580 inserted_byte += len_byte;
fb88bf2d 4581 while (len_byte--)
ee59c65f 4582 *dst++ = *src++;
fb88bf2d 4583 fake_multibyte = 1;
d46c5b12
KH
4584 break;
4585 }
9864ebce
KH
4586 if (result == CODING_FINISH_INTERRUPT)
4587 {
4588 /* The conversion procedure was interrupted by a user. */
4589 fake_multibyte = 1;
4590 break;
4591 }
4592 /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */
4593 if (coding->consumed < 1)
4594 {
4595 /* It's quite strange to require more memory without
4596 consuming any bytes. Perhaps CCL program bug. */
4597 fake_multibyte = 1;
4598 break;
4599 }
fb88bf2d
KH
4600 if (first)
4601 {
4602 /* We have just done the first batch of conversion which was
4603 stoped because of insufficient gap. Let's reconsider the
4604 required gap size (i.e. SRT - DST) now.
4605
4606 We have converted ORIG bytes (== coding->consumed) into
4607 NEW bytes (coding->produced). To convert the remaining
4608 LEN bytes, we may need REQUIRE bytes of gap, where:
4609 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4610 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4611 Here, we are sure that NEW >= ORIG. */
6e44253b
KH
4612 float ratio = coding->produced - coding->consumed;
4613 ratio /= coding->consumed;
4614 require = len_byte * ratio;
fb88bf2d
KH
4615 first = 0;
4616 }
4617 if ((src - dst) < (require + 2000))
4618 {
4619 /* See the comment above the previous call of make_gap. */
4620 int add = len_byte + inserted_byte;
4621
4622 GAP_SIZE -= add;
4623 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4624 GPT += inserted_byte; GPT_BYTE += inserted_byte;
4625 make_gap (require + 2000);
4626 GAP_SIZE += add;
4627 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4628 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4629 /* Don't forget to update SRC, DST. */
4630 src = GAP_END_ADDR - len_byte;
4631 dst = GPT_ADDR + inserted_byte;
4632 }
d46c5b12 4633 }
fb88bf2d
KH
4634 if (src - dst > 0) *dst = 0; /* Put an anchor. */
4635
2b4f9037 4636 if (multibyte
88993dfd
KH
4637 && (encodep
4638 || fake_multibyte
4639 || (to - from) != (to_byte - from_byte)))
2b4f9037 4640 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
7553d0e1 4641
12410ef1
KH
4642 /* If we have shrinked the conversion area, adjust it now. */
4643 if (total_skip > 0)
4644 {
4645 if (tail_skip > 0)
4646 safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4647 inserted += total_skip; inserted_byte += total_skip;
4648 GAP_SIZE += total_skip;
4649 GPT -= head_skip; GPT_BYTE -= head_skip;
4650 ZV -= total_skip; ZV_BYTE -= total_skip;
4651 Z -= total_skip; Z_BYTE -= total_skip;
4652 from -= head_skip; from_byte -= head_skip;
4653 to += tail_skip; to_byte += tail_skip;
4654 }
4655
6abb9bd9 4656 prev_Z = Z;
12410ef1 4657 adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
6abb9bd9 4658 inserted = Z - prev_Z;
4ed46869 4659
2b4f9037 4660 if (! encodep && ! NILP (coding->post_read_conversion))
d46c5b12 4661 {
2b4f9037 4662 Lisp_Object val;
b843d1ae 4663 int count = specpdl_ptr - specpdl;
4ed46869 4664
e133c8fa
KH
4665 if (from != PT)
4666 TEMP_SET_PT_BOTH (from, from_byte);
6abb9bd9 4667 prev_Z = Z;
b843d1ae
KH
4668 record_unwind_protect (code_convert_region_unwind, Qnil);
4669 /* We should not call any more pre-write/post-read-conversion
4670 functions while this post-read-conversion is running. */
4671 inhibit_pre_post_conversion = 1;
2b4f9037 4672 val = call1 (coding->post_read_conversion, make_number (inserted));
b843d1ae
KH
4673 inhibit_pre_post_conversion = 0;
4674 /* Discard the unwind protect. */
4675 specpdl_ptr--;
6abb9bd9 4676 CHECK_NUMBER (val, 0);
944bd420 4677 inserted += Z - prev_Z;
e133c8fa
KH
4678 }
4679
4680 if (orig_point >= from)
4681 {
4682 if (orig_point >= from + orig_len)
4683 orig_point += inserted - orig_len;
4684 else
4685 orig_point = from;
4686 TEMP_SET_PT (orig_point);
d46c5b12 4687 }
4ed46869 4688
2b4f9037
KH
4689 signal_after_change (from, to - from, inserted);
4690
fb88bf2d 4691 {
12410ef1
KH
4692 coding->consumed = to_byte - from_byte;
4693 coding->consumed_char = to - from;
4694 coding->produced = inserted_byte;
4695 coding->produced_char = inserted;
fb88bf2d 4696 }
7553d0e1 4697
fb88bf2d 4698 return 0;
d46c5b12
KH
4699}
4700
4701Lisp_Object
4702code_convert_string (str, coding, encodep, nocopy)
4703 Lisp_Object str;
4ed46869 4704 struct coding_system *coding;
d46c5b12 4705 int encodep, nocopy;
4ed46869 4706{
d46c5b12
KH
4707 int len;
4708 char *buf;
fc932ac6
RS
4709 int from = 0, to = XSTRING (str)->size;
4710 int to_byte = STRING_BYTES (XSTRING (str));
d46c5b12 4711 struct gcpro gcpro1;
84d60297 4712 Lisp_Object saved_coding_symbol;
d46c5b12 4713 int result;
4ed46869 4714
84d60297 4715 saved_coding_symbol = Qnil;
b843d1ae
KH
4716 if ((encodep && !NILP (coding->pre_write_conversion)
4717 || !encodep && !NILP (coding->post_read_conversion)))
d46c5b12
KH
4718 {
4719 /* Since we have to call Lisp functions which assume target text
b843d1ae
KH
4720 is in a buffer, after setting a temporary buffer, call
4721 code_convert_region. */
d46c5b12
KH
4722 int count = specpdl_ptr - specpdl;
4723 struct buffer *prev = current_buffer;
b843d1ae 4724 int multibyte = STRING_MULTIBYTE (str);
e133c8fa 4725
d46c5b12 4726 record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
b843d1ae
KH
4727 record_unwind_protect (code_convert_region_unwind, Qnil);
4728 inhibit_pre_post_conversion = 1;
4729 GCPRO1 (str);
d46c5b12
KH
4730 temp_output_buffer_setup (" *code-converting-work*");
4731 set_buffer_internal (XBUFFER (Vstandard_output));
b843d1ae
KH
4732 /* We must insert the contents of STR as is without
4733 unibyte<->multibyte conversion. For that, we adjust the
4734 multibyteness of the working buffer to that of STR. */
4735 Ferase_buffer (); /* for safety */
4736 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
4737 insert_from_string (str, 0, 0, to, to_byte, 0);
4738 UNGCPRO;
fb88bf2d 4739 code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
b843d1ae
KH
4740 /* Make a unibyte string if we are encoding, otherwise make a
4741 multibyte string. */
4742 Fset_buffer_multibyte (encodep ? Qnil : Qt);
d46c5b12 4743 str = make_buffer_string (BEGV, ZV, 0);
d46c5b12
KH
4744 return unbind_to (count, str);
4745 }
4ed46869 4746
d46c5b12
KH
4747 if (! encodep && CODING_REQUIRE_DETECTION (coding))
4748 {
4749 /* See the comments in code_convert_region. */
4750 if (coding->type == coding_type_undecided)
4751 {
4752 detect_coding (coding, XSTRING (str)->data, to_byte);
4753 if (coding->type == coding_type_undecided)
4754 coding->type = coding_type_emacs_mule;
4755 }
4756 if (coding->eol_type == CODING_EOL_UNDECIDED)
4757 {
4758 saved_coding_symbol = coding->symbol;
4759 detect_eol (coding, XSTRING (str)->data, to_byte);
4760 if (coding->eol_type == CODING_EOL_UNDECIDED)
4761 coding->eol_type = CODING_EOL_LF;
4762 /* We had better recover the original eol format if we
4763 encounter an inconsitent eol format while decoding. */
4764 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4765 }
4766 }
4ed46869 4767
d46c5b12
KH
4768 if (encodep
4769 ? ! CODING_REQUIRE_ENCODING (coding)
4770 : ! CODING_REQUIRE_DECODING (coding))
4771 from = to_byte;
4772 else
4773 {
4774 /* Try to skip the heading and tailing ASCIIs. */
88993dfd
KH
4775 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
4776 encodep);
d46c5b12 4777 }
e133c8fa
KH
4778 if (from == to_byte
4779 && coding->type != coding_type_ccl)
d46c5b12 4780 return (nocopy ? str : Fcopy_sequence (str));
4ed46869 4781
d46c5b12
KH
4782 if (encodep)
4783 len = encoding_buffer_size (coding, to_byte - from);
4784 else
4785 len = decoding_buffer_size (coding, to_byte - from);
fc932ac6 4786 len += from + STRING_BYTES (XSTRING (str)) - to_byte;
d46c5b12
KH
4787 GCPRO1 (str);
4788 buf = get_conversion_buffer (len);
4789 UNGCPRO;
4ed46869 4790
d46c5b12
KH
4791 if (from > 0)
4792 bcopy (XSTRING (str)->data, buf, from);
4793 result = (encodep
4794 ? encode_coding (coding, XSTRING (str)->data + from,
4795 buf + from, to_byte - from, len)
4796 : decode_coding (coding, XSTRING (str)->data + from,
f30cc612 4797 buf + from, to_byte - from, len));
d46c5b12 4798 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4ed46869 4799 {
d46c5b12
KH
4800 /* We simple try to decode the whole string again but without
4801 eol-conversion this time. */
4802 coding->eol_type = CODING_EOL_LF;
4803 coding->symbol = saved_coding_symbol;
4804 return code_convert_string (str, coding, encodep, nocopy);
4ed46869 4805 }
d46c5b12
KH
4806
4807 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
fc932ac6 4808 STRING_BYTES (XSTRING (str)) - to_byte);
d46c5b12 4809
fc932ac6 4810 len = from + STRING_BYTES (XSTRING (str)) - to_byte;
d46c5b12
KH
4811 if (encodep)
4812 str = make_unibyte_string (buf, len + coding->produced);
4813 else
826bfb8b
KH
4814 {
4815 int chars= (coding->fake_multibyte
4816 ? multibyte_chars_in_text (buf + from, coding->produced)
4817 : coding->produced_char);
4818 str = make_multibyte_string (buf, len + chars, len + coding->produced);
4819 }
4820
d46c5b12 4821 return str;
4ed46869
KH
4822}
4823
4824\f
4825#ifdef emacs
1397dc18 4826/*** 8. Emacs Lisp library functions ***/
4ed46869 4827
4ed46869
KH
4828DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4829 "Return t if OBJECT is nil or a coding-system.\n\
3a73fa5d
RS
4830See the documentation of `make-coding-system' for information\n\
4831about coding-system objects.")
4ed46869
KH
4832 (obj)
4833 Lisp_Object obj;
4834{
4608c386
KH
4835 if (NILP (obj))
4836 return Qt;
4837 if (!SYMBOLP (obj))
4838 return Qnil;
4839 /* Get coding-spec vector for OBJ. */
4840 obj = Fget (obj, Qcoding_system);
4841 return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4842 ? Qt : Qnil);
4ed46869
KH
4843}
4844
9d991de8
RS
4845DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4846 Sread_non_nil_coding_system, 1, 1, 0,
e0e989f6 4847 "Read a coding system from the minibuffer, prompting with string PROMPT.")
4ed46869
KH
4848 (prompt)
4849 Lisp_Object prompt;
4850{
e0e989f6 4851 Lisp_Object val;
9d991de8
RS
4852 do
4853 {
4608c386
KH
4854 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4855 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
4856 }
4857 while (XSTRING (val)->size == 0);
e0e989f6 4858 return (Fintern (val, Qnil));
4ed46869
KH
4859}
4860
9b787f3e
RS
4861DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4862 "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4863If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4864 (prompt, default_coding_system)
4865 Lisp_Object prompt, default_coding_system;
4ed46869 4866{
f44d27ce 4867 Lisp_Object val;
9b787f3e
RS
4868 if (SYMBOLP (default_coding_system))
4869 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 4870 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
4871 Qt, Qnil, Qcoding_system_history,
4872 default_coding_system, Qnil);
e0e989f6 4873 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
4874}
4875
4876DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4877 1, 1, 0,
4878 "Check validity of CODING-SYSTEM.\n\
3a73fa5d
RS
4879If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4880It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4ed46869
KH
4881The value of property should be a vector of length 5.")
4882 (coding_system)
4883 Lisp_Object coding_system;
4884{
4885 CHECK_SYMBOL (coding_system, 0);
4886 if (!NILP (Fcoding_system_p (coding_system)))
4887 return coding_system;
4888 while (1)
02ba4723 4889 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 4890}
3a73fa5d 4891\f
d46c5b12
KH
4892Lisp_Object
4893detect_coding_system (src, src_bytes, highest)
4894 unsigned char *src;
4895 int src_bytes, highest;
4ed46869
KH
4896{
4897 int coding_mask, eol_type;
d46c5b12
KH
4898 Lisp_Object val, tmp;
4899 int dummy;
4ed46869 4900
d46c5b12
KH
4901 coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4902 eol_type = detect_eol_type (src, src_bytes, &dummy);
4903 if (eol_type == CODING_EOL_INCONSISTENT)
25b02698 4904 eol_type = CODING_EOL_UNDECIDED;
4ed46869 4905
d46c5b12 4906 if (!coding_mask)
4ed46869 4907 {
27901516 4908 val = Qundecided;
d46c5b12 4909 if (eol_type != CODING_EOL_UNDECIDED)
4ed46869 4910 {
f44d27ce
RS
4911 Lisp_Object val2;
4912 val2 = Fget (Qundecided, Qeol_type);
4ed46869
KH
4913 if (VECTORP (val2))
4914 val = XVECTOR (val2)->contents[eol_type];
4915 }
80e803b4 4916 return (highest ? val : Fcons (val, Qnil));
4ed46869 4917 }
4ed46869 4918
d46c5b12
KH
4919 /* At first, gather possible coding systems in VAL. */
4920 val = Qnil;
03699b14 4921 for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCDR (tmp))
4ed46869 4922 {
d46c5b12 4923 int idx
03699b14 4924 = XFASTINT (Fget (XCAR (tmp), Qcoding_category_index));
d46c5b12 4925 if (coding_mask & (1 << idx))
4ed46869 4926 {
03699b14 4927 val = Fcons (Fsymbol_value (XCAR (tmp)), val);
d46c5b12
KH
4928 if (highest)
4929 break;
4ed46869
KH
4930 }
4931 }
d46c5b12
KH
4932 if (!highest)
4933 val = Fnreverse (val);
4ed46869 4934
65059037 4935 /* Then, replace the elements with subsidiary coding systems. */
03699b14 4936 for (tmp = val; !NILP (tmp); tmp = XCDR (tmp))
4ed46869 4937 {
65059037
RS
4938 if (eol_type != CODING_EOL_UNDECIDED
4939 && eol_type != CODING_EOL_INCONSISTENT)
4ed46869 4940 {
d46c5b12 4941 Lisp_Object eol;
03699b14 4942 eol = Fget (XCAR (tmp), Qeol_type);
d46c5b12 4943 if (VECTORP (eol))
03699b14 4944 XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
4ed46869
KH
4945 }
4946 }
03699b14 4947 return (highest ? XCAR (val) : val);
d46c5b12 4948}
4ed46869 4949
d46c5b12
KH
4950DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4951 2, 3, 0,
4952 "Detect coding system of the text in the region between START and END.\n\
4953Return a list of possible coding systems ordered by priority.\n\
4954\n\
80e803b4
KH
4955If only ASCII characters are found, it returns a list of single element\n\
4956`undecided' or its subsidiary coding system according to a detected\n\
4957end-of-line format.\n\
d46c5b12
KH
4958\n\
4959If optional argument HIGHEST is non-nil, return the coding system of\n\
4960highest priority.")
4961 (start, end, highest)
4962 Lisp_Object start, end, highest;
4963{
4964 int from, to;
4965 int from_byte, to_byte;
6289dd10 4966
d46c5b12
KH
4967 CHECK_NUMBER_COERCE_MARKER (start, 0);
4968 CHECK_NUMBER_COERCE_MARKER (end, 1);
4ed46869 4969
d46c5b12
KH
4970 validate_region (&start, &end);
4971 from = XINT (start), to = XINT (end);
4972 from_byte = CHAR_TO_BYTE (from);
4973 to_byte = CHAR_TO_BYTE (to);
6289dd10 4974
d46c5b12
KH
4975 if (from < GPT && to >= GPT)
4976 move_gap_both (to, to_byte);
4ed46869 4977
d46c5b12
KH
4978 return detect_coding_system (BYTE_POS_ADDR (from_byte),
4979 to_byte - from_byte,
4980 !NILP (highest));
4981}
6289dd10 4982
d46c5b12
KH
4983DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4984 1, 2, 0,
4985 "Detect coding system of the text in STRING.\n\
4986Return a list of possible coding systems ordered by priority.\n\
4987\n\
80e803b4
KH
4988If only ASCII characters are found, it returns a list of single element\n\
4989`undecided' or its subsidiary coding system according to a detected\n\
4990end-of-line format.\n\
d46c5b12
KH
4991\n\
4992If optional argument HIGHEST is non-nil, return the coding system of\n\
4993highest priority.")
4994 (string, highest)
4995 Lisp_Object string, highest;
4996{
4997 CHECK_STRING (string, 0);
4ed46869 4998
d46c5b12 4999 return detect_coding_system (XSTRING (string)->data,
fc932ac6 5000 STRING_BYTES (XSTRING (string)),
d46c5b12 5001 !NILP (highest));
4ed46869
KH
5002}
5003
4031e2bf
KH
5004Lisp_Object
5005code_convert_region1 (start, end, coding_system, encodep)
d46c5b12 5006 Lisp_Object start, end, coding_system;
4031e2bf 5007 int encodep;
3a73fa5d
RS
5008{
5009 struct coding_system coding;
4031e2bf 5010 int from, to, len;
3a73fa5d 5011
d46c5b12
KH
5012 CHECK_NUMBER_COERCE_MARKER (start, 0);
5013 CHECK_NUMBER_COERCE_MARKER (end, 1);
3a73fa5d
RS
5014 CHECK_SYMBOL (coding_system, 2);
5015
d46c5b12
KH
5016 validate_region (&start, &end);
5017 from = XFASTINT (start);
5018 to = XFASTINT (end);
5019
3a73fa5d 5020 if (NILP (coding_system))
d46c5b12
KH
5021 return make_number (to - from);
5022
3a73fa5d 5023 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
d46c5b12 5024 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
3a73fa5d 5025
d46c5b12 5026 coding.mode |= CODING_MODE_LAST_BLOCK;
fb88bf2d
KH
5027 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5028 &coding, encodep, 1);
f072a3e8 5029 Vlast_coding_system_used = coding.symbol;
fb88bf2d 5030 return make_number (coding.produced_char);
4031e2bf
KH
5031}
5032
5033DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
5034 3, 3, "r\nzCoding system: ",
5035 "Decode the current region by specified coding system.\n\
5036When called from a program, takes three arguments:\n\
5037START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
f072a3e8
RS
5038This function sets `last-coding-system-used' to the precise coding system\n\
5039used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5040not fully specified.)\n\
5041It returns the length of the decoded text.")
4031e2bf
KH
5042 (start, end, coding_system)
5043 Lisp_Object start, end, coding_system;
5044{
5045 return code_convert_region1 (start, end, coding_system, 0);
3a73fa5d
RS
5046}
5047
5048DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
5049 3, 3, "r\nzCoding system: ",
d46c5b12 5050 "Encode the current region by specified coding system.\n\
3a73fa5d 5051When called from a program, takes three arguments:\n\
d46c5b12 5052START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
f072a3e8
RS
5053This function sets `last-coding-system-used' to the precise coding system\n\
5054used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5055not fully specified.)\n\
5056It returns the length of the encoded text.")
d46c5b12
KH
5057 (start, end, coding_system)
5058 Lisp_Object start, end, coding_system;
3a73fa5d 5059{
4031e2bf
KH
5060 return code_convert_region1 (start, end, coding_system, 1);
5061}
3a73fa5d 5062
4031e2bf
KH
5063Lisp_Object
5064code_convert_string1 (string, coding_system, nocopy, encodep)
5065 Lisp_Object string, coding_system, nocopy;
5066 int encodep;
5067{
5068 struct coding_system coding;
3a73fa5d 5069
4031e2bf
KH
5070 CHECK_STRING (string, 0);
5071 CHECK_SYMBOL (coding_system, 1);
4ed46869 5072
d46c5b12 5073 if (NILP (coding_system))
4031e2bf 5074 return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4ed46869 5075
d46c5b12
KH
5076 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5077 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5f1cd180 5078
d46c5b12 5079 coding.mode |= CODING_MODE_LAST_BLOCK;
f072a3e8 5080 Vlast_coding_system_used = coding.symbol;
4031e2bf 5081 return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4ed46869
KH
5082}
5083
4ed46869 5084DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
e0e989f6
KH
5085 2, 3, 0,
5086 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
fe487a71 5087Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
f072a3e8
RS
5088if the decoding operation is trivial.\n\
5089This function sets `last-coding-system-used' to the precise coding system\n\
5090used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5091not fully specified.)")
e0e989f6
KH
5092 (string, coding_system, nocopy)
5093 Lisp_Object string, coding_system, nocopy;
4ed46869 5094{
f072a3e8 5095 return code_convert_string1 (string, coding_system, nocopy, 0);
4ed46869
KH
5096}
5097
5098DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
e0e989f6
KH
5099 2, 3, 0,
5100 "Encode STRING to CODING-SYSTEM, and return the result.\n\
fe487a71 5101Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
f072a3e8
RS
5102if the encoding operation is trivial.\n\
5103This function sets `last-coding-system-used' to the precise coding system\n\
5104used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5105not fully specified.)")
e0e989f6
KH
5106 (string, coding_system, nocopy)
5107 Lisp_Object string, coding_system, nocopy;
4ed46869 5108{
f072a3e8 5109 return code_convert_string1 (string, coding_system, nocopy, 1);
4ed46869 5110}
4031e2bf 5111
ecec61c1
KH
5112/* Encode or decode STRING according to CODING_SYSTEM.
5113 Do not set Vlast_coding_system_used. */
5114
5115Lisp_Object
5116code_convert_string_norecord (string, coding_system, encodep)
5117 Lisp_Object string, coding_system;
5118 int encodep;
5119{
5120 struct coding_system coding;
5121
5122 CHECK_STRING (string, 0);
5123 CHECK_SYMBOL (coding_system, 1);
5124
5125 if (NILP (coding_system))
5126 return string;
5127
5128 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5129 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5130
5131 coding.mode |= CODING_MODE_LAST_BLOCK;
5132 return code_convert_string (string, &coding, encodep, Qt);
5133}
3a73fa5d 5134\f
4ed46869 5135DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
55ab7be3 5136 "Decode a Japanese character which has CODE in shift_jis encoding.\n\
4ed46869
KH
5137Return the corresponding character.")
5138 (code)
5139 Lisp_Object code;
5140{
5141 unsigned char c1, c2, s1, s2;
5142 Lisp_Object val;
5143
5144 CHECK_NUMBER (code, 0);
5145 s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
55ab7be3
KH
5146 if (s1 == 0)
5147 {
c28a9453
KH
5148 if (s2 < 0x80)
5149 XSETFASTINT (val, s2);
5150 else if (s2 >= 0xA0 || s2 <= 0xDF)
5151 XSETFASTINT (val,
5152 MAKE_NON_ASCII_CHAR (charset_katakana_jisx0201, s2, 0));
5153 else
9da8350f 5154 error ("Invalid Shift JIS code: %x", XFASTINT (code));
55ab7be3
KH
5155 }
5156 else
5157 {
5158 if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
5159 || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
9da8350f 5160 error ("Invalid Shift JIS code: %x", XFASTINT (code));
55ab7be3
KH
5161 DECODE_SJIS (s1, s2, c1, c2);
5162 XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
5163 }
4ed46869
KH
5164 return val;
5165}
5166
5167DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
55ab7be3
KH
5168 "Encode a Japanese character CHAR to shift_jis encoding.\n\
5169Return the corresponding code in SJIS.")
4ed46869
KH
5170 (ch)
5171 Lisp_Object ch;
5172{
bcf26d6a 5173 int charset, c1, c2, s1, s2;
4ed46869
KH
5174 Lisp_Object val;
5175
5176 CHECK_NUMBER (ch, 0);
5177 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
c28a9453
KH
5178 if (charset == CHARSET_ASCII)
5179 {
5180 val = ch;
5181 }
5182 else if (charset == charset_jisx0208
5183 && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
4ed46869
KH
5184 {
5185 ENCODE_SJIS (c1, c2, s1, s2);
bcf26d6a 5186 XSETFASTINT (val, (s1 << 8) | s2);
4ed46869 5187 }
55ab7be3
KH
5188 else if (charset == charset_katakana_jisx0201
5189 && c1 > 0x20 && c2 < 0xE0)
5190 {
5191 XSETFASTINT (val, c1 | 0x80);
5192 }
4ed46869 5193 else
55ab7be3 5194 error ("Can't encode to shift_jis: %d", XFASTINT (ch));
4ed46869
KH
5195 return val;
5196}
5197
5198DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
c28a9453 5199 "Decode a Big5 character which has CODE in BIG5 coding system.\n\
4ed46869
KH
5200Return the corresponding character.")
5201 (code)
5202 Lisp_Object code;
5203{
5204 int charset;
5205 unsigned char b1, b2, c1, c2;
5206 Lisp_Object val;
5207
5208 CHECK_NUMBER (code, 0);
5209 b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
c28a9453
KH
5210 if (b1 == 0)
5211 {
5212 if (b2 >= 0x80)
9da8350f 5213 error ("Invalid BIG5 code: %x", XFASTINT (code));
c28a9453
KH
5214 val = code;
5215 }
5216 else
5217 {
5218 if ((b1 < 0xA1 || b1 > 0xFE)
5219 || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
9da8350f 5220 error ("Invalid BIG5 code: %x", XFASTINT (code));
c28a9453
KH
5221 DECODE_BIG5 (b1, b2, charset, c1, c2);
5222 XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
5223 }
4ed46869
KH
5224 return val;
5225}
5226
5227DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
d46c5b12 5228 "Encode the Big5 character CHAR to BIG5 coding system.\n\
4ed46869
KH
5229Return the corresponding character code in Big5.")
5230 (ch)
5231 Lisp_Object ch;
5232{
bcf26d6a 5233 int charset, c1, c2, b1, b2;
4ed46869
KH
5234 Lisp_Object val;
5235
5236 CHECK_NUMBER (ch, 0);
5237 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
c28a9453
KH
5238 if (charset == CHARSET_ASCII)
5239 {
5240 val = ch;
5241 }
5242 else if ((charset == charset_big5_1
5243 && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
5244 || (charset == charset_big5_2
5245 && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
4ed46869
KH
5246 {
5247 ENCODE_BIG5 (charset, c1, c2, b1, b2);
bcf26d6a 5248 XSETFASTINT (val, (b1 << 8) | b2);
4ed46869
KH
5249 }
5250 else
c28a9453 5251 error ("Can't encode to Big5: %d", XFASTINT (ch));
4ed46869
KH
5252 return val;
5253}
3a73fa5d 5254\f
1ba9e4ab
KH
5255DEFUN ("set-terminal-coding-system-internal",
5256 Fset_terminal_coding_system_internal,
5257 Sset_terminal_coding_system_internal, 1, 1, 0, "")
4ed46869
KH
5258 (coding_system)
5259 Lisp_Object coding_system;
5260{
5261 CHECK_SYMBOL (coding_system, 0);
5262 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
70c22245 5263 /* We had better not send unsafe characters to terminal. */
6e85d753
KH
5264 terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
5265
4ed46869
KH
5266 return Qnil;
5267}
5268
c4825358
KH
5269DEFUN ("set-safe-terminal-coding-system-internal",
5270 Fset_safe_terminal_coding_system_internal,
5271 Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
5272 (coding_system)
5273 Lisp_Object coding_system;
5274{
5275 CHECK_SYMBOL (coding_system, 0);
5276 setup_coding_system (Fcheck_coding_system (coding_system),
5277 &safe_terminal_coding);
5278 return Qnil;
5279}
5280
4ed46869
KH
5281DEFUN ("terminal-coding-system",
5282 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
3a73fa5d 5283 "Return coding system specified for terminal output.")
4ed46869
KH
5284 ()
5285{
5286 return terminal_coding.symbol;
5287}
5288
1ba9e4ab
KH
5289DEFUN ("set-keyboard-coding-system-internal",
5290 Fset_keyboard_coding_system_internal,
5291 Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4ed46869
KH
5292 (coding_system)
5293 Lisp_Object coding_system;
5294{
5295 CHECK_SYMBOL (coding_system, 0);
5296 setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
5297 return Qnil;
5298}
5299
5300DEFUN ("keyboard-coding-system",
5301 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
3a73fa5d 5302 "Return coding system specified for decoding keyboard input.")
4ed46869
KH
5303 ()
5304{
5305 return keyboard_coding.symbol;
5306}
5307
5308\f
a5d301df
KH
5309DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
5310 Sfind_operation_coding_system, 1, MANY, 0,
5311 "Choose a coding system for an operation based on the target name.\n\
69f76525 5312The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
9ce27fde
KH
5313DECODING-SYSTEM is the coding system to use for decoding\n\
5314\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
5315for encoding (in case OPERATION does encoding).\n\
ccdb79f5
RS
5316\n\
5317The first argument OPERATION specifies an I/O primitive:\n\
5318 For file I/O, `insert-file-contents' or `write-region'.\n\
5319 For process I/O, `call-process', `call-process-region', or `start-process'.\n\
5320 For network I/O, `open-network-stream'.\n\
5321\n\
5322The remaining arguments should be the same arguments that were passed\n\
5323to the primitive. Depending on which primitive, one of those arguments\n\
5324is selected as the TARGET. For example, if OPERATION does file I/O,\n\
5325whichever argument specifies the file name is TARGET.\n\
5326\n\
5327TARGET has a meaning which depends on OPERATION:\n\
4ed46869
KH
5328 For file I/O, TARGET is a file name.\n\
5329 For process I/O, TARGET is a process name.\n\
5330 For network I/O, TARGET is a service name or a port number\n\
5331\n\
02ba4723
KH
5332This function looks up what specified for TARGET in,\n\
5333`file-coding-system-alist', `process-coding-system-alist',\n\
5334or `network-coding-system-alist' depending on OPERATION.\n\
5335They may specify a coding system, a cons of coding systems,\n\
5336or a function symbol to call.\n\
5337In the last case, we call the function with one argument,\n\
9ce27fde 5338which is a list of all the arguments given to this function.")
4ed46869
KH
5339 (nargs, args)
5340 int nargs;
5341 Lisp_Object *args;
5342{
5343 Lisp_Object operation, target_idx, target, val;
5344 register Lisp_Object chain;
5345
5346 if (nargs < 2)
5347 error ("Too few arguments");
5348 operation = args[0];
5349 if (!SYMBOLP (operation)
5350 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
5351 error ("Invalid first arguement");
5352 if (nargs < 1 + XINT (target_idx))
5353 error ("Too few arguments for operation: %s",
5354 XSYMBOL (operation)->name->data);
5355 target = args[XINT (target_idx) + 1];
5356 if (!(STRINGP (target)
5357 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
5358 error ("Invalid %dth argument", XINT (target_idx) + 1);
5359
2e34157c
RS
5360 chain = ((EQ (operation, Qinsert_file_contents)
5361 || EQ (operation, Qwrite_region))
02ba4723 5362 ? Vfile_coding_system_alist
2e34157c 5363 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
5364 ? Vnetwork_coding_system_alist
5365 : Vprocess_coding_system_alist));
4ed46869
KH
5366 if (NILP (chain))
5367 return Qnil;
5368
03699b14 5369 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 5370 {
f44d27ce 5371 Lisp_Object elt;
03699b14 5372 elt = XCAR (chain);
4ed46869
KH
5373
5374 if (CONSP (elt)
5375 && ((STRINGP (target)
03699b14
KR
5376 && STRINGP (XCAR (elt))
5377 && fast_string_match (XCAR (elt), target) >= 0)
5378 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 5379 {
03699b14 5380 val = XCDR (elt);
b19fd4c5
KH
5381 /* Here, if VAL is both a valid coding system and a valid
5382 function symbol, we return VAL as a coding system. */
02ba4723
KH
5383 if (CONSP (val))
5384 return val;
5385 if (! SYMBOLP (val))
5386 return Qnil;
5387 if (! NILP (Fcoding_system_p (val)))
5388 return Fcons (val, val);
b19fd4c5
KH
5389 if (! NILP (Ffboundp (val)))
5390 {
5391 val = call1 (val, Flist (nargs, args));
5392 if (CONSP (val))
5393 return val;
5394 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
5395 return Fcons (val, val);
5396 }
02ba4723
KH
5397 return Qnil;
5398 }
4ed46869
KH
5399 }
5400 return Qnil;
5401}
5402
1397dc18
KH
5403DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal,
5404 Supdate_coding_systems_internal, 0, 0, 0,
5405 "Update internal database for ISO2022 and CCL based coding systems.\n\
d46c5b12
KH
5406When values of the following coding categories are changed, you must\n\
5407call this function:\n\
5408 coding-category-iso-7, coding-category-iso-7-tight,\n\
5409 coding-category-iso-8-1, coding-category-iso-8-2,\n\
1397dc18
KH
5410 coding-category-iso-7-else, coding-category-iso-8-else,\n\
5411 coding-category-ccl")
d46c5b12
KH
5412 ()
5413{
5414 int i;
5415
1397dc18 5416 for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++)
d46c5b12 5417 {
1397dc18
KH
5418 Lisp_Object val;
5419
5420 val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
5421 if (!NILP (val))
5422 {
5423 if (! coding_system_table[i])
5424 coding_system_table[i] = ((struct coding_system *)
5425 xmalloc (sizeof (struct coding_system)));
5426 setup_coding_system (val, coding_system_table[i]);
5427 }
5428 else if (coding_system_table[i])
5429 {
5430 xfree (coding_system_table[i]);
5431 coding_system_table[i] = NULL;
5432 }
d46c5b12 5433 }
1397dc18 5434
d46c5b12
KH
5435 return Qnil;
5436}
5437
66cfb530
KH
5438DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
5439 Sset_coding_priority_internal, 0, 0, 0,
5440 "Update internal database for the current value of `coding-category-list'.\n\
5441This function is internal use only.")
5442 ()
5443{
5444 int i = 0, idx;
84d60297
RS
5445 Lisp_Object val;
5446
5447 val = Vcoding_category_list;
66cfb530
KH
5448
5449 while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
5450 {
03699b14 5451 if (! SYMBOLP (XCAR (val)))
66cfb530 5452 break;
03699b14 5453 idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
66cfb530
KH
5454 if (idx >= CODING_CATEGORY_IDX_MAX)
5455 break;
5456 coding_priorities[i++] = (1 << idx);
03699b14 5457 val = XCDR (val);
66cfb530
KH
5458 }
5459 /* If coding-category-list is valid and contains all coding
5460 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
5461 the following code saves Emacs from craching. */
5462 while (i < CODING_CATEGORY_IDX_MAX)
5463 coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5464
5465 return Qnil;
5466}
5467
4ed46869
KH
5468#endif /* emacs */
5469
5470\f
1397dc18 5471/*** 9. Post-amble ***/
4ed46869 5472
6d74c3aa
KH
5473void
5474init_coding ()
5475{
5476 conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5477}
5478
dfcf069d 5479void
4ed46869
KH
5480init_coding_once ()
5481{
5482 int i;
5483
0ef69138 5484 /* Emacs' internal format specific initialize routine. */
4ed46869
KH
5485 for (i = 0; i <= 0x20; i++)
5486 emacs_code_class[i] = EMACS_control_code;
5487 emacs_code_class[0x0A] = EMACS_linefeed_code;
5488 emacs_code_class[0x0D] = EMACS_carriage_return_code;
5489 for (i = 0x21 ; i < 0x7F; i++)
5490 emacs_code_class[i] = EMACS_ascii_code;
5491 emacs_code_class[0x7F] = EMACS_control_code;
5492 emacs_code_class[0x80] = EMACS_leading_code_composition;
5493 for (i = 0x81; i < 0xFF; i++)
5494 emacs_code_class[i] = EMACS_invalid_code;
5495 emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5496 emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5497 emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5498 emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5499
5500 /* ISO2022 specific initialize routine. */
5501 for (i = 0; i < 0x20; i++)
5502 iso_code_class[i] = ISO_control_code;
5503 for (i = 0x21; i < 0x7F; i++)
5504 iso_code_class[i] = ISO_graphic_plane_0;
5505 for (i = 0x80; i < 0xA0; i++)
5506 iso_code_class[i] = ISO_control_code;
5507 for (i = 0xA1; i < 0xFF; i++)
5508 iso_code_class[i] = ISO_graphic_plane_1;
5509 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5510 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5511 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5512 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5513 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5514 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5515 iso_code_class[ISO_CODE_ESC] = ISO_escape;
5516 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5517 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5518 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5519
e0e989f6 5520 conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
e0e989f6
KH
5521
5522 setup_coding_system (Qnil, &keyboard_coding);
5523 setup_coding_system (Qnil, &terminal_coding);
c4825358 5524 setup_coding_system (Qnil, &safe_terminal_coding);
6bc51348 5525 setup_coding_system (Qnil, &default_buffer_file_coding);
9ce27fde 5526
d46c5b12
KH
5527 bzero (coding_system_table, sizeof coding_system_table);
5528
66cfb530
KH
5529 bzero (ascii_skip_code, sizeof ascii_skip_code);
5530 for (i = 0; i < 128; i++)
5531 ascii_skip_code[i] = 1;
5532
9ce27fde
KH
5533#if defined (MSDOS) || defined (WINDOWSNT)
5534 system_eol_type = CODING_EOL_CRLF;
5535#else
5536 system_eol_type = CODING_EOL_LF;
5537#endif
b843d1ae
KH
5538
5539 inhibit_pre_post_conversion = 0;
e0e989f6
KH
5540}
5541
5542#ifdef emacs
5543
dfcf069d 5544void
e0e989f6
KH
5545syms_of_coding ()
5546{
5547 Qtarget_idx = intern ("target-idx");
5548 staticpro (&Qtarget_idx);
5549
bb0115a2
RS
5550 Qcoding_system_history = intern ("coding-system-history");
5551 staticpro (&Qcoding_system_history);
5552 Fset (Qcoding_system_history, Qnil);
5553
9ce27fde 5554 /* Target FILENAME is the first argument. */
e0e989f6 5555 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 5556 /* Target FILENAME is the third argument. */
e0e989f6
KH
5557 Fput (Qwrite_region, Qtarget_idx, make_number (2));
5558
5559 Qcall_process = intern ("call-process");
5560 staticpro (&Qcall_process);
9ce27fde 5561 /* Target PROGRAM is the first argument. */
e0e989f6
KH
5562 Fput (Qcall_process, Qtarget_idx, make_number (0));
5563
5564 Qcall_process_region = intern ("call-process-region");
5565 staticpro (&Qcall_process_region);
9ce27fde 5566 /* Target PROGRAM is the third argument. */
e0e989f6
KH
5567 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5568
5569 Qstart_process = intern ("start-process");
5570 staticpro (&Qstart_process);
9ce27fde 5571 /* Target PROGRAM is the third argument. */
e0e989f6
KH
5572 Fput (Qstart_process, Qtarget_idx, make_number (2));
5573
5574 Qopen_network_stream = intern ("open-network-stream");
5575 staticpro (&Qopen_network_stream);
9ce27fde 5576 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
5577 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5578
4ed46869
KH
5579 Qcoding_system = intern ("coding-system");
5580 staticpro (&Qcoding_system);
5581
5582 Qeol_type = intern ("eol-type");
5583 staticpro (&Qeol_type);
5584
5585 Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5586 staticpro (&Qbuffer_file_coding_system);
5587
5588 Qpost_read_conversion = intern ("post-read-conversion");
5589 staticpro (&Qpost_read_conversion);
5590
5591 Qpre_write_conversion = intern ("pre-write-conversion");
5592 staticpro (&Qpre_write_conversion);
5593
27901516
KH
5594 Qno_conversion = intern ("no-conversion");
5595 staticpro (&Qno_conversion);
5596
5597 Qundecided = intern ("undecided");
5598 staticpro (&Qundecided);
5599
4ed46869
KH
5600 Qcoding_system_p = intern ("coding-system-p");
5601 staticpro (&Qcoding_system_p);
5602
5603 Qcoding_system_error = intern ("coding-system-error");
5604 staticpro (&Qcoding_system_error);
5605
5606 Fput (Qcoding_system_error, Qerror_conditions,
5607 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5608 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 5609 build_string ("Invalid coding system"));
4ed46869 5610
d46c5b12
KH
5611 Qcoding_category = intern ("coding-category");
5612 staticpro (&Qcoding_category);
4ed46869
KH
5613 Qcoding_category_index = intern ("coding-category-index");
5614 staticpro (&Qcoding_category_index);
5615
d46c5b12
KH
5616 Vcoding_category_table
5617 = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5618 staticpro (&Vcoding_category_table);
4ed46869
KH
5619 {
5620 int i;
5621 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5622 {
d46c5b12
KH
5623 XVECTOR (Vcoding_category_table)->contents[i]
5624 = intern (coding_category_name[i]);
5625 Fput (XVECTOR (Vcoding_category_table)->contents[i],
5626 Qcoding_category_index, make_number (i));
4ed46869
KH
5627 }
5628 }
5629
f967223b
KH
5630 Qtranslation_table = intern ("translation-table");
5631 staticpro (&Qtranslation_table);
1397dc18 5632 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
bdd9fb48 5633
f967223b
KH
5634 Qtranslation_table_id = intern ("translation-table-id");
5635 staticpro (&Qtranslation_table_id);
84fbb8a0 5636
f967223b
KH
5637 Qtranslation_table_for_decode = intern ("translation-table-for-decode");
5638 staticpro (&Qtranslation_table_for_decode);
a5d301df 5639
f967223b
KH
5640 Qtranslation_table_for_encode = intern ("translation-table-for-encode");
5641 staticpro (&Qtranslation_table_for_encode);
a5d301df 5642
70c22245
KH
5643 Qsafe_charsets = intern ("safe-charsets");
5644 staticpro (&Qsafe_charsets);
5645
1397dc18
KH
5646 Qvalid_codes = intern ("valid-codes");
5647 staticpro (&Qvalid_codes);
5648
9ce27fde
KH
5649 Qemacs_mule = intern ("emacs-mule");
5650 staticpro (&Qemacs_mule);
5651
d46c5b12
KH
5652 Qraw_text = intern ("raw-text");
5653 staticpro (&Qraw_text);
5654
4ed46869
KH
5655 defsubr (&Scoding_system_p);
5656 defsubr (&Sread_coding_system);
5657 defsubr (&Sread_non_nil_coding_system);
5658 defsubr (&Scheck_coding_system);
5659 defsubr (&Sdetect_coding_region);
d46c5b12 5660 defsubr (&Sdetect_coding_string);
4ed46869
KH
5661 defsubr (&Sdecode_coding_region);
5662 defsubr (&Sencode_coding_region);
5663 defsubr (&Sdecode_coding_string);
5664 defsubr (&Sencode_coding_string);
5665 defsubr (&Sdecode_sjis_char);
5666 defsubr (&Sencode_sjis_char);
5667 defsubr (&Sdecode_big5_char);
5668 defsubr (&Sencode_big5_char);
1ba9e4ab 5669 defsubr (&Sset_terminal_coding_system_internal);
c4825358 5670 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 5671 defsubr (&Sterminal_coding_system);
1ba9e4ab 5672 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 5673 defsubr (&Skeyboard_coding_system);
a5d301df 5674 defsubr (&Sfind_operation_coding_system);
1397dc18 5675 defsubr (&Supdate_coding_systems_internal);
66cfb530 5676 defsubr (&Sset_coding_priority_internal);
4ed46869 5677
4608c386
KH
5678 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5679 "List of coding systems.\n\
5680\n\
5681Do not alter the value of this variable manually. This variable should be\n\
5682updated by the functions `make-coding-system' and\n\
5683`define-coding-system-alias'.");
5684 Vcoding_system_list = Qnil;
5685
5686 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5687 "Alist of coding system names.\n\
5688Each element is one element list of coding system name.\n\
5689This variable is given to `completing-read' as TABLE argument.\n\
5690\n\
5691Do not alter the value of this variable manually. This variable should be\n\
5692updated by the functions `make-coding-system' and\n\
5693`define-coding-system-alias'.");
5694 Vcoding_system_alist = Qnil;
5695
4ed46869
KH
5696 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5697 "List of coding-categories (symbols) ordered by priority.");
5698 {
5699 int i;
5700
5701 Vcoding_category_list = Qnil;
5702 for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5703 Vcoding_category_list
d46c5b12
KH
5704 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5705 Vcoding_category_list);
4ed46869
KH
5706 }
5707
5708 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10bff6f1 5709 "Specify the coding system for read operations.\n\
2ebb362d 5710It is useful to bind this variable with `let', but do not set it globally.\n\
4ed46869 5711If the value is a coding system, it is used for decoding on read operation.\n\
a67a9c66 5712If not, an appropriate element is used from one of the coding system alists:\n\
10bff6f1 5713There are three such tables, `file-coding-system-alist',\n\
a67a9c66 5714`process-coding-system-alist', and `network-coding-system-alist'.");
4ed46869
KH
5715 Vcoding_system_for_read = Qnil;
5716
5717 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10bff6f1 5718 "Specify the coding system for write operations.\n\
928aedd8
RS
5719Programs bind this variable with `let', but you should not set it globally.\n\
5720If the value is a coding system, it is used for encoding of output,\n\
5721when writing it to a file and when sending it to a file or subprocess.\n\
5722\n\
5723If this does not specify a coding system, an appropriate element\n\
5724is used from one of the coding system alists:\n\
10bff6f1 5725There are three such tables, `file-coding-system-alist',\n\
928aedd8
RS
5726`process-coding-system-alist', and `network-coding-system-alist'.\n\
5727For output to files, if the above procedure does not specify a coding system,\n\
5728the value of `buffer-file-coding-system' is used.");
4ed46869
KH
5729 Vcoding_system_for_write = Qnil;
5730
5731 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
a67a9c66 5732 "Coding system used in the latest file or process I/O.");
4ed46869
KH
5733 Vlast_coding_system_used = Qnil;
5734
9ce27fde 5735 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
f07f4a24 5736 "*Non-nil means always inhibit code conversion of end-of-line format.\n\
94c7a214
DL
5737See info node `Coding Systems' and info node `Text and Binary' concerning\n\
5738such conversion.");
9ce27fde
KH
5739 inhibit_eol_conversion = 0;
5740
ed29121d
EZ
5741 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5742 "Non-nil means process buffer inherits coding system of process output.\n\
5743Bind it to t if the process output is to be treated as if it were a file\n\
5744read from some filesystem.");
5745 inherit_process_coding_system = 0;
5746
02ba4723
KH
5747 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5748 "Alist to decide a coding system to use for a file I/O operation.\n\
5749The format is ((PATTERN . VAL) ...),\n\
5750where PATTERN is a regular expression matching a file name,\n\
5751VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5752If VAL is a coding system, it is used for both decoding and encoding\n\
5753the file contents.\n\
5754If VAL is a cons of coding systems, the car part is used for decoding,\n\
5755and the cdr part is used for encoding.\n\
5756If VAL is a function symbol, the function must return a coding system\n\
5757or a cons of coding systems which are used as above.\n\
e0e989f6 5758\n\
a85a871a 5759See also the function `find-operation-coding-system'\n\
eda284ac 5760and the variable `auto-coding-alist'.");
02ba4723
KH
5761 Vfile_coding_system_alist = Qnil;
5762
5763 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5764 "Alist to decide a coding system to use for a process I/O operation.\n\
5765The format is ((PATTERN . VAL) ...),\n\
5766where PATTERN is a regular expression matching a program name,\n\
5767VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5768If VAL is a coding system, it is used for both decoding what received\n\
5769from the program and encoding what sent to the program.\n\
5770If VAL is a cons of coding systems, the car part is used for decoding,\n\
5771and the cdr part is used for encoding.\n\
5772If VAL is a function symbol, the function must return a coding system\n\
5773or a cons of coding systems which are used as above.\n\
4ed46869 5774\n\
9ce27fde 5775See also the function `find-operation-coding-system'.");
02ba4723
KH
5776 Vprocess_coding_system_alist = Qnil;
5777
5778 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5779 "Alist to decide a coding system to use for a network I/O operation.\n\
5780The format is ((PATTERN . VAL) ...),\n\
5781where PATTERN is a regular expression matching a network service name\n\
5782or is a port number to connect to,\n\
5783VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5784If VAL is a coding system, it is used for both decoding what received\n\
5785from the network stream and encoding what sent to the network stream.\n\
5786If VAL is a cons of coding systems, the car part is used for decoding,\n\
5787and the cdr part is used for encoding.\n\
5788If VAL is a function symbol, the function must return a coding system\n\
5789or a cons of coding systems which are used as above.\n\
4ed46869 5790\n\
9ce27fde 5791See also the function `find-operation-coding-system'.");
02ba4723 5792 Vnetwork_coding_system_alist = Qnil;
4ed46869 5793
68c45bf0
PE
5794 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
5795 "Coding system to use with system messages.");
5796 Vlocale_coding_system = Qnil;
5797
7722baf9
EZ
5798 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
5799 "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
5800 eol_mnemonic_unix = build_string (":");
4ed46869 5801
7722baf9
EZ
5802 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
5803 "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
5804 eol_mnemonic_dos = build_string ("\\");
4ed46869 5805
7722baf9
EZ
5806 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
5807 "*String displayed in mode line for MAC-like (CR) end-of-line format.");
5808 eol_mnemonic_mac = build_string ("/");
4ed46869 5809
7722baf9
EZ
5810 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5811 "*String displayed in mode line when end-of-line format is not yet determined.");
5812 eol_mnemonic_undecided = build_string (":");
4ed46869 5813
84fbb8a0 5814 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
f967223b 5815 "*Non-nil enables character translation while encoding and decoding.");
84fbb8a0 5816 Venable_character_translation = Qt;
bdd9fb48 5817
f967223b
KH
5818 DEFVAR_LISP ("standard-translation-table-for-decode",
5819 &Vstandard_translation_table_for_decode,
84fbb8a0 5820 "Table for translating characters while decoding.");
f967223b 5821 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 5822
f967223b
KH
5823 DEFVAR_LISP ("standard-translation-table-for-encode",
5824 &Vstandard_translation_table_for_encode,
84fbb8a0 5825 "Table for translationg characters while encoding.");
f967223b 5826 Vstandard_translation_table_for_encode = Qnil;
4ed46869
KH
5827
5828 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5829 "Alist of charsets vs revision numbers.\n\
5830While encoding, if a charset (car part of an element) is found,\n\
5831designate it with the escape sequence identifing revision (cdr part of the element).");
5832 Vcharset_revision_alist = Qnil;
02ba4723
KH
5833
5834 DEFVAR_LISP ("default-process-coding-system",
5835 &Vdefault_process_coding_system,
5836 "Cons of coding systems used for process I/O by default.\n\
5837The car part is used for decoding a process output,\n\
5838the cdr part is used for encoding a text to be sent to a process.");
5839 Vdefault_process_coding_system = Qnil;
c4825358 5840
3f003981
KH
5841 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5842 "Table of extra Latin codes in the range 128..159 (inclusive).\n\
c4825358
KH
5843This is a vector of length 256.\n\
5844If Nth element is non-nil, the existence of code N in a file\n\
bb0115a2 5845\(or output of subprocess) doesn't prevent it to be detected as\n\
3f003981
KH
5846a coding system of ISO 2022 variant which has a flag\n\
5847`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
c4825358
KH
5848or reading output of a subprocess.\n\
5849Only 128th through 159th elements has a meaning.");
3f003981 5850 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
5851
5852 DEFVAR_LISP ("select-safe-coding-system-function",
5853 &Vselect_safe_coding_system_function,
5854 "Function to call to select safe coding system for encoding a text.\n\
5855\n\
5856If set, this function is called to force a user to select a proper\n\
5857coding system which can encode the text in the case that a default\n\
5858coding system used in each operation can't encode the text.\n\
5859\n\
a85a871a 5860The default value is `select-safe-coding-system' (which see).");
d46c5b12
KH
5861 Vselect_safe_coding_system_function = Qnil;
5862
4ed46869
KH
5863}
5864
68c45bf0
PE
5865char *
5866emacs_strerror (error_number)
5867 int error_number;
5868{
5869 char *str;
5870
5871 synchronize_messages_locale ();
5872 str = strerror (error_number);
5873
5874 if (! NILP (Vlocale_coding_system))
5875 {
5876 Lisp_Object dec = code_convert_string_norecord (build_string (str),
5877 Vlocale_coding_system,
5878 0);
5879 str = (char *) XSTRING (dec)->data;
5880 }
5881
5882 return str;
5883}
5884
4ed46869 5885#endif /* emacs */