(coding_set_source): Delete the local variable beg_byte.
[bpt/emacs.git] / src / coding.c
1 /* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9 This file is part of GNU Emacs.
10
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
14 any later version.
15
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
25
26 /*** TABLE OF CONTENTS ***
27
28 0. General comments
29 1. Preamble
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
41
42 */
43
44 /*** 0. General comments ***
45
46
47 CODING SYSTEM
48
49 Coding system is an object for a encoding mechanism that contains
50 information about how to convert byte sequence to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from a
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
65 the mechanism of encoding. Here's a brief descrition about type.
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by code converter defined for each
75 character set.
76
77 o Old Emacs' internal format (emacs-mule)
78
79 The coding system adopted by an old versions of Emacs (20 and 21).
80
81 o ISO2022-base coding system
82
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
92 section 8.
93
94 o BIG5
95
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
101
102 o CCL
103
104 If a user wants to decode/encode a text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
108
109 o Raw-text
110
111 A coding system for a text containing raw eight-bit data. Emacs
112 treat each byte of source text as a character (except for
113 end-of-line conversion).
114
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
118
119
120 END-OF-LINE FORMAT
121
122 How end-of-line of a text is encoded depends on a system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
127
128 Since text characters encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
131
132 STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
138
139 */
140
141 /* COMMON MACROS */
142
143
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
157 #if 0
158 static int
159 detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
162 {
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
188 }
189 #endif
190
191 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
197
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
202
203 Below is the template of these functions. */
204
205 #if 0
206 static void
207 decode_coding_XXXX (coding)
208 struct coding_system *coding;
209 {
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
243 }
244 #endif
245
246 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
252
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
257
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
261
262 Below is a template of these functions. */
263 #if 0
264 static void
265 encode_coding_XXX (coding)
266 struct coding_system *coding;
267 {
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
285 }
286 #endif
287
288 \f
289 /*** 1. Preamble ***/
290
291 #include <config.h>
292 #include <stdio.h>
293
294 #include "lisp.h"
295 #include "buffer.h"
296 #include "character.h"
297 #include "charset.h"
298 #include "ccl.h"
299 #include "composite.h"
300 #include "coding.h"
301 #include "window.h"
302
303 Lisp_Object Vcoding_system_hash_table;
304
305 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
306 Lisp_Object Qunix, Qdos, Qmac;
307 Lisp_Object Qbuffer_file_coding_system;
308 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
309 Lisp_Object Qdefault_char;
310 Lisp_Object Qno_conversion, Qundecided;
311 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
312 Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
313 Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
314 Lisp_Object Qcoding_system_history;
315 Lisp_Object Qvalid_codes;
316
317 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
318 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
319 Lisp_Object Qstart_process, Qopen_network_stream;
320 Lisp_Object Qtarget_idx;
321
322 Lisp_Object Vselect_safe_coding_system_function;
323
324 /* Mnemonic string for each format of end-of-line. */
325 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
326 /* Mnemonic string to indicate format of end-of-line is not yet
327 decided. */
328 Lisp_Object eol_mnemonic_undecided;
329
330 #ifdef emacs
331
332 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
333
334 Lisp_Object Qcoding_system_p, Qcoding_system_error;
335
336 /* Coding system emacs-mule and raw-text are for converting only
337 end-of-line format. */
338 Lisp_Object Qemacs_mule, Qraw_text;
339
340 /* Coding-systems are handed between Emacs Lisp programs and C internal
341 routines by the following three variables. */
342 /* Coding-system for reading files and receiving data from process. */
343 Lisp_Object Vcoding_system_for_read;
344 /* Coding-system for writing files and sending data to process. */
345 Lisp_Object Vcoding_system_for_write;
346 /* Coding-system actually used in the latest I/O. */
347 Lisp_Object Vlast_coding_system_used;
348
349 /* A vector of length 256 which contains information about special
350 Latin codes (especially for dealing with Microsoft codes). */
351 Lisp_Object Vlatin_extra_code_table;
352
353 /* Flag to inhibit code conversion of end-of-line format. */
354 int inhibit_eol_conversion;
355
356 /* Flag to inhibit ISO2022 escape sequence detection. */
357 int inhibit_iso_escape_detection;
358
359 /* Flag to make buffer-file-coding-system inherit from process-coding. */
360 int inherit_process_coding_system;
361
362 /* Coding system to be used to encode text for terminal display. */
363 struct coding_system terminal_coding;
364
365 /* Coding system to be used to encode text for terminal display when
366 terminal coding system is nil. */
367 struct coding_system safe_terminal_coding;
368
369 /* Coding system of what is sent from terminal keyboard. */
370 struct coding_system keyboard_coding;
371
372 Lisp_Object Vfile_coding_system_alist;
373 Lisp_Object Vprocess_coding_system_alist;
374 Lisp_Object Vnetwork_coding_system_alist;
375
376 Lisp_Object Vlocale_coding_system;
377
378 #endif /* emacs */
379
380 /* Flag to tell if we look up translation table on character code
381 conversion. */
382 Lisp_Object Venable_character_translation;
383 /* Standard translation table to look up on decoding (reading). */
384 Lisp_Object Vstandard_translation_table_for_decode;
385 /* Standard translation table to look up on encoding (writing). */
386 Lisp_Object Vstandard_translation_table_for_encode;
387
388 Lisp_Object Qtranslation_table;
389 Lisp_Object Qtranslation_table_id;
390 Lisp_Object Qtranslation_table_for_decode;
391 Lisp_Object Qtranslation_table_for_encode;
392
393 /* Alist of charsets vs revision number. */
394 static Lisp_Object Vcharset_revision_table;
395
396 /* Default coding systems used for process I/O. */
397 Lisp_Object Vdefault_process_coding_system;
398
399 /* Global flag to tell that we can't call post-read-conversion and
400 pre-write-conversion functions. Usually the value is zero, but it
401 is set to 1 temporarily while such functions are running. This is
402 to avoid infinite recursive call. */
403 static int inhibit_pre_post_conversion;
404
405 /* Char-table containing safe coding systems of each character. */
406 Lisp_Object Vchar_coding_system_table;
407 Lisp_Object Qchar_coding_system;
408
409 /* Two special coding systems. */
410 Lisp_Object Vsjis_coding_system;
411 Lisp_Object Vbig5_coding_system;
412
413
414 static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
415 static void decode_coding_utf_8 P_ ((struct coding_system *));
416 static int encode_coding_utf_8 P_ ((struct coding_system *));
417
418 static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
419 static void decode_coding_utf_16 P_ ((struct coding_system *));
420 static int encode_coding_utf_16 P_ ((struct coding_system *));
421
422 static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
423 static void decode_coding_iso_2022 P_ ((struct coding_system *));
424 static int encode_coding_iso_2022 P_ ((struct coding_system *));
425
426 static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
427 static void decode_coding_emacs_mule P_ ((struct coding_system *));
428 static int encode_coding_emacs_mule P_ ((struct coding_system *));
429
430 static int detect_coding_sjis P_ ((struct coding_system *, int *));
431 static void decode_coding_sjis P_ ((struct coding_system *));
432 static int encode_coding_sjis P_ ((struct coding_system *));
433
434 static int detect_coding_big5 P_ ((struct coding_system *, int *));
435 static void decode_coding_big5 P_ ((struct coding_system *));
436 static int encode_coding_big5 P_ ((struct coding_system *));
437
438 static int detect_coding_ccl P_ ((struct coding_system *, int *));
439 static void decode_coding_ccl P_ ((struct coding_system *));
440 static int encode_coding_ccl P_ ((struct coding_system *));
441
442 static void decode_coding_raw_text P_ ((struct coding_system *));
443 static int encode_coding_raw_text P_ ((struct coding_system *));
444
445
446 /* ISO2022 section */
447
448 #define CODING_ISO_INITIAL(coding, reg) \
449 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
450 coding_attr_iso_initial), \
451 reg)))
452
453
454 #define CODING_ISO_REQUEST(coding, charset_id) \
455 ((charset_id <= (coding)->max_charset_id \
456 ? (coding)->safe_charsets[charset_id] \
457 : -1))
458
459
460 #define CODING_ISO_FLAGS(coding) \
461 ((coding)->spec.iso_2022.flags)
462 #define CODING_ISO_DESIGNATION(coding, reg) \
463 ((coding)->spec.iso_2022.current_designation[reg])
464 #define CODING_ISO_INVOCATION(coding, plane) \
465 ((coding)->spec.iso_2022.current_invocation[plane])
466 #define CODING_ISO_SINGLE_SHIFTING(coding) \
467 ((coding)->spec.iso_2022.single_shifting)
468 #define CODING_ISO_BOL(coding) \
469 ((coding)->spec.iso_2022.bol)
470 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
471 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
472
473 /* Control characters of ISO2022. */
474 /* code */ /* function */
475 #define ISO_CODE_LF 0x0A /* line-feed */
476 #define ISO_CODE_CR 0x0D /* carriage-return */
477 #define ISO_CODE_SO 0x0E /* shift-out */
478 #define ISO_CODE_SI 0x0F /* shift-in */
479 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
480 #define ISO_CODE_ESC 0x1B /* escape */
481 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
482 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
483 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
484
485 /* All code (1-byte) of ISO2022 is classified into one of the
486 followings. */
487 enum iso_code_class_type
488 {
489 ISO_control_0, /* Control codes in the range
490 0x00..0x1F and 0x7F, except for the
491 following 5 codes. */
492 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
493 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
494 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
495 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
496 ISO_escape, /* ISO_CODE_SO (0x1B) */
497 ISO_control_1, /* Control codes in the range
498 0x80..0x9F, except for the
499 following 3 codes. */
500 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
501 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
502 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
503 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
504 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
505 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
506 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
507 };
508
509 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
510 `iso-flags' attribute of an iso2022 coding system. */
511
512 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
513 instead of the correct short-form sequence (e.g. ESC $ A). */
514 #define CODING_ISO_FLAG_LONG_FORM 0x0001
515
516 /* If set, reset graphic planes and registers at end-of-line to the
517 initial state. */
518 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
519
520 /* If set, reset graphic planes and registers before any control
521 characters to the initial state. */
522 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
523
524 /* If set, encode by 7-bit environment. */
525 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
526
527 /* If set, use locking-shift function. */
528 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
529
530 /* If set, use single-shift function. Overwrite
531 CODING_ISO_FLAG_LOCKING_SHIFT. */
532 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
533
534 /* If set, use designation escape sequence. */
535 #define CODING_ISO_FLAG_DESIGNATION 0x0040
536
537 /* If set, produce revision number sequence. */
538 #define CODING_ISO_FLAG_REVISION 0x0080
539
540 /* If set, produce ISO6429's direction specifying sequence. */
541 #define CODING_ISO_FLAG_DIRECTION 0x0100
542
543 /* If set, assume designation states are reset at beginning of line on
544 output. */
545 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
546
547 /* If set, designation sequence should be placed at beginning of line
548 on output. */
549 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
550
551 /* If set, do not encode unsafe charactes on output. */
552 #define CODING_ISO_FLAG_SAFE 0x0800
553
554 /* If set, extra latin codes (128..159) are accepted as a valid code
555 on input. */
556 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
557
558 #define CODING_ISO_FLAG_COMPOSITION 0x2000
559
560 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
561
562 #define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
563
564 /* A character to be produced on output if encoding of the original
565 character is prohibited by CODING_ISO_FLAG_SAFE. */
566 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
567
568
569 /* UTF-16 section */
570 #define CODING_UTF_16_BOM(coding) \
571 ((coding)->spec.utf_16.bom)
572
573 #define CODING_UTF_16_ENDIAN(coding) \
574 ((coding)->spec.utf_16.endian)
575
576 #define CODING_UTF_16_SURROGATE(coding) \
577 ((coding)->spec.utf_16.surrogate)
578
579
580 /* CCL section */
581 #define CODING_CCL_DECODER(coding) \
582 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
583 #define CODING_CCL_ENCODER(coding) \
584 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
585 #define CODING_CCL_VALIDS(coding) \
586 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
587 ->data)
588
589 /* Index for each coding category in `coding_category_table' */
590
591 enum coding_category
592 {
593 coding_category_iso_7,
594 coding_category_iso_7_tight,
595 coding_category_iso_8_1,
596 coding_category_iso_8_2,
597 coding_category_iso_7_else,
598 coding_category_iso_8_else,
599 coding_category_utf_8,
600 coding_category_utf_16_auto,
601 coding_category_utf_16_be,
602 coding_category_utf_16_le,
603 coding_category_utf_16_be_nosig,
604 coding_category_utf_16_le_nosig,
605 coding_category_charset,
606 coding_category_sjis,
607 coding_category_big5,
608 coding_category_ccl,
609 coding_category_emacs_mule,
610 /* All above are targets of code detection. */
611 coding_category_raw_text,
612 coding_category_undecided,
613 coding_category_max
614 };
615
616 /* Definitions of flag bits used in detect_coding_XXXX. */
617 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
618 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
619 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
620 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
621 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
622 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
623 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
624 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
625 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
626 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
627 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
628 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
629 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
630 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
631 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
632 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
633
634 /* This value is returned if detect_coding_mask () find nothing other
635 than ASCII characters. */
636 #define CATEGORY_MASK_ANY \
637 (CATEGORY_MASK_ISO_7 \
638 | CATEGORY_MASK_ISO_7_TIGHT \
639 | CATEGORY_MASK_ISO_8_1 \
640 | CATEGORY_MASK_ISO_8_2 \
641 | CATEGORY_MASK_ISO_7_ELSE \
642 | CATEGORY_MASK_ISO_8_ELSE \
643 | CATEGORY_MASK_UTF_8 \
644 | CATEGORY_MASK_UTF_16_BE \
645 | CATEGORY_MASK_UTF_16_LE \
646 | CATEGORY_MASK_UTF_16_BE_NOSIG \
647 | CATEGORY_MASK_UTF_16_LE_NOSIG \
648 | CATEGORY_MASK_CHARSET \
649 | CATEGORY_MASK_SJIS \
650 | CATEGORY_MASK_BIG5 \
651 | CATEGORY_MASK_CCL \
652 | CATEGORY_MASK_EMACS_MULE)
653
654
655 #define CATEGORY_MASK_ISO_7BIT \
656 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
657
658 #define CATEGORY_MASK_ISO_8BIT \
659 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
660
661 #define CATEGORY_MASK_ISO_ELSE \
662 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
663
664 #define CATEGORY_MASK_ISO_ESCAPE \
665 (CATEGORY_MASK_ISO_7 \
666 | CATEGORY_MASK_ISO_7_TIGHT \
667 | CATEGORY_MASK_ISO_7_ELSE \
668 | CATEGORY_MASK_ISO_8_ELSE)
669
670 #define CATEGORY_MASK_ISO \
671 ( CATEGORY_MASK_ISO_7BIT \
672 | CATEGORY_MASK_ISO_8BIT \
673 | CATEGORY_MASK_ISO_ELSE)
674
675 #define CATEGORY_MASK_UTF_16 \
676 (CATEGORY_MASK_UTF_16_BE \
677 | CATEGORY_MASK_UTF_16_LE \
678 | CATEGORY_MASK_UTF_16_BE_NOSIG \
679 | CATEGORY_MASK_UTF_16_LE_NOSIG)
680
681
682 /* List of symbols `coding-category-xxx' ordered by priority. This
683 variable is exposed to Emacs Lisp. */
684 static Lisp_Object Vcoding_category_list;
685
686 /* Table of coding categories (Lisp symbols). This variable is for
687 internal use oly. */
688 static Lisp_Object Vcoding_category_table;
689
690 /* Table of coding-categories ordered by priority. */
691 static enum coding_category coding_priorities[coding_category_max];
692
693 /* Nth element is a coding context for the coding system bound to the
694 Nth coding category. */
695 static struct coding_system coding_categories[coding_category_max];
696
697 static int detected_mask[coding_category_raw_text] =
698 { CATEGORY_MASK_ISO,
699 CATEGORY_MASK_ISO,
700 CATEGORY_MASK_ISO,
701 CATEGORY_MASK_ISO,
702 CATEGORY_MASK_ISO,
703 CATEGORY_MASK_ISO,
704 CATEGORY_MASK_UTF_8,
705 CATEGORY_MASK_UTF_16,
706 CATEGORY_MASK_UTF_16,
707 CATEGORY_MASK_UTF_16,
708 CATEGORY_MASK_UTF_16,
709 CATEGORY_MASK_UTF_16,
710 CATEGORY_MASK_CHARSET,
711 CATEGORY_MASK_SJIS,
712 CATEGORY_MASK_BIG5,
713 CATEGORY_MASK_CCL,
714 CATEGORY_MASK_EMACS_MULE
715 };
716
717 /*** Commonly used macros and functions ***/
718
719 #ifndef min
720 #define min(a, b) ((a) < (b) ? (a) : (b))
721 #endif
722 #ifndef max
723 #define max(a, b) ((a) > (b) ? (a) : (b))
724 #endif
725
726 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
727 do { \
728 attrs = CODING_ID_ATTRS (coding->id); \
729 eol_type = CODING_ID_EOL_TYPE (coding->id); \
730 if (VECTORP (eol_type)) \
731 eol_type = Qunix; \
732 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
733 } while (0)
734
735
736 /* Safely get one byte from the source text pointed by SRC which ends
737 at SRC_END, and set C to that byte. If there are not enough bytes
738 in the source, it jumps to `no_more_source'. The caller
739 should declare and set these variables appropriately in advance:
740 src, src_end, multibytep
741 */
742
743 #define ONE_MORE_BYTE(c) \
744 do { \
745 if (src == src_end) \
746 { \
747 if (src_base < src) \
748 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
749 goto no_more_source; \
750 } \
751 c = *src++; \
752 if (multibytep && (c & 0x80)) \
753 { \
754 if ((c & 0xFE) != 0xC0) \
755 error ("Undecodable char found"); \
756 c = ((c & 1) << 6) | *src++; \
757 } \
758 consumed_chars++; \
759 } while (0)
760
761
762 #define ONE_MORE_BYTE_NO_CHECK(c) \
763 do { \
764 c = *src++; \
765 if (multibytep && (c & 0x80)) \
766 { \
767 if ((c & 0xFE) != 0xC0) \
768 error ("Undecodable char found"); \
769 c = ((c & 1) << 6) | *src++; \
770 } \
771 } while (0)
772
773
774 /* Store a byte C in the place pointed by DST and increment DST to the
775 next free point, and increment PRODUCED_CHARS. The caller should
776 assure that C is 0..127, and declare and set the variable `dst'
777 appropriately in advance.
778 */
779
780
781 #define EMIT_ONE_ASCII_BYTE(c) \
782 do { \
783 produced_chars++; \
784 *dst++ = (c); \
785 } while (0)
786
787
788 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
789
790 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
791 do { \
792 produced_chars += 2; \
793 *dst++ = (c1), *dst++ = (c2); \
794 } while (0)
795
796
797 /* Store a byte C in the place pointed by DST and increment DST to the
798 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
799 nonzero, store in an appropriate multibyte from. The caller should
800 declare and set the variables `dst' and `multibytep' appropriately
801 in advance. */
802
803 #define EMIT_ONE_BYTE(c) \
804 do { \
805 produced_chars++; \
806 if (multibytep) \
807 { \
808 int ch = (c); \
809 if (ch >= 0x80) \
810 ch = BYTE8_TO_CHAR (ch); \
811 CHAR_STRING_ADVANCE (ch, dst); \
812 } \
813 else \
814 *dst++ = (c); \
815 } while (0)
816
817
818 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
819
820 #define EMIT_TWO_BYTES(c1, c2) \
821 do { \
822 produced_chars += 2; \
823 if (multibytep) \
824 { \
825 int ch; \
826 \
827 ch = (c1); \
828 if (ch >= 0x80) \
829 ch = BYTE8_TO_CHAR (ch); \
830 CHAR_STRING_ADVANCE (ch, dst); \
831 ch = (c2); \
832 if (ch >= 0x80) \
833 ch = BYTE8_TO_CHAR (ch); \
834 CHAR_STRING_ADVANCE (ch, dst); \
835 } \
836 else \
837 { \
838 *dst++ = (c1); \
839 *dst++ = (c2); \
840 } \
841 } while (0)
842
843
844 #define EMIT_THREE_BYTES(c1, c2, c3) \
845 do { \
846 EMIT_ONE_BYTE (c1); \
847 EMIT_TWO_BYTES (c2, c3); \
848 } while (0)
849
850
851 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
852 do { \
853 EMIT_TWO_BYTES (c1, c2); \
854 EMIT_TWO_BYTES (c3, c4); \
855 } while (0)
856
857
858 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
859 do { \
860 charset_map_loaded = 0; \
861 c = DECODE_CHAR (charset, code); \
862 if (charset_map_loaded) \
863 { \
864 unsigned char *orig = coding->source; \
865 EMACS_INT offset; \
866 \
867 coding_set_source (coding); \
868 offset = coding->source - orig; \
869 src += offset; \
870 src_base += offset; \
871 src_end += offset; \
872 } \
873 } while (0)
874
875
876 #define ASSURE_DESTINATION(bytes) \
877 do { \
878 if (dst + (bytes) >= dst_end) \
879 { \
880 int more_bytes = charbuf_end - charbuf + (bytes); \
881 \
882 dst = alloc_destination (coding, more_bytes, dst); \
883 dst_end = coding->destination + coding->dst_bytes; \
884 } \
885 } while (0)
886
887
888
889 static void
890 coding_set_source (coding)
891 struct coding_system *coding;
892 {
893 if (BUFFERP (coding->src_object))
894 {
895 if (coding->src_pos < 0)
896 coding->source = GAP_END_ADDR + coding->src_pos_byte;
897 else
898 {
899 struct buffer *buf = XBUFFER (coding->src_object);
900 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
901 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
902
903 coding->source = beg_addr + coding->src_pos_byte - 1;
904 if (coding->src_pos_byte >= gpt_byte)
905 coding->source += BUF_GAP_SIZE (buf);
906 }
907 }
908 else if (STRINGP (coding->src_object))
909 {
910 coding->source = (XSTRING (coding->src_object)->data
911 + coding->src_pos_byte);
912 }
913 else
914 /* Otherwise, the source is C string and is never relocated
915 automatically. Thus we don't have to update anything. */
916 ;
917 }
918
919 static void
920 coding_set_destination (coding)
921 struct coding_system *coding;
922 {
923 if (BUFFERP (coding->dst_object))
924 {
925 /* We are sure that coding->dst_pos_byte is before the gap of the
926 buffer. */
927 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
928 + coding->dst_pos_byte - 1);
929 if (coding->src_pos < 0)
930 coding->dst_bytes = (GAP_END_ADDR
931 - (coding->src_bytes - coding->consumed)
932 - coding->destination);
933 else
934 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
935 - coding->destination);
936 }
937 else
938 /* Otherwise, the destination is C string and is never relocated
939 automatically. Thus we don't have to update anything. */
940 ;
941 }
942
943
944 static void
945 coding_alloc_by_realloc (coding, bytes)
946 struct coding_system *coding;
947 EMACS_INT bytes;
948 {
949 coding->destination = (unsigned char *) xrealloc (coding->destination,
950 coding->dst_bytes + bytes);
951 coding->dst_bytes += bytes;
952 }
953
954 static void
955 coding_alloc_by_making_gap (coding, bytes)
956 struct coding_system *coding;
957 EMACS_INT bytes;
958 {
959 if (BUFFERP (coding->dst_object)
960 && EQ (coding->src_object, coding->dst_object))
961 {
962 EMACS_INT add = coding->src_bytes - coding->consumed;
963
964 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
965 make_gap (bytes);
966 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
967 }
968 else
969 {
970 Lisp_Object this_buffer;
971
972 this_buffer = Fcurrent_buffer ();
973 set_buffer_internal (XBUFFER (coding->dst_object));
974 make_gap (bytes);
975 set_buffer_internal (XBUFFER (this_buffer));
976 }
977 }
978
979
980 static unsigned char *
981 alloc_destination (coding, nbytes, dst)
982 struct coding_system *coding;
983 int nbytes;
984 unsigned char *dst;
985 {
986 EMACS_INT offset = dst - coding->destination;
987
988 if (BUFFERP (coding->dst_object))
989 coding_alloc_by_making_gap (coding, nbytes);
990 else
991 coding_alloc_by_realloc (coding, nbytes);
992 coding->result = CODING_RESULT_SUCCESS;
993 coding_set_destination (coding);
994 dst = coding->destination + offset;
995 return dst;
996 }
997
998 \f
999 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1000
1001
1002
1003 \f
1004 /*** 3. UTF-8 ***/
1005
1006 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1007 Check if a text is encoded in UTF-8. If it is, return
1008 CATEGORY_MASK_UTF_8, else return 0. */
1009
1010 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1011 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1012 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1013 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1014 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1015 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1016
1017 static int
1018 detect_coding_utf_8 (coding, mask)
1019 struct coding_system *coding;
1020 int *mask;
1021 {
1022 unsigned char *src = coding->source, *src_base = src;
1023 unsigned char *src_end = coding->source + coding->src_bytes;
1024 int multibytep = coding->src_multibyte;
1025 int consumed_chars = 0;
1026 int found = 0;
1027
1028 /* A coding system of this category is always ASCII compatible. */
1029 src += coding->head_ascii;
1030
1031 while (1)
1032 {
1033 int c, c1, c2, c3, c4;
1034
1035 ONE_MORE_BYTE (c);
1036 if (UTF_8_1_OCTET_P (c))
1037 continue;
1038 ONE_MORE_BYTE (c1);
1039 if (! UTF_8_EXTRA_OCTET_P (c1))
1040 break;
1041 if (UTF_8_2_OCTET_LEADING_P (c))
1042 {
1043 found++;
1044 continue;
1045 }
1046 ONE_MORE_BYTE (c2);
1047 if (! UTF_8_EXTRA_OCTET_P (c2))
1048 break;
1049 if (UTF_8_3_OCTET_LEADING_P (c))
1050 {
1051 found++;
1052 continue;
1053 }
1054 ONE_MORE_BYTE (c3);
1055 if (! UTF_8_EXTRA_OCTET_P (c3))
1056 break;
1057 if (UTF_8_4_OCTET_LEADING_P (c))
1058 {
1059 found++;
1060 continue;
1061 }
1062 ONE_MORE_BYTE (c4);
1063 if (! UTF_8_EXTRA_OCTET_P (c4))
1064 break;
1065 if (UTF_8_5_OCTET_LEADING_P (c))
1066 {
1067 found++;
1068 continue;
1069 }
1070 break;
1071 }
1072 *mask &= ~CATEGORY_MASK_UTF_8;
1073 return 0;
1074
1075 no_more_source:
1076 if (! found)
1077 return 0;
1078 *mask &= CATEGORY_MASK_UTF_8;
1079 return 1;
1080 }
1081
1082
1083 static void
1084 decode_coding_utf_8 (coding)
1085 struct coding_system *coding;
1086 {
1087 unsigned char *src = coding->source + coding->consumed;
1088 unsigned char *src_end = coding->source + coding->src_bytes;
1089 unsigned char *src_base;
1090 int *charbuf = coding->charbuf;
1091 int *charbuf_end = charbuf + coding->charbuf_size;
1092 int consumed_chars = 0, consumed_chars_base;
1093 int multibytep = coding->src_multibyte;
1094 Lisp_Object attr, eol_type, charset_list;
1095
1096 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1097
1098 while (1)
1099 {
1100 int c, c1, c2, c3, c4, c5;
1101
1102 src_base = src;
1103 consumed_chars_base = consumed_chars;
1104
1105 if (charbuf >= charbuf_end)
1106 break;
1107
1108 ONE_MORE_BYTE (c1);
1109 if (UTF_8_1_OCTET_P(c1))
1110 {
1111 c = c1;
1112 if (c == '\r')
1113 {
1114 if (EQ (eol_type, Qdos))
1115 {
1116 if (src == src_end)
1117 goto no_more_source;
1118 if (*src == '\n')
1119 ONE_MORE_BYTE (c);
1120 }
1121 else if (EQ (eol_type, Qmac))
1122 c = '\n';
1123 }
1124 }
1125 else
1126 {
1127 ONE_MORE_BYTE (c2);
1128 if (! UTF_8_EXTRA_OCTET_P (c2))
1129 goto invalid_code;
1130 if (UTF_8_2_OCTET_LEADING_P (c1))
1131 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1132 else
1133 {
1134 ONE_MORE_BYTE (c3);
1135 if (! UTF_8_EXTRA_OCTET_P (c3))
1136 goto invalid_code;
1137 if (UTF_8_3_OCTET_LEADING_P (c1))
1138 c = (((c1 & 0xF) << 12)
1139 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1140 else
1141 {
1142 ONE_MORE_BYTE (c4);
1143 if (! UTF_8_EXTRA_OCTET_P (c4))
1144 goto invalid_code;
1145 if (UTF_8_4_OCTET_LEADING_P (c1))
1146 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1147 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1148 else
1149 {
1150 ONE_MORE_BYTE (c5);
1151 if (! UTF_8_EXTRA_OCTET_P (c5))
1152 goto invalid_code;
1153 if (UTF_8_5_OCTET_LEADING_P (c1))
1154 {
1155 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1156 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1157 | (c5 & 0x3F));
1158 if (c > MAX_CHAR)
1159 goto invalid_code;
1160 }
1161 else
1162 goto invalid_code;
1163 }
1164 }
1165 }
1166 }
1167
1168 *charbuf++ = c;
1169 continue;
1170
1171 invalid_code:
1172 src = src_base;
1173 consumed_chars = consumed_chars_base;
1174 ONE_MORE_BYTE (c);
1175 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1176 coding->errors++;
1177 }
1178
1179 no_more_source:
1180 coding->consumed_char += consumed_chars_base;
1181 coding->consumed = src_base - coding->source;
1182 coding->charbuf_used = charbuf - coding->charbuf;
1183 }
1184
1185
1186 static int
1187 encode_coding_utf_8 (coding)
1188 struct coding_system *coding;
1189 {
1190 int multibytep = coding->dst_multibyte;
1191 int *charbuf = coding->charbuf;
1192 int *charbuf_end = charbuf + coding->charbuf_used;
1193 unsigned char *dst = coding->destination + coding->produced;
1194 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1195 int produced_chars = 0;
1196 int c;
1197
1198 if (multibytep)
1199 {
1200 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1201
1202 while (charbuf < charbuf_end)
1203 {
1204 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1205
1206 ASSURE_DESTINATION (safe_room);
1207 c = *charbuf++;
1208 CHAR_STRING_ADVANCE (c, pend);
1209 for (p = str; p < pend; p++)
1210 EMIT_ONE_BYTE (*p);
1211 }
1212 }
1213 else
1214 {
1215 int safe_room = MAX_MULTIBYTE_LENGTH;
1216
1217 while (charbuf < charbuf_end)
1218 {
1219 ASSURE_DESTINATION (safe_room);
1220 c = *charbuf++;
1221 dst += CHAR_STRING (c, dst);
1222 produced_chars++;
1223 }
1224 }
1225 coding->result = CODING_RESULT_SUCCESS;
1226 coding->produced_char += produced_chars;
1227 coding->produced = dst - coding->destination;
1228 return 0;
1229 }
1230
1231
1232 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1233 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1234 Little Endian (otherwise). If it is, return
1235 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1236 else return 0. */
1237
1238 #define UTF_16_HIGH_SURROGATE_P(val) \
1239 (((val) & 0xFC00) == 0xD800)
1240
1241 #define UTF_16_LOW_SURROGATE_P(val) \
1242 (((val) & 0xFC00) == 0xDC00)
1243
1244 #define UTF_16_INVALID_P(val) \
1245 (((val) == 0xFFFE) \
1246 || ((val) == 0xFFFF) \
1247 || UTF_16_LOW_SURROGATE_P (val))
1248
1249
1250 static int
1251 detect_coding_utf_16 (coding, mask)
1252 struct coding_system *coding;
1253 int *mask;
1254 {
1255 unsigned char *src = coding->source, *src_base = src;
1256 unsigned char *src_end = coding->source + coding->src_bytes;
1257 int multibytep = coding->src_multibyte;
1258 int consumed_chars = 0;
1259 int c1, c2;
1260
1261 ONE_MORE_BYTE (c1);
1262 ONE_MORE_BYTE (c2);
1263
1264 if ((c1 == 0xFF) && (c2 == 0xFE))
1265 {
1266 *mask &= CATEGORY_MASK_UTF_16_LE;
1267 return 1;
1268 }
1269 else if ((c1 == 0xFE) && (c2 == 0xFF))
1270 {
1271 *mask &= CATEGORY_MASK_UTF_16_BE;
1272 return 1;
1273 }
1274 no_more_source:
1275 return 0;
1276 }
1277
1278 static void
1279 decode_coding_utf_16 (coding)
1280 struct coding_system *coding;
1281 {
1282 unsigned char *src = coding->source + coding->consumed;
1283 unsigned char *src_end = coding->source + coding->src_bytes;
1284 unsigned char *src_base;
1285 int *charbuf = coding->charbuf;
1286 int *charbuf_end = charbuf + coding->charbuf_size;
1287 int consumed_chars = 0, consumed_chars_base;
1288 int multibytep = coding->src_multibyte;
1289 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1290 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1291 int surrogate = CODING_UTF_16_SURROGATE (coding);
1292 Lisp_Object attr, eol_type, charset_list;
1293
1294 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1295
1296 if (bom != utf_16_without_bom)
1297 {
1298 int c, c1, c2;
1299
1300 src_base = src;
1301 ONE_MORE_BYTE (c1);
1302 ONE_MORE_BYTE (c2);
1303 c = (c1 << 8) | c2;
1304 if (bom == utf_16_with_bom)
1305 {
1306 if (endian == utf_16_big_endian
1307 ? c != 0xFFFE : c != 0xFEFF)
1308 {
1309 /* We are sure that there's enouph room at CHARBUF. */
1310 *charbuf++ = c1;
1311 *charbuf++ = c2;
1312 coding->errors++;
1313 }
1314 }
1315 else
1316 {
1317 if (c == 0xFFFE)
1318 CODING_UTF_16_ENDIAN (coding)
1319 = endian = utf_16_big_endian;
1320 else if (c == 0xFEFF)
1321 CODING_UTF_16_ENDIAN (coding)
1322 = endian = utf_16_little_endian;
1323 else
1324 {
1325 CODING_UTF_16_ENDIAN (coding)
1326 = endian = utf_16_big_endian;
1327 src = src_base;
1328 }
1329 }
1330 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1331 }
1332
1333 while (1)
1334 {
1335 int c, c1, c2;
1336
1337 src_base = src;
1338 consumed_chars_base = consumed_chars;
1339
1340 if (charbuf + 2 >= charbuf_end)
1341 break;
1342
1343 ONE_MORE_BYTE (c1);
1344 ONE_MORE_BYTE (c2);
1345 c = (endian == utf_16_big_endian
1346 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1347 if (surrogate)
1348 {
1349 if (! UTF_16_LOW_SURROGATE_P (c))
1350 {
1351 if (endian == utf_16_big_endian)
1352 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1353 else
1354 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1355 *charbuf++ = c1;
1356 *charbuf++ = c2;
1357 coding->errors++;
1358 if (UTF_16_HIGH_SURROGATE_P (c))
1359 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1360 else
1361 *charbuf++ = c;
1362 }
1363 else
1364 {
1365 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1366 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1367 *charbuf++ = c;
1368 }
1369 }
1370 else
1371 {
1372 if (UTF_16_HIGH_SURROGATE_P (c))
1373 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1374 else
1375 *charbuf++ = c;
1376 }
1377 }
1378
1379 no_more_source:
1380 coding->consumed_char += consumed_chars_base;
1381 coding->consumed = src_base - coding->source;
1382 coding->charbuf_used = charbuf - coding->charbuf;
1383 }
1384
1385 static int
1386 encode_coding_utf_16 (coding)
1387 struct coding_system *coding;
1388 {
1389 int multibytep = coding->dst_multibyte;
1390 int *charbuf = coding->charbuf;
1391 int *charbuf_end = charbuf + coding->charbuf_used;
1392 unsigned char *dst = coding->destination + coding->produced;
1393 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1394 int safe_room = 8;
1395 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1396 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1397 int produced_chars = 0;
1398 Lisp_Object attrs, eol_type, charset_list;
1399 int c;
1400
1401 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1402
1403 if (bom == utf_16_with_bom)
1404 {
1405 ASSURE_DESTINATION (safe_room);
1406 if (big_endian)
1407 EMIT_TWO_BYTES (0xFF, 0xFE);
1408 else
1409 EMIT_TWO_BYTES (0xFE, 0xFF);
1410 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1411 }
1412
1413 while (charbuf < charbuf_end)
1414 {
1415 ASSURE_DESTINATION (safe_room);
1416 c = *charbuf++;
1417 if (c >= MAX_UNICODE_CHAR)
1418 c = coding->default_char;
1419
1420 if (c < 0x10000)
1421 {
1422 if (big_endian)
1423 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1424 else
1425 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1426 }
1427 else
1428 {
1429 int c1, c2;
1430
1431 c -= 0x10000;
1432 c1 = (c >> 10) + 0xD800;
1433 c2 = (c & 0x3FF) + 0xDC00;
1434 if (big_endian)
1435 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1436 else
1437 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1438 }
1439 }
1440 coding->result = CODING_RESULT_SUCCESS;
1441 coding->produced = dst - coding->destination;
1442 coding->produced_char += produced_chars;
1443 return 0;
1444 }
1445
1446 \f
1447 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1448
1449 /* Emacs' internal format for representation of multiple character
1450 sets is a kind of multi-byte encoding, i.e. characters are
1451 represented by variable-length sequences of one-byte codes.
1452
1453 ASCII characters and control characters (e.g. `tab', `newline') are
1454 represented by one-byte sequences which are their ASCII codes, in
1455 the range 0x00 through 0x7F.
1456
1457 8-bit characters of the range 0x80..0x9F are represented by
1458 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1459 code + 0x20).
1460
1461 8-bit characters of the range 0xA0..0xFF are represented by
1462 one-byte sequences which are their 8-bit code.
1463
1464 The other characters are represented by a sequence of `base
1465 leading-code', optional `extended leading-code', and one or two
1466 `position-code's. The length of the sequence is determined by the
1467 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1468 whereas extended leading-code and position-code take the range 0xA0
1469 through 0xFF. See `charset.h' for more details about leading-code
1470 and position-code.
1471
1472 --- CODE RANGE of Emacs' internal format ---
1473 character set range
1474 ------------- -----
1475 ascii 0x00..0x7F
1476 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1477 eight-bit-graphic 0xA0..0xBF
1478 ELSE 0x81..0x9D + [0xA0..0xFF]+
1479 ---------------------------------------------
1480
1481 As this is the internal character representation, the format is
1482 usually not used externally (i.e. in a file or in a data sent to a
1483 process). But, it is possible to have a text externally in this
1484 format (i.e. by encoding by the coding system `emacs-mule').
1485
1486 In that case, a sequence of one-byte codes has a slightly different
1487 form.
1488
1489 At first, all characters in eight-bit-control are represented by
1490 one-byte sequences which are their 8-bit code.
1491
1492 Next, character composition data are represented by the byte
1493 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1494 where,
1495 METHOD is 0xF0 plus one of composition method (enum
1496 composition_method),
1497
1498 BYTES is 0xA0 plus a byte length of this composition data,
1499
1500 CHARS is 0x20 plus a number of characters composed by this
1501 data,
1502
1503 COMPONENTs are characters of multibye form or composition
1504 rules encoded by two-byte of ASCII codes.
1505
1506 In addition, for backward compatibility, the following formats are
1507 also recognized as composition data on decoding.
1508
1509 0x80 MSEQ ...
1510 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1511
1512 Here,
1513 MSEQ is a multibyte form but in these special format:
1514 ASCII: 0xA0 ASCII_CODE+0x80,
1515 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1516 RULE is a one byte code of the range 0xA0..0xF0 that
1517 represents a composition rule.
1518 */
1519
1520 char emacs_mule_bytes[256];
1521
1522 /* Leading-code followed by extended leading-code. */
1523 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1524 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1525 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1526 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1527
1528
1529 int
1530 emacs_mule_char (coding, composition, nbytes, nchars)
1531 struct coding_system *coding;
1532 int composition;
1533 int *nbytes, *nchars;
1534 {
1535 unsigned char *src = coding->source + coding->consumed;
1536 unsigned char *src_end = coding->source + coding->src_bytes;
1537 int multibytep = coding->src_multibyte;
1538 unsigned char *src_base = src;
1539 struct charset *charset;
1540 unsigned code;
1541 int c;
1542 int consumed_chars = 0;
1543
1544 ONE_MORE_BYTE (c);
1545 if (composition)
1546 {
1547 c -= 0x20;
1548 if (c == 0x80)
1549 {
1550 ONE_MORE_BYTE (c);
1551 if (c < 0xA0)
1552 goto invalid_code;
1553 *nbytes = src - src_base;
1554 *nchars = consumed_chars;
1555 return (c - 0x80);
1556 }
1557 }
1558
1559 switch (emacs_mule_bytes[c])
1560 {
1561 case 2:
1562 if (! (charset = emacs_mule_charset[c]))
1563 goto invalid_code;
1564 ONE_MORE_BYTE (c);
1565 code = c & 0x7F;
1566 break;
1567
1568 case 3:
1569 if (c == LEADING_CODE_PRIVATE_11
1570 || c == LEADING_CODE_PRIVATE_12)
1571 {
1572 ONE_MORE_BYTE (c);
1573 if (! (charset = emacs_mule_charset[c]))
1574 goto invalid_code;
1575 ONE_MORE_BYTE (c);
1576 code = c & 0x7F;
1577 }
1578 else
1579 {
1580 if (! (charset = emacs_mule_charset[c]))
1581 goto invalid_code;
1582 ONE_MORE_BYTE (c);
1583 code = (c & 0x7F) << 7;
1584 ONE_MORE_BYTE (c);
1585 code |= c & 0x7F;
1586 }
1587 break;
1588
1589 case 4:
1590 if (! (charset = emacs_mule_charset[c]))
1591 goto invalid_code;
1592 ONE_MORE_BYTE (c);
1593 code = (c & 0x7F) << 7;
1594 ONE_MORE_BYTE (c);
1595 code |= c & 0x7F;
1596 break;
1597
1598 case 1:
1599 code = c;
1600 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) ? charset_ascii
1601 : code < 0xA0 ? charset_8_bit_control
1602 : charset_8_bit_graphic);
1603 break;
1604
1605 default:
1606 abort ();
1607 }
1608 c = DECODE_CHAR (charset, code);
1609 if (c < 0)
1610 goto invalid_code;
1611 *nbytes = src - src_base;
1612 *nchars = consumed_chars;
1613 return c;
1614
1615 no_more_source:
1616 return -2;
1617
1618 invalid_code:
1619 return -1;
1620 }
1621
1622
1623 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1624 Check if a text is encoded in `emacs-mule'. */
1625
1626 static int
1627 detect_coding_emacs_mule (coding, mask)
1628 struct coding_system *coding;
1629 int *mask;
1630 {
1631 unsigned char *src = coding->source, *src_base = src;
1632 unsigned char *src_end = coding->source + coding->src_bytes;
1633 int multibytep = coding->src_multibyte;
1634 int consumed_chars = 0;
1635 int c;
1636 int found = 0;
1637
1638 /* A coding system of this category is always ASCII compatible. */
1639 src += coding->head_ascii;
1640
1641 while (1)
1642 {
1643 ONE_MORE_BYTE (c);
1644
1645 if (c == 0x80)
1646 {
1647 /* Perhaps the start of composite character. We simple skip
1648 it because analyzing it is too heavy for detecting. But,
1649 at least, we check that the composite character
1650 constitues of more than 4 bytes. */
1651 unsigned char *src_base;
1652
1653 repeat:
1654 src_base = src;
1655 do
1656 {
1657 ONE_MORE_BYTE (c);
1658 }
1659 while (c >= 0xA0);
1660
1661 if (src - src_base <= 4)
1662 break;
1663 found = 1;
1664 if (c == 0x80)
1665 goto repeat;
1666 }
1667
1668 if (c < 0x80)
1669 {
1670 if (c < 0x20
1671 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1672 break;
1673 }
1674 else
1675 {
1676 unsigned char *src_base = src - 1;
1677
1678 do
1679 {
1680 ONE_MORE_BYTE (c);
1681 }
1682 while (c >= 0xA0);
1683 if (src - src_base != emacs_mule_bytes[*src_base])
1684 break;
1685 found = 1;
1686 }
1687 }
1688 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1689 return 0;
1690
1691 no_more_source:
1692 if (!found)
1693 return 0;
1694 *mask &= CATEGORY_MASK_EMACS_MULE;
1695 return 1;
1696 }
1697
1698
1699 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1700
1701 /* Decode a character represented as a component of composition
1702 sequence of Emacs 20/21 style at SRC. Set C to that character and
1703 update SRC to the head of next character (or an encoded composition
1704 rule). If SRC doesn't points a composition component, set C to -1.
1705 If SRC points an invalid byte sequence, global exit by a return
1706 value 0. */
1707
1708 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1709 if (1) \
1710 { \
1711 int c; \
1712 int nbytes, nchars; \
1713 \
1714 if (src == src_end) \
1715 break; \
1716 c = emacs_mule_char (coding, 1, &nbytes, &nchars); \
1717 if (c < 0) \
1718 { \
1719 if (c == -2) \
1720 break; \
1721 goto invalid_code; \
1722 } \
1723 *buf++ = c; \
1724 src += nbytes; \
1725 consumed_chars += nchars; \
1726 } \
1727 else
1728
1729
1730 /* Decode a composition rule represented as a component of composition
1731 sequence of Emacs 20 style at SRC. Set C to the rule. If SRC
1732 points an invalid byte sequence, set C to -1. */
1733
1734 #define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \
1735 do { \
1736 int c, gref, nref; \
1737 \
1738 if (src < src_end) \
1739 goto invalid_code; \
1740 ONE_MORE_BYTE_NO_CHECK (c); \
1741 c -= 0xA0; \
1742 if (c < 0 || c >= 81) \
1743 goto invalid_code; \
1744 \
1745 gref = c / 9, nref = c % 9; \
1746 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1747 } while (0)
1748
1749
1750 #define ADD_COMPOSITION_DATA(buf, method, nchars) \
1751 do { \
1752 *buf++ = -5; \
1753 *buf++ = coding->produced_char + char_offset; \
1754 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1755 *buf++ = method; \
1756 *buf++ = nchars; \
1757 } while (0)
1758
1759
1760 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1761 do { \
1762 /* Emacs 21 style format. The first three bytes at SRC are \
1763 (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1764 the byte length of this composition information, CHARS is the \
1765 number of characters composed by this composition. */ \
1766 enum composition_method method = c - 0xF0; \
1767 int consumed_chars_limit; \
1768 int nbytes, nchars; \
1769 \
1770 ONE_MORE_BYTE (c); \
1771 nbytes = c - 0xA0; \
1772 if (nbytes < 3) \
1773 goto invalid_code; \
1774 ONE_MORE_BYTE (c); \
1775 nchars = c - 0xA0; \
1776 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1777 consumed_chars_limit = consumed_chars_base + nbytes; \
1778 if (method != COMPOSITION_RELATIVE) \
1779 { \
1780 int i = 0; \
1781 while (consumed_chars < consumed_chars_limit) \
1782 { \
1783 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1784 DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \
1785 else \
1786 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1787 } \
1788 if (consumed_chars < consumed_chars_limit) \
1789 goto invalid_code; \
1790 } \
1791 } while (0)
1792
1793
1794 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1795 do { \
1796 /* Emacs 20 style format for relative composition. */ \
1797 /* Store multibyte form of characters to be composed. */ \
1798 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1799 int *buf = components; \
1800 int i, j; \
1801 \
1802 src = src_base; \
1803 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1804 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1805 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1806 if (i < 2) \
1807 goto invalid_code; \
1808 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1809 for (j = 0; j < i; j++) \
1810 *charbuf++ = components[j]; \
1811 } while (0)
1812
1813
1814 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1815 do { \
1816 /* Emacs 20 style format for rule-base composition. */ \
1817 /* Store multibyte form of characters to be composed. */ \
1818 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1819 int *buf = components; \
1820 int i, j; \
1821 \
1822 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1823 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1824 { \
1825 DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \
1826 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1827 } \
1828 if (i < 1 || (buf - components) % 2 == 0) \
1829 goto invalid_code; \
1830 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1831 goto no_more_source; \
1832 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1833 for (j = 0; j < i; j++) \
1834 *charbuf++ = components[j]; \
1835 for (j = 0; j < i; j += 2) \
1836 *charbuf++ = components[j]; \
1837 } while (0)
1838
1839
1840 static void
1841 decode_coding_emacs_mule (coding)
1842 struct coding_system *coding;
1843 {
1844 unsigned char *src = coding->source + coding->consumed;
1845 unsigned char *src_end = coding->source + coding->src_bytes;
1846 unsigned char *src_base;
1847 int *charbuf = coding->charbuf;
1848 int *charbuf_end = charbuf + coding->charbuf_size;
1849 int consumed_chars = 0, consumed_chars_base;
1850 int char_offset = 0;
1851 int multibytep = coding->src_multibyte;
1852 Lisp_Object attrs, eol_type, charset_list;
1853
1854 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1855
1856 while (1)
1857 {
1858 int c;
1859
1860 src_base = src;
1861 consumed_chars_base = consumed_chars;
1862
1863 if (charbuf >= charbuf_end)
1864 break;
1865
1866 ONE_MORE_BYTE (c);
1867
1868 if (c < 0x80)
1869 {
1870 if (c == '\r')
1871 {
1872 if (EQ (eol_type, Qdos))
1873 {
1874 if (src == src_end)
1875 goto no_more_source;
1876 if (*src == '\n')
1877 ONE_MORE_BYTE (c);
1878 }
1879 else if (EQ (eol_type, Qmac))
1880 c = '\n';
1881 }
1882 *charbuf++ = c;
1883 char_offset++;
1884 }
1885 else if (c == 0x80)
1886 {
1887 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1888 break;
1889 ONE_MORE_BYTE (c);
1890 if (c - 0xF0 >= COMPOSITION_RELATIVE
1891 && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
1892 DECODE_EMACS_MULE_21_COMPOSITION (c);
1893 else if (c < 0xC0)
1894 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1895 else if (c == 0xFF)
1896 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1897 else
1898 goto invalid_code;
1899 }
1900 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1901 {
1902 int nbytes, nchars;
1903 src--;
1904 c = emacs_mule_char (coding, 0, &nbytes, &nchars);
1905 if (c < 0)
1906 {
1907 if (c == -2)
1908 break;
1909 goto invalid_code;
1910 }
1911 *charbuf++ = c;
1912 char_offset++;
1913 }
1914 continue;
1915
1916 invalid_code:
1917 src = src_base;
1918 consumed_chars = consumed_chars_base;
1919 ONE_MORE_BYTE (c);
1920 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1921 coding->errors++;
1922 }
1923
1924 no_more_source:
1925 coding->consumed_char += consumed_chars_base;
1926 coding->consumed = src_base - coding->source;
1927 coding->charbuf_used = charbuf - coding->charbuf;
1928 }
1929
1930
1931 #define EMACS_MULE_LEADING_CODES(id, codes) \
1932 do { \
1933 if (id < 0xA0) \
1934 codes[0] = id, codes[1] = 0; \
1935 else if (id < 0xE0) \
1936 codes[0] = 0x9A, codes[1] = id; \
1937 else if (id < 0xF0) \
1938 codes[0] = 0x9B, codes[1] = id; \
1939 else if (id < 0xF5) \
1940 codes[0] = 0x9C, codes[1] = id; \
1941 else \
1942 codes[0] = 0x9D, codes[1] = id; \
1943 } while (0);
1944
1945
1946 static int
1947 encode_coding_emacs_mule (coding)
1948 struct coding_system *coding;
1949 {
1950 int multibytep = coding->dst_multibyte;
1951 int *charbuf = coding->charbuf;
1952 int *charbuf_end = charbuf + coding->charbuf_used;
1953 unsigned char *dst = coding->destination + coding->produced;
1954 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1955 int safe_room = 8;
1956 int produced_chars = 0;
1957 Lisp_Object attrs, eol_type, charset_list;
1958 int c;
1959
1960 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1961
1962 while (charbuf < charbuf_end)
1963 {
1964 ASSURE_DESTINATION (safe_room);
1965 c = *charbuf++;
1966 if (ASCII_CHAR_P (c))
1967 EMIT_ONE_ASCII_BYTE (c);
1968 else
1969 {
1970 struct charset *charset;
1971 unsigned code;
1972 int dimension;
1973 int emacs_mule_id;
1974 unsigned char leading_codes[2];
1975
1976 charset = char_charset (c, charset_list, &code);
1977 if (! charset)
1978 {
1979 c = coding->default_char;
1980 if (ASCII_CHAR_P (c))
1981 {
1982 EMIT_ONE_ASCII_BYTE (c);
1983 continue;
1984 }
1985 charset = char_charset (c, charset_list, &code);
1986 }
1987 dimension = CHARSET_DIMENSION (charset);
1988 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
1989 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
1990 EMIT_ONE_BYTE (leading_codes[0]);
1991 if (leading_codes[1])
1992 EMIT_ONE_BYTE (leading_codes[1]);
1993 if (dimension == 1)
1994 EMIT_ONE_BYTE (code);
1995 else
1996 {
1997 EMIT_ONE_BYTE (code >> 8);
1998 EMIT_ONE_BYTE (code & 0xFF);
1999 }
2000 }
2001 }
2002 coding->result = CODING_RESULT_SUCCESS;
2003 coding->produced_char += produced_chars;
2004 coding->produced = dst - coding->destination;
2005 return 0;
2006 }
2007
2008 \f
2009 /*** 7. ISO2022 handlers ***/
2010
2011 /* The following note describes the coding system ISO2022 briefly.
2012 Since the intention of this note is to help understand the
2013 functions in this file, some parts are NOT ACCURATE or OVERLY
2014 SIMPLIFIED. For thorough understanding, please refer to the
2015 original document of ISO2022.
2016
2017 ISO2022 provides many mechanisms to encode several character sets
2018 in 7-bit and 8-bit environments. For 7-bite environments, all text
2019 is encoded using bytes less than 128. This may make the encoded
2020 text a little bit longer, but the text passes more easily through
2021 several gateways, some of which strip off MSB (Most Signigant Bit).
2022
2023 There are two kinds of character sets: control character set and
2024 graphic character set. The former contains control characters such
2025 as `newline' and `escape' to provide control functions (control
2026 functions are also provided by escape sequences). The latter
2027 contains graphic characters such as 'A' and '-'. Emacs recognizes
2028 two control character sets and many graphic character sets.
2029
2030 Graphic character sets are classified into one of the following
2031 four classes, according to the number of bytes (DIMENSION) and
2032 number of characters in one dimension (CHARS) of the set:
2033 - DIMENSION1_CHARS94
2034 - DIMENSION1_CHARS96
2035 - DIMENSION2_CHARS94
2036 - DIMENSION2_CHARS96
2037
2038 In addition, each character set is assigned an identification tag,
2039 unique for each set, called "final character" (denoted as <F>
2040 hereafter). The <F> of each character set is decided by ECMA(*)
2041 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2042 (0x30..0x3F are for private use only).
2043
2044 Note (*): ECMA = European Computer Manufacturers Association
2045
2046 Here are examples of graphic character set [NAME(<F>)]:
2047 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2048 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2049 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2050 o DIMENSION2_CHARS96 -- none for the moment
2051
2052 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2053 C0 [0x00..0x1F] -- control character plane 0
2054 GL [0x20..0x7F] -- graphic character plane 0
2055 C1 [0x80..0x9F] -- control character plane 1
2056 GR [0xA0..0xFF] -- graphic character plane 1
2057
2058 A control character set is directly designated and invoked to C0 or
2059 C1 by an escape sequence. The most common case is that:
2060 - ISO646's control character set is designated/invoked to C0, and
2061 - ISO6429's control character set is designated/invoked to C1,
2062 and usually these designations/invocations are omitted in encoded
2063 text. In a 7-bit environment, only C0 can be used, and a control
2064 character for C1 is encoded by an appropriate escape sequence to
2065 fit into the environment. All control characters for C1 are
2066 defined to have corresponding escape sequences.
2067
2068 A graphic character set is at first designated to one of four
2069 graphic registers (G0 through G3), then these graphic registers are
2070 invoked to GL or GR. These designations and invocations can be
2071 done independently. The most common case is that G0 is invoked to
2072 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2073 these invocations and designations are omitted in encoded text.
2074 In a 7-bit environment, only GL can be used.
2075
2076 When a graphic character set of CHARS94 is invoked to GL, codes
2077 0x20 and 0x7F of the GL area work as control characters SPACE and
2078 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2079 be used.
2080
2081 There are two ways of invocation: locking-shift and single-shift.
2082 With locking-shift, the invocation lasts until the next different
2083 invocation, whereas with single-shift, the invocation affects the
2084 following character only and doesn't affect the locking-shift
2085 state. Invocations are done by the following control characters or
2086 escape sequences:
2087
2088 ----------------------------------------------------------------------
2089 abbrev function cntrl escape seq description
2090 ----------------------------------------------------------------------
2091 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2092 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2093 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2094 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2095 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2096 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2097 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2098 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2099 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2100 ----------------------------------------------------------------------
2101 (*) These are not used by any known coding system.
2102
2103 Control characters for these functions are defined by macros
2104 ISO_CODE_XXX in `coding.h'.
2105
2106 Designations are done by the following escape sequences:
2107 ----------------------------------------------------------------------
2108 escape sequence description
2109 ----------------------------------------------------------------------
2110 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2111 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2112 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2113 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2114 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2115 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2116 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2117 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2118 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2119 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2120 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2121 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2122 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2123 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2124 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2125 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2126 ----------------------------------------------------------------------
2127
2128 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2129 of dimension 1, chars 94, and final character <F>, etc...
2130
2131 Note (*): Although these designations are not allowed in ISO2022,
2132 Emacs accepts them on decoding, and produces them on encoding
2133 CHARS96 character sets in a coding system which is characterized as
2134 7-bit environment, non-locking-shift, and non-single-shift.
2135
2136 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2137 '(' must be omitted. We refer to this as "short-form" hereafter.
2138
2139 Now you may notice that there are a lot of ways for encoding the
2140 same multilingual text in ISO2022. Actually, there exist many
2141 coding systems such as Compound Text (used in X11's inter client
2142 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2143 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
2144 localized platforms), and all of these are variants of ISO2022.
2145
2146 In addition to the above, Emacs handles two more kinds of escape
2147 sequences: ISO6429's direction specification and Emacs' private
2148 sequence for specifying character composition.
2149
2150 ISO6429's direction specification takes the following form:
2151 o CSI ']' -- end of the current direction
2152 o CSI '0' ']' -- end of the current direction
2153 o CSI '1' ']' -- start of left-to-right text
2154 o CSI '2' ']' -- start of right-to-left text
2155 The control character CSI (0x9B: control sequence introducer) is
2156 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2157
2158 Character composition specification takes the following form:
2159 o ESC '0' -- start relative composition
2160 o ESC '1' -- end composition
2161 o ESC '2' -- start rule-base composition (*)
2162 o ESC '3' -- start relative composition with alternate chars (**)
2163 o ESC '4' -- start rule-base composition with alternate chars (**)
2164 Since these are not standard escape sequences of any ISO standard,
2165 the use of them for these meaning is restricted to Emacs only.
2166
2167 (*) This form is used only in Emacs 20.5 and the older versions,
2168 but the newer versions can safely decode it.
2169 (**) This form is used only in Emacs 21.1 and the newer versions,
2170 and the older versions can't decode it.
2171
2172 Here's a list of examples usages of these composition escape
2173 sequences (categorized by `enum composition_method').
2174
2175 COMPOSITION_RELATIVE:
2176 ESC 0 CHAR [ CHAR ] ESC 1
2177 COMPOSITOIN_WITH_RULE:
2178 ESC 2 CHAR [ RULE CHAR ] ESC 1
2179 COMPOSITION_WITH_ALTCHARS:
2180 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2181 COMPOSITION_WITH_RULE_ALTCHARS:
2182 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2183
2184 enum iso_code_class_type iso_code_class[256];
2185
2186 #define SAFE_CHARSET_P(coding, id) \
2187 ((id) <= (coding)->max_charset_id \
2188 && (coding)->safe_charsets[id] >= 0)
2189
2190
2191 #define SHIFT_OUT_OK(category) \
2192 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2193
2194 static void
2195 setup_iso_safe_charsets (Lisp_Object attrs)
2196 {
2197 Lisp_Object charset_list, safe_charsets;
2198 Lisp_Object request;
2199 Lisp_Object reg_usage;
2200 Lisp_Object tail;
2201 int reg94, reg96;
2202 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2203 int max_charset_id;
2204
2205 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2206 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2207 && ! EQ (charset_list, Viso_2022_charset_list))
2208 {
2209 CODING_ATTR_CHARSET_LIST (attrs)
2210 = charset_list = Viso_2022_charset_list;
2211 ASET (attrs, coding_attr_safe_charsets, Qnil);
2212 }
2213
2214 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2215 return;
2216
2217 max_charset_id = 0;
2218 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2219 {
2220 int id = XINT (XCAR (tail));
2221 if (max_charset_id < id)
2222 max_charset_id = id;
2223 }
2224
2225 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2226 make_number (255));
2227 request = AREF (attrs, coding_attr_iso_request);
2228 reg_usage = AREF (attrs, coding_attr_iso_usage);
2229 reg94 = XINT (XCAR (reg_usage));
2230 reg96 = XINT (XCDR (reg_usage));
2231
2232 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2233 {
2234 Lisp_Object id;
2235 Lisp_Object reg;
2236 struct charset *charset;
2237
2238 id = XCAR (tail);
2239 charset = CHARSET_FROM_ID (XINT (id));
2240 reg = Fcdr (Fassq (request, id));
2241 if (! NILP (reg))
2242 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2243 else if (charset->iso_chars_96)
2244 {
2245 if (reg96 < 4)
2246 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2247 }
2248 else
2249 {
2250 if (reg94 < 4)
2251 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2252 }
2253 }
2254 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2255 }
2256
2257
2258 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2259 Check if a text is encoded in ISO2022. If it is, returns an
2260 integer in which appropriate flag bits any of:
2261 CATEGORY_MASK_ISO_7
2262 CATEGORY_MASK_ISO_7_TIGHT
2263 CATEGORY_MASK_ISO_8_1
2264 CATEGORY_MASK_ISO_8_2
2265 CATEGORY_MASK_ISO_7_ELSE
2266 CATEGORY_MASK_ISO_8_ELSE
2267 are set. If a code which should never appear in ISO2022 is found,
2268 returns 0. */
2269
2270 static int
2271 detect_coding_iso_2022 (coding, mask)
2272 struct coding_system *coding;
2273 int *mask;
2274 {
2275 unsigned char *src = coding->source, *src_base = src;
2276 unsigned char *src_end = coding->source + coding->src_bytes;
2277 int multibytep = coding->src_multibyte;
2278 int mask_iso = CATEGORY_MASK_ISO;
2279 int mask_found = 0, mask_8bit_found = 0;
2280 int reg[4], shift_out = 0, single_shifting = 0;
2281 int id;
2282 int c, c1;
2283 int consumed_chars = 0;
2284 int i;
2285
2286 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2287 {
2288 struct coding_system *this = &(coding_categories[i]);
2289 Lisp_Object attrs, val;
2290
2291 attrs = CODING_ID_ATTRS (this->id);
2292 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2293 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2294 setup_iso_safe_charsets (attrs);
2295 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2296 this->max_charset_id = XSTRING (val)->size - 1;
2297 this->safe_charsets = (char *) XSTRING (val)->data;
2298 }
2299
2300 /* A coding system of this category is always ASCII compatible. */
2301 src += coding->head_ascii;
2302
2303 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2304 while (mask_iso && src < src_end)
2305 {
2306 ONE_MORE_BYTE (c);
2307 switch (c)
2308 {
2309 case ISO_CODE_ESC:
2310 if (inhibit_iso_escape_detection)
2311 break;
2312 single_shifting = 0;
2313 ONE_MORE_BYTE (c);
2314 if (c >= '(' && c <= '/')
2315 {
2316 /* Designation sequence for a charset of dimension 1. */
2317 ONE_MORE_BYTE (c1);
2318 if (c1 < ' ' || c1 >= 0x80
2319 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2320 /* Invalid designation sequence. Just ignore. */
2321 break;
2322 reg[(c - '(') % 4] = id;
2323 }
2324 else if (c == '$')
2325 {
2326 /* Designation sequence for a charset of dimension 2. */
2327 ONE_MORE_BYTE (c);
2328 if (c >= '@' && c <= 'B')
2329 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2330 reg[0] = id = iso_charset_table[1][0][c];
2331 else if (c >= '(' && c <= '/')
2332 {
2333 ONE_MORE_BYTE (c1);
2334 if (c1 < ' ' || c1 >= 0x80
2335 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2336 /* Invalid designation sequence. Just ignore. */
2337 break;
2338 reg[(c - '(') % 4] = id;
2339 }
2340 else
2341 /* Invalid designation sequence. Just ignore. */
2342 break;
2343 }
2344 else if (c == 'N' || c == 'O')
2345 {
2346 /* ESC <Fe> for SS2 or SS3. */
2347 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
2348 break;
2349 }
2350 else if (c >= '0' && c <= '4')
2351 {
2352 /* ESC <Fp> for start/end composition. */
2353 mask_found |= CATEGORY_MASK_ISO;
2354 break;
2355 }
2356 else
2357 {
2358 /* Invalid escape sequence. */
2359 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2360 break;
2361 }
2362
2363 /* We found a valid designation sequence for CHARSET. */
2364 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2365 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2366 id))
2367 mask_found |= CATEGORY_MASK_ISO_7;
2368 else
2369 mask_iso &= ~CATEGORY_MASK_ISO_7;
2370 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2371 id))
2372 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
2373 else
2374 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2375 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2376 id))
2377 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
2378 else
2379 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2380 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2381 id))
2382 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
2383 else
2384 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
2385 break;
2386
2387 case ISO_CODE_SO:
2388 if (inhibit_iso_escape_detection)
2389 break;
2390 single_shifting = 0;
2391 if (shift_out == 0
2392 && (reg[1] >= 0
2393 || SHIFT_OUT_OK (coding_category_iso_7_else)
2394 || SHIFT_OUT_OK (coding_category_iso_8_else)))
2395 {
2396 /* Locking shift out. */
2397 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2398 mask_found |= CATEGORY_MASK_ISO_ELSE;
2399 }
2400 break;
2401
2402 case ISO_CODE_SI:
2403 if (inhibit_iso_escape_detection)
2404 break;
2405 single_shifting = 0;
2406 if (shift_out == 1)
2407 {
2408 /* Locking shift in. */
2409 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2410 mask_found |= CATEGORY_MASK_ISO_ELSE;
2411 }
2412 break;
2413
2414 case ISO_CODE_CSI:
2415 single_shifting = 0;
2416 case ISO_CODE_SS2:
2417 case ISO_CODE_SS3:
2418 {
2419 int newmask = CATEGORY_MASK_ISO_8_ELSE;
2420
2421 if (inhibit_iso_escape_detection)
2422 break;
2423 if (c != ISO_CODE_CSI)
2424 {
2425 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2426 & CODING_ISO_FLAG_SINGLE_SHIFT)
2427 newmask |= CATEGORY_MASK_ISO_8_1;
2428 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2429 & CODING_ISO_FLAG_SINGLE_SHIFT)
2430 newmask |= CATEGORY_MASK_ISO_8_2;
2431 single_shifting = 1;
2432 }
2433 if (VECTORP (Vlatin_extra_code_table)
2434 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2435 {
2436 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2437 & CODING_ISO_FLAG_LATIN_EXTRA)
2438 newmask |= CATEGORY_MASK_ISO_8_1;
2439 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2440 & CODING_ISO_FLAG_LATIN_EXTRA)
2441 newmask |= CATEGORY_MASK_ISO_8_2;
2442 }
2443 mask_iso &= newmask;
2444 mask_found |= newmask;
2445 }
2446 break;
2447
2448 default:
2449 if (c < 0x80)
2450 {
2451 single_shifting = 0;
2452 break;
2453 }
2454 else if (c < 0xA0)
2455 {
2456 single_shifting = 0;
2457 mask_8bit_found = 1;
2458 if (VECTORP (Vlatin_extra_code_table)
2459 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2460 {
2461 int newmask = 0;
2462
2463 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2464 & CODING_ISO_FLAG_LATIN_EXTRA)
2465 newmask |= CATEGORY_MASK_ISO_8_1;
2466 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2467 & CODING_ISO_FLAG_LATIN_EXTRA)
2468 newmask |= CATEGORY_MASK_ISO_8_2;
2469 mask_iso &= newmask;
2470 mask_found |= newmask;
2471 }
2472 else
2473 return 0;
2474 }
2475 else
2476 {
2477 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2478 | CATEGORY_MASK_ISO_7_ELSE);
2479 mask_found |= CATEGORY_MASK_ISO_8_1;
2480 mask_8bit_found = 1;
2481 /* Check the length of succeeding codes of the range
2482 0xA0..0FF. If the byte length is odd, we exclude
2483 CATEGORY_MASK_ISO_8_2. We can check this only
2484 when we are not single shifting. */
2485 if (!single_shifting
2486 && mask_iso & CATEGORY_MASK_ISO_8_2)
2487 {
2488 int i = 1;
2489 while (src < src_end)
2490 {
2491 ONE_MORE_BYTE (c);
2492 if (c < 0xA0)
2493 break;
2494 i++;
2495 }
2496
2497 if (i & 1 && src < src_end)
2498 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
2499 else
2500 mask_found |= CATEGORY_MASK_ISO_8_2;
2501 }
2502 }
2503 break;
2504 }
2505 }
2506 no_more_source:
2507 if (!mask_iso)
2508 {
2509 *mask &= ~CATEGORY_MASK_ISO;
2510 return 0;
2511 }
2512 if (!mask_found)
2513 return 0;
2514 *mask &= mask_iso & mask_found;
2515 if (! mask_8bit_found)
2516 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2517 return 1;
2518 }
2519
2520
2521 /* Set designation state into CODING. */
2522 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2523 do { \
2524 int id, prev; \
2525 \
2526 if (final < '0' || final >= 128 \
2527 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2528 || !SAFE_CHARSET_P (coding, id)) \
2529 { \
2530 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2531 goto invalid_code; \
2532 } \
2533 prev = CODING_ISO_DESIGNATION (coding, reg); \
2534 CODING_ISO_DESIGNATION (coding, reg) = id; \
2535 /* If there was an invalid designation to REG previously, and this \
2536 designation is ASCII to REG, we should keep this designation \
2537 sequence. */ \
2538 if (prev == -2 && id == charset_ascii) \
2539 goto invalid_code; \
2540 } while (0)
2541
2542
2543 #define MAYBE_FINISH_COMPOSITION() \
2544 do { \
2545 int i; \
2546 if (composition_state == COMPOSING_NO) \
2547 break; \
2548 /* It is assured that we have enough room for producing \
2549 characters stored in the table `components'. */ \
2550 if (charbuf + component_idx > charbuf_end) \
2551 goto no_more_source; \
2552 composition_state = COMPOSING_NO; \
2553 if (method == COMPOSITION_RELATIVE \
2554 || method == COMPOSITION_WITH_ALTCHARS) \
2555 { \
2556 for (i = 0; i < component_idx; i++) \
2557 *charbuf++ = components[i]; \
2558 char_offset += component_idx; \
2559 } \
2560 else \
2561 { \
2562 for (i = 0; i < component_idx; i += 2) \
2563 *charbuf++ = components[i]; \
2564 char_offset += (component_idx / 2) + 1; \
2565 } \
2566 } while (0)
2567
2568
2569 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2570 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2571 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2572 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2573 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2574 */
2575
2576 #define DECODE_COMPOSITION_START(c1) \
2577 do { \
2578 if (c1 == '0' \
2579 && composition_state == COMPOSING_COMPONENT_CHAR) \
2580 { \
2581 component_len = component_idx; \
2582 composition_state = COMPOSING_CHAR; \
2583 } \
2584 else \
2585 { \
2586 unsigned char *p; \
2587 \
2588 MAYBE_FINISH_COMPOSITION (); \
2589 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2590 goto no_more_source; \
2591 for (p = src; p < src_end - 1; p++) \
2592 if (*p == ISO_CODE_ESC && p[1] == '1') \
2593 break; \
2594 if (p == src_end - 1) \
2595 { \
2596 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2597 goto invalid_code; \
2598 goto no_more_source; \
2599 } \
2600 \
2601 /* This is surely the start of a composition. */ \
2602 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2603 : c1 == '2' ? COMPOSITION_WITH_RULE \
2604 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2605 : COMPOSITION_WITH_RULE_ALTCHARS); \
2606 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2607 : COMPOSING_COMPONENT_CHAR); \
2608 component_idx = component_len = 0; \
2609 } \
2610 } while (0)
2611
2612
2613 /* Handle compositoin end sequence ESC 1. */
2614
2615 #define DECODE_COMPOSITION_END() \
2616 do { \
2617 int nchars = (component_len > 0 ? component_idx - component_len \
2618 : method == COMPOSITION_RELATIVE ? component_idx \
2619 : (component_idx + 1) / 2); \
2620 int i; \
2621 int *saved_charbuf = charbuf; \
2622 \
2623 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2624 if (method != COMPOSITION_RELATIVE) \
2625 { \
2626 if (component_len == 0) \
2627 for (i = 0; i < component_idx; i++) \
2628 *charbuf++ = components[i]; \
2629 else \
2630 for (i = 0; i < component_len; i++) \
2631 *charbuf++ = components[i]; \
2632 *saved_charbuf = saved_charbuf - charbuf; \
2633 } \
2634 if (method == COMPOSITION_WITH_RULE) \
2635 for (i = 0; i < component_idx; i += 2, char_offset++) \
2636 *charbuf++ = components[i]; \
2637 else \
2638 for (i = component_len; i < component_idx; i++, char_offset++) \
2639 *charbuf++ = components[i]; \
2640 coding->annotated = 1; \
2641 composition_state = COMPOSING_NO; \
2642 } while (0)
2643
2644
2645 /* Decode a composition rule from the byte C1 (and maybe one more byte
2646 from SRC) and store one encoded composition rule in
2647 coding->cmp_data. */
2648
2649 #define DECODE_COMPOSITION_RULE(c1) \
2650 do { \
2651 (c1) -= 32; \
2652 if (c1 < 81) /* old format (before ver.21) */ \
2653 { \
2654 int gref = (c1) / 9; \
2655 int nref = (c1) % 9; \
2656 if (gref == 4) gref = 10; \
2657 if (nref == 4) nref = 10; \
2658 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2659 } \
2660 else if (c1 < 93) /* new format (after ver.21) */ \
2661 { \
2662 ONE_MORE_BYTE (c2); \
2663 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2664 } \
2665 else \
2666 c1 = 0; \
2667 } while (0)
2668
2669
2670 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2671
2672 static void
2673 decode_coding_iso_2022 (coding)
2674 struct coding_system *coding;
2675 {
2676 unsigned char *src = coding->source + coding->consumed;
2677 unsigned char *src_end = coding->source + coding->src_bytes;
2678 unsigned char *src_base;
2679 int *charbuf = coding->charbuf;
2680 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2681 int consumed_chars = 0, consumed_chars_base;
2682 int char_offset = 0;
2683 int multibytep = coding->src_multibyte;
2684 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2685 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2686 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2687 struct charset *charset;
2688 int c;
2689 /* For handling composition sequence. */
2690 #define COMPOSING_NO 0
2691 #define COMPOSING_CHAR 1
2692 #define COMPOSING_RULE 2
2693 #define COMPOSING_COMPONENT_CHAR 3
2694 #define COMPOSING_COMPONENT_RULE 4
2695
2696 int composition_state = COMPOSING_NO;
2697 enum composition_method method;
2698 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2699 int component_idx;
2700 int component_len;
2701 Lisp_Object attrs, eol_type, charset_list;
2702
2703 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2704 setup_iso_safe_charsets (attrs);
2705
2706 while (1)
2707 {
2708 int c1, c2;
2709
2710 src_base = src;
2711 consumed_chars_base = consumed_chars;
2712
2713 if (charbuf >= charbuf_end)
2714 break;
2715
2716 ONE_MORE_BYTE (c1);
2717
2718 /* We produce no character or one character. */
2719 switch (iso_code_class [c1])
2720 {
2721 case ISO_0x20_or_0x7F:
2722 if (composition_state != COMPOSING_NO)
2723 {
2724 if (composition_state == COMPOSING_RULE
2725 || composition_state == COMPOSING_COMPONENT_RULE)
2726 {
2727 DECODE_COMPOSITION_RULE (c1);
2728 components[component_idx++] = c1;
2729 composition_state--;
2730 continue;
2731 }
2732 else if (method == COMPOSITION_WITH_RULE)
2733 composition_state = COMPOSING_RULE;
2734 else if (method == COMPOSITION_WITH_RULE_ALTCHARS
2735 && composition_state == COMPOSING_COMPONENT_CHAR)
2736 composition_state = COMPOSING_COMPONENT_CHAR;
2737 }
2738 if (charset_id_0 < 0
2739 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2740 {
2741 /* This is SPACE or DEL. */
2742 charset = CHARSET_FROM_ID (charset_ascii);
2743 break;
2744 }
2745 /* This is a graphic character, we fall down ... */
2746
2747 case ISO_graphic_plane_0:
2748 if (composition_state == COMPOSING_RULE)
2749 {
2750 DECODE_COMPOSITION_RULE (c1);
2751 components[component_idx++] = c1;
2752 composition_state = COMPOSING_CHAR;
2753 }
2754 charset = CHARSET_FROM_ID (charset_id_0);
2755 break;
2756
2757 case ISO_0xA0_or_0xFF:
2758 if (charset_id_1 < 0
2759 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2760 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2761 goto invalid_code;
2762 /* This is a graphic character, we fall down ... */
2763
2764 case ISO_graphic_plane_1:
2765 if (charset_id_1 < 0)
2766 goto invalid_code;
2767 charset = CHARSET_FROM_ID (charset_id_1);
2768 break;
2769
2770 case ISO_carriage_return:
2771 if (c1 == '\r')
2772 {
2773 if (EQ (eol_type, Qdos))
2774 {
2775 if (src == src_end)
2776 goto no_more_source;
2777 if (*src == '\n')
2778 ONE_MORE_BYTE (c1);
2779 }
2780 else if (EQ (eol_type, Qmac))
2781 c1 = '\n';
2782 }
2783 /* fall through */
2784
2785 case ISO_control_0:
2786 MAYBE_FINISH_COMPOSITION ();
2787 charset = CHARSET_FROM_ID (charset_ascii);
2788 break;
2789
2790 case ISO_control_1:
2791 MAYBE_FINISH_COMPOSITION ();
2792 goto invalid_code;
2793
2794 case ISO_shift_out:
2795 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2796 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2797 goto invalid_code;
2798 CODING_ISO_INVOCATION (coding, 0) = 1;
2799 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2800 continue;
2801
2802 case ISO_shift_in:
2803 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2804 goto invalid_code;
2805 CODING_ISO_INVOCATION (coding, 0) = 0;
2806 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2807 continue;
2808
2809 case ISO_single_shift_2_7:
2810 case ISO_single_shift_2:
2811 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2812 goto invalid_code;
2813 /* SS2 is handled as an escape sequence of ESC 'N' */
2814 c1 = 'N';
2815 goto label_escape_sequence;
2816
2817 case ISO_single_shift_3:
2818 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2819 goto invalid_code;
2820 /* SS2 is handled as an escape sequence of ESC 'O' */
2821 c1 = 'O';
2822 goto label_escape_sequence;
2823
2824 case ISO_control_sequence_introducer:
2825 /* CSI is handled as an escape sequence of ESC '[' ... */
2826 c1 = '[';
2827 goto label_escape_sequence;
2828
2829 case ISO_escape:
2830 ONE_MORE_BYTE (c1);
2831 label_escape_sequence:
2832 /* Escape sequences handled here are invocation,
2833 designation, direction specification, and character
2834 composition specification. */
2835 switch (c1)
2836 {
2837 case '&': /* revision of following character set */
2838 ONE_MORE_BYTE (c1);
2839 if (!(c1 >= '@' && c1 <= '~'))
2840 goto invalid_code;
2841 ONE_MORE_BYTE (c1);
2842 if (c1 != ISO_CODE_ESC)
2843 goto invalid_code;
2844 ONE_MORE_BYTE (c1);
2845 goto label_escape_sequence;
2846
2847 case '$': /* designation of 2-byte character set */
2848 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2849 goto invalid_code;
2850 ONE_MORE_BYTE (c1);
2851 if (c1 >= '@' && c1 <= 'B')
2852 { /* designation of JISX0208.1978, GB2312.1980,
2853 or JISX0208.1980 */
2854 DECODE_DESIGNATION (0, 2, 0, c1);
2855 }
2856 else if (c1 >= 0x28 && c1 <= 0x2B)
2857 { /* designation of DIMENSION2_CHARS94 character set */
2858 ONE_MORE_BYTE (c2);
2859 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
2860 }
2861 else if (c1 >= 0x2C && c1 <= 0x2F)
2862 { /* designation of DIMENSION2_CHARS96 character set */
2863 ONE_MORE_BYTE (c2);
2864 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
2865 }
2866 else
2867 goto invalid_code;
2868 /* We must update these variables now. */
2869 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2870 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2871 continue;
2872
2873 case 'n': /* invocation of locking-shift-2 */
2874 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2875 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2876 goto invalid_code;
2877 CODING_ISO_INVOCATION (coding, 0) = 2;
2878 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2879 continue;
2880
2881 case 'o': /* invocation of locking-shift-3 */
2882 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2883 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2884 goto invalid_code;
2885 CODING_ISO_INVOCATION (coding, 0) = 3;
2886 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2887 continue;
2888
2889 case 'N': /* invocation of single-shift-2 */
2890 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2891 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2892 goto invalid_code;
2893 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
2894 ONE_MORE_BYTE (c1);
2895 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2896 goto invalid_code;
2897 break;
2898
2899 case 'O': /* invocation of single-shift-3 */
2900 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2901 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2902 goto invalid_code;
2903 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
2904 ONE_MORE_BYTE (c1);
2905 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2906 goto invalid_code;
2907 break;
2908
2909 case '0': case '2': case '3': case '4': /* start composition */
2910 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2911 goto invalid_code;
2912 DECODE_COMPOSITION_START (c1);
2913 continue;
2914
2915 case '1': /* end composition */
2916 if (composition_state == COMPOSING_NO)
2917 goto invalid_code;
2918 DECODE_COMPOSITION_END ();
2919 continue;
2920
2921 case '[': /* specification of direction */
2922 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2923 goto invalid_code;
2924 /* For the moment, nested direction is not supported.
2925 So, `coding->mode & CODING_MODE_DIRECTION' zero means
2926 left-to-right, and nozero means right-to-left. */
2927 ONE_MORE_BYTE (c1);
2928 switch (c1)
2929 {
2930 case ']': /* end of the current direction */
2931 coding->mode &= ~CODING_MODE_DIRECTION;
2932
2933 case '0': /* end of the current direction */
2934 case '1': /* start of left-to-right direction */
2935 ONE_MORE_BYTE (c1);
2936 if (c1 == ']')
2937 coding->mode &= ~CODING_MODE_DIRECTION;
2938 else
2939 goto invalid_code;
2940 break;
2941
2942 case '2': /* start of right-to-left direction */
2943 ONE_MORE_BYTE (c1);
2944 if (c1 == ']')
2945 coding->mode |= CODING_MODE_DIRECTION;
2946 else
2947 goto invalid_code;
2948 break;
2949
2950 default:
2951 goto invalid_code;
2952 }
2953 continue;
2954
2955 default:
2956 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2957 goto invalid_code;
2958 if (c1 >= 0x28 && c1 <= 0x2B)
2959 { /* designation of DIMENSION1_CHARS94 character set */
2960 ONE_MORE_BYTE (c2);
2961 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
2962 }
2963 else if (c1 >= 0x2C && c1 <= 0x2F)
2964 { /* designation of DIMENSION1_CHARS96 character set */
2965 ONE_MORE_BYTE (c2);
2966 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
2967 }
2968 else
2969 goto invalid_code;
2970 /* We must update these variables now. */
2971 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2972 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2973 continue;
2974 }
2975 }
2976
2977 /* Now we know CHARSET and 1st position code C1 of a character.
2978 Produce a decoded character while getting 2nd position code
2979 C2 if necessary. */
2980 c1 &= 0x7F;
2981 if (CHARSET_DIMENSION (charset) > 1)
2982 {
2983 ONE_MORE_BYTE (c2);
2984 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
2985 /* C2 is not in a valid range. */
2986 goto invalid_code;
2987 c1 = (c1 << 8) | (c2 & 0x7F);
2988 if (CHARSET_DIMENSION (charset) > 2)
2989 {
2990 ONE_MORE_BYTE (c2);
2991 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
2992 /* C2 is not in a valid range. */
2993 goto invalid_code;
2994 c1 = (c1 << 8) | (c2 & 0x7F);
2995 }
2996 }
2997
2998 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
2999 if (c < 0)
3000 {
3001 MAYBE_FINISH_COMPOSITION ();
3002 for (; src_base < src; src_base++, char_offset++)
3003 {
3004 if (ASCII_BYTE_P (*src_base))
3005 *charbuf++ = *src_base;
3006 else
3007 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3008 }
3009 }
3010 else if (composition_state == COMPOSING_NO)
3011 {
3012 *charbuf++ = c;
3013 char_offset++;
3014 }
3015 else
3016 components[component_idx++] = c;
3017 continue;
3018
3019 invalid_code:
3020 MAYBE_FINISH_COMPOSITION ();
3021 src = src_base;
3022 consumed_chars = consumed_chars_base;
3023 ONE_MORE_BYTE (c);
3024 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3025 coding->errors++;
3026 }
3027
3028 no_more_source:
3029 coding->consumed_char += consumed_chars_base;
3030 coding->consumed = src_base - coding->source;
3031 coding->charbuf_used = charbuf - coding->charbuf;
3032 }
3033
3034
3035 /* ISO2022 encoding stuff. */
3036
3037 /*
3038 It is not enough to say just "ISO2022" on encoding, we have to
3039 specify more details. In Emacs, each coding system of ISO2022
3040 variant has the following specifications:
3041 1. Initial designation to G0 thru G3.
3042 2. Allows short-form designation?
3043 3. ASCII should be designated to G0 before control characters?
3044 4. ASCII should be designated to G0 at end of line?
3045 5. 7-bit environment or 8-bit environment?
3046 6. Use locking-shift?
3047 7. Use Single-shift?
3048 And the following two are only for Japanese:
3049 8. Use ASCII in place of JIS0201-1976-Roman?
3050 9. Use JISX0208-1983 in place of JISX0208-1978?
3051 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3052 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3053 details.
3054 */
3055
3056 /* Produce codes (escape sequence) for designating CHARSET to graphic
3057 register REG at DST, and increment DST. If <final-char> of CHARSET is
3058 '@', 'A', or 'B' and the coding system CODING allows, produce
3059 designation sequence of short-form. */
3060
3061 #define ENCODE_DESIGNATION(charset, reg, coding) \
3062 do { \
3063 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3064 char *intermediate_char_94 = "()*+"; \
3065 char *intermediate_char_96 = ",-./"; \
3066 int revision = -1; \
3067 int c; \
3068 \
3069 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3070 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3071 \
3072 if (revision >= 0) \
3073 { \
3074 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3075 EMIT_ONE_BYTE ('@' + revision); \
3076 } \
3077 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3078 if (CHARSET_DIMENSION (charset) == 1) \
3079 { \
3080 if (! CHARSET_ISO_CHARS_96 (charset)) \
3081 c = intermediate_char_94[reg]; \
3082 else \
3083 c = intermediate_char_96[reg]; \
3084 EMIT_ONE_ASCII_BYTE (c); \
3085 } \
3086 else \
3087 { \
3088 EMIT_ONE_ASCII_BYTE ('$'); \
3089 if (! CHARSET_ISO_CHARS_96 (charset)) \
3090 { \
3091 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3092 || reg != 0 \
3093 || final_char < '@' || final_char > 'B') \
3094 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3095 } \
3096 else \
3097 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3098 } \
3099 EMIT_ONE_ASCII_BYTE (final_char); \
3100 \
3101 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3102 } while (0)
3103
3104
3105 /* The following two macros produce codes (control character or escape
3106 sequence) for ISO2022 single-shift functions (single-shift-2 and
3107 single-shift-3). */
3108
3109 #define ENCODE_SINGLE_SHIFT_2 \
3110 do { \
3111 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3112 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3113 else \
3114 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3115 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3116 } while (0)
3117
3118
3119 #define ENCODE_SINGLE_SHIFT_3 \
3120 do { \
3121 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3122 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3123 else \
3124 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3125 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3126 } while (0)
3127
3128
3129 /* The following four macros produce codes (control character or
3130 escape sequence) for ISO2022 locking-shift functions (shift-in,
3131 shift-out, locking-shift-2, and locking-shift-3). */
3132
3133 #define ENCODE_SHIFT_IN \
3134 do { \
3135 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3136 CODING_ISO_INVOCATION (coding, 0) = 0; \
3137 } while (0)
3138
3139
3140 #define ENCODE_SHIFT_OUT \
3141 do { \
3142 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3143 CODING_ISO_INVOCATION (coding, 0) = 1; \
3144 } while (0)
3145
3146
3147 #define ENCODE_LOCKING_SHIFT_2 \
3148 do { \
3149 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3150 CODING_ISO_INVOCATION (coding, 0) = 2; \
3151 } while (0)
3152
3153
3154 #define ENCODE_LOCKING_SHIFT_3 \
3155 do { \
3156 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3157 CODING_ISO_INVOCATION (coding, 0) = 3; \
3158 } while (0)
3159
3160
3161 /* Produce codes for a DIMENSION1 character whose character set is
3162 CHARSET and whose position-code is C1. Designation and invocation
3163 sequences are also produced in advance if necessary. */
3164
3165 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3166 do { \
3167 int id = CHARSET_ID (charset); \
3168 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3169 { \
3170 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3171 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3172 else \
3173 EMIT_ONE_BYTE (c1 | 0x80); \
3174 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3175 break; \
3176 } \
3177 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3178 { \
3179 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3180 break; \
3181 } \
3182 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3183 { \
3184 EMIT_ONE_BYTE (c1 | 0x80); \
3185 break; \
3186 } \
3187 else \
3188 /* Since CHARSET is not yet invoked to any graphic planes, we \
3189 must invoke it, or, at first, designate it to some graphic \
3190 register. Then repeat the loop to actually produce the \
3191 character. */ \
3192 dst = encode_invocation_designation (charset, coding, dst, \
3193 &produced_chars); \
3194 } while (1)
3195
3196
3197 /* Produce codes for a DIMENSION2 character whose character set is
3198 CHARSET and whose position-codes are C1 and C2. Designation and
3199 invocation codes are also produced in advance if necessary. */
3200
3201 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3202 do { \
3203 int id = CHARSET_ID (charset); \
3204 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3205 { \
3206 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3207 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3208 else \
3209 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3210 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3211 break; \
3212 } \
3213 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3214 { \
3215 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3216 break; \
3217 } \
3218 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3219 { \
3220 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3221 break; \
3222 } \
3223 else \
3224 /* Since CHARSET is not yet invoked to any graphic planes, we \
3225 must invoke it, or, at first, designate it to some graphic \
3226 register. Then repeat the loop to actually produce the \
3227 character. */ \
3228 dst = encode_invocation_designation (charset, coding, dst, \
3229 &produced_chars); \
3230 } while (1)
3231
3232
3233 #define ENCODE_ISO_CHARACTER(charset, c) \
3234 do { \
3235 int code = ENCODE_CHAR ((charset),(c)); \
3236 \
3237 if (CHARSET_DIMENSION (charset) == 1) \
3238 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3239 else \
3240 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3241 } while (0)
3242
3243
3244 /* Produce designation and invocation codes at a place pointed by DST
3245 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3246 Return new DST. */
3247
3248 unsigned char *
3249 encode_invocation_designation (charset, coding, dst, p_nchars)
3250 struct charset *charset;
3251 struct coding_system *coding;
3252 unsigned char *dst;
3253 int *p_nchars;
3254 {
3255 int multibytep = coding->dst_multibyte;
3256 int produced_chars = *p_nchars;
3257 int reg; /* graphic register number */
3258 int id = CHARSET_ID (charset);
3259
3260 /* At first, check designations. */
3261 for (reg = 0; reg < 4; reg++)
3262 if (id == CODING_ISO_DESIGNATION (coding, reg))
3263 break;
3264
3265 if (reg >= 4)
3266 {
3267 /* CHARSET is not yet designated to any graphic registers. */
3268 /* At first check the requested designation. */
3269 reg = CODING_ISO_REQUEST (coding, id);
3270 if (reg < 0)
3271 /* Since CHARSET requests no special designation, designate it
3272 to graphic register 0. */
3273 reg = 0;
3274
3275 ENCODE_DESIGNATION (charset, reg, coding);
3276 }
3277
3278 if (CODING_ISO_INVOCATION (coding, 0) != reg
3279 && CODING_ISO_INVOCATION (coding, 1) != reg)
3280 {
3281 /* Since the graphic register REG is not invoked to any graphic
3282 planes, invoke it to graphic plane 0. */
3283 switch (reg)
3284 {
3285 case 0: /* graphic register 0 */
3286 ENCODE_SHIFT_IN;
3287 break;
3288
3289 case 1: /* graphic register 1 */
3290 ENCODE_SHIFT_OUT;
3291 break;
3292
3293 case 2: /* graphic register 2 */
3294 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3295 ENCODE_SINGLE_SHIFT_2;
3296 else
3297 ENCODE_LOCKING_SHIFT_2;
3298 break;
3299
3300 case 3: /* graphic register 3 */
3301 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3302 ENCODE_SINGLE_SHIFT_3;
3303 else
3304 ENCODE_LOCKING_SHIFT_3;
3305 break;
3306 }
3307 }
3308
3309 *p_nchars = produced_chars;
3310 return dst;
3311 }
3312
3313 /* The following three macros produce codes for indicating direction
3314 of text. */
3315 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3316 do { \
3317 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3318 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3319 else \
3320 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3321 } while (0)
3322
3323
3324 #define ENCODE_DIRECTION_R2L() \
3325 do { \
3326 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3327 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3328 } while (0)
3329
3330
3331 #define ENCODE_DIRECTION_L2R() \
3332 do { \
3333 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3334 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3335 } while (0)
3336
3337
3338 /* Produce codes for designation and invocation to reset the graphic
3339 planes and registers to initial state. */
3340 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3341 do { \
3342 int reg; \
3343 struct charset *charset; \
3344 \
3345 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3346 ENCODE_SHIFT_IN; \
3347 for (reg = 0; reg < 4; reg++) \
3348 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3349 && (CODING_ISO_DESIGNATION (coding, reg) \
3350 != CODING_ISO_INITIAL (coding, reg))) \
3351 { \
3352 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3353 ENCODE_DESIGNATION (charset, reg, coding); \
3354 } \
3355 } while (0)
3356
3357
3358 /* Produce designation sequences of charsets in the line started from
3359 SRC to a place pointed by DST, and return updated DST.
3360
3361 If the current block ends before any end-of-line, we may fail to
3362 find all the necessary designations. */
3363
3364 static unsigned char *
3365 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3366 struct coding_system *coding;
3367 int *charbuf, *charbuf_end;
3368 unsigned char *dst;
3369 {
3370 struct charset *charset;
3371 /* Table of charsets to be designated to each graphic register. */
3372 int r[4];
3373 int c, found = 0, reg;
3374 int produced_chars = 0;
3375 int multibytep = coding->dst_multibyte;
3376 Lisp_Object attrs;
3377 Lisp_Object charset_list;
3378
3379 attrs = CODING_ID_ATTRS (coding->id);
3380 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3381 if (EQ (charset_list, Qiso_2022))
3382 charset_list = Viso_2022_charset_list;
3383
3384 for (reg = 0; reg < 4; reg++)
3385 r[reg] = -1;
3386
3387 while (found < 4)
3388 {
3389 int id;
3390
3391 c = *charbuf++;
3392 if (c == '\n')
3393 break;
3394 charset = char_charset (c, charset_list, NULL);
3395 id = CHARSET_ID (charset);
3396 reg = CODING_ISO_REQUEST (coding, id);
3397 if (reg >= 0 && r[reg] < 0)
3398 {
3399 found++;
3400 r[reg] = id;
3401 }
3402 }
3403
3404 if (found)
3405 {
3406 for (reg = 0; reg < 4; reg++)
3407 if (r[reg] >= 0
3408 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3409 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3410 }
3411
3412 return dst;
3413 }
3414
3415 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3416
3417 static int
3418 encode_coding_iso_2022 (coding)
3419 struct coding_system *coding;
3420 {
3421 int multibytep = coding->dst_multibyte;
3422 int *charbuf = coding->charbuf;
3423 int *charbuf_end = charbuf + coding->charbuf_used;
3424 unsigned char *dst = coding->destination + coding->produced;
3425 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3426 int safe_room = 16;
3427 int bol_designation
3428 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3429 && CODING_ISO_BOL (coding));
3430 int produced_chars = 0;
3431 Lisp_Object attrs, eol_type, charset_list;
3432 int ascii_compatible;
3433 int c;
3434
3435 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3436
3437 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3438
3439 while (charbuf < charbuf_end)
3440 {
3441 ASSURE_DESTINATION (safe_room);
3442
3443 if (bol_designation)
3444 {
3445 unsigned char *dst_prev = dst;
3446
3447 /* We have to produce designation sequences if any now. */
3448 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3449 bol_designation = 0;
3450 /* We are sure that designation sequences are all ASCII bytes. */
3451 produced_chars += dst - dst_prev;
3452 }
3453
3454 c = *charbuf++;
3455
3456 /* Now encode the character C. */
3457 if (c < 0x20 || c == 0x7F)
3458 {
3459 if (c == '\n'
3460 || (c == '\r' && EQ (eol_type, Qmac)))
3461 {
3462 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3463 ENCODE_RESET_PLANE_AND_REGISTER ();
3464 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3465 {
3466 int i;
3467
3468 for (i = 0; i < 4; i++)
3469 CODING_ISO_DESIGNATION (coding, i)
3470 = CODING_ISO_INITIAL (coding, i);
3471 }
3472 bol_designation
3473 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3474 }
3475 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3476 ENCODE_RESET_PLANE_AND_REGISTER ();
3477 EMIT_ONE_ASCII_BYTE (c);
3478 }
3479 else if (ASCII_CHAR_P (c))
3480 {
3481 if (ascii_compatible)
3482 EMIT_ONE_ASCII_BYTE (c);
3483 else
3484 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii), c);
3485 }
3486 else
3487 {
3488 struct charset *charset = char_charset (c, charset_list, NULL);
3489
3490 if (!charset)
3491 {
3492 c = coding->default_char;
3493 charset = char_charset (c, charset_list, NULL);
3494 }
3495 ENCODE_ISO_CHARACTER (charset, c);
3496 }
3497 }
3498
3499 if (coding->mode & CODING_MODE_LAST_BLOCK
3500 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3501 {
3502 ASSURE_DESTINATION (safe_room);
3503 ENCODE_RESET_PLANE_AND_REGISTER ();
3504 }
3505 coding->result = CODING_RESULT_SUCCESS;
3506 CODING_ISO_BOL (coding) = bol_designation;
3507 coding->produced_char += produced_chars;
3508 coding->produced = dst - coding->destination;
3509 return 0;
3510 }
3511
3512 \f
3513 /*** 8,9. SJIS and BIG5 handlers ***/
3514
3515 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3516 quite widely. So, for the moment, Emacs supports them in the bare
3517 C code. But, in the future, they may be supported only by CCL. */
3518
3519 /* SJIS is a coding system encoding three character sets: ASCII, right
3520 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3521 as is. A character of charset katakana-jisx0201 is encoded by
3522 "position-code + 0x80". A character of charset japanese-jisx0208
3523 is encoded in 2-byte but two position-codes are divided and shifted
3524 so that it fit in the range below.
3525
3526 --- CODE RANGE of SJIS ---
3527 (character set) (range)
3528 ASCII 0x00 .. 0x7F
3529 KATAKANA-JISX0201 0xA0 .. 0xDF
3530 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3531 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3532 -------------------------------
3533
3534 */
3535
3536 /* BIG5 is a coding system encoding two character sets: ASCII and
3537 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3538 character set and is encoded in two-byte.
3539
3540 --- CODE RANGE of BIG5 ---
3541 (character set) (range)
3542 ASCII 0x00 .. 0x7F
3543 Big5 (1st byte) 0xA1 .. 0xFE
3544 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3545 --------------------------
3546
3547 */
3548
3549 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3550 Check if a text is encoded in SJIS. If it is, return
3551 CATEGORY_MASK_SJIS, else return 0. */
3552
3553 static int
3554 detect_coding_sjis (coding, mask)
3555 struct coding_system *coding;
3556 int *mask;
3557 {
3558 unsigned char *src = coding->source, *src_base = src;
3559 unsigned char *src_end = coding->source + coding->src_bytes;
3560 int multibytep = coding->src_multibyte;
3561 int consumed_chars = 0;
3562 int found = 0;
3563 int c;
3564
3565 /* A coding system of this category is always ASCII compatible. */
3566 src += coding->head_ascii;
3567
3568 while (1)
3569 {
3570 ONE_MORE_BYTE (c);
3571 if (c < 0x80)
3572 continue;
3573 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3574 {
3575 ONE_MORE_BYTE (c);
3576 if (c < 0x40 || c == 0x7F || c > 0xFC)
3577 break;
3578 found = 1;
3579 }
3580 else if (c >= 0xA0 && c < 0xE0)
3581 found = 1;
3582 else
3583 break;
3584 }
3585 *mask &= ~CATEGORY_MASK_SJIS;
3586 return 0;
3587
3588 no_more_source:
3589 if (!found)
3590 return 0;
3591 *mask &= CATEGORY_MASK_SJIS;
3592 return 1;
3593 }
3594
3595 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3596 Check if a text is encoded in BIG5. If it is, return
3597 CATEGORY_MASK_BIG5, else return 0. */
3598
3599 static int
3600 detect_coding_big5 (coding, mask)
3601 struct coding_system *coding;
3602 int *mask;
3603 {
3604 unsigned char *src = coding->source, *src_base = src;
3605 unsigned char *src_end = coding->source + coding->src_bytes;
3606 int multibytep = coding->src_multibyte;
3607 int consumed_chars = 0;
3608 int found = 0;
3609 int c;
3610
3611 /* A coding system of this category is always ASCII compatible. */
3612 src += coding->head_ascii;
3613
3614 while (1)
3615 {
3616 ONE_MORE_BYTE (c);
3617 if (c < 0x80)
3618 continue;
3619 if (c >= 0xA1)
3620 {
3621 ONE_MORE_BYTE (c);
3622 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3623 return 0;
3624 found = 1;
3625 }
3626 else
3627 break;
3628 }
3629 *mask &= ~CATEGORY_MASK_BIG5;
3630 return 0;
3631
3632 no_more_source:
3633 if (!found)
3634 return 0;
3635 *mask &= CATEGORY_MASK_BIG5;
3636 return 1;
3637 }
3638
3639 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3640 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3641
3642 static void
3643 decode_coding_sjis (coding)
3644 struct coding_system *coding;
3645 {
3646 unsigned char *src = coding->source + coding->consumed;
3647 unsigned char *src_end = coding->source + coding->src_bytes;
3648 unsigned char *src_base;
3649 int *charbuf = coding->charbuf;
3650 int *charbuf_end = charbuf + coding->charbuf_size;
3651 int consumed_chars = 0, consumed_chars_base;
3652 int multibytep = coding->src_multibyte;
3653 struct charset *charset_roman, *charset_kanji, *charset_kana;
3654 Lisp_Object attrs, eol_type, charset_list, val;
3655
3656 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3657
3658 val = charset_list;
3659 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3660 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3661 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
3662
3663 while (1)
3664 {
3665 int c, c1;
3666
3667 src_base = src;
3668 consumed_chars_base = consumed_chars;
3669
3670 if (charbuf >= charbuf_end)
3671 break;
3672
3673 ONE_MORE_BYTE (c);
3674
3675 if (c == '\r')
3676 {
3677 if (EQ (eol_type, Qdos))
3678 {
3679 if (src == src_end)
3680 goto no_more_source;
3681 if (*src == '\n')
3682 ONE_MORE_BYTE (c);
3683 }
3684 else if (EQ (eol_type, Qmac))
3685 c = '\n';
3686 }
3687 else
3688 {
3689 struct charset *charset;
3690
3691 if (c < 0x80)
3692 charset = charset_roman;
3693 else
3694 {
3695 if (c >= 0xF0)
3696 goto invalid_code;
3697 if (c < 0xA0 || c >= 0xE0)
3698 {
3699 /* SJIS -> JISX0208 */
3700 ONE_MORE_BYTE (c1);
3701 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3702 goto invalid_code;
3703 c = (c << 8) | c1;
3704 SJIS_TO_JIS (c);
3705 charset = charset_kanji;
3706 }
3707 else
3708 /* SJIS -> JISX0201-Kana */
3709 charset = charset_kana;
3710 }
3711 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3712 }
3713 *charbuf++ = c;
3714 continue;
3715
3716 invalid_code:
3717 src = src_base;
3718 consumed_chars = consumed_chars_base;
3719 ONE_MORE_BYTE (c);
3720 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3721 coding->errors++;
3722 }
3723
3724 no_more_source:
3725 coding->consumed_char += consumed_chars_base;
3726 coding->consumed = src_base - coding->source;
3727 coding->charbuf_used = charbuf - coding->charbuf;
3728 }
3729
3730 static void
3731 decode_coding_big5 (coding)
3732 struct coding_system *coding;
3733 {
3734 unsigned char *src = coding->source + coding->consumed;
3735 unsigned char *src_end = coding->source + coding->src_bytes;
3736 unsigned char *src_base;
3737 int *charbuf = coding->charbuf;
3738 int *charbuf_end = charbuf + coding->charbuf_size;
3739 int consumed_chars = 0, consumed_chars_base;
3740 int multibytep = coding->src_multibyte;
3741 struct charset *charset_roman, *charset_big5;
3742 Lisp_Object attrs, eol_type, charset_list, val;
3743
3744 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3745 val = charset_list;
3746 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3747 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3748
3749 while (1)
3750 {
3751 int c, c1;
3752
3753 src_base = src;
3754 consumed_chars_base = consumed_chars;
3755
3756 if (charbuf >= charbuf_end)
3757 break;
3758
3759 ONE_MORE_BYTE (c);
3760
3761 if (c == '\r')
3762 {
3763 if (EQ (eol_type, Qdos))
3764 {
3765 if (src == src_end)
3766 goto no_more_source;
3767 if (*src == '\n')
3768 ONE_MORE_BYTE (c);
3769 }
3770 else if (EQ (eol_type, Qmac))
3771 c = '\n';
3772 }
3773 else
3774 {
3775 struct charset *charset;
3776 if (c < 0x80)
3777 charset = charset_roman;
3778 else
3779 {
3780 /* BIG5 -> Big5 */
3781 if (c < 0xA1 || c > 0xFE)
3782 goto invalid_code;
3783 ONE_MORE_BYTE (c1);
3784 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3785 goto invalid_code;
3786 c = c << 8 | c1;
3787 charset = charset_big5;
3788 }
3789 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3790 }
3791
3792 *charbuf++ = c;
3793 continue;
3794
3795 invalid_code:
3796 src = src_base;
3797 consumed_chars = consumed_chars_base;
3798 ONE_MORE_BYTE (c);
3799 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3800 coding->errors++;
3801 }
3802
3803 no_more_source:
3804 coding->consumed_char += consumed_chars_base;
3805 coding->consumed = src_base - coding->source;
3806 coding->charbuf_used = charbuf - coding->charbuf;
3807 }
3808
3809 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3810 This function can encode charsets `ascii', `katakana-jisx0201',
3811 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3812 are sure that all these charsets are registered as official charset
3813 (i.e. do not have extended leading-codes). Characters of other
3814 charsets are produced without any encoding. If SJIS_P is 1, encode
3815 SJIS text, else encode BIG5 text. */
3816
3817 static int
3818 encode_coding_sjis (coding)
3819 struct coding_system *coding;
3820 {
3821 int multibytep = coding->dst_multibyte;
3822 int *charbuf = coding->charbuf;
3823 int *charbuf_end = charbuf + coding->charbuf_used;
3824 unsigned char *dst = coding->destination + coding->produced;
3825 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3826 int safe_room = 4;
3827 int produced_chars = 0;
3828 Lisp_Object attrs, eol_type, charset_list, val;
3829 int ascii_compatible;
3830 struct charset *charset_roman, *charset_kanji, *charset_kana;
3831 int c;
3832
3833 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3834 val = charset_list;
3835 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3836 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3837 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3838
3839 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3840
3841 while (charbuf < charbuf_end)
3842 {
3843 ASSURE_DESTINATION (safe_room);
3844 c = *charbuf++;
3845 /* Now encode the character C. */
3846 if (ASCII_CHAR_P (c) && ascii_compatible)
3847 EMIT_ONE_ASCII_BYTE (c);
3848 else
3849 {
3850 unsigned code;
3851 struct charset *charset = char_charset (c, charset_list, &code);
3852
3853 if (!charset)
3854 {
3855 c = coding->default_char;
3856 charset = char_charset (c, charset_list, &code);
3857 }
3858 if (code == CHARSET_INVALID_CODE (charset))
3859 abort ();
3860 if (charset == charset_kanji)
3861 {
3862 int c1, c2;
3863 JIS_TO_SJIS (code);
3864 c1 = code >> 8, c2 = code & 0xFF;
3865 EMIT_TWO_BYTES (c1, c2);
3866 }
3867 else if (charset == charset_kana)
3868 EMIT_ONE_BYTE (code | 0x80);
3869 else
3870 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3871 }
3872 }
3873 coding->result = CODING_RESULT_SUCCESS;
3874 coding->produced_char += produced_chars;
3875 coding->produced = dst - coding->destination;
3876 return 0;
3877 }
3878
3879 static int
3880 encode_coding_big5 (coding)
3881 struct coding_system *coding;
3882 {
3883 int multibytep = coding->dst_multibyte;
3884 int *charbuf = coding->charbuf;
3885 int *charbuf_end = charbuf + coding->charbuf_used;
3886 unsigned char *dst = coding->destination + coding->produced;
3887 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3888 int safe_room = 4;
3889 int produced_chars = 0;
3890 Lisp_Object attrs, eol_type, charset_list, val;
3891 int ascii_compatible;
3892 struct charset *charset_roman, *charset_big5;
3893 int c;
3894
3895 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3896 val = charset_list;
3897 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3898 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3899 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3900
3901 while (charbuf < charbuf_end)
3902 {
3903 ASSURE_DESTINATION (safe_room);
3904 c = *charbuf++;
3905 /* Now encode the character C. */
3906 if (ASCII_CHAR_P (c) && ascii_compatible)
3907 EMIT_ONE_ASCII_BYTE (c);
3908 else
3909 {
3910 unsigned code;
3911 struct charset *charset = char_charset (c, charset_list, &code);
3912
3913 if (! charset)
3914 {
3915 c = coding->default_char;
3916 charset = char_charset (c, charset_list, &code);
3917 }
3918 if (code == CHARSET_INVALID_CODE (charset))
3919 abort ();
3920 if (charset == charset_big5)
3921 {
3922 int c1, c2;
3923
3924 c1 = code >> 8, c2 = code & 0xFF;
3925 EMIT_TWO_BYTES (c1, c2);
3926 }
3927 else
3928 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3929 }
3930 }
3931 coding->result = CODING_RESULT_SUCCESS;
3932 coding->produced_char += produced_chars;
3933 coding->produced = dst - coding->destination;
3934 return 0;
3935 }
3936
3937 \f
3938 /*** 10. CCL handlers ***/
3939
3940 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3941 Check if a text is encoded in a coding system of which
3942 encoder/decoder are written in CCL program. If it is, return
3943 CATEGORY_MASK_CCL, else return 0. */
3944
3945 static int
3946 detect_coding_ccl (coding, mask)
3947 struct coding_system *coding;
3948 int *mask;
3949 {
3950 unsigned char *src = coding->source, *src_base = src;
3951 unsigned char *src_end = coding->source + coding->src_bytes;
3952 int multibytep = coding->src_multibyte;
3953 int consumed_chars = 0;
3954 int found = 0;
3955 unsigned char *valids = CODING_CCL_VALIDS (coding);
3956 int head_ascii = coding->head_ascii;
3957 Lisp_Object attrs;
3958
3959 coding = &coding_categories[coding_category_ccl];
3960 attrs = CODING_ID_ATTRS (coding->id);
3961 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
3962 src += head_ascii;
3963
3964 while (1)
3965 {
3966 int c;
3967 ONE_MORE_BYTE (c);
3968 if (! valids[c])
3969 break;
3970 if (!found && valids[c] > 1)
3971 found = 1;
3972 }
3973 *mask &= ~CATEGORY_MASK_CCL;
3974 return 0;
3975
3976 no_more_source:
3977 if (!found)
3978 return 0;
3979 *mask &= CATEGORY_MASK_CCL;
3980 return 1;
3981 }
3982
3983 static void
3984 decode_coding_ccl (coding)
3985 struct coding_system *coding;
3986 {
3987 unsigned char *src = coding->source + coding->consumed;
3988 unsigned char *src_end = coding->source + coding->src_bytes;
3989 int *charbuf = coding->charbuf;
3990 int *charbuf_end = charbuf + coding->charbuf_size;
3991 int consumed_chars = 0;
3992 int multibytep = coding->src_multibyte;
3993 struct ccl_program ccl;
3994 int source_charbuf[1024];
3995 int source_byteidx[1024];
3996
3997 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
3998
3999 while (src < src_end)
4000 {
4001 unsigned char *p = src;
4002 int *source, *source_end;
4003 int i = 0;
4004
4005 if (multibytep)
4006 while (i < 1024 && p < src_end)
4007 {
4008 source_byteidx[i] = p - src;
4009 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4010 }
4011 else
4012 while (i < 1024 && p < src_end)
4013 source_charbuf[i++] = *p++;
4014
4015 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4016 ccl.last_block = 1;
4017
4018 source = source_charbuf;
4019 source_end = source + i;
4020 while (source < source_end)
4021 {
4022 ccl_driver (&ccl, source, charbuf,
4023 source_end - source, charbuf_end - charbuf);
4024 source += ccl.consumed;
4025 charbuf += ccl.produced;
4026 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4027 break;
4028 }
4029 if (source < source_end)
4030 src += source_byteidx[source - source_charbuf];
4031 else
4032 src = p;
4033 consumed_chars += source - source_charbuf;
4034
4035 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4036 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4037 break;
4038 }
4039
4040 switch (ccl.status)
4041 {
4042 case CCL_STAT_SUSPEND_BY_SRC:
4043 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4044 break;
4045 case CCL_STAT_SUSPEND_BY_DST:
4046 break;
4047 case CCL_STAT_QUIT:
4048 case CCL_STAT_INVALID_CMD:
4049 coding->result = CODING_RESULT_INTERRUPT;
4050 break;
4051 default:
4052 coding->result = CODING_RESULT_SUCCESS;
4053 break;
4054 }
4055 coding->consumed_char += consumed_chars;
4056 coding->consumed = src - coding->source;
4057 coding->charbuf_used = charbuf - coding->charbuf;
4058 }
4059
4060 static int
4061 encode_coding_ccl (coding)
4062 struct coding_system *coding;
4063 {
4064 struct ccl_program ccl;
4065 int multibytep = coding->dst_multibyte;
4066 int *charbuf = coding->charbuf;
4067 int *charbuf_end = charbuf + coding->charbuf_used;
4068 unsigned char *dst = coding->destination + coding->produced;
4069 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4070 unsigned char *adjusted_dst_end = dst_end - 1;
4071 int destination_charbuf[1024];
4072 int i, produced_chars = 0;
4073
4074 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4075
4076 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4077 ccl.dst_multibyte = coding->dst_multibyte;
4078
4079 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4080 {
4081 int dst_bytes = dst_end - dst;
4082 if (dst_bytes > 1024)
4083 dst_bytes = 1024;
4084
4085 ccl_driver (&ccl, charbuf, destination_charbuf,
4086 charbuf_end - charbuf, dst_bytes);
4087 charbuf += ccl.consumed;
4088 if (multibytep)
4089 for (i = 0; i < ccl.produced; i++)
4090 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4091 else
4092 {
4093 for (i = 0; i < ccl.produced; i++)
4094 *dst++ = destination_charbuf[i] & 0xFF;
4095 produced_chars += ccl.produced;
4096 }
4097 }
4098
4099 switch (ccl.status)
4100 {
4101 case CCL_STAT_SUSPEND_BY_SRC:
4102 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4103 break;
4104 case CCL_STAT_SUSPEND_BY_DST:
4105 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4106 break;
4107 case CCL_STAT_QUIT:
4108 case CCL_STAT_INVALID_CMD:
4109 coding->result = CODING_RESULT_INTERRUPT;
4110 break;
4111 default:
4112 coding->result = CODING_RESULT_SUCCESS;
4113 break;
4114 }
4115
4116 coding->produced_char += produced_chars;
4117 coding->produced = dst - coding->destination;
4118 return 0;
4119 }
4120
4121
4122 \f
4123 /*** 10, 11. no-conversion handlers ***/
4124
4125 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4126
4127 static void
4128 decode_coding_raw_text (coding)
4129 struct coding_system *coding;
4130 {
4131 coding->chars_at_source = 1;
4132 coding->consumed_char = 0;
4133 coding->consumed = 0;
4134 coding->result = CODING_RESULT_SUCCESS;
4135 }
4136
4137 static int
4138 encode_coding_raw_text (coding)
4139 struct coding_system *coding;
4140 {
4141 int multibytep = coding->dst_multibyte;
4142 int *charbuf = coding->charbuf;
4143 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4144 unsigned char *dst = coding->destination + coding->produced;
4145 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4146 int produced_chars = 0;
4147 int c;
4148
4149 if (multibytep)
4150 {
4151 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4152
4153 if (coding->src_multibyte)
4154 while (charbuf < charbuf_end)
4155 {
4156 ASSURE_DESTINATION (safe_room);
4157 c = *charbuf++;
4158 if (ASCII_CHAR_P (c))
4159 EMIT_ONE_ASCII_BYTE (c);
4160 else if (CHAR_BYTE8_P (c))
4161 {
4162 c = CHAR_TO_BYTE8 (c);
4163 EMIT_ONE_BYTE (c);
4164 }
4165 else
4166 {
4167 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4168
4169 CHAR_STRING_ADVANCE (c, p1);
4170 while (p0 < p1)
4171 EMIT_ONE_BYTE (*p0);
4172 }
4173 }
4174 else
4175 while (charbuf < charbuf_end)
4176 {
4177 ASSURE_DESTINATION (safe_room);
4178 c = *charbuf++;
4179 EMIT_ONE_BYTE (c);
4180 }
4181 }
4182 else
4183 {
4184 if (coding->src_multibyte)
4185 {
4186 int safe_room = MAX_MULTIBYTE_LENGTH;
4187
4188 while (charbuf < charbuf_end)
4189 {
4190 ASSURE_DESTINATION (safe_room);
4191 c = *charbuf++;
4192 if (ASCII_CHAR_P (c))
4193 *dst++ = c;
4194 else if (CHAR_BYTE8_P (c))
4195 *dst++ = CHAR_TO_BYTE8 (c);
4196 else
4197 CHAR_STRING_ADVANCE (c, dst);
4198 produced_chars++;
4199 }
4200 }
4201 else
4202 {
4203 ASSURE_DESTINATION (charbuf_end - charbuf);
4204 while (charbuf < charbuf_end && dst < dst_end)
4205 *dst++ = *charbuf++;
4206 produced_chars = dst - (coding->destination + coding->dst_bytes);
4207 }
4208 }
4209 coding->result = CODING_RESULT_SUCCESS;
4210 coding->produced_char += produced_chars;
4211 coding->produced = dst - coding->destination;
4212 return 0;
4213 }
4214
4215 static int
4216 detect_coding_charset (coding, mask)
4217 struct coding_system *coding;
4218 int *mask;
4219 {
4220 unsigned char *src = coding->source, *src_base = src;
4221 unsigned char *src_end = coding->source + coding->src_bytes;
4222 int multibytep = coding->src_multibyte;
4223 int consumed_chars = 0;
4224 Lisp_Object attrs, valids;
4225
4226 coding = &coding_categories[coding_category_charset];
4227 attrs = CODING_ID_ATTRS (coding->id);
4228 valids = AREF (attrs, coding_attr_charset_valids);
4229
4230 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4231 src += coding->head_ascii;
4232
4233 while (1)
4234 {
4235 int c;
4236
4237 ONE_MORE_BYTE (c);
4238 if (NILP (AREF (valids, c)))
4239 break;
4240 }
4241 *mask &= ~CATEGORY_MASK_CHARSET;
4242 return 0;
4243
4244 no_more_source:
4245 *mask &= CATEGORY_MASK_CHARSET;
4246 return 1;
4247 }
4248
4249 static void
4250 decode_coding_charset (coding)
4251 struct coding_system *coding;
4252 {
4253 unsigned char *src = coding->source + coding->consumed;
4254 unsigned char *src_end = coding->source + coding->src_bytes;
4255 unsigned char *src_base;
4256 int *charbuf = coding->charbuf;
4257 int *charbuf_end = charbuf + coding->charbuf_size;
4258 int consumed_chars = 0, consumed_chars_base;
4259 int multibytep = coding->src_multibyte;
4260 Lisp_Object attrs, eol_type, charset_list, valids;
4261
4262 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4263 valids = AREF (attrs, coding_attr_charset_valids);
4264
4265 while (1)
4266 {
4267 int c;
4268
4269 src_base = src;
4270 consumed_chars_base = consumed_chars;
4271
4272 if (charbuf >= charbuf_end)
4273 break;
4274
4275 ONE_MORE_BYTE (c);
4276 if (c == '\r')
4277 {
4278 if (EQ (eol_type, Qdos))
4279 {
4280 if (src < src_end
4281 && *src == '\n')
4282 ONE_MORE_BYTE (c);
4283 }
4284 else if (EQ (eol_type, Qmac))
4285 c = '\n';
4286 }
4287 else
4288 {
4289 Lisp_Object val;
4290 struct charset *charset;
4291 int c1;
4292
4293 val = AREF (valids, c);
4294 if (NILP (val))
4295 goto invalid_code;
4296 charset = CHARSET_FROM_ID (XFASTINT (val));
4297 if (CHARSET_DIMENSION (charset) > 1)
4298 {
4299 ONE_MORE_BYTE (c1);
4300 c = (c << 8) | c1;
4301 if (CHARSET_DIMENSION (charset) > 2)
4302 {
4303 ONE_MORE_BYTE (c1);
4304 c = (c << 8) | c1;
4305 if (CHARSET_DIMENSION (charset) > 3)
4306 {
4307 ONE_MORE_BYTE (c1);
4308 c = (c << 8) | c1;
4309 }
4310 }
4311 }
4312 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4313 if (c < 0)
4314 goto invalid_code;
4315 }
4316 *charbuf++ = c;
4317 continue;
4318
4319 invalid_code:
4320 src = src_base;
4321 consumed_chars = consumed_chars_base;
4322 ONE_MORE_BYTE (c);
4323 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4324 coding->errors++;
4325 }
4326
4327 no_more_source:
4328 coding->consumed_char += consumed_chars_base;
4329 coding->consumed = src_base - coding->source;
4330 coding->charbuf_used = charbuf - coding->charbuf;
4331 }
4332
4333 static int
4334 encode_coding_charset (coding)
4335 struct coding_system *coding;
4336 {
4337 int multibytep = coding->dst_multibyte;
4338 int *charbuf = coding->charbuf;
4339 int *charbuf_end = charbuf + coding->charbuf_used;
4340 unsigned char *dst = coding->destination + coding->produced;
4341 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4342 int safe_room = MAX_MULTIBYTE_LENGTH;
4343 int produced_chars = 0;
4344 Lisp_Object attrs, eol_type, charset_list;
4345 int ascii_compatible;
4346 int c;
4347
4348 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4349 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4350
4351 while (charbuf < charbuf_end)
4352 {
4353 struct charset *charset;
4354 unsigned code;
4355
4356 ASSURE_DESTINATION (safe_room);
4357 c = *charbuf++;
4358 if (ascii_compatible && ASCII_CHAR_P (c))
4359 EMIT_ONE_ASCII_BYTE (c);
4360 else
4361 {
4362 charset = char_charset (c, charset_list, &code);
4363 if (charset)
4364 {
4365 if (CHARSET_DIMENSION (charset) == 1)
4366 EMIT_ONE_BYTE (code);
4367 else if (CHARSET_DIMENSION (charset) == 2)
4368 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4369 else if (CHARSET_DIMENSION (charset) == 3)
4370 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4371 else
4372 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4373 (code >> 8) & 0xFF, code & 0xFF);
4374 }
4375 else
4376 EMIT_ONE_BYTE (coding->default_char);
4377 }
4378 }
4379
4380 coding->result = CODING_RESULT_SUCCESS;
4381 coding->produced_char += produced_chars;
4382 coding->produced = dst - coding->destination;
4383 return 0;
4384 }
4385
4386 \f
4387 /*** 7. C library functions ***/
4388
4389 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
4390 has a property `coding-system'. The value of this property is a
4391 vector of length 5 (called as coding-vector). Among elements of
4392 this vector, the first (element[0]) and the fifth (element[4])
4393 carry important information for decoding/encoding. Before
4394 decoding/encoding, this information should be set in fields of a
4395 structure of type `coding_system'.
4396
4397 A value of property `coding-system' can be a symbol of another
4398 subsidiary coding-system. In that case, Emacs gets coding-vector
4399 from that symbol.
4400
4401 `element[0]' contains information to be set in `coding->type'. The
4402 value and its meaning is as follows:
4403
4404 0 -- coding_type_emacs_mule
4405 1 -- coding_type_sjis
4406 2 -- coding_type_iso_2022
4407 3 -- coding_type_big5
4408 4 -- coding_type_ccl encoder/decoder written in CCL
4409 nil -- coding_type_no_conversion
4410 t -- coding_type_undecided (automatic conversion on decoding,
4411 no-conversion on encoding)
4412
4413 `element[4]' contains information to be set in `coding->flags' and
4414 `coding->spec'. The meaning varies by `coding->type'.
4415
4416 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4417 of length 32 (of which the first 13 sub-elements are used now).
4418 Meanings of these sub-elements are:
4419
4420 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4421 If the value is an integer of valid charset, the charset is
4422 assumed to be designated to graphic register N initially.
4423
4424 If the value is minus, it is a minus value of charset which
4425 reserves graphic register N, which means that the charset is
4426 not designated initially but should be designated to graphic
4427 register N just before encoding a character in that charset.
4428
4429 If the value is nil, graphic register N is never used on
4430 encoding.
4431
4432 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4433 Each value takes t or nil. See the section ISO2022 of
4434 `coding.h' for more information.
4435
4436 If `coding->type' is `coding_type_big5', element[4] is t to denote
4437 BIG5-ETen or nil to denote BIG5-HKU.
4438
4439 If `coding->type' takes the other value, element[4] is ignored.
4440
4441 Emacs Lisp's coding system also carries information about format of
4442 end-of-line in a value of property `eol-type'. If the value is
4443 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4444 it is not integer, it should be a vector of subsidiary coding
4445 systems of which property `eol-type' has one of above values.
4446
4447 */
4448
4449 /* Setup coding context CODING from information about CODING_SYSTEM.
4450 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4451 CODING_SYSTEM is invalid, signal an error. */
4452
4453 void
4454 setup_coding_system (coding_system, coding)
4455 Lisp_Object coding_system;
4456 struct coding_system *coding;
4457 {
4458 Lisp_Object attrs;
4459 Lisp_Object eol_type;
4460 Lisp_Object coding_type;
4461 Lisp_Object val;
4462
4463 if (NILP (coding_system))
4464 coding_system = Qno_conversion;
4465
4466 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4467
4468 attrs = CODING_ID_ATTRS (coding->id);
4469 eol_type = CODING_ID_EOL_TYPE (coding->id);
4470
4471 coding->mode = 0;
4472 coding->head_ascii = -1;
4473 coding->common_flags
4474 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4475
4476 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4477 coding->max_charset_id = XSTRING (val)->size - 1;
4478 coding->safe_charsets = (char *) XSTRING (val)->data;
4479 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4480
4481 coding_type = CODING_ATTR_TYPE (attrs);
4482 if (EQ (coding_type, Qundecided))
4483 {
4484 coding->detector = NULL;
4485 coding->decoder = decode_coding_raw_text;
4486 coding->encoder = encode_coding_raw_text;
4487 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4488 }
4489 else if (EQ (coding_type, Qiso_2022))
4490 {
4491 int i;
4492 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4493
4494 /* Invoke graphic register 0 to plane 0. */
4495 CODING_ISO_INVOCATION (coding, 0) = 0;
4496 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4497 CODING_ISO_INVOCATION (coding, 1)
4498 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4499 /* Setup the initial status of designation. */
4500 for (i = 0; i < 4; i++)
4501 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4502 /* Not single shifting initially. */
4503 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4504 /* Beginning of buffer should also be regarded as bol. */
4505 CODING_ISO_BOL (coding) = 1;
4506 coding->detector = detect_coding_iso_2022;
4507 coding->decoder = decode_coding_iso_2022;
4508 coding->encoder = encode_coding_iso_2022;
4509 if (flags & CODING_ISO_FLAG_SAFE)
4510 coding->mode |= CODING_MODE_SAFE_ENCODING;
4511 coding->common_flags
4512 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4513 | CODING_REQUIRE_FLUSHING_MASK);
4514 if (flags & CODING_ISO_FLAG_COMPOSITION)
4515 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4516 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4517 {
4518 setup_iso_safe_charsets (attrs);
4519 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4520 coding->max_charset_id = XSTRING (val)->size - 1;
4521 coding->safe_charsets = (char *) XSTRING (val)->data;
4522 }
4523 CODING_ISO_FLAGS (coding) = flags;
4524 }
4525 else if (EQ (coding_type, Qcharset))
4526 {
4527 coding->detector = detect_coding_charset;
4528 coding->decoder = decode_coding_charset;
4529 coding->encoder = encode_coding_charset;
4530 coding->common_flags
4531 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4532 }
4533 else if (EQ (coding_type, Qutf_8))
4534 {
4535 coding->detector = detect_coding_utf_8;
4536 coding->decoder = decode_coding_utf_8;
4537 coding->encoder = encode_coding_utf_8;
4538 coding->common_flags
4539 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4540 }
4541 else if (EQ (coding_type, Qutf_16))
4542 {
4543 val = AREF (attrs, coding_attr_utf_16_bom);
4544 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4545 : EQ (val, Qt) ? utf_16_with_bom
4546 : utf_16_without_bom);
4547 val = AREF (attrs, coding_attr_utf_16_endian);
4548 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4549 : utf_16_little_endian);
4550 CODING_UTF_16_SURROGATE (coding) = 0;
4551 coding->detector = detect_coding_utf_16;
4552 coding->decoder = decode_coding_utf_16;
4553 coding->encoder = encode_coding_utf_16;
4554 coding->common_flags
4555 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4556 }
4557 else if (EQ (coding_type, Qccl))
4558 {
4559 coding->detector = detect_coding_ccl;
4560 coding->decoder = decode_coding_ccl;
4561 coding->encoder = encode_coding_ccl;
4562 coding->common_flags
4563 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4564 | CODING_REQUIRE_FLUSHING_MASK);
4565 }
4566 else if (EQ (coding_type, Qemacs_mule))
4567 {
4568 coding->detector = detect_coding_emacs_mule;
4569 coding->decoder = decode_coding_emacs_mule;
4570 coding->encoder = encode_coding_emacs_mule;
4571 coding->common_flags
4572 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4573 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4574 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4575 {
4576 Lisp_Object tail, safe_charsets;
4577 int max_charset_id = 0;
4578
4579 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4580 tail = XCDR (tail))
4581 if (max_charset_id < XFASTINT (XCAR (tail)))
4582 max_charset_id = XFASTINT (XCAR (tail));
4583 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4584 make_number (255));
4585 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4586 tail = XCDR (tail))
4587 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4588 coding->max_charset_id = max_charset_id;
4589 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4590 }
4591 }
4592 else if (EQ (coding_type, Qshift_jis))
4593 {
4594 coding->detector = detect_coding_sjis;
4595 coding->decoder = decode_coding_sjis;
4596 coding->encoder = encode_coding_sjis;
4597 coding->common_flags
4598 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4599 }
4600 else if (EQ (coding_type, Qbig5))
4601 {
4602 coding->detector = detect_coding_big5;
4603 coding->decoder = decode_coding_big5;
4604 coding->encoder = encode_coding_big5;
4605 coding->common_flags
4606 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4607 }
4608 else /* EQ (coding_type, Qraw_text) */
4609 {
4610 coding->detector = NULL;
4611 coding->decoder = decode_coding_raw_text;
4612 coding->encoder = encode_coding_raw_text;
4613 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4614 }
4615
4616 return;
4617 }
4618
4619 /* Return raw-text or one of its subsidiaries that has the same
4620 eol_type as CODING-SYSTEM. */
4621
4622 Lisp_Object
4623 raw_text_coding_system (coding_system)
4624 Lisp_Object coding_system;
4625 {
4626 Lisp_Object spec, attrs;
4627 Lisp_Object eol_type, raw_text_eol_type;
4628
4629 spec = CODING_SYSTEM_SPEC (coding_system);
4630 attrs = AREF (spec, 0);
4631
4632 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4633 return coding_system;
4634
4635 eol_type = AREF (spec, 2);
4636 if (VECTORP (eol_type))
4637 return Qraw_text;
4638 spec = CODING_SYSTEM_SPEC (Qraw_text);
4639 raw_text_eol_type = AREF (spec, 2);
4640 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4641 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4642 : AREF (raw_text_eol_type, 2));
4643 }
4644
4645
4646 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4647 does, return one of the subsidiary that has the same eol-spec as
4648 PARENT. Otherwise, return CODING_SYSTEM. */
4649
4650 Lisp_Object
4651 coding_inherit_eol_type (coding_system, parent)
4652 Lisp_Object coding_system, parent;
4653 {
4654 Lisp_Object spec, attrs, eol_type;
4655
4656 spec = CODING_SYSTEM_SPEC (coding_system);
4657 attrs = AREF (spec, 0);
4658 eol_type = AREF (spec, 2);
4659 if (VECTORP (eol_type))
4660 {
4661 Lisp_Object parent_spec;
4662 Lisp_Object parent_eol_type;
4663
4664 parent_spec
4665 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4666 parent_eol_type = AREF (parent_spec, 2);
4667 if (EQ (parent_eol_type, Qunix))
4668 coding_system = AREF (eol_type, 0);
4669 else if (EQ (parent_eol_type, Qdos))
4670 coding_system = AREF (eol_type, 1);
4671 else if (EQ (parent_eol_type, Qmac))
4672 coding_system = AREF (eol_type, 2);
4673 }
4674 return coding_system;
4675 }
4676
4677 /* Emacs has a mechanism to automatically detect a coding system if it
4678 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4679 it's impossible to distinguish some coding systems accurately
4680 because they use the same range of codes. So, at first, coding
4681 systems are categorized into 7, those are:
4682
4683 o coding-category-emacs-mule
4684
4685 The category for a coding system which has the same code range
4686 as Emacs' internal format. Assigned the coding-system (Lisp
4687 symbol) `emacs-mule' by default.
4688
4689 o coding-category-sjis
4690
4691 The category for a coding system which has the same code range
4692 as SJIS. Assigned the coding-system (Lisp
4693 symbol) `japanese-shift-jis' by default.
4694
4695 o coding-category-iso-7
4696
4697 The category for a coding system which has the same code range
4698 as ISO2022 of 7-bit environment. This doesn't use any locking
4699 shift and single shift functions. This can encode/decode all
4700 charsets. Assigned the coding-system (Lisp symbol)
4701 `iso-2022-7bit' by default.
4702
4703 o coding-category-iso-7-tight
4704
4705 Same as coding-category-iso-7 except that this can
4706 encode/decode only the specified charsets.
4707
4708 o coding-category-iso-8-1
4709
4710 The category for a coding system which has the same code range
4711 as ISO2022 of 8-bit environment and graphic plane 1 used only
4712 for DIMENSION1 charset. This doesn't use any locking shift
4713 and single shift functions. Assigned the coding-system (Lisp
4714 symbol) `iso-latin-1' by default.
4715
4716 o coding-category-iso-8-2
4717
4718 The category for a coding system which has the same code range
4719 as ISO2022 of 8-bit environment and graphic plane 1 used only
4720 for DIMENSION2 charset. This doesn't use any locking shift
4721 and single shift functions. Assigned the coding-system (Lisp
4722 symbol) `japanese-iso-8bit' by default.
4723
4724 o coding-category-iso-7-else
4725
4726 The category for a coding system which has the same code range
4727 as ISO2022 of 7-bit environemnt but uses locking shift or
4728 single shift functions. Assigned the coding-system (Lisp
4729 symbol) `iso-2022-7bit-lock' by default.
4730
4731 o coding-category-iso-8-else
4732
4733 The category for a coding system which has the same code range
4734 as ISO2022 of 8-bit environemnt but uses locking shift or
4735 single shift functions. Assigned the coding-system (Lisp
4736 symbol) `iso-2022-8bit-ss2' by default.
4737
4738 o coding-category-big5
4739
4740 The category for a coding system which has the same code range
4741 as BIG5. Assigned the coding-system (Lisp symbol)
4742 `cn-big5' by default.
4743
4744 o coding-category-utf-8
4745
4746 The category for a coding system which has the same code range
4747 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4748 symbol) `utf-8' by default.
4749
4750 o coding-category-utf-16-be
4751
4752 The category for a coding system in which a text has an
4753 Unicode signature (cf. Unicode Standard) in the order of BIG
4754 endian at the head. Assigned the coding-system (Lisp symbol)
4755 `utf-16-be' by default.
4756
4757 o coding-category-utf-16-le
4758
4759 The category for a coding system in which a text has an
4760 Unicode signature (cf. Unicode Standard) in the order of
4761 LITTLE endian at the head. Assigned the coding-system (Lisp
4762 symbol) `utf-16-le' by default.
4763
4764 o coding-category-ccl
4765
4766 The category for a coding system of which encoder/decoder is
4767 written in CCL programs. The default value is nil, i.e., no
4768 coding system is assigned.
4769
4770 o coding-category-binary
4771
4772 The category for a coding system not categorized in any of the
4773 above. Assigned the coding-system (Lisp symbol)
4774 `no-conversion' by default.
4775
4776 Each of them is a Lisp symbol and the value is an actual
4777 `coding-system's (this is also a Lisp symbol) assigned by a user.
4778 What Emacs does actually is to detect a category of coding system.
4779 Then, it uses a `coding-system' assigned to it. If Emacs can't
4780 decide only one possible category, it selects a category of the
4781 highest priority. Priorities of categories are also specified by a
4782 user in a Lisp variable `coding-category-list'.
4783
4784 */
4785
4786 #define EOL_SEEN_NONE 0
4787 #define EOL_SEEN_LF 1
4788 #define EOL_SEEN_CR 2
4789 #define EOL_SEEN_CRLF 4
4790
4791 /* Detect how end-of-line of a text of length CODING->src_bytes
4792 pointed by CODING->source is encoded. Return one of
4793 EOL_SEEN_XXX. */
4794
4795 #define MAX_EOL_CHECK_COUNT 3
4796
4797 static int
4798 detect_eol (coding, source, src_bytes)
4799 struct coding_system *coding;
4800 unsigned char *source;
4801 EMACS_INT src_bytes;
4802 {
4803 Lisp_Object attrs, coding_type;
4804 unsigned char *src = source, *src_end = src + src_bytes;
4805 unsigned char c;
4806 int total = 0;
4807 int eol_seen = EOL_SEEN_NONE;
4808
4809 attrs = CODING_ID_ATTRS (coding->id);
4810 coding_type = CODING_ATTR_TYPE (attrs);
4811
4812 if (EQ (coding_type, Qccl))
4813 {
4814 int msb, lsb;
4815
4816 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4817 lsb = 1 - msb;
4818
4819 while (src + 1 < src_end)
4820 {
4821 c = src[lsb];
4822 if (src[msb] == 0 && (c == '\n' || c == '\r'))
4823 {
4824 int this_eol;
4825
4826 if (c == '\n')
4827 this_eol = EOL_SEEN_LF;
4828 else if (src + 3 >= src_end
4829 || src[msb + 2] != 0
4830 || src[lsb + 2] != '\n')
4831 this_eol = EOL_SEEN_CR;
4832 else
4833 this_eol = EOL_SEEN_CRLF;
4834
4835 if (eol_seen == EOL_SEEN_NONE)
4836 /* This is the first end-of-line. */
4837 eol_seen = this_eol;
4838 else if (eol_seen != this_eol)
4839 {
4840 /* The found type is different from what found before. */
4841 eol_seen = EOL_SEEN_LF;
4842 break;
4843 }
4844 if (++total == MAX_EOL_CHECK_COUNT)
4845 break;
4846 }
4847 src += 2;
4848 }
4849 }
4850 else
4851 {
4852 while (src < src_end)
4853 {
4854 c = *src++;
4855 if (c == '\n' || c == '\r')
4856 {
4857 int this_eol;
4858
4859 if (c == '\n')
4860 this_eol = EOL_SEEN_LF;
4861 else if (src >= src_end || *src != '\n')
4862 this_eol = EOL_SEEN_CR;
4863 else
4864 this_eol = EOL_SEEN_CRLF, src++;
4865
4866 if (eol_seen == EOL_SEEN_NONE)
4867 /* This is the first end-of-line. */
4868 eol_seen = this_eol;
4869 else if (eol_seen != this_eol)
4870 {
4871 /* The found type is different from what found before. */
4872 eol_seen = EOL_SEEN_LF;
4873 break;
4874 }
4875 if (++total == MAX_EOL_CHECK_COUNT)
4876 break;
4877 }
4878 }
4879 }
4880 return eol_seen;
4881 }
4882
4883
4884 static void
4885 adjust_coding_eol_type (coding, eol_seen)
4886 struct coding_system *coding;
4887 int eol_seen;
4888 {
4889 Lisp_Object eol_type;
4890
4891 eol_type = CODING_ID_EOL_TYPE (coding->id);
4892 if (eol_seen & EOL_SEEN_LF)
4893 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
4894 else if (eol_type & EOL_SEEN_CRLF)
4895 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
4896 else if (eol_type & EOL_SEEN_CR)
4897 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
4898 }
4899
4900 /* Detect how a text specified in CODING is encoded. If a coding
4901 system is detected, update fields of CODING by the detected coding
4902 system. */
4903
4904 void
4905 detect_coding (coding)
4906 struct coding_system *coding;
4907 {
4908 unsigned char *src, *src_end;
4909 Lisp_Object attrs, coding_type;
4910
4911 coding->consumed = coding->consumed_char = 0;
4912 coding->produced = coding->produced_char = 0;
4913 coding_set_source (coding);
4914
4915 src_end = coding->source + coding->src_bytes;
4916
4917 /* If we have not yet decided the text encoding type, detect it
4918 now. */
4919 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
4920 {
4921 int mask = CATEGORY_MASK_ANY;
4922 int c, i;
4923
4924 for (src = coding->source; src < src_end; src++)
4925 {
4926 c = *src;
4927 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
4928 || c == ISO_CODE_SI
4929 || c == ISO_CODE_SO)))
4930 break;
4931 }
4932 coding->head_ascii = src - (coding->source + coding->consumed);
4933
4934 if (coding->head_ascii < coding->src_bytes)
4935 {
4936 int detected = 0;
4937
4938 for (i = 0; i < coding_category_raw_text; i++)
4939 {
4940 enum coding_category category = coding_priorities[i];
4941 struct coding_system *this = coding_categories + category;
4942
4943 if (category >= coding_category_raw_text
4944 || detected & (1 << category))
4945 continue;
4946
4947 if (this->id < 0)
4948 {
4949 /* No coding system of this category is defined. */
4950 mask &= ~(1 << category);
4951 }
4952 else
4953 {
4954 detected |= detected_mask[category];
4955 if ((*(this->detector)) (coding, &mask))
4956 break;
4957 }
4958 }
4959 if (! mask)
4960 setup_coding_system (Qraw_text, coding);
4961 else if (mask != CATEGORY_MASK_ANY)
4962 for (i = 0; i < coding_category_raw_text; i++)
4963 {
4964 enum coding_category category = coding_priorities[i];
4965 struct coding_system *this = coding_categories + category;
4966
4967 if (mask & (1 << category))
4968 {
4969 setup_coding_system (CODING_ID_NAME (this->id), coding);
4970 break;
4971 }
4972 }
4973 }
4974 }
4975
4976 attrs = CODING_ID_ATTRS (coding->id);
4977 coding_type = CODING_ATTR_TYPE (attrs);
4978
4979 /* If we have not yet decided the EOL type, detect it now. But, the
4980 detection is impossible for a CCL based coding system, in which
4981 case, we detct the EOL type after decoding. */
4982 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
4983 && ! EQ (coding_type, Qccl))
4984 {
4985 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
4986
4987 if (eol_seen != EOL_SEEN_NONE)
4988 adjust_coding_eol_type (coding, eol_seen);
4989 }
4990 }
4991
4992
4993 static void
4994 decode_eol (coding)
4995 struct coding_system *coding;
4996 {
4997 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
4998 {
4999 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5000 unsigned char *pend = p + coding->produced;
5001 int eol_seen = EOL_SEEN_NONE;
5002
5003 for (; p < pend; p++)
5004 {
5005 if (*p == '\n')
5006 eol_seen |= EOL_SEEN_LF;
5007 else if (*p == '\r')
5008 {
5009 if (p + 1 < pend && *(p + 1) == '\n')
5010 {
5011 eol_seen |= EOL_SEEN_CRLF;
5012 p++;
5013 }
5014 else
5015 eol_seen |= EOL_SEEN_CR;
5016 }
5017 }
5018 if (eol_seen != EOL_SEEN_NONE)
5019 adjust_coding_eol_type (coding, eol_seen);
5020 }
5021
5022 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5023 {
5024 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5025 unsigned char *pend = p + coding->produced;
5026
5027 for (; p < pend; p++)
5028 if (*p == '\r')
5029 *p = '\n';
5030 }
5031 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5032 {
5033 unsigned char *p, *pbeg, *pend;
5034 Lisp_Object undo_list;
5035
5036 move_gap_both (coding->dst_pos + coding->produced_char,
5037 coding->dst_pos_byte + coding->produced);
5038 undo_list = current_buffer->undo_list;
5039 current_buffer->undo_list = Qt;
5040 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, Qnil);
5041 current_buffer->undo_list = undo_list;
5042 pbeg = GPT_ADDR;
5043 pend = pbeg + coding->produced;
5044
5045 for (p = pend - 1; p >= pbeg; p--)
5046 if (*p == '\r')
5047 {
5048 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5049 pend--;
5050 }
5051 coding->produced_char -= coding->produced - (pend - pbeg);
5052 coding->produced = pend - pbeg;
5053 insert_from_gap (coding->produced_char, coding->produced);
5054 }
5055 }
5056
5057 static void
5058 translate_chars (coding, table)
5059 struct coding_system *coding;
5060 Lisp_Object table;
5061 {
5062 int *charbuf = coding->charbuf;
5063 int *charbuf_end = charbuf + coding->charbuf_used;
5064 int c;
5065
5066 if (coding->chars_at_source)
5067 return;
5068
5069 while (charbuf < charbuf_end)
5070 {
5071 c = *charbuf;
5072 if (c < 0)
5073 charbuf += c;
5074 else
5075 *charbuf++ = translate_char (table, c);
5076 }
5077 }
5078
5079 static int
5080 produce_chars (coding)
5081 struct coding_system *coding;
5082 {
5083 unsigned char *dst = coding->destination + coding->produced;
5084 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5085 int produced;
5086 int produced_chars = 0;
5087
5088 if (! coding->chars_at_source)
5089 {
5090 /* Characters are in coding->charbuf. */
5091 int *buf = coding->charbuf;
5092 int *buf_end = buf + coding->charbuf_used;
5093 unsigned char *adjusted_dst_end;
5094
5095 if (BUFFERP (coding->src_object)
5096 && EQ (coding->src_object, coding->dst_object))
5097 dst_end = coding->source + coding->consumed;
5098 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5099
5100 while (buf < buf_end)
5101 {
5102 int c = *buf++;
5103
5104 if (dst >= adjusted_dst_end)
5105 {
5106 dst = alloc_destination (coding,
5107 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5108 dst);
5109 dst_end = coding->destination + coding->dst_bytes;
5110 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5111 }
5112 if (c >= 0)
5113 {
5114 if (coding->dst_multibyte
5115 || ! CHAR_BYTE8_P (c))
5116 CHAR_STRING_ADVANCE (c, dst);
5117 else
5118 *dst++ = CHAR_TO_BYTE8 (c);
5119 produced_chars++;
5120 }
5121 else
5122 /* This is an annotation data. */
5123 buf -= c + 1;
5124 }
5125 }
5126 else
5127 {
5128 unsigned char *src = coding->source;
5129 unsigned char *src_end = src + coding->src_bytes;
5130 Lisp_Object eol_type;
5131
5132 eol_type = CODING_ID_EOL_TYPE (coding->id);
5133
5134 if (coding->src_multibyte != coding->dst_multibyte)
5135 {
5136 if (coding->src_multibyte)
5137 {
5138 int multibytep = 1;
5139 int consumed_chars;
5140
5141 while (1)
5142 {
5143 unsigned char *src_base = src;
5144 int c;
5145
5146 ONE_MORE_BYTE (c);
5147 if (c == '\r')
5148 {
5149 if (EQ (eol_type, Qdos))
5150 {
5151 if (src < src_end
5152 && *src == '\n')
5153 c = *src++;
5154 }
5155 else if (EQ (eol_type, Qmac))
5156 c = '\n';
5157 }
5158 if (dst == dst_end)
5159 {
5160 coding->consumed = src - coding->source;
5161
5162 if (EQ (coding->src_object, coding->dst_object))
5163 dst_end = src;
5164 if (dst == dst_end)
5165 {
5166 dst = alloc_destination (coding, src_end - src + 1,
5167 dst);
5168 dst_end = coding->destination + coding->dst_bytes;
5169 coding_set_source (coding);
5170 src = coding->source + coding->consumed;
5171 src_end = coding->source + coding->src_bytes;
5172 }
5173 }
5174 *dst++ = c;
5175 produced_chars++;
5176 }
5177 no_more_source:
5178 ;
5179 }
5180 else
5181 while (src < src_end)
5182 {
5183 int multibytep = 1;
5184 int c = *src++;
5185
5186 if (c == '\r')
5187 {
5188 if (EQ (eol_type, Qdos))
5189 {
5190 if (src < src_end
5191 && *src == '\n')
5192 c = *src++;
5193 }
5194 else if (EQ (eol_type, Qmac))
5195 c = '\n';
5196 }
5197 if (dst >= dst_end - 1)
5198 {
5199 coding->consumed = src - coding->source;
5200
5201 if (EQ (coding->src_object, coding->dst_object))
5202 dst_end = src;
5203 if (dst >= dst_end - 1)
5204 {
5205 dst = alloc_destination (coding, src_end - src + 2,
5206 dst);
5207 dst_end = coding->destination + coding->dst_bytes;
5208 coding_set_source (coding);
5209 src = coding->source + coding->consumed;
5210 src_end = coding->source + coding->src_bytes;
5211 }
5212 }
5213 EMIT_ONE_BYTE (c);
5214 }
5215 }
5216 else
5217 {
5218 if (!EQ (coding->src_object, coding->dst_object))
5219 {
5220 int require = coding->src_bytes - coding->dst_bytes;
5221
5222 if (require > 0)
5223 {
5224 EMACS_INT offset = src - coding->source;
5225
5226 dst = alloc_destination (coding, require, dst);
5227 coding_set_source (coding);
5228 src = coding->source + offset;
5229 src_end = coding->source + coding->src_bytes;
5230 }
5231 }
5232 produced_chars = coding->src_chars;
5233 while (src < src_end)
5234 {
5235 int c = *src++;
5236
5237 if (c == '\r')
5238 {
5239 if (EQ (eol_type, Qdos))
5240 {
5241 if (src < src_end
5242 && *src == '\n')
5243 c = *src++;
5244 produced_chars--;
5245 }
5246 else if (EQ (eol_type, Qmac))
5247 c = '\n';
5248 }
5249 *dst++ = c;
5250 }
5251 }
5252 coding->consumed = coding->src_bytes;
5253 coding->consumed_char = coding->src_chars;
5254 }
5255
5256 produced = dst - (coding->destination + coding->produced);
5257 if (BUFFERP (coding->dst_object))
5258 insert_from_gap (produced_chars, produced);
5259 coding->produced += produced;
5260 coding->produced_char += produced_chars;
5261 return produced_chars;
5262 }
5263
5264 /* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5265 or
5266 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5267 */
5268
5269 static INLINE void
5270 produce_composition (coding, charbuf)
5271 struct coding_system *coding;
5272 int *charbuf;
5273 {
5274 Lisp_Object buffer;
5275 int len;
5276 EMACS_INT pos;
5277 enum composition_method method;
5278 int cmp_len;
5279 Lisp_Object components;
5280
5281 buffer = coding->dst_object;
5282 len = -charbuf[0];
5283 pos = coding->dst_pos + charbuf[1];
5284 method = (enum composition_method) (charbuf[3]);
5285 cmp_len = charbuf[4];
5286
5287 if (method == COMPOSITION_RELATIVE)
5288 components = Qnil;
5289 else
5290 {
5291 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5292 int i;
5293
5294 len -= 5;
5295 charbuf += 5;
5296 for (i = 0; i < len; i++)
5297 args[i] = make_number (charbuf[i]);
5298 components = (method == COMPOSITION_WITH_ALTCHARS
5299 ? Fstring (len, args) : Fvector (len, args));
5300 }
5301 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5302 }
5303
5304 static int *
5305 save_composition_data (buf, buf_end, prop)
5306 int *buf, *buf_end;
5307 Lisp_Object prop;
5308 {
5309 enum composition_method method = COMPOSITION_METHOD (prop);
5310 int cmp_len = COMPOSITION_LENGTH (prop);
5311
5312 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5313 return NULL;
5314
5315 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5316 buf[2] = method;
5317 buf[3] = cmp_len;
5318
5319 if (method == COMPOSITION_RELATIVE)
5320 buf[0] = 4;
5321 else
5322 {
5323 Lisp_Object components;
5324 int len, i;
5325
5326 components = COMPOSITION_COMPONENTS (prop);
5327 if (VECTORP (components))
5328 {
5329 len = XVECTOR (components)->size;
5330 for (i = 0; i < len; i++)
5331 buf[4 + i] = XINT (AREF (components, i));
5332 }
5333 else if (STRINGP (components))
5334 {
5335 int i_byte;
5336
5337 len = XSTRING (components)->size;
5338 i = i_byte = 0;
5339 while (i < len)
5340 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5341 }
5342 else if (INTEGERP (components))
5343 {
5344 len = 1;
5345 buf[4] = XINT (components);
5346 }
5347 else if (CONSP (components))
5348 {
5349 for (len = 0; CONSP (components);
5350 len++, components = XCDR (components))
5351 buf[4 + len] = XINT (XCAR (components));
5352 }
5353 else
5354 abort ();
5355 buf[0] = 4 + len;
5356 }
5357 return (buf + buf[0]);
5358 }
5359
5360 #define CHARBUF_SIZE 0x4000
5361
5362 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5363 do { \
5364 int size = CHARBUF_SIZE;; \
5365 \
5366 coding->charbuf = NULL; \
5367 while (size > 1024) \
5368 { \
5369 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5370 if (coding->charbuf) \
5371 break; \
5372 size >>= 1; \
5373 } \
5374 if (! coding->charbuf) \
5375 { \
5376 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5377 return coding->result; \
5378 } \
5379 coding->charbuf_size = size; \
5380 } while (0)
5381
5382
5383 static void
5384 produce_annotation (coding)
5385 struct coding_system *coding;
5386 {
5387 int *charbuf = coding->charbuf;
5388 int *charbuf_end = charbuf + coding->charbuf_used;
5389
5390 while (charbuf < charbuf_end)
5391 {
5392 if (*charbuf >= 0)
5393 charbuf++;
5394 else
5395 {
5396 int len = -*charbuf;
5397 switch (charbuf[2])
5398 {
5399 case CODING_ANNOTATE_COMPOSITION_MASK:
5400 produce_composition (coding, charbuf);
5401 break;
5402 default:
5403 abort ();
5404 }
5405 charbuf += len;
5406 }
5407 }
5408 }
5409
5410 /* Decode the data at CODING->src_object into CODING->dst_object.
5411 CODING->src_object is a buffer, a string, or nil.
5412 CODING->dst_object is a buffer.
5413
5414 If CODING->src_object is a buffer, it must be the current buffer.
5415 In this case, if CODING->src_pos is positive, it is a position of
5416 the source text in the buffer, otherwise, the source text is in the
5417 gap area of the buffer, and CODING->src_pos specifies the offset of
5418 the text from GPT (which must be the same as PT). If this is the
5419 same buffer as CODING->dst_object, CODING->src_pos must be
5420 negative.
5421
5422 If CODING->src_object is a string, CODING->src_pos in an index to
5423 that string.
5424
5425 If CODING->src_object is nil, CODING->source must already point to
5426 the non-relocatable memory area. In this case, CODING->src_pos is
5427 an offset from CODING->source.
5428
5429 The decoded data is inserted at the current point of the buffer
5430 CODING->dst_object.
5431 */
5432
5433 static int
5434 decode_coding (coding)
5435 struct coding_system *coding;
5436 {
5437 Lisp_Object attrs;
5438
5439 if (BUFFERP (coding->src_object)
5440 && coding->src_pos > 0
5441 && coding->src_pos < GPT
5442 && coding->src_pos + coding->src_chars > GPT)
5443 move_gap_both (coding->src_pos, coding->src_pos_byte);
5444
5445 if (BUFFERP (coding->dst_object))
5446 {
5447 if (current_buffer != XBUFFER (coding->dst_object))
5448 set_buffer_internal (XBUFFER (coding->dst_object));
5449 if (GPT != PT)
5450 move_gap_both (PT, PT_BYTE);
5451 }
5452
5453 coding->consumed = coding->consumed_char = 0;
5454 coding->produced = coding->produced_char = 0;
5455 coding->chars_at_source = 0;
5456 coding->result = CODING_RESULT_SUCCESS;
5457 coding->errors = 0;
5458
5459 ALLOC_CONVERSION_WORK_AREA (coding);
5460
5461 attrs = CODING_ID_ATTRS (coding->id);
5462
5463 do
5464 {
5465 coding_set_source (coding);
5466 coding->annotated = 0;
5467 (*(coding->decoder)) (coding);
5468 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5469 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5470 coding_set_destination (coding);
5471 produce_chars (coding);
5472 if (coding->annotated)
5473 produce_annotation (coding);
5474 }
5475 while (coding->consumed < coding->src_bytes
5476 && ! coding->result);
5477
5478 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5479 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5480 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5481 decode_eol (coding);
5482
5483 coding->carryover_bytes = 0;
5484 if (coding->consumed < coding->src_bytes)
5485 {
5486 int nbytes = coding->src_bytes - coding->consumed;
5487 unsigned char *src;
5488
5489 coding_set_source (coding);
5490 coding_set_destination (coding);
5491 src = coding->source + coding->consumed;
5492
5493 if (coding->mode & CODING_MODE_LAST_BLOCK)
5494 {
5495 /* Flush out unprocessed data as binary chars. We are sure
5496 that the number of data is less than the size of
5497 coding->charbuf. */
5498 int *charbuf = coding->charbuf;
5499
5500 while (nbytes-- > 0)
5501 {
5502 int c = *src++;
5503 *charbuf++ = (c & 0x80 ? - c : c);
5504 }
5505 produce_chars (coding);
5506 }
5507 else
5508 {
5509 /* Record unprocessed bytes in coding->carryover. We are
5510 sure that the number of data is less than the size of
5511 coding->carryover. */
5512 unsigned char *p = coding->carryover;
5513
5514 coding->carryover_bytes = nbytes;
5515 while (nbytes-- > 0)
5516 *p++ = *src++;
5517 }
5518 coding->consumed = coding->src_bytes;
5519 }
5520
5521 return coding->result;
5522 }
5523
5524 static void
5525 consume_chars (coding)
5526 struct coding_system *coding;
5527 {
5528 int *buf = coding->charbuf;
5529 /* -1 is to compensate for CRLF. */
5530 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5531 unsigned char *src = coding->source + coding->consumed;
5532 int pos = coding->src_pos + coding->consumed_char;
5533 int end_pos = coding->src_pos + coding->src_chars;
5534 int multibytep = coding->src_multibyte;
5535 Lisp_Object eol_type;
5536 int c;
5537 int start, end, stop;
5538 Lisp_Object object, prop;
5539
5540 eol_type = CODING_ID_EOL_TYPE (coding->id);
5541 if (VECTORP (eol_type))
5542 eol_type = Qunix;
5543
5544 object = coding->src_object;
5545
5546 /* Note: composition handling is not yet implemented. */
5547 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
5548
5549 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5550 && find_composition (pos, end_pos, &start, &end, &prop, object)
5551 && end <= end_pos
5552 && (start >= pos
5553 || (find_composition (end, end_pos, &start, &end, &prop, object)
5554 && end <= end_pos)))
5555 stop = start;
5556 else
5557 stop = end_pos;
5558
5559 while (buf < buf_end)
5560 {
5561 if (pos == stop)
5562 {
5563 int *p;
5564
5565 if (pos == end_pos)
5566 break;
5567 p = save_composition_data (buf, buf_end, prop);
5568 if (p == NULL)
5569 break;
5570 buf = p;
5571 if (find_composition (end, end_pos, &start, &end, &prop, object)
5572 && end <= end_pos)
5573 stop = start;
5574 else
5575 stop = end_pos;
5576 }
5577
5578 if (! multibytep)
5579 c = *src++;
5580 else
5581 c = STRING_CHAR_ADVANCE (src);
5582 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5583 c = '\n';
5584 if (! EQ (eol_type, Qunix))
5585 {
5586 if (c == '\n')
5587 {
5588 if (EQ (eol_type, Qdos))
5589 *buf++ = '\r';
5590 else
5591 c = '\r';
5592 }
5593 }
5594 *buf++ = c;
5595 pos++;
5596 }
5597
5598 coding->consumed = src - coding->source;
5599 coding->consumed_char = pos - coding->src_pos;
5600 coding->charbuf_used = buf - coding->charbuf;
5601 coding->chars_at_source = 0;
5602 }
5603
5604
5605 /* Encode the text at CODING->src_object into CODING->dst_object.
5606 CODING->src_object is a buffer or a string.
5607 CODING->dst_object is a buffer or nil.
5608
5609 If CODING->src_object is a buffer, it must be the current buffer.
5610 In this case, if CODING->src_pos is positive, it is a position of
5611 the source text in the buffer, otherwise. the source text is in the
5612 gap area of the buffer, and coding->src_pos specifies the offset of
5613 the text from GPT (which must be the same as PT). If this is the
5614 same buffer as CODING->dst_object, CODING->src_pos must be
5615 negative and CODING should not have `pre-write-conversion'.
5616
5617 If CODING->src_object is a string, CODING should not have
5618 `pre-write-conversion'.
5619
5620 If CODING->dst_object is a buffer, the encoded data is inserted at
5621 the current point of that buffer.
5622
5623 If CODING->dst_object is nil, the encoded data is placed at the
5624 memory area specified by CODING->destination. */
5625
5626 static int
5627 encode_coding (coding)
5628 struct coding_system *coding;
5629 {
5630 Lisp_Object attrs;
5631
5632 attrs = CODING_ID_ATTRS (coding->id);
5633
5634 if (BUFFERP (coding->dst_object))
5635 {
5636 set_buffer_internal (XBUFFER (coding->dst_object));
5637 coding->dst_multibyte
5638 = ! NILP (current_buffer->enable_multibyte_characters);
5639 }
5640
5641 coding->consumed = coding->consumed_char = 0;
5642 coding->produced = coding->produced_char = 0;
5643 coding->result = CODING_RESULT_SUCCESS;
5644 coding->errors = 0;
5645
5646 ALLOC_CONVERSION_WORK_AREA (coding);
5647
5648 do {
5649 coding_set_source (coding);
5650 consume_chars (coding);
5651
5652 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5653 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5654
5655 coding_set_destination (coding);
5656 (*(coding->encoder)) (coding);
5657 } while (coding->consumed_char < coding->src_chars);
5658
5659 if (BUFFERP (coding->dst_object))
5660 insert_from_gap (coding->produced_char, coding->produced);
5661
5662 return (coding->result);
5663 }
5664
5665 /* Work buffer */
5666
5667 /* List of currently used working buffer. */
5668 Lisp_Object Vcode_conversion_work_buf_list;
5669
5670 /* A working buffer used by the top level conversion. */
5671 Lisp_Object Vcode_conversion_reused_work_buf;
5672
5673
5674 /* Return a working buffer that can be freely used by the following
5675 code conversion. MULTIBYTEP specifies the multibyteness of the
5676 buffer. */
5677
5678 Lisp_Object
5679 make_conversion_work_buffer (multibytep)
5680 int multibytep;
5681 {
5682 struct buffer *current = current_buffer;
5683 Lisp_Object buf;
5684
5685 if (NILP (Vcode_conversion_work_buf_list))
5686 {
5687 if (NILP (Vcode_conversion_reused_work_buf))
5688 Vcode_conversion_reused_work_buf
5689 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5690 Vcode_conversion_work_buf_list
5691 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
5692 }
5693 else
5694 {
5695 int depth = Flength (Vcode_conversion_work_buf_list);
5696 char str[128];
5697
5698 sprintf (str, " *code-conversion-work*<%d>", depth);
5699 Vcode_conversion_work_buf_list
5700 = Fcons (Fget_buffer_create (build_string (str)),
5701 Vcode_conversion_work_buf_list);
5702 }
5703
5704 buf = XCAR (Vcode_conversion_work_buf_list);
5705 set_buffer_internal (XBUFFER (buf));
5706 current_buffer->undo_list = Qt;
5707 Ferase_buffer ();
5708 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
5709 set_buffer_internal (current);
5710 return buf;
5711 }
5712
5713 static struct coding_system *saved_coding;
5714
5715 Lisp_Object
5716 code_conversion_restore (info)
5717 Lisp_Object info;
5718 {
5719 int depth = Flength (Vcode_conversion_work_buf_list);
5720 Lisp_Object buf;
5721
5722 if (depth > 0)
5723 {
5724 buf = XCAR (Vcode_conversion_work_buf_list);
5725 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5726 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5727 Fkill_buffer (buf);
5728 }
5729
5730 if (saved_coding->dst_object == Qt
5731 && saved_coding->destination)
5732 xfree (saved_coding->destination);
5733
5734 return save_excursion_restore (info);
5735 }
5736
5737
5738 int
5739 decode_coding_gap (coding, chars, bytes)
5740 struct coding_system *coding;
5741 EMACS_INT chars, bytes;
5742 {
5743 int count = specpdl_ptr - specpdl;
5744
5745 saved_coding = coding;
5746 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5747
5748 coding->src_object = Fcurrent_buffer ();
5749 coding->src_chars = chars;
5750 coding->src_bytes = bytes;
5751 coding->src_pos = -chars;
5752 coding->src_pos_byte = -bytes;
5753 coding->src_multibyte = chars < bytes;
5754 coding->dst_object = coding->src_object;
5755 coding->dst_pos = PT;
5756 coding->dst_pos_byte = PT_BYTE;
5757 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
5758
5759 if (CODING_REQUIRE_DETECTION (coding))
5760 detect_coding (coding);
5761
5762 decode_coding (coding);
5763
5764 unbind_to (count, Qnil);
5765 return coding->result;
5766 }
5767
5768 int
5769 encode_coding_gap (coding, chars, bytes)
5770 struct coding_system *coding;
5771 EMACS_INT chars, bytes;
5772 {
5773 int count = specpdl_ptr - specpdl;
5774 Lisp_Object buffer;
5775
5776 saved_coding = coding;
5777 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5778
5779 buffer = Fcurrent_buffer ();
5780 coding->src_object = buffer;
5781 coding->src_chars = chars;
5782 coding->src_bytes = bytes;
5783 coding->src_pos = -chars;
5784 coding->src_pos_byte = -bytes;
5785 coding->src_multibyte = chars < bytes;
5786 coding->dst_object = coding->src_object;
5787 coding->dst_pos = PT;
5788 coding->dst_pos_byte = PT_BYTE;
5789
5790 encode_coding (coding);
5791
5792 unbind_to (count, Qnil);
5793 return coding->result;
5794 }
5795
5796
5797 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5798 SRC_OBJECT into DST_OBJECT by coding context CODING.
5799
5800 SRC_OBJECT is a buffer, a string, or Qnil.
5801
5802 If it is a buffer, the text is at point of the buffer. FROM and TO
5803 are positions in the buffer.
5804
5805 If it is a string, the text is at the beginning of the string.
5806 FROM and TO are indices to the string.
5807
5808 If it is nil, the text is at coding->source. FROM and TO are
5809 indices to coding->source.
5810
5811 DST_OBJECT is a buffer, Qt, or Qnil.
5812
5813 If it is a buffer, the decoded text is inserted at point of the
5814 buffer. If the buffer is the same as SRC_OBJECT, the source text
5815 is deleted.
5816
5817 If it is Qt, a string is made from the decoded text, and
5818 set in CODING->dst_object.
5819
5820 If it is Qnil, the decoded text is stored at CODING->destination.
5821 The called must allocate CODING->dst_bytes bytes at
5822 CODING->destination by xmalloc. If the decoded text is longer than
5823 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5824 */
5825
5826 void
5827 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5828 dst_object)
5829 struct coding_system *coding;
5830 Lisp_Object src_object;
5831 EMACS_INT from, from_byte, to, to_byte;
5832 Lisp_Object dst_object;
5833 {
5834 int count = specpdl_ptr - specpdl;
5835 unsigned char *destination;
5836 EMACS_INT dst_bytes;
5837 EMACS_INT chars = to - from;
5838 EMACS_INT bytes = to_byte - from_byte;
5839 Lisp_Object attrs;
5840
5841 saved_coding = coding;
5842 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5843
5844 if (NILP (dst_object))
5845 {
5846 destination = coding->destination;
5847 dst_bytes = coding->dst_bytes;
5848 }
5849
5850 coding->src_object = src_object;
5851 coding->src_chars = chars;
5852 coding->src_bytes = bytes;
5853 coding->src_multibyte = chars < bytes;
5854
5855 if (STRINGP (src_object))
5856 {
5857 coding->src_pos = from;
5858 coding->src_pos_byte = from_byte;
5859 }
5860 else if (BUFFERP (src_object))
5861 {
5862 set_buffer_internal (XBUFFER (src_object));
5863 if (from != GPT)
5864 move_gap_both (from, from_byte);
5865 if (EQ (src_object, dst_object))
5866 {
5867 TEMP_SET_PT_BOTH (from, from_byte);
5868 del_range_both (from, from_byte, to, to_byte, 1);
5869 coding->src_pos = -chars;
5870 coding->src_pos_byte = -bytes;
5871 }
5872 else
5873 {
5874 coding->src_pos = from;
5875 coding->src_pos_byte = from_byte;
5876 }
5877 }
5878
5879 if (CODING_REQUIRE_DETECTION (coding))
5880 detect_coding (coding);
5881 attrs = CODING_ID_ATTRS (coding->id);
5882
5883 if (! NILP (CODING_ATTR_POST_READ (attrs))
5884 || EQ (dst_object, Qt))
5885 {
5886 coding->dst_object = make_conversion_work_buffer (1);
5887 coding->dst_pos = BEG;
5888 coding->dst_pos_byte = BEG_BYTE;
5889 coding->dst_multibyte = 1;
5890 }
5891 else if (BUFFERP (dst_object))
5892 {
5893 coding->dst_object = dst_object;
5894 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5895 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5896 coding->dst_multibyte
5897 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
5898 }
5899 else
5900 {
5901 coding->dst_object = Qnil;
5902 coding->dst_multibyte = 1;
5903 }
5904
5905 decode_coding (coding);
5906
5907 if (BUFFERP (coding->dst_object))
5908 set_buffer_internal (XBUFFER (coding->dst_object));
5909
5910 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5911 {
5912 struct gcpro gcpro1, gcpro2;
5913 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
5914 Lisp_Object val;
5915
5916 GCPRO2 (coding->src_object, coding->dst_object);
5917 val = call1 (CODING_ATTR_POST_READ (attrs),
5918 make_number (coding->produced_char));
5919 UNGCPRO;
5920 CHECK_NATNUM (val);
5921 coding->produced_char += Z - prev_Z;
5922 coding->produced += Z_BYTE - prev_Z_BYTE;
5923 }
5924
5925 if (EQ (dst_object, Qt))
5926 {
5927 coding->dst_object = Fbuffer_string ();
5928 }
5929 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
5930 {
5931 set_buffer_internal (XBUFFER (coding->dst_object));
5932 if (dst_bytes < coding->produced)
5933 {
5934 destination
5935 = (unsigned char *) xrealloc (destination, coding->produced);
5936 if (! destination)
5937 {
5938 coding->result = CODING_RESULT_INSUFFICIENT_DST;
5939 unbind_to (count, Qnil);
5940 return;
5941 }
5942 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
5943 move_gap_both (BEGV, BEGV_BYTE);
5944 bcopy (BEGV_ADDR, destination, coding->produced);
5945 coding->destination = destination;
5946 }
5947 }
5948
5949 unbind_to (count, Qnil);
5950 }
5951
5952
5953 void
5954 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5955 dst_object)
5956 struct coding_system *coding;
5957 Lisp_Object src_object;
5958 EMACS_INT from, from_byte, to, to_byte;
5959 Lisp_Object dst_object;
5960 {
5961 int count = specpdl_ptr - specpdl;
5962 EMACS_INT chars = to - from;
5963 EMACS_INT bytes = to_byte - from_byte;
5964 Lisp_Object attrs;
5965
5966 saved_coding = coding;
5967 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5968
5969 coding->src_object = src_object;
5970 coding->src_chars = chars;
5971 coding->src_bytes = bytes;
5972 coding->src_multibyte = chars < bytes;
5973
5974 attrs = CODING_ID_ATTRS (coding->id);
5975
5976 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5977 {
5978 Lisp_Object val;
5979
5980 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
5981 set_buffer_internal (XBUFFER (coding->src_object));
5982 if (STRINGP (src_object))
5983 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
5984 else if (BUFFERP (src_object))
5985 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
5986 else
5987 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
5988
5989 if (EQ (src_object, dst_object))
5990 {
5991 set_buffer_internal (XBUFFER (src_object));
5992 del_range_both (from, from_byte, to, to_byte, 1);
5993 set_buffer_internal (XBUFFER (coding->src_object));
5994 }
5995
5996 val = call2 (CODING_ATTR_PRE_WRITE (attrs),
5997 make_number (1), make_number (chars));
5998 CHECK_NATNUM (val);
5999 if (BEG != GPT)
6000 move_gap_both (BEG, BEG_BYTE);
6001 coding->src_chars = Z - BEG;
6002 coding->src_bytes = Z_BYTE - BEG_BYTE;
6003 coding->src_pos = BEG;
6004 coding->src_pos_byte = BEG_BYTE;
6005 coding->src_multibyte = Z < Z_BYTE;
6006 }
6007 else if (STRINGP (src_object))
6008 {
6009 coding->src_pos = from;
6010 coding->src_pos_byte = from_byte;
6011 }
6012 else if (BUFFERP (src_object))
6013 {
6014 set_buffer_internal (XBUFFER (src_object));
6015 if (from != GPT)
6016 move_gap_both (from, from_byte);
6017 if (EQ (src_object, dst_object))
6018 {
6019 del_range_both (from, from_byte, to, to_byte, 1);
6020 coding->src_pos = -chars;
6021 coding->src_pos_byte = -bytes;
6022 }
6023 else
6024 {
6025 coding->src_pos = from;
6026 coding->src_pos_byte = from_byte;
6027 }
6028 }
6029
6030 if (BUFFERP (dst_object))
6031 {
6032 coding->dst_object = dst_object;
6033 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6034 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6035 coding->dst_multibyte
6036 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6037 }
6038 else if (EQ (dst_object, Qt))
6039 {
6040 coding->dst_object = Qnil;
6041 coding->destination = (unsigned char *) xmalloc (coding->src_chars);
6042 coding->dst_bytes = coding->src_chars;
6043 coding->dst_multibyte = 0;
6044 }
6045 else
6046 {
6047 coding->dst_object = Qnil;
6048 coding->dst_multibyte = 0;
6049 }
6050
6051 encode_coding (coding);
6052
6053 if (EQ (dst_object, Qt))
6054 {
6055 if (BUFFERP (coding->dst_object))
6056 coding->dst_object = Fbuffer_string ();
6057 else
6058 {
6059 coding->dst_object
6060 = make_unibyte_string ((char *) coding->destination,
6061 coding->produced);
6062 xfree (coding->destination);
6063 }
6064 }
6065
6066 unbind_to (count, Qnil);
6067 }
6068
6069
6070 Lisp_Object
6071 preferred_coding_system ()
6072 {
6073 int id = coding_categories[coding_priorities[0]].id;
6074
6075 return CODING_ID_NAME (id);
6076 }
6077
6078 \f
6079 #ifdef emacs
6080 /*** 8. Emacs Lisp library functions ***/
6081
6082 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6083 doc: /* Return t if OBJECT is nil or a coding-system.
6084 See the documentation of `define-coding-system' for information
6085 about coding-system objects. */)
6086 (obj)
6087 Lisp_Object obj;
6088 {
6089 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6090 }
6091
6092 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6093 Sread_non_nil_coding_system, 1, 1, 0,
6094 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6095 (prompt)
6096 Lisp_Object prompt;
6097 {
6098 Lisp_Object val;
6099 do
6100 {
6101 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6102 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6103 }
6104 while (XSTRING (val)->size == 0);
6105 return (Fintern (val, Qnil));
6106 }
6107
6108 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6109 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6110 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6111 (prompt, default_coding_system)
6112 Lisp_Object prompt, default_coding_system;
6113 {
6114 Lisp_Object val;
6115 if (SYMBOLP (default_coding_system))
6116 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6117 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6118 Qt, Qnil, Qcoding_system_history,
6119 default_coding_system, Qnil);
6120 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6121 }
6122
6123 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6124 1, 1, 0,
6125 doc: /* Check validity of CODING-SYSTEM.
6126 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6127 It is valid if it is a symbol with a non-nil `coding-system' property.
6128 The value of property should be a vector of length 5. */)
6129 (coding_system)
6130 Lisp_Object coding_system;
6131 {
6132 CHECK_SYMBOL (coding_system);
6133 if (!NILP (Fcoding_system_p (coding_system)))
6134 return coding_system;
6135 while (1)
6136 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6137 }
6138
6139 \f
6140 Lisp_Object
6141 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6142 unsigned char *src;
6143 int src_bytes, highest;
6144 int multibytep;
6145 Lisp_Object coding_system;
6146 {
6147 unsigned char *src_end = src + src_bytes;
6148 int mask = CATEGORY_MASK_ANY;
6149 int detected = 0;
6150 int c, i;
6151 Lisp_Object attrs, eol_type;
6152 Lisp_Object val;
6153 struct coding_system coding;
6154
6155 if (NILP (coding_system))
6156 coding_system = Qundecided;
6157 setup_coding_system (coding_system, &coding);
6158 attrs = CODING_ID_ATTRS (coding.id);
6159 eol_type = CODING_ID_EOL_TYPE (coding.id);
6160
6161 coding.source = src;
6162 coding.src_bytes = src_bytes;
6163 coding.src_multibyte = multibytep;
6164 coding.consumed = 0;
6165
6166 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
6167 {
6168 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6169 }
6170 else
6171 {
6172 coding_system = Qnil;
6173 for (; src < src_end; src++)
6174 {
6175 c = *src;
6176 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6177 || c == ISO_CODE_SI
6178 || c == ISO_CODE_SO)))
6179 break;
6180 }
6181 coding.head_ascii = src - coding.source;
6182
6183 if (src < src_end)
6184 for (i = 0; i < coding_category_raw_text; i++)
6185 {
6186 enum coding_category category = coding_priorities[i];
6187 struct coding_system *this = coding_categories + category;
6188
6189 if (category >= coding_category_raw_text
6190 || detected & (1 << category))
6191 continue;
6192
6193 if (this->id < 0)
6194 {
6195 /* No coding system of this category is defined. */
6196 mask &= ~(1 << category);
6197 }
6198 else
6199 {
6200 detected |= detected_mask[category];
6201 if ((*(coding_categories[category].detector)) (&coding, &mask)
6202 && highest)
6203 {
6204 mask &= detected_mask[category];
6205 break;
6206 }
6207 }
6208 }
6209 }
6210
6211 if (!mask)
6212 val = Fcons (make_number (coding_category_raw_text), Qnil);
6213 else if (mask == CATEGORY_MASK_ANY)
6214 val = Fcons (make_number (coding_category_undecided), Qnil);
6215 else if (highest)
6216 {
6217 for (i = 0; i < coding_category_raw_text; i++)
6218 if (mask & (1 << coding_priorities[i]))
6219 {
6220 val = Fcons (make_number (coding_priorities[i]), Qnil);
6221 break;
6222 }
6223 }
6224 else
6225 {
6226 val = Qnil;
6227 for (i = coding_category_raw_text - 1; i >= 0; i--)
6228 if (mask & (1 << coding_priorities[i]))
6229 val = Fcons (make_number (coding_priorities[i]), val);
6230 }
6231
6232 {
6233 int one_byte_eol = -1, two_byte_eol = -1;
6234 Lisp_Object tail;
6235
6236 for (tail = val; CONSP (tail); tail = XCDR (tail))
6237 {
6238 struct coding_system *this
6239 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6240 : &coding);
6241 int this_eol;
6242
6243 attrs = CODING_ID_ATTRS (this->id);
6244 eol_type = CODING_ID_EOL_TYPE (this->id);
6245 XSETCAR (tail, CODING_ID_NAME (this->id));
6246 if (VECTORP (eol_type))
6247 {
6248 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6249 {
6250 if (two_byte_eol < 0)
6251 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6252 this_eol = two_byte_eol;
6253 }
6254 else
6255 {
6256 if (one_byte_eol < 0)
6257 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6258 this_eol = one_byte_eol;
6259 }
6260 if (this_eol == EOL_SEEN_LF)
6261 XSETCAR (tail, AREF (eol_type, 0));
6262 else if (this_eol == EOL_SEEN_CRLF)
6263 XSETCAR (tail, AREF (eol_type, 1));
6264 else if (this_eol == EOL_SEEN_CR)
6265 XSETCAR (tail, AREF (eol_type, 2));
6266 }
6267 }
6268 }
6269
6270 return (highest ? XCAR (val) : val);
6271 }
6272
6273
6274 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6275 2, 3, 0,
6276 doc: /* Detect coding system of the text in the region between START and END.
6277 Return a list of possible coding systems ordered by priority.
6278
6279 If only ASCII characters are found, it returns a list of single element
6280 `undecided' or its subsidiary coding system according to a detected
6281 end-of-line format.
6282
6283 If optional argument HIGHEST is non-nil, return the coding system of
6284 highest priority. */)
6285 (start, end, highest)
6286 Lisp_Object start, end, highest;
6287 {
6288 int from, to;
6289 int from_byte, to_byte;
6290
6291 CHECK_NUMBER_COERCE_MARKER (start);
6292 CHECK_NUMBER_COERCE_MARKER (end);
6293
6294 validate_region (&start, &end);
6295 from = XINT (start), to = XINT (end);
6296 from_byte = CHAR_TO_BYTE (from);
6297 to_byte = CHAR_TO_BYTE (to);
6298
6299 if (from < GPT && to >= GPT)
6300 move_gap_both (to, to_byte);
6301
6302 return detect_coding_system (BYTE_POS_ADDR (from_byte),
6303 to_byte - from_byte,
6304 !NILP (highest),
6305 !NILP (current_buffer
6306 ->enable_multibyte_characters),
6307 Qnil);
6308 }
6309
6310 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6311 1, 2, 0,
6312 doc: /* Detect coding system of the text in STRING.
6313 Return a list of possible coding systems ordered by priority.
6314
6315 If only ASCII characters are found, it returns a list of single element
6316 `undecided' or its subsidiary coding system according to a detected
6317 end-of-line format.
6318
6319 If optional argument HIGHEST is non-nil, return the coding system of
6320 highest priority. */)
6321 (string, highest)
6322 Lisp_Object string, highest;
6323 {
6324 CHECK_STRING (string);
6325
6326 return detect_coding_system (XSTRING (string)->data,
6327 STRING_BYTES (XSTRING (string)),
6328 !NILP (highest),
6329 STRING_MULTIBYTE (string),
6330 Qnil);
6331 }
6332
6333
6334 static INLINE int
6335 char_encodable_p (c, attrs)
6336 int c;
6337 Lisp_Object attrs;
6338 {
6339 Lisp_Object tail;
6340 struct charset *charset;
6341
6342 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6343 CONSP (tail); tail = XCDR (tail))
6344 {
6345 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6346 if (CHAR_CHARSET_P (c, charset))
6347 break;
6348 }
6349 return (! NILP (tail));
6350 }
6351
6352
6353 /* Return a list of coding systems that safely encode the text between
6354 START and END. If EXCLUDE is non-nil, it is a list of coding
6355 systems not to check. The returned list doesn't contain any such
6356 coding systems. In any case, If the text contains only ASCII or is
6357 unibyte, return t. */
6358
6359 DEFUN ("find-coding-systems-region-internal",
6360 Ffind_coding_systems_region_internal,
6361 Sfind_coding_systems_region_internal, 2, 3, 0,
6362 doc: /* Internal use only. */)
6363 (start, end, exclude)
6364 Lisp_Object start, end, exclude;
6365 {
6366 Lisp_Object coding_attrs_list, safe_codings;
6367 EMACS_INT start_byte, end_byte;
6368 unsigned char *p, *pbeg, *pend;
6369 int c;
6370 Lisp_Object tail, elt;
6371
6372 if (STRINGP (start))
6373 {
6374 if (!STRING_MULTIBYTE (start)
6375 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6376 return Qt;
6377 start_byte = 0;
6378 end_byte = STRING_BYTES (XSTRING (start));
6379 }
6380 else
6381 {
6382 CHECK_NUMBER_COERCE_MARKER (start);
6383 CHECK_NUMBER_COERCE_MARKER (end);
6384 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6385 args_out_of_range (start, end);
6386 if (NILP (current_buffer->enable_multibyte_characters))
6387 return Qt;
6388 start_byte = CHAR_TO_BYTE (XINT (start));
6389 end_byte = CHAR_TO_BYTE (XINT (end));
6390 if (XINT (end) - XINT (start) == end_byte - start_byte)
6391 return Qt;
6392
6393 if (start < GPT && end > GPT)
6394 {
6395 if ((GPT - start) < (end - GPT))
6396 move_gap_both (start, start_byte);
6397 else
6398 move_gap_both (end, end_byte);
6399 }
6400 }
6401
6402 coding_attrs_list = Qnil;
6403 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6404 if (NILP (exclude)
6405 || NILP (Fmemq (XCAR (tail), exclude)))
6406 {
6407 Lisp_Object attrs;
6408
6409 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6410 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6411 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6412 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6413 }
6414
6415 if (STRINGP (start))
6416 p = pbeg = XSTRING (start)->data;
6417 else
6418 p = pbeg = BYTE_POS_ADDR (start_byte);
6419 pend = p + (end_byte - start_byte);
6420
6421 while (p < pend && ASCII_BYTE_P (*p)) p++;
6422 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6423
6424 while (p < pend)
6425 {
6426 if (ASCII_BYTE_P (*p))
6427 p++;
6428 else
6429 {
6430 c = STRING_CHAR_ADVANCE (p);
6431
6432 charset_map_loaded = 0;
6433 for (tail = coding_attrs_list; CONSP (tail);)
6434 {
6435 elt = XCAR (tail);
6436 if (NILP (elt))
6437 tail = XCDR (tail);
6438 else if (char_encodable_p (c, elt))
6439 tail = XCDR (tail);
6440 else if (CONSP (XCDR (tail)))
6441 {
6442 XSETCAR (tail, XCAR (XCDR (tail)));
6443 XSETCDR (tail, XCDR (XCDR (tail)));
6444 }
6445 else
6446 {
6447 XSETCAR (tail, Qnil);
6448 tail = XCDR (tail);
6449 }
6450 }
6451 if (charset_map_loaded)
6452 {
6453 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6454
6455 if (STRINGP (start))
6456 pbeg = XSTRING (start)->data;
6457 else
6458 pbeg = BYTE_POS_ADDR (start_byte);
6459 p = pbeg + p_offset;
6460 pend = pbeg + pend_offset;
6461 }
6462 }
6463 }
6464
6465 safe_codings = Qnil;
6466 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6467 if (! NILP (XCAR (tail)))
6468 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6469
6470 return safe_codings;
6471 }
6472
6473
6474 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6475 Scheck_coding_systems_region, 3, 3, 0,
6476 doc: /* Check if the region is encodable by coding systems.
6477
6478 START and END are buffer positions specifying the region.
6479 CODING-SYSTEM-LIST is a list of coding systems to check.
6480
6481 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6482 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6483 whole region, POS0, POS1, ... are buffer positions where non-encodable
6484 characters are found.
6485
6486 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6487 value is nil.
6488
6489 START may be a string. In that case, check if the string is
6490 encodable, and the value contains indices to the string instead of
6491 buffer positions. END is ignored. */)
6492 (start, end, coding_system_list)
6493 Lisp_Object start, end, coding_system_list;
6494 {
6495 Lisp_Object list;
6496 EMACS_INT start_byte, end_byte;
6497 int pos;
6498 unsigned char *p, *pbeg, *pend;
6499 int c;
6500 Lisp_Object tail, elt;
6501
6502 if (STRINGP (start))
6503 {
6504 if (!STRING_MULTIBYTE (start)
6505 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6506 return Qnil;
6507 start_byte = 0;
6508 end_byte = STRING_BYTES (XSTRING (start));
6509 pos = 0;
6510 }
6511 else
6512 {
6513 CHECK_NUMBER_COERCE_MARKER (start);
6514 CHECK_NUMBER_COERCE_MARKER (end);
6515 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6516 args_out_of_range (start, end);
6517 if (NILP (current_buffer->enable_multibyte_characters))
6518 return Qnil;
6519 start_byte = CHAR_TO_BYTE (XINT (start));
6520 end_byte = CHAR_TO_BYTE (XINT (end));
6521 if (XINT (end) - XINT (start) == end_byte - start_byte)
6522 return Qt;
6523
6524 if (start < GPT && end > GPT)
6525 {
6526 if ((GPT - start) < (end - GPT))
6527 move_gap_both (start, start_byte);
6528 else
6529 move_gap_both (end, end_byte);
6530 }
6531 pos = start;
6532 }
6533
6534 list = Qnil;
6535 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6536 {
6537 elt = XCAR (tail);
6538 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6539 Qnil)),
6540 list);
6541 }
6542
6543 if (STRINGP (start))
6544 p = pbeg = XSTRING (start)->data;
6545 else
6546 p = pbeg = BYTE_POS_ADDR (start_byte);
6547 pend = p + (end_byte - start_byte);
6548
6549 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6550 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6551
6552 while (p < pend)
6553 {
6554 if (ASCII_BYTE_P (*p))
6555 p++;
6556 else
6557 {
6558 c = STRING_CHAR_ADVANCE (p);
6559
6560 charset_map_loaded = 0;
6561 for (tail = list; CONSP (tail); tail = XCDR (tail))
6562 {
6563 elt = XCDR (XCAR (tail));
6564 if (! char_encodable_p (c, XCAR (elt)))
6565 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6566 }
6567 if (charset_map_loaded)
6568 {
6569 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6570
6571 if (STRINGP (start))
6572 pbeg = XSTRING (start)->data;
6573 else
6574 pbeg = BYTE_POS_ADDR (start_byte);
6575 p = pbeg + p_offset;
6576 pend = pbeg + pend_offset;
6577 }
6578 }
6579 pos++;
6580 }
6581
6582 tail = list;
6583 list = Qnil;
6584 for (; CONSP (tail); tail = XCDR (tail))
6585 {
6586 elt = XCAR (tail);
6587 if (CONSP (XCDR (XCDR (elt))))
6588 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6589 list);
6590 }
6591
6592 return list;
6593 }
6594
6595
6596
6597 Lisp_Object
6598 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6599 Lisp_Object start, end, coding_system, dst_object;
6600 int encodep, norecord;
6601 {
6602 struct coding_system coding;
6603 EMACS_INT from, from_byte, to, to_byte;
6604 Lisp_Object src_object;
6605
6606 CHECK_NUMBER_COERCE_MARKER (start);
6607 CHECK_NUMBER_COERCE_MARKER (end);
6608 if (NILP (coding_system))
6609 coding_system = Qno_conversion;
6610 else
6611 CHECK_CODING_SYSTEM (coding_system);
6612 src_object = Fcurrent_buffer ();
6613 if (NILP (dst_object))
6614 dst_object = src_object;
6615 else if (! EQ (dst_object, Qt))
6616 CHECK_BUFFER (dst_object);
6617
6618 validate_region (&start, &end);
6619 from = XFASTINT (start);
6620 from_byte = CHAR_TO_BYTE (from);
6621 to = XFASTINT (end);
6622 to_byte = CHAR_TO_BYTE (to);
6623
6624 setup_coding_system (coding_system, &coding);
6625 coding.mode |= CODING_MODE_LAST_BLOCK;
6626
6627 if (encodep)
6628 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6629 dst_object);
6630 else
6631 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6632 dst_object);
6633 if (! norecord)
6634 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6635
6636 if (coding.result != CODING_RESULT_SUCCESS)
6637 error ("Code conversion error: %d", coding.result);
6638
6639 return (BUFFERP (dst_object)
6640 ? make_number (coding.produced_char)
6641 : coding.dst_object);
6642 }
6643
6644
6645 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6646 3, 4, "r\nzCoding system: ",
6647 doc: /* Decode the current region from the specified coding system.
6648 When called from a program, takes four arguments:
6649 START, END, CODING-SYSTEM, and DESTINATION.
6650 START and END are buffer positions.
6651
6652 Optional 4th arguments DESTINATION specifies where the decoded text goes.
6653 If nil, the region between START and END is replace by the decoded text.
6654 If buffer, the decoded text is inserted in the buffer.
6655 If t, the decoded text is returned.
6656
6657 This function sets `last-coding-system-used' to the precise coding system
6658 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6659 not fully specified.)
6660 It returns the length of the decoded text. */)
6661 (start, end, coding_system, destination)
6662 Lisp_Object start, end, coding_system, destination;
6663 {
6664 return code_convert_region (start, end, coding_system, destination, 0, 0);
6665 }
6666
6667 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6668 3, 4, "r\nzCoding system: ",
6669 doc: /* Encode the current region by specified coding system.
6670 When called from a program, takes three arguments:
6671 START, END, and CODING-SYSTEM. START and END are buffer positions.
6672
6673 Optional 4th arguments DESTINATION specifies where the encoded text goes.
6674 If nil, the region between START and END is replace by the encoded text.
6675 If buffer, the encoded text is inserted in the buffer.
6676 If t, the encoded text is returned.
6677
6678 This function sets `last-coding-system-used' to the precise coding system
6679 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6680 not fully specified.)
6681 It returns the length of the encoded text. */)
6682 (start, end, coding_system, destination)
6683 Lisp_Object start, end, coding_system, destination;
6684 {
6685 return code_convert_region (start, end, coding_system, destination, 1, 0);
6686 }
6687
6688 Lisp_Object
6689 code_convert_string (string, coding_system, dst_object,
6690 encodep, nocopy, norecord)
6691 Lisp_Object string, coding_system, dst_object;
6692 int encodep, nocopy, norecord;
6693 {
6694 struct coding_system coding;
6695 EMACS_INT chars, bytes;
6696
6697 CHECK_STRING (string);
6698 if (NILP (coding_system))
6699 {
6700 if (! norecord)
6701 Vlast_coding_system_used = Qno_conversion;
6702 if (NILP (dst_object))
6703 return (nocopy ? Fcopy_sequence (string) : string);
6704 }
6705
6706 if (NILP (coding_system))
6707 coding_system = Qno_conversion;
6708 else
6709 CHECK_CODING_SYSTEM (coding_system);
6710 if (NILP (dst_object))
6711 dst_object = Qt;
6712 else if (! EQ (dst_object, Qt))
6713 CHECK_BUFFER (dst_object);
6714
6715 setup_coding_system (coding_system, &coding);
6716 coding.mode |= CODING_MODE_LAST_BLOCK;
6717 chars = XSTRING (string)->size;
6718 bytes = STRING_BYTES (XSTRING (string));
6719 if (encodep)
6720 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6721 else
6722 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6723 if (! norecord)
6724 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6725
6726 if (coding.result != CODING_RESULT_SUCCESS)
6727 error ("Code conversion error: %d", coding.result);
6728
6729 return (BUFFERP (dst_object)
6730 ? make_number (coding.produced_char)
6731 : coding.dst_object);
6732 }
6733
6734
6735 /* Encode or decode STRING according to CODING_SYSTEM.
6736 Do not set Vlast_coding_system_used.
6737
6738 This function is called only from macros DECODE_FILE and
6739 ENCODE_FILE, thus we ignore character composition. */
6740
6741 Lisp_Object
6742 code_convert_string_norecord (string, coding_system, encodep)
6743 Lisp_Object string, coding_system;
6744 int encodep;
6745 {
6746 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
6747 }
6748
6749
6750 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6751 2, 4, 0,
6752 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6753
6754 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6755 if the decoding operation is trivial.
6756
6757 Optional fourth arg BUFFER non-nil meant that the decoded text is
6758 inserted in BUFFER instead of returned as a astring. In this case,
6759 the return value is BUFFER.
6760
6761 This function sets `last-coding-system-used' to the precise coding system
6762 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6763 not fully specified. */)
6764 (string, coding_system, nocopy, buffer)
6765 Lisp_Object string, coding_system, nocopy, buffer;
6766 {
6767 return code_convert_string (string, coding_system, buffer,
6768 0, ! NILP (nocopy), 0);
6769 }
6770
6771 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6772 2, 4, 0,
6773 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6774
6775 Optional third arg NOCOPY non-nil means it is OK to return STRING
6776 itself if the encoding operation is trivial.
6777
6778 Optional fourth arg BUFFER non-nil meant that the encoded text is
6779 inserted in BUFFER instead of returned as a astring. In this case,
6780 the return value is BUFFER.
6781
6782 This function sets `last-coding-system-used' to the precise coding system
6783 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6784 not fully specified.) */)
6785 (string, coding_system, nocopy, buffer)
6786 Lisp_Object string, coding_system, nocopy, buffer;
6787 {
6788 return code_convert_string (string, coding_system, buffer,
6789 nocopy, ! NILP (nocopy), 1);
6790 }
6791
6792 \f
6793 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6794 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6795 Return the corresponding character. */)
6796 (code)
6797 Lisp_Object code;
6798 {
6799 Lisp_Object spec, attrs, val;
6800 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6801 int c;
6802
6803 CHECK_NATNUM (code);
6804 c = XFASTINT (code);
6805 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6806 attrs = AREF (spec, 0);
6807
6808 if (ASCII_BYTE_P (c)
6809 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6810 return code;
6811
6812 val = CODING_ATTR_CHARSET_LIST (attrs);
6813 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6814 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6815 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
6816
6817 if (c <= 0x7F)
6818 charset = charset_roman;
6819 else if (c >= 0xA0 && c < 0xDF)
6820 {
6821 charset = charset_kana;
6822 c -= 0x80;
6823 }
6824 else
6825 {
6826 int s1 = c >> 8, s2 = c & 0x7F;
6827
6828 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6829 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6830 error ("Invalid code: %d", code);
6831 SJIS_TO_JIS (c);
6832 charset = charset_kanji;
6833 }
6834 c = DECODE_CHAR (charset, c);
6835 if (c < 0)
6836 error ("Invalid code: %d", code);
6837 return make_number (c);
6838 }
6839
6840
6841 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6842 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6843 Return the corresponding code in SJIS. */)
6844 (ch)
6845 Lisp_Object ch;
6846 {
6847 Lisp_Object spec, attrs, charset_list;
6848 int c;
6849 struct charset *charset;
6850 unsigned code;
6851
6852 CHECK_CHARACTER (ch);
6853 c = XFASTINT (ch);
6854 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6855 attrs = AREF (spec, 0);
6856
6857 if (ASCII_CHAR_P (c)
6858 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6859 return ch;
6860
6861 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6862 charset = char_charset (c, charset_list, &code);
6863 if (code == CHARSET_INVALID_CODE (charset))
6864 error ("Can't encode by shift_jis encoding: %d", c);
6865 JIS_TO_SJIS (code);
6866
6867 return make_number (code);
6868 }
6869
6870 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6871 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6872 Return the corresponding character. */)
6873 (code)
6874 Lisp_Object code;
6875 {
6876 Lisp_Object spec, attrs, val;
6877 struct charset *charset_roman, *charset_big5, *charset;
6878 int c;
6879
6880 CHECK_NATNUM (code);
6881 c = XFASTINT (code);
6882 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6883 attrs = AREF (spec, 0);
6884
6885 if (ASCII_BYTE_P (c)
6886 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6887 return code;
6888
6889 val = CODING_ATTR_CHARSET_LIST (attrs);
6890 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6891 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
6892
6893 if (c <= 0x7F)
6894 charset = charset_roman;
6895 else
6896 {
6897 int b1 = c >> 8, b2 = c & 0x7F;
6898 if (b1 < 0xA1 || b1 > 0xFE
6899 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
6900 error ("Invalid code: %d", code);
6901 charset = charset_big5;
6902 }
6903 c = DECODE_CHAR (charset, (unsigned )c);
6904 if (c < 0)
6905 error ("Invalid code: %d", code);
6906 return make_number (c);
6907 }
6908
6909 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6910 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
6911 Return the corresponding character code in Big5. */)
6912 (ch)
6913 Lisp_Object ch;
6914 {
6915 Lisp_Object spec, attrs, charset_list;
6916 struct charset *charset;
6917 int c;
6918 unsigned code;
6919
6920 CHECK_CHARACTER (ch);
6921 c = XFASTINT (ch);
6922 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6923 attrs = AREF (spec, 0);
6924 if (ASCII_CHAR_P (c)
6925 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6926 return ch;
6927
6928 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6929 charset = char_charset (c, charset_list, &code);
6930 if (code == CHARSET_INVALID_CODE (charset))
6931 error ("Can't encode by Big5 encoding: %d", c);
6932
6933 return make_number (code);
6934 }
6935
6936 \f
6937 DEFUN ("set-terminal-coding-system-internal",
6938 Fset_terminal_coding_system_internal,
6939 Sset_terminal_coding_system_internal, 1, 1, 0,
6940 doc: /* Internal use only. */)
6941 (coding_system)
6942 Lisp_Object coding_system;
6943 {
6944 CHECK_SYMBOL (coding_system);
6945 setup_coding_system (Fcheck_coding_system (coding_system),
6946 &terminal_coding);
6947
6948 /* We had better not send unsafe characters to terminal. */
6949 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
6950 /* Characer composition should be disabled. */
6951 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6952 terminal_coding.src_multibyte = 1;
6953 terminal_coding.dst_multibyte = 0;
6954 return Qnil;
6955 }
6956
6957 DEFUN ("set-safe-terminal-coding-system-internal",
6958 Fset_safe_terminal_coding_system_internal,
6959 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
6960 doc: /* Internal use only. */)
6961 (coding_system)
6962 Lisp_Object coding_system;
6963 {
6964 CHECK_SYMBOL (coding_system);
6965 setup_coding_system (Fcheck_coding_system (coding_system),
6966 &safe_terminal_coding);
6967 /* Characer composition should be disabled. */
6968 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6969 safe_terminal_coding.src_multibyte = 1;
6970 safe_terminal_coding.dst_multibyte = 0;
6971 return Qnil;
6972 }
6973
6974 DEFUN ("terminal-coding-system",
6975 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6976 doc: /* Return coding system specified for terminal output. */)
6977 ()
6978 {
6979 return CODING_ID_NAME (terminal_coding.id);
6980 }
6981
6982 DEFUN ("set-keyboard-coding-system-internal",
6983 Fset_keyboard_coding_system_internal,
6984 Sset_keyboard_coding_system_internal, 1, 1, 0,
6985 doc: /* Internal use only. */)
6986 (coding_system)
6987 Lisp_Object coding_system;
6988 {
6989 CHECK_SYMBOL (coding_system);
6990 setup_coding_system (Fcheck_coding_system (coding_system),
6991 &keyboard_coding);
6992 /* Characer composition should be disabled. */
6993 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6994 return Qnil;
6995 }
6996
6997 DEFUN ("keyboard-coding-system",
6998 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6999 doc: /* Return coding system specified for decoding keyboard input. */)
7000 ()
7001 {
7002 return CODING_ID_NAME (keyboard_coding.id);
7003 }
7004
7005 \f
7006 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7007 Sfind_operation_coding_system, 1, MANY, 0,
7008 doc: /* Choose a coding system for an operation based on the target name.
7009 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7010 DECODING-SYSTEM is the coding system to use for decoding
7011 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7012 for encoding (in case OPERATION does encoding).
7013
7014 The first argument OPERATION specifies an I/O primitive:
7015 For file I/O, `insert-file-contents' or `write-region'.
7016 For process I/O, `call-process', `call-process-region', or `start-process'.
7017 For network I/O, `open-network-stream'.
7018
7019 The remaining arguments should be the same arguments that were passed
7020 to the primitive. Depending on which primitive, one of those arguments
7021 is selected as the TARGET. For example, if OPERATION does file I/O,
7022 whichever argument specifies the file name is TARGET.
7023
7024 TARGET has a meaning which depends on OPERATION:
7025 For file I/O, TARGET is a file name.
7026 For process I/O, TARGET is a process name.
7027 For network I/O, TARGET is a service name or a port number
7028
7029 This function looks up what specified for TARGET in,
7030 `file-coding-system-alist', `process-coding-system-alist',
7031 or `network-coding-system-alist' depending on OPERATION.
7032 They may specify a coding system, a cons of coding systems,
7033 or a function symbol to call.
7034 In the last case, we call the function with one argument,
7035 which is a list of all the arguments given to this function.
7036
7037 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7038 (nargs, args)
7039 int nargs;
7040 Lisp_Object *args;
7041 {
7042 Lisp_Object operation, target_idx, target, val;
7043 register Lisp_Object chain;
7044
7045 if (nargs < 2)
7046 error ("Too few arguments");
7047 operation = args[0];
7048 if (!SYMBOLP (operation)
7049 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7050 error ("Invalid first arguement");
7051 if (nargs < 1 + XINT (target_idx))
7052 error ("Too few arguments for operation: %s",
7053 XSYMBOL (operation)->name->data);
7054 target = args[XINT (target_idx) + 1];
7055 if (!(STRINGP (target)
7056 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7057 error ("Invalid %dth argument", XINT (target_idx) + 1);
7058
7059 chain = ((EQ (operation, Qinsert_file_contents)
7060 || EQ (operation, Qwrite_region))
7061 ? Vfile_coding_system_alist
7062 : (EQ (operation, Qopen_network_stream)
7063 ? Vnetwork_coding_system_alist
7064 : Vprocess_coding_system_alist));
7065 if (NILP (chain))
7066 return Qnil;
7067
7068 for (; CONSP (chain); chain = XCDR (chain))
7069 {
7070 Lisp_Object elt;
7071
7072 elt = XCAR (chain);
7073 if (CONSP (elt)
7074 && ((STRINGP (target)
7075 && STRINGP (XCAR (elt))
7076 && fast_string_match (XCAR (elt), target) >= 0)
7077 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7078 {
7079 val = XCDR (elt);
7080 /* Here, if VAL is both a valid coding system and a valid
7081 function symbol, we return VAL as a coding system. */
7082 if (CONSP (val))
7083 return val;
7084 if (! SYMBOLP (val))
7085 return Qnil;
7086 if (! NILP (Fcoding_system_p (val)))
7087 return Fcons (val, val);
7088 if (! NILP (Ffboundp (val)))
7089 {
7090 val = call1 (val, Flist (nargs, args));
7091 if (CONSP (val))
7092 return val;
7093 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7094 return Fcons (val, val);
7095 }
7096 return Qnil;
7097 }
7098 }
7099 return Qnil;
7100 }
7101
7102 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7103 Sset_coding_system_priority, 1, MANY, 0,
7104 doc: /* Put higher priority to coding systems of the arguments. */)
7105 (nargs, args)
7106 int nargs;
7107 Lisp_Object *args;
7108 {
7109 int i, j;
7110 int changed[coding_category_max];
7111 enum coding_category priorities[coding_category_max];
7112
7113 bzero (changed, sizeof changed);
7114
7115 for (i = j = 0; i < nargs; i++)
7116 {
7117 enum coding_category category;
7118 Lisp_Object spec, attrs;
7119
7120 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7121 attrs = AREF (spec, 0);
7122 category = XINT (CODING_ATTR_CATEGORY (attrs));
7123 if (changed[category])
7124 /* Ignore this coding system because a coding system of the
7125 same category already had a higher priority. */
7126 continue;
7127 changed[category] = 1;
7128 priorities[j++] = category;
7129 if (coding_categories[category].id >= 0
7130 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7131 setup_coding_system (args[i], &coding_categories[category]);
7132 }
7133
7134 /* Now we have decided top J priorities. Reflect the order of the
7135 original priorities to the remaining priorities. */
7136
7137 for (i = j, j = 0; i < coding_category_max; i++, j++)
7138 {
7139 while (j < coding_category_max
7140 && changed[coding_priorities[j]])
7141 j++;
7142 if (j == coding_category_max)
7143 abort ();
7144 priorities[i] = coding_priorities[j];
7145 }
7146
7147 bcopy (priorities, coding_priorities, sizeof priorities);
7148 return Qnil;
7149 }
7150
7151 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7152 Scoding_system_priority_list, 0, 1, 0,
7153 doc: /* Return a list of coding systems ordered by their priorities. */)
7154 (highestp)
7155 Lisp_Object highestp;
7156 {
7157 int i;
7158 Lisp_Object val;
7159
7160 for (i = 0, val = Qnil; i < coding_category_max; i++)
7161 {
7162 enum coding_category category = coding_priorities[i];
7163 int id = coding_categories[category].id;
7164 Lisp_Object attrs;
7165
7166 if (id < 0)
7167 continue;
7168 attrs = CODING_ID_ATTRS (id);
7169 if (! NILP (highestp))
7170 return CODING_ATTR_BASE_NAME (attrs);
7171 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7172 }
7173 return Fnreverse (val);
7174 }
7175
7176 static Lisp_Object
7177 make_subsidiaries (base)
7178 Lisp_Object base;
7179 {
7180 Lisp_Object subsidiaries;
7181 char *suffixes[] = { "-unix", "-dos", "-mac" };
7182 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7183 char *buf = (char *) alloca (base_name_len + 6);
7184 int i;
7185
7186 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7187 subsidiaries = Fmake_vector (make_number (3), Qnil);
7188 for (i = 0; i < 3; i++)
7189 {
7190 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7191 ASET (subsidiaries, i, intern (buf));
7192 }
7193 return subsidiaries;
7194 }
7195
7196
7197 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7198 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7199 doc: /* For internal use only. */)
7200 (nargs, args)
7201 int nargs;
7202 Lisp_Object *args;
7203 {
7204 Lisp_Object name;
7205 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7206 Lisp_Object attrs; /* Vector of attributes. */
7207 Lisp_Object eol_type;
7208 Lisp_Object aliases;
7209 Lisp_Object coding_type, charset_list, safe_charsets;
7210 enum coding_category category;
7211 Lisp_Object tail, val;
7212 int max_charset_id = 0;
7213 int i;
7214
7215 if (nargs < coding_arg_max)
7216 goto short_args;
7217
7218 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7219
7220 name = args[coding_arg_name];
7221 CHECK_SYMBOL (name);
7222 CODING_ATTR_BASE_NAME (attrs) = name;
7223
7224 val = args[coding_arg_mnemonic];
7225 if (! STRINGP (val))
7226 CHECK_CHARACTER (val);
7227 CODING_ATTR_MNEMONIC (attrs) = val;
7228
7229 coding_type = args[coding_arg_coding_type];
7230 CHECK_SYMBOL (coding_type);
7231 CODING_ATTR_TYPE (attrs) = coding_type;
7232
7233 charset_list = args[coding_arg_charset_list];
7234 if (SYMBOLP (charset_list))
7235 {
7236 if (EQ (charset_list, Qiso_2022))
7237 {
7238 if (! EQ (coding_type, Qiso_2022))
7239 error ("Invalid charset-list");
7240 charset_list = Viso_2022_charset_list;
7241 }
7242 else if (EQ (charset_list, Qemacs_mule))
7243 {
7244 if (! EQ (coding_type, Qemacs_mule))
7245 error ("Invalid charset-list");
7246 charset_list = Vemacs_mule_charset_list;
7247 }
7248 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7249 if (max_charset_id < XFASTINT (XCAR (tail)))
7250 max_charset_id = XFASTINT (XCAR (tail));
7251 }
7252 else
7253 {
7254 charset_list = Fcopy_sequence (charset_list);
7255 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7256 {
7257 struct charset *charset;
7258
7259 val = Fcar (tail);
7260 CHECK_CHARSET_GET_CHARSET (val, charset);
7261 if (EQ (coding_type, Qiso_2022)
7262 ? CHARSET_ISO_FINAL (charset) < 0
7263 : EQ (coding_type, Qemacs_mule)
7264 ? CHARSET_EMACS_MULE_ID (charset) < 0
7265 : 0)
7266 error ("Can't handle charset `%s'",
7267 XSYMBOL (CHARSET_NAME (charset))->name->data);
7268
7269 XCAR (tail) = make_number (charset->id);
7270 if (max_charset_id < charset->id)
7271 max_charset_id = charset->id;
7272 }
7273 }
7274 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7275
7276 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7277 make_number (255));
7278 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7279 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7280 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7281
7282 val = args[coding_arg_decode_translation_table];
7283 if (! NILP (val))
7284 CHECK_CHAR_TABLE (val);
7285 CODING_ATTR_DECODE_TBL (attrs) = val;
7286
7287 val = args[coding_arg_encode_translation_table];
7288 if (! NILP (val))
7289 CHECK_CHAR_TABLE (val);
7290 CODING_ATTR_ENCODE_TBL (attrs) = val;
7291
7292 val = args[coding_arg_post_read_conversion];
7293 CHECK_SYMBOL (val);
7294 CODING_ATTR_POST_READ (attrs) = val;
7295
7296 val = args[coding_arg_pre_write_conversion];
7297 CHECK_SYMBOL (val);
7298 CODING_ATTR_PRE_WRITE (attrs) = val;
7299
7300 val = args[coding_arg_default_char];
7301 if (NILP (val))
7302 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7303 else
7304 {
7305 CHECK_CHARACTER (val);
7306 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7307 }
7308
7309 val = args[coding_arg_plist];
7310 CHECK_LIST (val);
7311 CODING_ATTR_PLIST (attrs) = val;
7312
7313 if (EQ (coding_type, Qcharset))
7314 {
7315 val = Fmake_vector (make_number (256), Qnil);
7316
7317 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7318 {
7319 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7320 int idx = (CHARSET_DIMENSION (charset) - 1) * 4;
7321
7322 for (i = charset->code_space[idx];
7323 i <= charset->code_space[idx + 1]; i++)
7324 {
7325 if (NILP (AREF (val, i)))
7326 ASET (val, i, XCAR (tail));
7327 else
7328 error ("Charsets conflicts in the first byte");
7329 }
7330 }
7331 ASET (attrs, coding_attr_charset_valids, val);
7332 category = coding_category_charset;
7333 }
7334 else if (EQ (coding_type, Qccl))
7335 {
7336 Lisp_Object valids;
7337
7338 if (nargs < coding_arg_ccl_max)
7339 goto short_args;
7340
7341 val = args[coding_arg_ccl_decoder];
7342 CHECK_CCL_PROGRAM (val);
7343 if (VECTORP (val))
7344 val = Fcopy_sequence (val);
7345 ASET (attrs, coding_attr_ccl_decoder, val);
7346
7347 val = args[coding_arg_ccl_encoder];
7348 CHECK_CCL_PROGRAM (val);
7349 if (VECTORP (val))
7350 val = Fcopy_sequence (val);
7351 ASET (attrs, coding_attr_ccl_encoder, val);
7352
7353 val = args[coding_arg_ccl_valids];
7354 valids = Fmake_string (make_number (256), make_number (0));
7355 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7356 {
7357 val = Fcar (tail);
7358 if (INTEGERP (val))
7359 ASET (valids, XINT (val), 1);
7360 else
7361 {
7362 int from, to;
7363
7364 CHECK_CONS (val);
7365 CHECK_NUMBER (XCAR (val));
7366 CHECK_NUMBER (XCDR (val));
7367 from = XINT (XCAR (val));
7368 to = XINT (XCDR (val));
7369 for (i = from; i <= to; i++)
7370 ASET (valids, i, 1);
7371 }
7372 }
7373 ASET (attrs, coding_attr_ccl_valids, valids);
7374
7375 category = coding_category_ccl;
7376 }
7377 else if (EQ (coding_type, Qutf_16))
7378 {
7379 Lisp_Object bom, endian;
7380
7381 if (nargs < coding_arg_utf16_max)
7382 goto short_args;
7383
7384 bom = args[coding_arg_utf16_bom];
7385 if (! NILP (bom) && ! EQ (bom, Qt))
7386 {
7387 CHECK_CONS (bom);
7388 CHECK_CODING_SYSTEM (XCAR (bom));
7389 CHECK_CODING_SYSTEM (XCDR (bom));
7390 }
7391 ASET (attrs, coding_attr_utf_16_bom, bom);
7392
7393 endian = args[coding_arg_utf16_endian];
7394 ASET (attrs, coding_attr_utf_16_endian, endian);
7395
7396 category = (CONSP (bom)
7397 ? coding_category_utf_16_auto
7398 : NILP (bom)
7399 ? (NILP (endian)
7400 ? coding_category_utf_16_be_nosig
7401 : coding_category_utf_16_le_nosig)
7402 : (NILP (endian)
7403 ? coding_category_utf_16_be
7404 : coding_category_utf_16_le));
7405 }
7406 else if (EQ (coding_type, Qiso_2022))
7407 {
7408 Lisp_Object initial, reg_usage, request, flags;
7409 int i, id;
7410
7411 if (nargs < coding_arg_iso2022_max)
7412 goto short_args;
7413
7414 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7415 CHECK_VECTOR (initial);
7416 for (i = 0; i < 4; i++)
7417 {
7418 val = Faref (initial, make_number (i));
7419 if (! NILP (val))
7420 {
7421 CHECK_CHARSET_GET_ID (val, id);
7422 ASET (initial, i, make_number (id));
7423 }
7424 else
7425 ASET (initial, i, make_number (-1));
7426 }
7427
7428 reg_usage = args[coding_arg_iso2022_reg_usage];
7429 CHECK_CONS (reg_usage);
7430 CHECK_NATNUM (XCAR (reg_usage));
7431 CHECK_NATNUM (XCDR (reg_usage));
7432
7433 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7434 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
7435 {
7436 int id;
7437
7438 val = Fcar (tail);
7439 CHECK_CONS (val);
7440 CHECK_CHARSET_GET_ID (XCAR (val), id);
7441 CHECK_NATNUM (XCDR (val));
7442 if (XINT (XCDR (val)) >= 4)
7443 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7444 XCAR (val) = make_number (id);
7445 }
7446
7447 flags = args[coding_arg_iso2022_flags];
7448 CHECK_NATNUM (flags);
7449 i = XINT (flags);
7450 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7451 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7452
7453 ASET (attrs, coding_attr_iso_initial, initial);
7454 ASET (attrs, coding_attr_iso_usage, reg_usage);
7455 ASET (attrs, coding_attr_iso_request, request);
7456 ASET (attrs, coding_attr_iso_flags, flags);
7457 setup_iso_safe_charsets (attrs);
7458
7459 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7460 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7461 | CODING_ISO_FLAG_SINGLE_SHIFT))
7462 ? coding_category_iso_7_else
7463 : EQ (args[coding_arg_charset_list], Qiso_2022)
7464 ? coding_category_iso_7
7465 : coding_category_iso_7_tight);
7466 else
7467 {
7468 int id = XINT (AREF (initial, 1));
7469
7470 category = (((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7471 | CODING_ISO_FLAG_SINGLE_SHIFT))
7472 || EQ (args[coding_arg_charset_list], Qiso_2022)
7473 || id < 0)
7474 ? coding_category_iso_8_else
7475 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7476 ? coding_category_iso_8_1
7477 : coding_category_iso_8_2);
7478 }
7479 }
7480 else if (EQ (coding_type, Qemacs_mule))
7481 {
7482 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7483 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7484
7485 category = coding_category_emacs_mule;
7486 }
7487 else if (EQ (coding_type, Qshift_jis))
7488 {
7489
7490 struct charset *charset;
7491
7492 if (XINT (Flength (charset_list)) != 3)
7493 error ("There should be just three charsets");
7494
7495 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7496 if (CHARSET_DIMENSION (charset) != 1)
7497 error ("Dimension of charset %s is not one",
7498 XSYMBOL (CHARSET_NAME (charset))->name->data);
7499
7500 charset_list = XCDR (charset_list);
7501 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7502 if (CHARSET_DIMENSION (charset) != 1)
7503 error ("Dimension of charset %s is not one",
7504 XSYMBOL (CHARSET_NAME (charset))->name->data);
7505
7506 charset_list = XCDR (charset_list);
7507 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7508 if (CHARSET_DIMENSION (charset) != 2)
7509 error ("Dimension of charset %s is not two",
7510 XSYMBOL (CHARSET_NAME (charset))->name->data);
7511
7512 category = coding_category_sjis;
7513 Vsjis_coding_system = name;
7514 }
7515 else if (EQ (coding_type, Qbig5))
7516 {
7517 struct charset *charset;
7518
7519 if (XINT (Flength (charset_list)) != 2)
7520 error ("There should be just two charsets");
7521
7522 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7523 if (CHARSET_DIMENSION (charset) != 1)
7524 error ("Dimension of charset %s is not one",
7525 XSYMBOL (CHARSET_NAME (charset))->name->data);
7526
7527 charset_list = XCDR (charset_list);
7528 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7529 if (CHARSET_DIMENSION (charset) != 2)
7530 error ("Dimension of charset %s is not two",
7531 XSYMBOL (CHARSET_NAME (charset))->name->data);
7532
7533 category = coding_category_big5;
7534 Vbig5_coding_system = name;
7535 }
7536 else if (EQ (coding_type, Qraw_text))
7537 category = coding_category_raw_text;
7538 else if (EQ (coding_type, Qutf_8))
7539 category = coding_category_utf_8;
7540 else if (EQ (coding_type, Qundecided))
7541 category = coding_category_undecided;
7542 else
7543 error ("Invalid coding system type: %s",
7544 XSYMBOL (coding_type)->name->data);
7545
7546 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7547
7548 eol_type = args[coding_arg_eol_type];
7549 if (! NILP (eol_type)
7550 && ! EQ (eol_type, Qunix)
7551 && ! EQ (eol_type, Qdos)
7552 && ! EQ (eol_type, Qmac))
7553 error ("Invalid eol-type");
7554
7555 aliases = Fcons (name, Qnil);
7556
7557 if (NILP (eol_type))
7558 {
7559 eol_type = make_subsidiaries (name);
7560 for (i = 0; i < 3; i++)
7561 {
7562 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7563
7564 this_name = AREF (eol_type, i);
7565 this_aliases = Fcons (this_name, Qnil);
7566 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7567 this_spec = Fmake_vector (make_number (3), attrs);
7568 ASET (this_spec, 1, this_aliases);
7569 ASET (this_spec, 2, this_eol_type);
7570 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7571 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7572 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7573 Vcoding_system_alist);
7574 }
7575 }
7576
7577 spec_vec = Fmake_vector (make_number (3), attrs);
7578 ASET (spec_vec, 1, aliases);
7579 ASET (spec_vec, 2, eol_type);
7580
7581 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7582 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7583 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7584 Vcoding_system_alist);
7585
7586 {
7587 int id = coding_categories[category].id;
7588
7589 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7590 setup_coding_system (name, &coding_categories[category]);
7591 }
7592
7593 return Qnil;
7594
7595 short_args:
7596 return Fsignal (Qwrong_number_of_arguments,
7597 Fcons (intern ("define-coding-system-internal"),
7598 make_number (nargs)));
7599 }
7600
7601 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7602 Sdefine_coding_system_alias, 2, 2, 0,
7603 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7604 (alias, coding_system)
7605 Lisp_Object alias, coding_system;
7606 {
7607 Lisp_Object spec, aliases, eol_type;
7608
7609 CHECK_SYMBOL (alias);
7610 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7611 aliases = AREF (spec, 1);
7612 while (!NILP (XCDR (aliases)))
7613 aliases = XCDR (aliases);
7614 XCDR (aliases) = Fcons (alias, Qnil);
7615
7616 eol_type = AREF (spec, 2);
7617 if (VECTORP (eol_type))
7618 {
7619 Lisp_Object subsidiaries;
7620 int i;
7621
7622 subsidiaries = make_subsidiaries (alias);
7623 for (i = 0; i < 3; i++)
7624 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7625 AREF (eol_type, i));
7626
7627 ASET (spec, 2, subsidiaries);
7628 }
7629
7630 Fputhash (alias, spec, Vcoding_system_hash_table);
7631 Vcoding_system_alist = Fcons (Fcons (alias, Qnil), Vcoding_system_alist);
7632
7633 return Qnil;
7634 }
7635
7636 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7637 1, 1, 0,
7638 doc: /* Return the base of CODING-SYSTEM.
7639 Any alias or subsidiary coding systems are not base coding system. */)
7640 (coding_system)
7641 Lisp_Object coding_system;
7642 {
7643 Lisp_Object spec, attrs;
7644
7645 if (NILP (coding_system))
7646 return (Qno_conversion);
7647 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7648 attrs = AREF (spec, 0);
7649 return CODING_ATTR_BASE_NAME (attrs);
7650 }
7651
7652 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7653 1, 1, 0,
7654 doc: "Return the property list of CODING-SYSTEM.")
7655 (coding_system)
7656 Lisp_Object coding_system;
7657 {
7658 Lisp_Object spec, attrs;
7659
7660 if (NILP (coding_system))
7661 coding_system = Qno_conversion;
7662 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7663 attrs = AREF (spec, 0);
7664 return CODING_ATTR_PLIST (attrs);
7665 }
7666
7667
7668 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7669 1, 1, 0,
7670 doc: /* Return the list of aliases of CODING-SYSTEM.
7671 A base coding system is what made by `define-coding-system'.
7672 Any alias nor subsidiary coding systems are not base coding system. */)
7673 (coding_system)
7674 Lisp_Object coding_system;
7675 {
7676 Lisp_Object spec;
7677
7678 if (NILP (coding_system))
7679 coding_system = Qno_conversion;
7680 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7681 return AREF (spec, 2);
7682 }
7683
7684 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7685 Scoding_system_eol_type, 1, 1, 0,
7686 doc: /* Return eol-type of CODING-SYSTEM.
7687 An eol-type is integer 0, 1, 2, or a vector of coding systems.
7688
7689 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7690 and CR respectively.
7691
7692 A vector value indicates that a format of end-of-line should be
7693 detected automatically. Nth element of the vector is the subsidiary
7694 coding system whose eol-type is N. */)
7695 (coding_system)
7696 Lisp_Object coding_system;
7697 {
7698 Lisp_Object spec, eol_type;
7699 int n;
7700
7701 if (NILP (coding_system))
7702 coding_system = Qno_conversion;
7703 if (! CODING_SYSTEM_P (coding_system))
7704 return Qnil;
7705 spec = CODING_SYSTEM_SPEC (coding_system);
7706 eol_type = AREF (spec, 2);
7707 if (VECTORP (eol_type))
7708 return Fcopy_sequence (eol_type);
7709 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7710 return make_number (n);
7711 }
7712
7713 #endif /* emacs */
7714
7715 \f
7716 /*** 9. Post-amble ***/
7717
7718 void
7719 init_coding_once ()
7720 {
7721 int i;
7722
7723 for (i = 0; i < coding_category_max; i++)
7724 {
7725 coding_categories[i].id = -1;
7726 coding_priorities[i] = i;
7727 }
7728
7729 /* ISO2022 specific initialize routine. */
7730 for (i = 0; i < 0x20; i++)
7731 iso_code_class[i] = ISO_control_0;
7732 for (i = 0x21; i < 0x7F; i++)
7733 iso_code_class[i] = ISO_graphic_plane_0;
7734 for (i = 0x80; i < 0xA0; i++)
7735 iso_code_class[i] = ISO_control_1;
7736 for (i = 0xA1; i < 0xFF; i++)
7737 iso_code_class[i] = ISO_graphic_plane_1;
7738 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7739 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7740 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7741 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7742 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7743 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7744 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7745 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7746 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7747 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7748
7749 inhibit_pre_post_conversion = 0;
7750
7751 for (i = 0; i < 256; i++)
7752 {
7753 emacs_mule_bytes[i] = 1;
7754 }
7755 }
7756
7757 #ifdef emacs
7758
7759 void
7760 syms_of_coding ()
7761 {
7762 staticpro (&Vcoding_system_hash_table);
7763 Vcoding_system_hash_table = Fmakehash (Qeq);
7764
7765 staticpro (&Vsjis_coding_system);
7766 Vsjis_coding_system = Qnil;
7767
7768 staticpro (&Vbig5_coding_system);
7769 Vbig5_coding_system = Qnil;
7770
7771 staticpro (&Vcode_conversion_work_buf_list);
7772 Vcode_conversion_work_buf_list = Qnil;
7773
7774 staticpro (&Vcode_conversion_reused_work_buf);
7775 Vcode_conversion_reused_work_buf = Qnil;
7776
7777 DEFSYM (Qcharset, "charset");
7778 DEFSYM (Qtarget_idx, "target-idx");
7779 DEFSYM (Qcoding_system_history, "coding-system-history");
7780 Fset (Qcoding_system_history, Qnil);
7781
7782 /* Target FILENAME is the first argument. */
7783 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7784 /* Target FILENAME is the third argument. */
7785 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7786
7787 DEFSYM (Qcall_process, "call-process");
7788 /* Target PROGRAM is the first argument. */
7789 Fput (Qcall_process, Qtarget_idx, make_number (0));
7790
7791 DEFSYM (Qcall_process_region, "call-process-region");
7792 /* Target PROGRAM is the third argument. */
7793 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7794
7795 DEFSYM (Qstart_process, "start-process");
7796 /* Target PROGRAM is the third argument. */
7797 Fput (Qstart_process, Qtarget_idx, make_number (2));
7798
7799 DEFSYM (Qopen_network_stream, "open-network-stream");
7800 /* Target SERVICE is the fourth argument. */
7801 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7802
7803 DEFSYM (Qcoding_system, "coding-system");
7804 DEFSYM (Qcoding_aliases, "coding-aliases");
7805
7806 DEFSYM (Qeol_type, "eol-type");
7807 DEFSYM (Qunix, "unix");
7808 DEFSYM (Qdos, "dos");
7809 DEFSYM (Qmac, "mac");
7810
7811 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7812 DEFSYM (Qpost_read_conversion, "post-read-conversion");
7813 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7814 DEFSYM (Qdefault_char, "default-char");
7815 DEFSYM (Qundecided, "undecided");
7816 DEFSYM (Qno_conversion, "no-conversion");
7817 DEFSYM (Qraw_text, "raw-text");
7818
7819 DEFSYM (Qiso_2022, "iso-2022");
7820
7821 DEFSYM (Qutf_8, "utf-8");
7822
7823 DEFSYM (Qutf_16, "utf-16");
7824 DEFSYM (Qutf_16_be, "utf-16-be");
7825 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7826 DEFSYM (Qutf_16_le, "utf-16-l3");
7827 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7828 DEFSYM (Qsignature, "signature");
7829 DEFSYM (Qendian, "endian");
7830 DEFSYM (Qbig, "big");
7831 DEFSYM (Qlittle, "little");
7832
7833 DEFSYM (Qshift_jis, "shift-jis");
7834 DEFSYM (Qbig5, "big5");
7835
7836 DEFSYM (Qcoding_system_p, "coding-system-p");
7837
7838 DEFSYM (Qcoding_system_error, "coding-system-error");
7839 Fput (Qcoding_system_error, Qerror_conditions,
7840 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7841 Fput (Qcoding_system_error, Qerror_message,
7842 build_string ("Invalid coding system"));
7843
7844 /* Intern this now in case it isn't already done.
7845 Setting this variable twice is harmless.
7846 But don't staticpro it here--that is done in alloc.c. */
7847 Qchar_table_extra_slots = intern ("char-table-extra-slots");
7848
7849 DEFSYM (Qtranslation_table, "translation-table");
7850 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7851 DEFSYM (Qtranslation_table_id, "translation-table-id");
7852 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
7853 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
7854
7855 DEFSYM (Qchar_coding_system, "char-coding-system");
7856
7857 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2));
7858
7859 DEFSYM (Qvalid_codes, "valid-codes");
7860
7861 DEFSYM (Qemacs_mule, "emacs-mule");
7862
7863 Vcoding_category_table
7864 = Fmake_vector (make_number (coding_category_max), Qnil);
7865 staticpro (&Vcoding_category_table);
7866 /* Followings are target of code detection. */
7867 ASET (Vcoding_category_table, coding_category_iso_7,
7868 intern ("coding-category-iso-7"));
7869 ASET (Vcoding_category_table, coding_category_iso_7_tight,
7870 intern ("coding-category-iso-7-tight"));
7871 ASET (Vcoding_category_table, coding_category_iso_8_1,
7872 intern ("coding-category-iso-8-1"));
7873 ASET (Vcoding_category_table, coding_category_iso_8_2,
7874 intern ("coding-category-iso-8-2"));
7875 ASET (Vcoding_category_table, coding_category_iso_7_else,
7876 intern ("coding-category-iso-7-else"));
7877 ASET (Vcoding_category_table, coding_category_iso_8_else,
7878 intern ("coding-category-iso-8-else"));
7879 ASET (Vcoding_category_table, coding_category_utf_8,
7880 intern ("coding-category-utf-8"));
7881 ASET (Vcoding_category_table, coding_category_utf_16_be,
7882 intern ("coding-category-utf-16-be"));
7883 ASET (Vcoding_category_table, coding_category_utf_16_le,
7884 intern ("coding-category-utf-16-le"));
7885 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
7886 intern ("coding-category-utf-16-be-nosig"));
7887 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
7888 intern ("coding-category-utf-16-le-nosig"));
7889 ASET (Vcoding_category_table, coding_category_charset,
7890 intern ("coding-category-charset"));
7891 ASET (Vcoding_category_table, coding_category_sjis,
7892 intern ("coding-category-sjis"));
7893 ASET (Vcoding_category_table, coding_category_big5,
7894 intern ("coding-category-big5"));
7895 ASET (Vcoding_category_table, coding_category_ccl,
7896 intern ("coding-category-ccl"));
7897 ASET (Vcoding_category_table, coding_category_emacs_mule,
7898 intern ("coding-category-emacs-mule"));
7899 /* Followings are NOT target of code detection. */
7900 ASET (Vcoding_category_table, coding_category_raw_text,
7901 intern ("coding-category-raw-text"));
7902 ASET (Vcoding_category_table, coding_category_undecided,
7903 intern ("coding-category-undecided"));
7904
7905 defsubr (&Scoding_system_p);
7906 defsubr (&Sread_coding_system);
7907 defsubr (&Sread_non_nil_coding_system);
7908 defsubr (&Scheck_coding_system);
7909 defsubr (&Sdetect_coding_region);
7910 defsubr (&Sdetect_coding_string);
7911 defsubr (&Sfind_coding_systems_region_internal);
7912 defsubr (&Scheck_coding_systems_region);
7913 defsubr (&Sdecode_coding_region);
7914 defsubr (&Sencode_coding_region);
7915 defsubr (&Sdecode_coding_string);
7916 defsubr (&Sencode_coding_string);
7917 defsubr (&Sdecode_sjis_char);
7918 defsubr (&Sencode_sjis_char);
7919 defsubr (&Sdecode_big5_char);
7920 defsubr (&Sencode_big5_char);
7921 defsubr (&Sset_terminal_coding_system_internal);
7922 defsubr (&Sset_safe_terminal_coding_system_internal);
7923 defsubr (&Sterminal_coding_system);
7924 defsubr (&Sset_keyboard_coding_system_internal);
7925 defsubr (&Skeyboard_coding_system);
7926 defsubr (&Sfind_operation_coding_system);
7927 defsubr (&Sset_coding_system_priority);
7928 defsubr (&Sdefine_coding_system_internal);
7929 defsubr (&Sdefine_coding_system_alias);
7930 defsubr (&Scoding_system_base);
7931 defsubr (&Scoding_system_plist);
7932 defsubr (&Scoding_system_aliases);
7933 defsubr (&Scoding_system_eol_type);
7934 defsubr (&Scoding_system_priority_list);
7935
7936 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7937 doc: /* List of coding systems.
7938
7939 Do not alter the value of this variable manually. This variable should be
7940 updated by the functions `define-coding-system' and
7941 `define-coding-system-alias'. */);
7942 Vcoding_system_list = Qnil;
7943
7944 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7945 doc: /* Alist of coding system names.
7946 Each element is one element list of coding system name.
7947 This variable is given to `completing-read' as TABLE argument.
7948
7949 Do not alter the value of this variable manually. This variable should be
7950 updated by the functions `make-coding-system' and
7951 `define-coding-system-alias'. */);
7952 Vcoding_system_alist = Qnil;
7953
7954 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7955 doc: /* List of coding-categories (symbols) ordered by priority.
7956
7957 On detecting a coding system, Emacs tries code detection algorithms
7958 associated with each coding-category one by one in this order. When
7959 one algorithm agrees with a byte sequence of source text, the coding
7960 system bound to the corresponding coding-category is selected. */);
7961 {
7962 int i;
7963
7964 Vcoding_category_list = Qnil;
7965 for (i = coding_category_max - 1; i >= 0; i--)
7966 Vcoding_category_list
7967 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7968 Vcoding_category_list);
7969 }
7970
7971 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7972 doc: /* Specify the coding system for read operations.
7973 It is useful to bind this variable with `let', but do not set it globally.
7974 If the value is a coding system, it is used for decoding on read operation.
7975 If not, an appropriate element is used from one of the coding system alists:
7976 There are three such tables, `file-coding-system-alist',
7977 `process-coding-system-alist', and `network-coding-system-alist'. */);
7978 Vcoding_system_for_read = Qnil;
7979
7980 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7981 doc: /* Specify the coding system for write operations.
7982 Programs bind this variable with `let', but you should not set it globally.
7983 If the value is a coding system, it is used for encoding of output,
7984 when writing it to a file and when sending it to a file or subprocess.
7985
7986 If this does not specify a coding system, an appropriate element
7987 is used from one of the coding system alists:
7988 There are three such tables, `file-coding-system-alist',
7989 `process-coding-system-alist', and `network-coding-system-alist'.
7990 For output to files, if the above procedure does not specify a coding system,
7991 the value of `buffer-file-coding-system' is used. */);
7992 Vcoding_system_for_write = Qnil;
7993
7994 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7995 doc: /*
7996 Coding system used in the latest file or process I/O. */);
7997 Vlast_coding_system_used = Qnil;
7998
7999 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8000 doc: /*
8001 *Non-nil means always inhibit code conversion of end-of-line format.
8002 See info node `Coding Systems' and info node `Text and Binary' concerning
8003 such conversion. */);
8004 inhibit_eol_conversion = 0;
8005
8006 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8007 doc: /*
8008 Non-nil means process buffer inherits coding system of process output.
8009 Bind it to t if the process output is to be treated as if it were a file
8010 read from some filesystem. */);
8011 inherit_process_coding_system = 0;
8012
8013 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8014 doc: /*
8015 Alist to decide a coding system to use for a file I/O operation.
8016 The format is ((PATTERN . VAL) ...),
8017 where PATTERN is a regular expression matching a file name,
8018 VAL is a coding system, a cons of coding systems, or a function symbol.
8019 If VAL is a coding system, it is used for both decoding and encoding
8020 the file contents.
8021 If VAL is a cons of coding systems, the car part is used for decoding,
8022 and the cdr part is used for encoding.
8023 If VAL is a function symbol, the function must return a coding system
8024 or a cons of coding systems which are used as above. The function gets
8025 the arguments with which `find-operation-coding-systems' was called.
8026
8027 See also the function `find-operation-coding-system'
8028 and the variable `auto-coding-alist'. */);
8029 Vfile_coding_system_alist = Qnil;
8030
8031 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8032 doc: /*
8033 Alist to decide a coding system to use for a process I/O operation.
8034 The format is ((PATTERN . VAL) ...),
8035 where PATTERN is a regular expression matching a program name,
8036 VAL is a coding system, a cons of coding systems, or a function symbol.
8037 If VAL is a coding system, it is used for both decoding what received
8038 from the program and encoding what sent to the program.
8039 If VAL is a cons of coding systems, the car part is used for decoding,
8040 and the cdr part is used for encoding.
8041 If VAL is a function symbol, the function must return a coding system
8042 or a cons of coding systems which are used as above.
8043
8044 See also the function `find-operation-coding-system'. */);
8045 Vprocess_coding_system_alist = Qnil;
8046
8047 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8048 doc: /*
8049 Alist to decide a coding system to use for a network I/O operation.
8050 The format is ((PATTERN . VAL) ...),
8051 where PATTERN is a regular expression matching a network service name
8052 or is a port number to connect to,
8053 VAL is a coding system, a cons of coding systems, or a function symbol.
8054 If VAL is a coding system, it is used for both decoding what received
8055 from the network stream and encoding what sent to the network stream.
8056 If VAL is a cons of coding systems, the car part is used for decoding,
8057 and the cdr part is used for encoding.
8058 If VAL is a function symbol, the function must return a coding system
8059 or a cons of coding systems which are used as above.
8060
8061 See also the function `find-operation-coding-system'. */);
8062 Vnetwork_coding_system_alist = Qnil;
8063
8064 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8065 doc: /* Coding system to use with system messages.
8066 Also used for decoding keyboard input on X Window system. */);
8067 Vlocale_coding_system = Qnil;
8068
8069 /* The eol mnemonics are reset in startup.el system-dependently. */
8070 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8071 doc: /*
8072 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8073 eol_mnemonic_unix = build_string (":");
8074
8075 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8076 doc: /*
8077 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8078 eol_mnemonic_dos = build_string ("\\");
8079
8080 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8081 doc: /*
8082 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
8083 eol_mnemonic_mac = build_string ("/");
8084
8085 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8086 doc: /*
8087 *String displayed in mode line when end-of-line format is not yet determined. */);
8088 eol_mnemonic_undecided = build_string (":");
8089
8090 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8091 doc: /*
8092 *Non-nil enables character translation while encoding and decoding. */);
8093 Venable_character_translation = Qt;
8094
8095 DEFVAR_LISP ("standard-translation-table-for-decode",
8096 &Vstandard_translation_table_for_decode,
8097 doc: /* Table for translating characters while decoding. */);
8098 Vstandard_translation_table_for_decode = Qnil;
8099
8100 DEFVAR_LISP ("standard-translation-table-for-encode",
8101 &Vstandard_translation_table_for_encode,
8102 doc: /* Table for translating characters while encoding. */);
8103 Vstandard_translation_table_for_encode = Qnil;
8104
8105 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8106 doc: /* Alist of charsets vs revision numbers.
8107 While encoding, if a charset (car part of an element) is found,
8108 designate it with the escape sequence identifying revision (cdr part
8109 of the element). */);
8110 Vcharset_revision_table = Qnil;
8111
8112 DEFVAR_LISP ("default-process-coding-system",
8113 &Vdefault_process_coding_system,
8114 doc: /* Cons of coding systems used for process I/O by default.
8115 The car part is used for decoding a process output,
8116 the cdr part is used for encoding a text to be sent to a process. */);
8117 Vdefault_process_coding_system = Qnil;
8118
8119 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8120 doc: /*
8121 Table of extra Latin codes in the range 128..159 (inclusive).
8122 This is a vector of length 256.
8123 If Nth element is non-nil, the existence of code N in a file
8124 \(or output of subprocess) doesn't prevent it to be detected as
8125 a coding system of ISO 2022 variant which has a flag
8126 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8127 or reading output of a subprocess.
8128 Only 128th through 159th elements has a meaning. */);
8129 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8130
8131 DEFVAR_LISP ("select-safe-coding-system-function",
8132 &Vselect_safe_coding_system_function,
8133 doc: /*
8134 Function to call to select safe coding system for encoding a text.
8135
8136 If set, this function is called to force a user to select a proper
8137 coding system which can encode the text in the case that a default
8138 coding system used in each operation can't encode the text.
8139
8140 The default value is `select-safe-coding-system' (which see). */);
8141 Vselect_safe_coding_system_function = Qnil;
8142
8143 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
8144 doc: /*
8145 Char-table containing safe coding systems of each characters.
8146 Each element doesn't include such generic coding systems that can
8147 encode any characters. They are in the first extra slot. */);
8148 Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
8149
8150 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8151 &inhibit_iso_escape_detection,
8152 doc: /*
8153 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8154
8155 By default, on reading a file, Emacs tries to detect how the text is
8156 encoded. This code detection is sensitive to escape sequences. If
8157 the sequence is valid as ISO2022, the code is determined as one of
8158 the ISO2022 encodings, and the file is decoded by the corresponding
8159 coding system (e.g. `iso-2022-7bit').
8160
8161 However, there may be a case that you want to read escape sequences in
8162 a file as is. In such a case, you can set this variable to non-nil.
8163 Then, as the code detection ignores any escape sequences, no file is
8164 detected as encoded in some ISO2022 encoding. The result is that all
8165 escape sequences become visible in a buffer.
8166
8167 The default value is nil, and it is strongly recommended not to change
8168 it. That is because many Emacs Lisp source files that contain
8169 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8170 in Emacs's distribution, and they won't be decoded correctly on
8171 reading if you suppress escape sequence detection.
8172
8173 The other way to read escape sequences in a file without decoding is
8174 to explicitly specify some coding system that doesn't use ISO2022's
8175 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8176 inhibit_iso_escape_detection = 0;
8177
8178 {
8179 Lisp_Object args[coding_arg_max];
8180 Lisp_Object plist[14];
8181 int i;
8182
8183 for (i = 0; i < coding_arg_max; i++)
8184 args[i] = Qnil;
8185
8186 plist[0] = intern (":name");
8187 plist[1] = args[coding_arg_name] = Qno_conversion;
8188 plist[2] = intern (":mnemonic");
8189 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8190 plist[4] = intern (":coding-type");
8191 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8192 plist[6] = intern (":ascii-compatible-p");
8193 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8194 plist[8] = intern (":default-char");
8195 plist[9] = args[coding_arg_default_char] = make_number (0);
8196 plist[10] = intern (":docstring");
8197 plist[11] = build_string ("Do no conversion.\n\
8198 \n\
8199 When you visit a file with this coding, the file is read into a\n\
8200 unibyte buffer as is, thus each byte of a file is treated as a\n\
8201 character.");
8202 plist[12] = intern (":eol-type");
8203 plist[13] = args[coding_arg_eol_type] = Qunix;
8204 args[coding_arg_plist] = Flist (14, plist);
8205 Fdefine_coding_system_internal (coding_arg_max, args);
8206 }
8207
8208 setup_coding_system (Qno_conversion, &keyboard_coding);
8209 setup_coding_system (Qno_conversion, &terminal_coding);
8210 setup_coding_system (Qno_conversion, &safe_terminal_coding);
8211 }
8212
8213 char *
8214 emacs_strerror (error_number)
8215 int error_number;
8216 {
8217 char *str;
8218
8219 synchronize_system_messages_locale ();
8220 str = strerror (error_number);
8221
8222 if (! NILP (Vlocale_coding_system))
8223 {
8224 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8225 Vlocale_coding_system,
8226 0);
8227 str = (char *) XSTRING (dec)->data;
8228 }
8229
8230 return str;
8231 }
8232
8233 #endif /* emacs */