(generate_ascii_font): Fix return type.
[bpt/emacs.git] / src / coding.c
1 /* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9 This file is part of GNU Emacs.
10
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
14 any later version.
15
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
25
26 /*** TABLE OF CONTENTS ***
27
28 0. General comments
29 1. Preamble
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
41
42 */
43
44 /*** 0. General comments ***
45
46
47 CODING SYSTEM
48
49 Coding system is an object for a encoding mechanism that contains
50 information about how to convert byte sequence to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from a
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
65 the mechanism of encoding. Here's a brief descrition about type.
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by code converter defined for each
75 character set.
76
77 o Old Emacs' internal format (emacs-mule)
78
79 The coding system adopted by an old versions of Emacs (20 and 21).
80
81 o ISO2022-base coding system
82
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
92 section 8.
93
94 o BIG5
95
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
101
102 o CCL
103
104 If a user wants to decode/encode a text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
108
109 o Raw-text
110
111 A coding system for a text containing raw eight-bit data. Emacs
112 treat each byte of source text as a character (except for
113 end-of-line conversion).
114
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
118
119
120 END-OF-LINE FORMAT
121
122 How end-of-line of a text is encoded depends on a system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
127
128 Since text characters encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
131
132 STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
138
139 */
140
141 /* COMMON MACROS */
142
143
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
157 #if 0
158 static int
159 detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
162 {
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
188 }
189 #endif
190
191 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
197
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
202
203 Below is the template of these functions. */
204
205 #if 0
206 static void
207 decode_coding_XXXX (coding)
208 struct coding_system *coding;
209 {
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
243 }
244 #endif
245
246 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
252
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
257
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
261
262 Below is a template of these functions. */
263 #if 0
264 static void
265 encode_coding_XXX (coding)
266 struct coding_system *coding;
267 {
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
285 }
286 #endif
287
288 \f
289 /*** 1. Preamble ***/
290
291 #include <config.h>
292 #include <stdio.h>
293
294 #include "lisp.h"
295 #include "buffer.h"
296 #include "character.h"
297 #include "charset.h"
298 #include "ccl.h"
299 #include "composite.h"
300 #include "coding.h"
301 #include "window.h"
302
303 Lisp_Object Vcoding_system_hash_table;
304
305 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
306 Lisp_Object Qunix, Qdos, Qmac;
307 Lisp_Object Qbuffer_file_coding_system;
308 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
309 Lisp_Object Qdefault_char;
310 Lisp_Object Qno_conversion, Qundecided;
311 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
312 Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
313 Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
314 Lisp_Object Qcoding_system_history;
315 Lisp_Object Qvalid_codes;
316
317 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
318 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
319 Lisp_Object Qstart_process, Qopen_network_stream;
320 Lisp_Object Qtarget_idx;
321
322 Lisp_Object Vselect_safe_coding_system_function;
323
324 /* Mnemonic string for each format of end-of-line. */
325 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
326 /* Mnemonic string to indicate format of end-of-line is not yet
327 decided. */
328 Lisp_Object eol_mnemonic_undecided;
329
330 #ifdef emacs
331
332 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
333
334 Lisp_Object Qcoding_system_p, Qcoding_system_error;
335
336 /* Coding system emacs-mule and raw-text are for converting only
337 end-of-line format. */
338 Lisp_Object Qemacs_mule, Qraw_text;
339
340 /* Coding-systems are handed between Emacs Lisp programs and C internal
341 routines by the following three variables. */
342 /* Coding-system for reading files and receiving data from process. */
343 Lisp_Object Vcoding_system_for_read;
344 /* Coding-system for writing files and sending data to process. */
345 Lisp_Object Vcoding_system_for_write;
346 /* Coding-system actually used in the latest I/O. */
347 Lisp_Object Vlast_coding_system_used;
348
349 /* A vector of length 256 which contains information about special
350 Latin codes (especially for dealing with Microsoft codes). */
351 Lisp_Object Vlatin_extra_code_table;
352
353 /* Flag to inhibit code conversion of end-of-line format. */
354 int inhibit_eol_conversion;
355
356 /* Flag to inhibit ISO2022 escape sequence detection. */
357 int inhibit_iso_escape_detection;
358
359 /* Flag to make buffer-file-coding-system inherit from process-coding. */
360 int inherit_process_coding_system;
361
362 /* Coding system to be used to encode text for terminal display. */
363 struct coding_system terminal_coding;
364
365 /* Coding system to be used to encode text for terminal display when
366 terminal coding system is nil. */
367 struct coding_system safe_terminal_coding;
368
369 /* Coding system of what is sent from terminal keyboard. */
370 struct coding_system keyboard_coding;
371
372 Lisp_Object Vfile_coding_system_alist;
373 Lisp_Object Vprocess_coding_system_alist;
374 Lisp_Object Vnetwork_coding_system_alist;
375
376 Lisp_Object Vlocale_coding_system;
377
378 #endif /* emacs */
379
380 /* Flag to tell if we look up translation table on character code
381 conversion. */
382 Lisp_Object Venable_character_translation;
383 /* Standard translation table to look up on decoding (reading). */
384 Lisp_Object Vstandard_translation_table_for_decode;
385 /* Standard translation table to look up on encoding (writing). */
386 Lisp_Object Vstandard_translation_table_for_encode;
387
388 Lisp_Object Qtranslation_table;
389 Lisp_Object Qtranslation_table_id;
390 Lisp_Object Qtranslation_table_for_decode;
391 Lisp_Object Qtranslation_table_for_encode;
392
393 /* Alist of charsets vs revision number. */
394 static Lisp_Object Vcharset_revision_table;
395
396 /* Default coding systems used for process I/O. */
397 Lisp_Object Vdefault_process_coding_system;
398
399 /* Global flag to tell that we can't call post-read-conversion and
400 pre-write-conversion functions. Usually the value is zero, but it
401 is set to 1 temporarily while such functions are running. This is
402 to avoid infinite recursive call. */
403 static int inhibit_pre_post_conversion;
404
405 /* Char-table containing safe coding systems of each character. */
406 Lisp_Object Vchar_coding_system_table;
407 Lisp_Object Qchar_coding_system;
408
409 /* Two special coding systems. */
410 Lisp_Object Vsjis_coding_system;
411 Lisp_Object Vbig5_coding_system;
412
413
414 static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
415 static void decode_coding_utf_8 P_ ((struct coding_system *));
416 static int encode_coding_utf_8 P_ ((struct coding_system *));
417
418 static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
419 static void decode_coding_utf_16 P_ ((struct coding_system *));
420 static int encode_coding_utf_16 P_ ((struct coding_system *));
421
422 static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
423 static void decode_coding_iso_2022 P_ ((struct coding_system *));
424 static int encode_coding_iso_2022 P_ ((struct coding_system *));
425
426 static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
427 static void decode_coding_emacs_mule P_ ((struct coding_system *));
428 static int encode_coding_emacs_mule P_ ((struct coding_system *));
429
430 static int detect_coding_sjis P_ ((struct coding_system *, int *));
431 static void decode_coding_sjis P_ ((struct coding_system *));
432 static int encode_coding_sjis P_ ((struct coding_system *));
433
434 static int detect_coding_big5 P_ ((struct coding_system *, int *));
435 static void decode_coding_big5 P_ ((struct coding_system *));
436 static int encode_coding_big5 P_ ((struct coding_system *));
437
438 static int detect_coding_ccl P_ ((struct coding_system *, int *));
439 static void decode_coding_ccl P_ ((struct coding_system *));
440 static int encode_coding_ccl P_ ((struct coding_system *));
441
442 static void decode_coding_raw_text P_ ((struct coding_system *));
443 static int encode_coding_raw_text P_ ((struct coding_system *));
444
445
446 /* ISO2022 section */
447
448 #define CODING_ISO_INITIAL(coding, reg) \
449 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
450 coding_attr_iso_initial), \
451 reg)))
452
453
454 #define CODING_ISO_REQUEST(coding, charset_id) \
455 ((charset_id <= (coding)->max_charset_id \
456 ? (coding)->safe_charsets[charset_id] \
457 : -1))
458
459
460 #define CODING_ISO_FLAGS(coding) \
461 ((coding)->spec.iso_2022.flags)
462 #define CODING_ISO_DESIGNATION(coding, reg) \
463 ((coding)->spec.iso_2022.current_designation[reg])
464 #define CODING_ISO_INVOCATION(coding, plane) \
465 ((coding)->spec.iso_2022.current_invocation[plane])
466 #define CODING_ISO_SINGLE_SHIFTING(coding) \
467 ((coding)->spec.iso_2022.single_shifting)
468 #define CODING_ISO_BOL(coding) \
469 ((coding)->spec.iso_2022.bol)
470 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
471 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
472
473 /* Control characters of ISO2022. */
474 /* code */ /* function */
475 #define ISO_CODE_LF 0x0A /* line-feed */
476 #define ISO_CODE_CR 0x0D /* carriage-return */
477 #define ISO_CODE_SO 0x0E /* shift-out */
478 #define ISO_CODE_SI 0x0F /* shift-in */
479 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
480 #define ISO_CODE_ESC 0x1B /* escape */
481 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
482 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
483 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
484
485 /* All code (1-byte) of ISO2022 is classified into one of the
486 followings. */
487 enum iso_code_class_type
488 {
489 ISO_control_0, /* Control codes in the range
490 0x00..0x1F and 0x7F, except for the
491 following 5 codes. */
492 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
493 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
494 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
495 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
496 ISO_escape, /* ISO_CODE_SO (0x1B) */
497 ISO_control_1, /* Control codes in the range
498 0x80..0x9F, except for the
499 following 3 codes. */
500 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
501 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
502 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
503 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
504 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
505 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
506 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
507 };
508
509 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
510 `iso-flags' attribute of an iso2022 coding system. */
511
512 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
513 instead of the correct short-form sequence (e.g. ESC $ A). */
514 #define CODING_ISO_FLAG_LONG_FORM 0x0001
515
516 /* If set, reset graphic planes and registers at end-of-line to the
517 initial state. */
518 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
519
520 /* If set, reset graphic planes and registers before any control
521 characters to the initial state. */
522 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
523
524 /* If set, encode by 7-bit environment. */
525 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
526
527 /* If set, use locking-shift function. */
528 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
529
530 /* If set, use single-shift function. Overwrite
531 CODING_ISO_FLAG_LOCKING_SHIFT. */
532 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
533
534 /* If set, use designation escape sequence. */
535 #define CODING_ISO_FLAG_DESIGNATION 0x0040
536
537 /* If set, produce revision number sequence. */
538 #define CODING_ISO_FLAG_REVISION 0x0080
539
540 /* If set, produce ISO6429's direction specifying sequence. */
541 #define CODING_ISO_FLAG_DIRECTION 0x0100
542
543 /* If set, assume designation states are reset at beginning of line on
544 output. */
545 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
546
547 /* If set, designation sequence should be placed at beginning of line
548 on output. */
549 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
550
551 /* If set, do not encode unsafe charactes on output. */
552 #define CODING_ISO_FLAG_SAFE 0x0800
553
554 /* If set, extra latin codes (128..159) are accepted as a valid code
555 on input. */
556 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
557
558 #define CODING_ISO_FLAG_COMPOSITION 0x2000
559
560 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
561
562 #define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
563
564 /* A character to be produced on output if encoding of the original
565 character is prohibited by CODING_ISO_FLAG_SAFE. */
566 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
567
568
569 /* UTF-16 section */
570 #define CODING_UTF_16_BOM(coding) \
571 ((coding)->spec.utf_16.bom)
572
573 #define CODING_UTF_16_ENDIAN(coding) \
574 ((coding)->spec.utf_16.endian)
575
576 #define CODING_UTF_16_SURROGATE(coding) \
577 ((coding)->spec.utf_16.surrogate)
578
579
580 /* CCL section */
581 #define CODING_CCL_DECODER(coding) \
582 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
583 #define CODING_CCL_ENCODER(coding) \
584 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
585 #define CODING_CCL_VALIDS(coding) \
586 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
587 ->data)
588
589 /* Index for each coding category in `coding_category_table' */
590
591 enum coding_category
592 {
593 coding_category_iso_7,
594 coding_category_iso_7_tight,
595 coding_category_iso_8_1,
596 coding_category_iso_8_2,
597 coding_category_iso_7_else,
598 coding_category_iso_8_else,
599 coding_category_utf_8,
600 coding_category_utf_16_auto,
601 coding_category_utf_16_be,
602 coding_category_utf_16_le,
603 coding_category_utf_16_be_nosig,
604 coding_category_utf_16_le_nosig,
605 coding_category_charset,
606 coding_category_sjis,
607 coding_category_big5,
608 coding_category_ccl,
609 coding_category_emacs_mule,
610 /* All above are targets of code detection. */
611 coding_category_raw_text,
612 coding_category_undecided,
613 coding_category_max
614 };
615
616 /* Definitions of flag bits used in detect_coding_XXXX. */
617 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
618 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
619 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
620 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
621 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
622 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
623 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
624 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
625 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
626 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
627 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
628 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
629 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
630 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
631 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
632 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
633
634 /* This value is returned if detect_coding_mask () find nothing other
635 than ASCII characters. */
636 #define CATEGORY_MASK_ANY \
637 (CATEGORY_MASK_ISO_7 \
638 | CATEGORY_MASK_ISO_7_TIGHT \
639 | CATEGORY_MASK_ISO_8_1 \
640 | CATEGORY_MASK_ISO_8_2 \
641 | CATEGORY_MASK_ISO_7_ELSE \
642 | CATEGORY_MASK_ISO_8_ELSE \
643 | CATEGORY_MASK_UTF_8 \
644 | CATEGORY_MASK_UTF_16_BE \
645 | CATEGORY_MASK_UTF_16_LE \
646 | CATEGORY_MASK_UTF_16_BE_NOSIG \
647 | CATEGORY_MASK_UTF_16_LE_NOSIG \
648 | CATEGORY_MASK_CHARSET \
649 | CATEGORY_MASK_SJIS \
650 | CATEGORY_MASK_BIG5 \
651 | CATEGORY_MASK_CCL \
652 | CATEGORY_MASK_EMACS_MULE)
653
654
655 #define CATEGORY_MASK_ISO_7BIT \
656 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
657
658 #define CATEGORY_MASK_ISO_8BIT \
659 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
660
661 #define CATEGORY_MASK_ISO_ELSE \
662 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
663
664 #define CATEGORY_MASK_ISO_ESCAPE \
665 (CATEGORY_MASK_ISO_7 \
666 | CATEGORY_MASK_ISO_7_TIGHT \
667 | CATEGORY_MASK_ISO_7_ELSE \
668 | CATEGORY_MASK_ISO_8_ELSE)
669
670 #define CATEGORY_MASK_ISO \
671 ( CATEGORY_MASK_ISO_7BIT \
672 | CATEGORY_MASK_ISO_8BIT \
673 | CATEGORY_MASK_ISO_ELSE)
674
675 #define CATEGORY_MASK_UTF_16 \
676 (CATEGORY_MASK_UTF_16_BE \
677 | CATEGORY_MASK_UTF_16_LE \
678 | CATEGORY_MASK_UTF_16_BE_NOSIG \
679 | CATEGORY_MASK_UTF_16_LE_NOSIG)
680
681
682 /* List of symbols `coding-category-xxx' ordered by priority. This
683 variable is exposed to Emacs Lisp. */
684 static Lisp_Object Vcoding_category_list;
685
686 /* Table of coding categories (Lisp symbols). This variable is for
687 internal use oly. */
688 static Lisp_Object Vcoding_category_table;
689
690 /* Table of coding-categories ordered by priority. */
691 static enum coding_category coding_priorities[coding_category_max];
692
693 /* Nth element is a coding context for the coding system bound to the
694 Nth coding category. */
695 static struct coding_system coding_categories[coding_category_max];
696
697 static int detected_mask[coding_category_raw_text] =
698 { CATEGORY_MASK_ISO,
699 CATEGORY_MASK_ISO,
700 CATEGORY_MASK_ISO,
701 CATEGORY_MASK_ISO,
702 CATEGORY_MASK_ISO,
703 CATEGORY_MASK_ISO,
704 CATEGORY_MASK_UTF_8,
705 CATEGORY_MASK_UTF_16,
706 CATEGORY_MASK_UTF_16,
707 CATEGORY_MASK_UTF_16,
708 CATEGORY_MASK_UTF_16,
709 CATEGORY_MASK_UTF_16,
710 CATEGORY_MASK_CHARSET,
711 CATEGORY_MASK_SJIS,
712 CATEGORY_MASK_BIG5,
713 CATEGORY_MASK_CCL,
714 CATEGORY_MASK_EMACS_MULE
715 };
716
717 /*** Commonly used macros and functions ***/
718
719 #ifndef min
720 #define min(a, b) ((a) < (b) ? (a) : (b))
721 #endif
722 #ifndef max
723 #define max(a, b) ((a) > (b) ? (a) : (b))
724 #endif
725
726 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
727 do { \
728 attrs = CODING_ID_ATTRS (coding->id); \
729 eol_type = CODING_ID_EOL_TYPE (coding->id); \
730 if (VECTORP (eol_type)) \
731 eol_type = Qunix; \
732 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
733 } while (0)
734
735
736 /* Safely get one byte from the source text pointed by SRC which ends
737 at SRC_END, and set C to that byte. If there are not enough bytes
738 in the source, it jumps to `no_more_source'. The caller
739 should declare and set these variables appropriately in advance:
740 src, src_end, multibytep
741 */
742
743 #define ONE_MORE_BYTE(c) \
744 do { \
745 if (src == src_end) \
746 { \
747 if (src_base < src) \
748 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
749 goto no_more_source; \
750 } \
751 c = *src++; \
752 if (multibytep && (c & 0x80)) \
753 { \
754 if ((c & 0xFE) != 0xC0) \
755 error ("Undecodable char found"); \
756 c = ((c & 1) << 6) | *src++; \
757 } \
758 consumed_chars++; \
759 } while (0)
760
761
762 #define ONE_MORE_BYTE_NO_CHECK(c) \
763 do { \
764 c = *src++; \
765 if (multibytep && (c & 0x80)) \
766 { \
767 if ((c & 0xFE) != 0xC0) \
768 error ("Undecodable char found"); \
769 c = ((c & 1) << 6) | *src++; \
770 } \
771 } while (0)
772
773
774 /* Store a byte C in the place pointed by DST and increment DST to the
775 next free point, and increment PRODUCED_CHARS. The caller should
776 assure that C is 0..127, and declare and set the variable `dst'
777 appropriately in advance.
778 */
779
780
781 #define EMIT_ONE_ASCII_BYTE(c) \
782 do { \
783 produced_chars++; \
784 *dst++ = (c); \
785 } while (0)
786
787
788 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
789
790 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
791 do { \
792 produced_chars += 2; \
793 *dst++ = (c1), *dst++ = (c2); \
794 } while (0)
795
796
797 /* Store a byte C in the place pointed by DST and increment DST to the
798 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
799 nonzero, store in an appropriate multibyte from. The caller should
800 declare and set the variables `dst' and `multibytep' appropriately
801 in advance. */
802
803 #define EMIT_ONE_BYTE(c) \
804 do { \
805 produced_chars++; \
806 if (multibytep) \
807 { \
808 int ch = (c); \
809 if (ch >= 0x80) \
810 ch = BYTE8_TO_CHAR (ch); \
811 CHAR_STRING_ADVANCE (ch, dst); \
812 } \
813 else \
814 *dst++ = (c); \
815 } while (0)
816
817
818 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
819
820 #define EMIT_TWO_BYTES(c1, c2) \
821 do { \
822 produced_chars += 2; \
823 if (multibytep) \
824 { \
825 int ch; \
826 \
827 ch = (c1); \
828 if (ch >= 0x80) \
829 ch = BYTE8_TO_CHAR (ch); \
830 CHAR_STRING_ADVANCE (ch, dst); \
831 ch = (c2); \
832 if (ch >= 0x80) \
833 ch = BYTE8_TO_CHAR (ch); \
834 CHAR_STRING_ADVANCE (ch, dst); \
835 } \
836 else \
837 { \
838 *dst++ = (c1); \
839 *dst++ = (c2); \
840 } \
841 } while (0)
842
843
844 #define EMIT_THREE_BYTES(c1, c2, c3) \
845 do { \
846 EMIT_ONE_BYTE (c1); \
847 EMIT_TWO_BYTES (c2, c3); \
848 } while (0)
849
850
851 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
852 do { \
853 EMIT_TWO_BYTES (c1, c2); \
854 EMIT_TWO_BYTES (c3, c4); \
855 } while (0)
856
857
858 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
859 do { \
860 charset_map_loaded = 0; \
861 c = DECODE_CHAR (charset, code); \
862 if (charset_map_loaded) \
863 { \
864 unsigned char *orig = coding->source; \
865 EMACS_INT offset; \
866 \
867 coding_set_source (coding); \
868 offset = coding->source - orig; \
869 src += offset; \
870 src_base += offset; \
871 src_end += offset; \
872 } \
873 } while (0)
874
875
876 #define ASSURE_DESTINATION(bytes) \
877 do { \
878 if (dst + (bytes) >= dst_end) \
879 { \
880 int more_bytes = charbuf_end - charbuf + (bytes); \
881 \
882 dst = alloc_destination (coding, more_bytes, dst); \
883 dst_end = coding->destination + coding->dst_bytes; \
884 } \
885 } while (0)
886
887
888
889 static void
890 coding_set_source (coding)
891 struct coding_system *coding;
892 {
893 if (BUFFERP (coding->src_object))
894 {
895 if (coding->src_pos < 0)
896 coding->source = GAP_END_ADDR + coding->src_pos_byte;
897 else
898 {
899 struct buffer *buf = XBUFFER (coding->src_object);
900 EMACS_INT beg_byte = BUF_BEG_BYTE (buf);
901 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
902 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
903
904 coding->source = beg_addr + coding->src_pos_byte - 1;
905 if (coding->src_pos_byte >= gpt_byte)
906 coding->source += BUF_GAP_SIZE (buf);
907 }
908 }
909 else if (STRINGP (coding->src_object))
910 {
911 coding->source = (XSTRING (coding->src_object)->data
912 + coding->src_pos_byte);
913 }
914 else
915 /* Otherwise, the source is C string and is never relocated
916 automatically. Thus we don't have to update anything. */
917 ;
918 }
919
920 static void
921 coding_set_destination (coding)
922 struct coding_system *coding;
923 {
924 if (BUFFERP (coding->dst_object))
925 {
926 /* We are sure that coding->dst_pos_byte is before the gap of the
927 buffer. */
928 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
929 + coding->dst_pos_byte - 1);
930 if (coding->src_pos < 0)
931 coding->dst_bytes = (GAP_END_ADDR
932 - (coding->src_bytes - coding->consumed)
933 - coding->destination);
934 else
935 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
936 - coding->destination);
937 }
938 else
939 /* Otherwise, the destination is C string and is never relocated
940 automatically. Thus we don't have to update anything. */
941 ;
942 }
943
944
945 static void
946 coding_alloc_by_realloc (coding, bytes)
947 struct coding_system *coding;
948 EMACS_INT bytes;
949 {
950 coding->destination = (unsigned char *) xrealloc (coding->destination,
951 coding->dst_bytes + bytes);
952 coding->dst_bytes += bytes;
953 }
954
955 static void
956 coding_alloc_by_making_gap (coding, bytes)
957 struct coding_system *coding;
958 EMACS_INT bytes;
959 {
960 if (BUFFERP (coding->dst_object)
961 && EQ (coding->src_object, coding->dst_object))
962 {
963 EMACS_INT add = coding->src_bytes - coding->consumed;
964
965 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
966 make_gap (bytes);
967 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
968 }
969 else
970 {
971 Lisp_Object this_buffer;
972
973 this_buffer = Fcurrent_buffer ();
974 set_buffer_internal (XBUFFER (coding->dst_object));
975 make_gap (bytes);
976 set_buffer_internal (XBUFFER (this_buffer));
977 }
978 }
979
980
981 static unsigned char *
982 alloc_destination (coding, nbytes, dst)
983 struct coding_system *coding;
984 int nbytes;
985 unsigned char *dst;
986 {
987 EMACS_INT offset = dst - coding->destination;
988
989 if (BUFFERP (coding->dst_object))
990 coding_alloc_by_making_gap (coding, nbytes);
991 else
992 coding_alloc_by_realloc (coding, nbytes);
993 coding->result = CODING_RESULT_SUCCESS;
994 coding_set_destination (coding);
995 dst = coding->destination + offset;
996 return dst;
997 }
998
999 \f
1000 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1001
1002
1003
1004 \f
1005 /*** 3. UTF-8 ***/
1006
1007 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1008 Check if a text is encoded in UTF-8. If it is, return
1009 CATEGORY_MASK_UTF_8, else return 0. */
1010
1011 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1012 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1013 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1014 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1015 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1016 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1017
1018 static int
1019 detect_coding_utf_8 (coding, mask)
1020 struct coding_system *coding;
1021 int *mask;
1022 {
1023 unsigned char *src = coding->source, *src_base = src;
1024 unsigned char *src_end = coding->source + coding->src_bytes;
1025 int multibytep = coding->src_multibyte;
1026 int consumed_chars = 0;
1027 int found = 0;
1028
1029 /* A coding system of this category is always ASCII compatible. */
1030 src += coding->head_ascii;
1031
1032 while (1)
1033 {
1034 int c, c1, c2, c3, c4;
1035
1036 ONE_MORE_BYTE (c);
1037 if (UTF_8_1_OCTET_P (c))
1038 continue;
1039 ONE_MORE_BYTE (c1);
1040 if (! UTF_8_EXTRA_OCTET_P (c1))
1041 break;
1042 if (UTF_8_2_OCTET_LEADING_P (c))
1043 {
1044 found++;
1045 continue;
1046 }
1047 ONE_MORE_BYTE (c2);
1048 if (! UTF_8_EXTRA_OCTET_P (c2))
1049 break;
1050 if (UTF_8_3_OCTET_LEADING_P (c))
1051 {
1052 found++;
1053 continue;
1054 }
1055 ONE_MORE_BYTE (c3);
1056 if (! UTF_8_EXTRA_OCTET_P (c3))
1057 break;
1058 if (UTF_8_4_OCTET_LEADING_P (c))
1059 {
1060 found++;
1061 continue;
1062 }
1063 ONE_MORE_BYTE (c4);
1064 if (! UTF_8_EXTRA_OCTET_P (c4))
1065 break;
1066 if (UTF_8_5_OCTET_LEADING_P (c))
1067 {
1068 found++;
1069 continue;
1070 }
1071 break;
1072 }
1073 *mask &= ~CATEGORY_MASK_UTF_8;
1074 return 0;
1075
1076 no_more_source:
1077 if (! found)
1078 return 0;
1079 *mask &= CATEGORY_MASK_UTF_8;
1080 return 1;
1081 }
1082
1083
1084 static void
1085 decode_coding_utf_8 (coding)
1086 struct coding_system *coding;
1087 {
1088 unsigned char *src = coding->source + coding->consumed;
1089 unsigned char *src_end = coding->source + coding->src_bytes;
1090 unsigned char *src_base;
1091 int *charbuf = coding->charbuf;
1092 int *charbuf_end = charbuf + coding->charbuf_size;
1093 int consumed_chars = 0, consumed_chars_base;
1094 int multibytep = coding->src_multibyte;
1095 Lisp_Object attr, eol_type, charset_list;
1096
1097 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1098
1099 while (1)
1100 {
1101 int c, c1, c2, c3, c4, c5;
1102
1103 src_base = src;
1104 consumed_chars_base = consumed_chars;
1105
1106 if (charbuf >= charbuf_end)
1107 break;
1108
1109 ONE_MORE_BYTE (c1);
1110 if (UTF_8_1_OCTET_P(c1))
1111 {
1112 c = c1;
1113 if (c == '\r')
1114 {
1115 if (EQ (eol_type, Qdos))
1116 {
1117 if (src == src_end)
1118 goto no_more_source;
1119 if (*src == '\n')
1120 ONE_MORE_BYTE (c);
1121 }
1122 else if (EQ (eol_type, Qmac))
1123 c = '\n';
1124 }
1125 }
1126 else
1127 {
1128 ONE_MORE_BYTE (c2);
1129 if (! UTF_8_EXTRA_OCTET_P (c2))
1130 goto invalid_code;
1131 if (UTF_8_2_OCTET_LEADING_P (c1))
1132 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1133 else
1134 {
1135 ONE_MORE_BYTE (c3);
1136 if (! UTF_8_EXTRA_OCTET_P (c3))
1137 goto invalid_code;
1138 if (UTF_8_3_OCTET_LEADING_P (c1))
1139 c = (((c1 & 0xF) << 12)
1140 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1141 else
1142 {
1143 ONE_MORE_BYTE (c4);
1144 if (! UTF_8_EXTRA_OCTET_P (c4))
1145 goto invalid_code;
1146 if (UTF_8_4_OCTET_LEADING_P (c1))
1147 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1148 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1149 else
1150 {
1151 ONE_MORE_BYTE (c5);
1152 if (! UTF_8_EXTRA_OCTET_P (c5))
1153 goto invalid_code;
1154 if (UTF_8_5_OCTET_LEADING_P (c1))
1155 {
1156 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1157 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1158 | (c5 & 0x3F));
1159 if (c > MAX_CHAR)
1160 goto invalid_code;
1161 }
1162 else
1163 goto invalid_code;
1164 }
1165 }
1166 }
1167 }
1168
1169 *charbuf++ = c;
1170 continue;
1171
1172 invalid_code:
1173 src = src_base;
1174 consumed_chars = consumed_chars_base;
1175 ONE_MORE_BYTE (c);
1176 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1177 coding->errors++;
1178 }
1179
1180 no_more_source:
1181 coding->consumed_char += consumed_chars_base;
1182 coding->consumed = src_base - coding->source;
1183 coding->charbuf_used = charbuf - coding->charbuf;
1184 }
1185
1186
1187 static int
1188 encode_coding_utf_8 (coding)
1189 struct coding_system *coding;
1190 {
1191 int multibytep = coding->dst_multibyte;
1192 int *charbuf = coding->charbuf;
1193 int *charbuf_end = charbuf + coding->charbuf_used;
1194 unsigned char *dst = coding->destination + coding->produced;
1195 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1196 int produced_chars = 0;
1197 int c;
1198
1199 if (multibytep)
1200 {
1201 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1202
1203 while (charbuf < charbuf_end)
1204 {
1205 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1206
1207 ASSURE_DESTINATION (safe_room);
1208 c = *charbuf++;
1209 CHAR_STRING_ADVANCE (c, pend);
1210 for (p = str; p < pend; p++)
1211 EMIT_ONE_BYTE (*p);
1212 }
1213 }
1214 else
1215 {
1216 int safe_room = MAX_MULTIBYTE_LENGTH;
1217
1218 while (charbuf < charbuf_end)
1219 {
1220 ASSURE_DESTINATION (safe_room);
1221 c = *charbuf++;
1222 dst += CHAR_STRING (c, dst);
1223 produced_chars++;
1224 }
1225 }
1226 coding->result = CODING_RESULT_SUCCESS;
1227 coding->produced_char += produced_chars;
1228 coding->produced = dst - coding->destination;
1229 return 0;
1230 }
1231
1232
1233 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1234 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1235 Little Endian (otherwise). If it is, return
1236 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1237 else return 0. */
1238
1239 #define UTF_16_HIGH_SURROGATE_P(val) \
1240 (((val) & 0xFC00) == 0xD800)
1241
1242 #define UTF_16_LOW_SURROGATE_P(val) \
1243 (((val) & 0xFC00) == 0xDC00)
1244
1245 #define UTF_16_INVALID_P(val) \
1246 (((val) == 0xFFFE) \
1247 || ((val) == 0xFFFF) \
1248 || UTF_16_LOW_SURROGATE_P (val))
1249
1250
1251 static int
1252 detect_coding_utf_16 (coding, mask)
1253 struct coding_system *coding;
1254 int *mask;
1255 {
1256 unsigned char *src = coding->source, *src_base = src;
1257 unsigned char *src_end = coding->source + coding->src_bytes;
1258 int multibytep = coding->src_multibyte;
1259 int consumed_chars = 0;
1260 int c1, c2;
1261
1262 ONE_MORE_BYTE (c1);
1263 ONE_MORE_BYTE (c2);
1264
1265 if ((c1 == 0xFF) && (c2 == 0xFE))
1266 {
1267 *mask &= CATEGORY_MASK_UTF_16_LE;
1268 return 1;
1269 }
1270 else if ((c1 == 0xFE) && (c2 == 0xFF))
1271 {
1272 *mask &= CATEGORY_MASK_UTF_16_BE;
1273 return 1;
1274 }
1275 no_more_source:
1276 return 0;
1277 }
1278
1279 static void
1280 decode_coding_utf_16 (coding)
1281 struct coding_system *coding;
1282 {
1283 unsigned char *src = coding->source + coding->consumed;
1284 unsigned char *src_end = coding->source + coding->src_bytes;
1285 unsigned char *src_base;
1286 int *charbuf = coding->charbuf;
1287 int *charbuf_end = charbuf + coding->charbuf_size;
1288 int consumed_chars = 0, consumed_chars_base;
1289 int multibytep = coding->src_multibyte;
1290 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1291 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1292 int surrogate = CODING_UTF_16_SURROGATE (coding);
1293 Lisp_Object attr, eol_type, charset_list;
1294
1295 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1296
1297 if (bom != utf_16_without_bom)
1298 {
1299 int c, c1, c2;
1300
1301 src_base = src;
1302 ONE_MORE_BYTE (c1);
1303 ONE_MORE_BYTE (c2);
1304 c = (c1 << 8) | c2;
1305 if (bom == utf_16_with_bom)
1306 {
1307 if (endian == utf_16_big_endian
1308 ? c != 0xFFFE : c != 0xFEFF)
1309 {
1310 /* We are sure that there's enouph room at CHARBUF. */
1311 *charbuf++ = c1;
1312 *charbuf++ = c2;
1313 coding->errors++;
1314 }
1315 }
1316 else
1317 {
1318 if (c == 0xFFFE)
1319 CODING_UTF_16_ENDIAN (coding)
1320 = endian = utf_16_big_endian;
1321 else if (c == 0xFEFF)
1322 CODING_UTF_16_ENDIAN (coding)
1323 = endian = utf_16_little_endian;
1324 else
1325 {
1326 CODING_UTF_16_ENDIAN (coding)
1327 = endian = utf_16_big_endian;
1328 src = src_base;
1329 }
1330 }
1331 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1332 }
1333
1334 while (1)
1335 {
1336 int c, c1, c2;
1337
1338 src_base = src;
1339 consumed_chars_base = consumed_chars;
1340
1341 if (charbuf + 2 >= charbuf_end)
1342 break;
1343
1344 ONE_MORE_BYTE (c1);
1345 ONE_MORE_BYTE (c2);
1346 c = (endian == utf_16_big_endian
1347 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1348 if (surrogate)
1349 {
1350 if (! UTF_16_LOW_SURROGATE_P (c))
1351 {
1352 if (endian == utf_16_big_endian)
1353 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1354 else
1355 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1356 *charbuf++ = c1;
1357 *charbuf++ = c2;
1358 coding->errors++;
1359 if (UTF_16_HIGH_SURROGATE_P (c))
1360 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1361 else
1362 *charbuf++ = c;
1363 }
1364 else
1365 {
1366 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1367 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1368 *charbuf++ = c;
1369 }
1370 }
1371 else
1372 {
1373 if (UTF_16_HIGH_SURROGATE_P (c))
1374 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1375 else
1376 *charbuf++ = c;
1377 }
1378 }
1379
1380 no_more_source:
1381 coding->consumed_char += consumed_chars_base;
1382 coding->consumed = src_base - coding->source;
1383 coding->charbuf_used = charbuf - coding->charbuf;
1384 }
1385
1386 static int
1387 encode_coding_utf_16 (coding)
1388 struct coding_system *coding;
1389 {
1390 int multibytep = coding->dst_multibyte;
1391 int *charbuf = coding->charbuf;
1392 int *charbuf_end = charbuf + coding->charbuf_used;
1393 unsigned char *dst = coding->destination + coding->produced;
1394 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1395 int safe_room = 8;
1396 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1397 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1398 int produced_chars = 0;
1399 Lisp_Object attrs, eol_type, charset_list;
1400 int c;
1401
1402 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1403
1404 if (bom == utf_16_with_bom)
1405 {
1406 ASSURE_DESTINATION (safe_room);
1407 if (big_endian)
1408 EMIT_TWO_BYTES (0xFF, 0xFE);
1409 else
1410 EMIT_TWO_BYTES (0xFE, 0xFF);
1411 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1412 }
1413
1414 while (charbuf < charbuf_end)
1415 {
1416 ASSURE_DESTINATION (safe_room);
1417 c = *charbuf++;
1418 if (c >= MAX_UNICODE_CHAR)
1419 c = coding->default_char;
1420
1421 if (c < 0x10000)
1422 {
1423 if (big_endian)
1424 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1425 else
1426 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1427 }
1428 else
1429 {
1430 int c1, c2;
1431
1432 c -= 0x10000;
1433 c1 = (c >> 10) + 0xD800;
1434 c2 = (c & 0x3FF) + 0xDC00;
1435 if (big_endian)
1436 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1437 else
1438 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1439 }
1440 }
1441 coding->result = CODING_RESULT_SUCCESS;
1442 coding->produced = dst - coding->destination;
1443 coding->produced_char += produced_chars;
1444 return 0;
1445 }
1446
1447 \f
1448 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1449
1450 /* Emacs' internal format for representation of multiple character
1451 sets is a kind of multi-byte encoding, i.e. characters are
1452 represented by variable-length sequences of one-byte codes.
1453
1454 ASCII characters and control characters (e.g. `tab', `newline') are
1455 represented by one-byte sequences which are their ASCII codes, in
1456 the range 0x00 through 0x7F.
1457
1458 8-bit characters of the range 0x80..0x9F are represented by
1459 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1460 code + 0x20).
1461
1462 8-bit characters of the range 0xA0..0xFF are represented by
1463 one-byte sequences which are their 8-bit code.
1464
1465 The other characters are represented by a sequence of `base
1466 leading-code', optional `extended leading-code', and one or two
1467 `position-code's. The length of the sequence is determined by the
1468 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1469 whereas extended leading-code and position-code take the range 0xA0
1470 through 0xFF. See `charset.h' for more details about leading-code
1471 and position-code.
1472
1473 --- CODE RANGE of Emacs' internal format ---
1474 character set range
1475 ------------- -----
1476 ascii 0x00..0x7F
1477 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1478 eight-bit-graphic 0xA0..0xBF
1479 ELSE 0x81..0x9D + [0xA0..0xFF]+
1480 ---------------------------------------------
1481
1482 As this is the internal character representation, the format is
1483 usually not used externally (i.e. in a file or in a data sent to a
1484 process). But, it is possible to have a text externally in this
1485 format (i.e. by encoding by the coding system `emacs-mule').
1486
1487 In that case, a sequence of one-byte codes has a slightly different
1488 form.
1489
1490 At first, all characters in eight-bit-control are represented by
1491 one-byte sequences which are their 8-bit code.
1492
1493 Next, character composition data are represented by the byte
1494 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1495 where,
1496 METHOD is 0xF0 plus one of composition method (enum
1497 composition_method),
1498
1499 BYTES is 0xA0 plus a byte length of this composition data,
1500
1501 CHARS is 0x20 plus a number of characters composed by this
1502 data,
1503
1504 COMPONENTs are characters of multibye form or composition
1505 rules encoded by two-byte of ASCII codes.
1506
1507 In addition, for backward compatibility, the following formats are
1508 also recognized as composition data on decoding.
1509
1510 0x80 MSEQ ...
1511 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1512
1513 Here,
1514 MSEQ is a multibyte form but in these special format:
1515 ASCII: 0xA0 ASCII_CODE+0x80,
1516 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1517 RULE is a one byte code of the range 0xA0..0xF0 that
1518 represents a composition rule.
1519 */
1520
1521 char emacs_mule_bytes[256];
1522
1523 /* Leading-code followed by extended leading-code. */
1524 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1525 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1526 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1527 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1528
1529
1530 int
1531 emacs_mule_char (coding, composition, nbytes, nchars)
1532 struct coding_system *coding;
1533 int composition;
1534 int *nbytes, *nchars;
1535 {
1536 unsigned char *src = coding->source + coding->consumed;
1537 unsigned char *src_end = coding->source + coding->src_bytes;
1538 int multibytep = coding->src_multibyte;
1539 unsigned char *src_base = src;
1540 struct charset *charset;
1541 unsigned code;
1542 int c;
1543 int consumed_chars = 0;
1544
1545 ONE_MORE_BYTE (c);
1546 if (composition)
1547 {
1548 c -= 0x20;
1549 if (c == 0x80)
1550 {
1551 ONE_MORE_BYTE (c);
1552 if (c < 0xA0)
1553 goto invalid_code;
1554 *nbytes = src - src_base;
1555 *nchars = consumed_chars;
1556 return (c - 0x80);
1557 }
1558 }
1559
1560 switch (emacs_mule_bytes[c])
1561 {
1562 case 2:
1563 if (! (charset = emacs_mule_charset[c]))
1564 goto invalid_code;
1565 ONE_MORE_BYTE (c);
1566 code = c & 0x7F;
1567 break;
1568
1569 case 3:
1570 if (c == LEADING_CODE_PRIVATE_11
1571 || c == LEADING_CODE_PRIVATE_12)
1572 {
1573 ONE_MORE_BYTE (c);
1574 if (! (charset = emacs_mule_charset[c]))
1575 goto invalid_code;
1576 ONE_MORE_BYTE (c);
1577 code = c & 0x7F;
1578 }
1579 else
1580 {
1581 if (! (charset = emacs_mule_charset[c]))
1582 goto invalid_code;
1583 ONE_MORE_BYTE (c);
1584 code = (c & 0x7F) << 7;
1585 ONE_MORE_BYTE (c);
1586 code |= c & 0x7F;
1587 }
1588 break;
1589
1590 case 4:
1591 if (! (charset = emacs_mule_charset[c]))
1592 goto invalid_code;
1593 ONE_MORE_BYTE (c);
1594 code = (c & 0x7F) << 7;
1595 ONE_MORE_BYTE (c);
1596 code |= c & 0x7F;
1597 break;
1598
1599 case 1:
1600 code = c;
1601 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) ? charset_ascii
1602 : code < 0xA0 ? charset_8_bit_control
1603 : charset_8_bit_graphic);
1604 break;
1605
1606 default:
1607 abort ();
1608 }
1609 c = DECODE_CHAR (charset, code);
1610 if (c < 0)
1611 goto invalid_code;
1612 *nbytes = src - src_base;
1613 *nchars = consumed_chars;
1614 return c;
1615
1616 no_more_source:
1617 return -2;
1618
1619 invalid_code:
1620 return -1;
1621 }
1622
1623
1624 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1625 Check if a text is encoded in `emacs-mule'. */
1626
1627 static int
1628 detect_coding_emacs_mule (coding, mask)
1629 struct coding_system *coding;
1630 int *mask;
1631 {
1632 unsigned char *src = coding->source, *src_base = src;
1633 unsigned char *src_end = coding->source + coding->src_bytes;
1634 int multibytep = coding->src_multibyte;
1635 int consumed_chars = 0;
1636 int c;
1637 int found = 0;
1638
1639 /* A coding system of this category is always ASCII compatible. */
1640 src += coding->head_ascii;
1641
1642 while (1)
1643 {
1644 ONE_MORE_BYTE (c);
1645
1646 if (c == 0x80)
1647 {
1648 /* Perhaps the start of composite character. We simple skip
1649 it because analyzing it is too heavy for detecting. But,
1650 at least, we check that the composite character
1651 constitues of more than 4 bytes. */
1652 unsigned char *src_base;
1653
1654 repeat:
1655 src_base = src;
1656 do
1657 {
1658 ONE_MORE_BYTE (c);
1659 }
1660 while (c >= 0xA0);
1661
1662 if (src - src_base <= 4)
1663 break;
1664 found = 1;
1665 if (c == 0x80)
1666 goto repeat;
1667 }
1668
1669 if (c < 0x80)
1670 {
1671 if (c < 0x20
1672 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1673 break;
1674 }
1675 else
1676 {
1677 unsigned char *src_base = src - 1;
1678
1679 do
1680 {
1681 ONE_MORE_BYTE (c);
1682 }
1683 while (c >= 0xA0);
1684 if (src - src_base != emacs_mule_bytes[*src_base])
1685 break;
1686 found = 1;
1687 }
1688 }
1689 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1690 return 0;
1691
1692 no_more_source:
1693 if (!found)
1694 return 0;
1695 *mask &= CATEGORY_MASK_EMACS_MULE;
1696 return 1;
1697 }
1698
1699
1700 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1701
1702 /* Decode a character represented as a component of composition
1703 sequence of Emacs 20/21 style at SRC. Set C to that character and
1704 update SRC to the head of next character (or an encoded composition
1705 rule). If SRC doesn't points a composition component, set C to -1.
1706 If SRC points an invalid byte sequence, global exit by a return
1707 value 0. */
1708
1709 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1710 if (1) \
1711 { \
1712 int c; \
1713 int nbytes, nchars; \
1714 \
1715 if (src == src_end) \
1716 break; \
1717 c = emacs_mule_char (coding, 1, &nbytes, &nchars); \
1718 if (c < 0) \
1719 { \
1720 if (c == -2) \
1721 break; \
1722 goto invalid_code; \
1723 } \
1724 *buf++ = c; \
1725 src += nbytes; \
1726 consumed_chars += nchars; \
1727 } \
1728 else
1729
1730
1731 /* Decode a composition rule represented as a component of composition
1732 sequence of Emacs 20 style at SRC. Set C to the rule. If SRC
1733 points an invalid byte sequence, set C to -1. */
1734
1735 #define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \
1736 do { \
1737 int c, gref, nref; \
1738 \
1739 if (src < src_end) \
1740 goto invalid_code; \
1741 ONE_MORE_BYTE_NO_CHECK (c); \
1742 c -= 0xA0; \
1743 if (c < 0 || c >= 81) \
1744 goto invalid_code; \
1745 \
1746 gref = c / 9, nref = c % 9; \
1747 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1748 } while (0)
1749
1750
1751 #define ADD_COMPOSITION_DATA(buf, method, nchars) \
1752 do { \
1753 *buf++ = -5; \
1754 *buf++ = coding->produced_char + char_offset; \
1755 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1756 *buf++ = method; \
1757 *buf++ = nchars; \
1758 } while (0)
1759
1760
1761 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1762 do { \
1763 /* Emacs 21 style format. The first three bytes at SRC are \
1764 (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1765 the byte length of this composition information, CHARS is the \
1766 number of characters composed by this composition. */ \
1767 enum composition_method method = c - 0xF0; \
1768 int consumed_chars_limit; \
1769 int nbytes, nchars; \
1770 \
1771 ONE_MORE_BYTE (c); \
1772 nbytes = c - 0xA0; \
1773 if (nbytes < 3) \
1774 goto invalid_code; \
1775 ONE_MORE_BYTE (c); \
1776 nchars = c - 0xA0; \
1777 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1778 consumed_chars_limit = consumed_chars_base + nbytes; \
1779 if (method != COMPOSITION_RELATIVE) \
1780 { \
1781 int i = 0; \
1782 while (consumed_chars < consumed_chars_limit) \
1783 { \
1784 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1785 DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \
1786 else \
1787 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1788 } \
1789 if (consumed_chars < consumed_chars_limit) \
1790 goto invalid_code; \
1791 } \
1792 } while (0)
1793
1794
1795 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1796 do { \
1797 /* Emacs 20 style format for relative composition. */ \
1798 /* Store multibyte form of characters to be composed. */ \
1799 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1800 int *buf = components; \
1801 int i, j; \
1802 \
1803 src = src_base; \
1804 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1805 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1806 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1807 if (i < 2) \
1808 goto invalid_code; \
1809 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1810 for (j = 0; j < i; j++) \
1811 *charbuf++ = components[j]; \
1812 } while (0)
1813
1814
1815 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1816 do { \
1817 /* Emacs 20 style format for rule-base composition. */ \
1818 /* Store multibyte form of characters to be composed. */ \
1819 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1820 int *buf = components; \
1821 int i, j; \
1822 \
1823 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1824 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1825 { \
1826 DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \
1827 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1828 } \
1829 if (i < 1 || (buf - components) % 2 == 0) \
1830 goto invalid_code; \
1831 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1832 goto no_more_source; \
1833 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1834 for (j = 0; j < i; j++) \
1835 *charbuf++ = components[j]; \
1836 for (j = 0; j < i; j += 2) \
1837 *charbuf++ = components[j]; \
1838 } while (0)
1839
1840
1841 static void
1842 decode_coding_emacs_mule (coding)
1843 struct coding_system *coding;
1844 {
1845 unsigned char *src = coding->source + coding->consumed;
1846 unsigned char *src_end = coding->source + coding->src_bytes;
1847 unsigned char *src_base;
1848 int *charbuf = coding->charbuf;
1849 int *charbuf_end = charbuf + coding->charbuf_size;
1850 int consumed_chars = 0, consumed_chars_base;
1851 int char_offset = 0;
1852 int multibytep = coding->src_multibyte;
1853 Lisp_Object attrs, eol_type, charset_list;
1854
1855 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1856
1857 while (1)
1858 {
1859 int c;
1860
1861 src_base = src;
1862 consumed_chars_base = consumed_chars;
1863
1864 if (charbuf >= charbuf_end)
1865 break;
1866
1867 ONE_MORE_BYTE (c);
1868
1869 if (c < 0x80)
1870 {
1871 if (c == '\r')
1872 {
1873 if (EQ (eol_type, Qdos))
1874 {
1875 if (src == src_end)
1876 goto no_more_source;
1877 if (*src == '\n')
1878 ONE_MORE_BYTE (c);
1879 }
1880 else if (EQ (eol_type, Qmac))
1881 c = '\n';
1882 }
1883 *charbuf++ = c;
1884 char_offset++;
1885 }
1886 else if (c == 0x80)
1887 {
1888 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1889 break;
1890 ONE_MORE_BYTE (c);
1891 if (c - 0xF0 >= COMPOSITION_RELATIVE
1892 && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
1893 DECODE_EMACS_MULE_21_COMPOSITION (c);
1894 else if (c < 0xC0)
1895 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1896 else if (c == 0xFF)
1897 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1898 else
1899 goto invalid_code;
1900 }
1901 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1902 {
1903 int nbytes, nchars;
1904 src--;
1905 c = emacs_mule_char (coding, 0, &nbytes, &nchars);
1906 if (c < 0)
1907 {
1908 if (c == -2)
1909 break;
1910 goto invalid_code;
1911 }
1912 *charbuf++ = c;
1913 char_offset++;
1914 }
1915 continue;
1916
1917 invalid_code:
1918 src = src_base;
1919 consumed_chars = consumed_chars_base;
1920 ONE_MORE_BYTE (c);
1921 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1922 coding->errors++;
1923 }
1924
1925 no_more_source:
1926 coding->consumed_char += consumed_chars_base;
1927 coding->consumed = src_base - coding->source;
1928 coding->charbuf_used = charbuf - coding->charbuf;
1929 }
1930
1931
1932 #define EMACS_MULE_LEADING_CODES(id, codes) \
1933 do { \
1934 if (id < 0xA0) \
1935 codes[0] = id, codes[1] = 0; \
1936 else if (id < 0xE0) \
1937 codes[0] = 0x9A, codes[1] = id; \
1938 else if (id < 0xF0) \
1939 codes[0] = 0x9B, codes[1] = id; \
1940 else if (id < 0xF5) \
1941 codes[0] = 0x9C, codes[1] = id; \
1942 else \
1943 codes[0] = 0x9D, codes[1] = id; \
1944 } while (0);
1945
1946
1947 static int
1948 encode_coding_emacs_mule (coding)
1949 struct coding_system *coding;
1950 {
1951 int multibytep = coding->dst_multibyte;
1952 int *charbuf = coding->charbuf;
1953 int *charbuf_end = charbuf + coding->charbuf_used;
1954 unsigned char *dst = coding->destination + coding->produced;
1955 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1956 int safe_room = 8;
1957 int produced_chars = 0;
1958 Lisp_Object attrs, eol_type, charset_list;
1959 int c;
1960
1961 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1962
1963 while (charbuf < charbuf_end)
1964 {
1965 ASSURE_DESTINATION (safe_room);
1966 c = *charbuf++;
1967 if (ASCII_CHAR_P (c))
1968 EMIT_ONE_ASCII_BYTE (c);
1969 else
1970 {
1971 struct charset *charset;
1972 unsigned code;
1973 int dimension;
1974 int emacs_mule_id;
1975 unsigned char leading_codes[2];
1976
1977 charset = char_charset (c, charset_list, &code);
1978 if (! charset)
1979 {
1980 c = coding->default_char;
1981 if (ASCII_CHAR_P (c))
1982 {
1983 EMIT_ONE_ASCII_BYTE (c);
1984 continue;
1985 }
1986 charset = char_charset (c, charset_list, &code);
1987 }
1988 dimension = CHARSET_DIMENSION (charset);
1989 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
1990 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
1991 EMIT_ONE_BYTE (leading_codes[0]);
1992 if (leading_codes[1])
1993 EMIT_ONE_BYTE (leading_codes[1]);
1994 if (dimension == 1)
1995 EMIT_ONE_BYTE (code);
1996 else
1997 {
1998 EMIT_ONE_BYTE (code >> 8);
1999 EMIT_ONE_BYTE (code & 0xFF);
2000 }
2001 }
2002 }
2003 coding->result = CODING_RESULT_SUCCESS;
2004 coding->produced_char += produced_chars;
2005 coding->produced = dst - coding->destination;
2006 return 0;
2007 }
2008
2009 \f
2010 /*** 7. ISO2022 handlers ***/
2011
2012 /* The following note describes the coding system ISO2022 briefly.
2013 Since the intention of this note is to help understand the
2014 functions in this file, some parts are NOT ACCURATE or OVERLY
2015 SIMPLIFIED. For thorough understanding, please refer to the
2016 original document of ISO2022.
2017
2018 ISO2022 provides many mechanisms to encode several character sets
2019 in 7-bit and 8-bit environments. For 7-bite environments, all text
2020 is encoded using bytes less than 128. This may make the encoded
2021 text a little bit longer, but the text passes more easily through
2022 several gateways, some of which strip off MSB (Most Signigant Bit).
2023
2024 There are two kinds of character sets: control character set and
2025 graphic character set. The former contains control characters such
2026 as `newline' and `escape' to provide control functions (control
2027 functions are also provided by escape sequences). The latter
2028 contains graphic characters such as 'A' and '-'. Emacs recognizes
2029 two control character sets and many graphic character sets.
2030
2031 Graphic character sets are classified into one of the following
2032 four classes, according to the number of bytes (DIMENSION) and
2033 number of characters in one dimension (CHARS) of the set:
2034 - DIMENSION1_CHARS94
2035 - DIMENSION1_CHARS96
2036 - DIMENSION2_CHARS94
2037 - DIMENSION2_CHARS96
2038
2039 In addition, each character set is assigned an identification tag,
2040 unique for each set, called "final character" (denoted as <F>
2041 hereafter). The <F> of each character set is decided by ECMA(*)
2042 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2043 (0x30..0x3F are for private use only).
2044
2045 Note (*): ECMA = European Computer Manufacturers Association
2046
2047 Here are examples of graphic character set [NAME(<F>)]:
2048 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2049 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2050 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2051 o DIMENSION2_CHARS96 -- none for the moment
2052
2053 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2054 C0 [0x00..0x1F] -- control character plane 0
2055 GL [0x20..0x7F] -- graphic character plane 0
2056 C1 [0x80..0x9F] -- control character plane 1
2057 GR [0xA0..0xFF] -- graphic character plane 1
2058
2059 A control character set is directly designated and invoked to C0 or
2060 C1 by an escape sequence. The most common case is that:
2061 - ISO646's control character set is designated/invoked to C0, and
2062 - ISO6429's control character set is designated/invoked to C1,
2063 and usually these designations/invocations are omitted in encoded
2064 text. In a 7-bit environment, only C0 can be used, and a control
2065 character for C1 is encoded by an appropriate escape sequence to
2066 fit into the environment. All control characters for C1 are
2067 defined to have corresponding escape sequences.
2068
2069 A graphic character set is at first designated to one of four
2070 graphic registers (G0 through G3), then these graphic registers are
2071 invoked to GL or GR. These designations and invocations can be
2072 done independently. The most common case is that G0 is invoked to
2073 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2074 these invocations and designations are omitted in encoded text.
2075 In a 7-bit environment, only GL can be used.
2076
2077 When a graphic character set of CHARS94 is invoked to GL, codes
2078 0x20 and 0x7F of the GL area work as control characters SPACE and
2079 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2080 be used.
2081
2082 There are two ways of invocation: locking-shift and single-shift.
2083 With locking-shift, the invocation lasts until the next different
2084 invocation, whereas with single-shift, the invocation affects the
2085 following character only and doesn't affect the locking-shift
2086 state. Invocations are done by the following control characters or
2087 escape sequences:
2088
2089 ----------------------------------------------------------------------
2090 abbrev function cntrl escape seq description
2091 ----------------------------------------------------------------------
2092 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2093 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2094 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2095 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2096 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2097 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2098 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2099 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2100 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2101 ----------------------------------------------------------------------
2102 (*) These are not used by any known coding system.
2103
2104 Control characters for these functions are defined by macros
2105 ISO_CODE_XXX in `coding.h'.
2106
2107 Designations are done by the following escape sequences:
2108 ----------------------------------------------------------------------
2109 escape sequence description
2110 ----------------------------------------------------------------------
2111 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2112 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2113 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2114 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2115 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2116 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2117 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2118 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2119 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2120 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2121 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2122 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2123 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2124 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2125 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2126 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2127 ----------------------------------------------------------------------
2128
2129 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2130 of dimension 1, chars 94, and final character <F>, etc...
2131
2132 Note (*): Although these designations are not allowed in ISO2022,
2133 Emacs accepts them on decoding, and produces them on encoding
2134 CHARS96 character sets in a coding system which is characterized as
2135 7-bit environment, non-locking-shift, and non-single-shift.
2136
2137 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2138 '(' must be omitted. We refer to this as "short-form" hereafter.
2139
2140 Now you may notice that there are a lot of ways for encoding the
2141 same multilingual text in ISO2022. Actually, there exist many
2142 coding systems such as Compound Text (used in X11's inter client
2143 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2144 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
2145 localized platforms), and all of these are variants of ISO2022.
2146
2147 In addition to the above, Emacs handles two more kinds of escape
2148 sequences: ISO6429's direction specification and Emacs' private
2149 sequence for specifying character composition.
2150
2151 ISO6429's direction specification takes the following form:
2152 o CSI ']' -- end of the current direction
2153 o CSI '0' ']' -- end of the current direction
2154 o CSI '1' ']' -- start of left-to-right text
2155 o CSI '2' ']' -- start of right-to-left text
2156 The control character CSI (0x9B: control sequence introducer) is
2157 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2158
2159 Character composition specification takes the following form:
2160 o ESC '0' -- start relative composition
2161 o ESC '1' -- end composition
2162 o ESC '2' -- start rule-base composition (*)
2163 o ESC '3' -- start relative composition with alternate chars (**)
2164 o ESC '4' -- start rule-base composition with alternate chars (**)
2165 Since these are not standard escape sequences of any ISO standard,
2166 the use of them for these meaning is restricted to Emacs only.
2167
2168 (*) This form is used only in Emacs 20.5 and the older versions,
2169 but the newer versions can safely decode it.
2170 (**) This form is used only in Emacs 21.1 and the newer versions,
2171 and the older versions can't decode it.
2172
2173 Here's a list of examples usages of these composition escape
2174 sequences (categorized by `enum composition_method').
2175
2176 COMPOSITION_RELATIVE:
2177 ESC 0 CHAR [ CHAR ] ESC 1
2178 COMPOSITOIN_WITH_RULE:
2179 ESC 2 CHAR [ RULE CHAR ] ESC 1
2180 COMPOSITION_WITH_ALTCHARS:
2181 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2182 COMPOSITION_WITH_RULE_ALTCHARS:
2183 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2184
2185 enum iso_code_class_type iso_code_class[256];
2186
2187 #define SAFE_CHARSET_P(coding, id) \
2188 ((id) <= (coding)->max_charset_id \
2189 && (coding)->safe_charsets[id] >= 0)
2190
2191
2192 #define SHIFT_OUT_OK(category) \
2193 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2194
2195 static void
2196 setup_iso_safe_charsets (Lisp_Object attrs)
2197 {
2198 Lisp_Object charset_list, safe_charsets;
2199 Lisp_Object request;
2200 Lisp_Object reg_usage;
2201 Lisp_Object tail;
2202 int reg94, reg96;
2203 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2204 int max_charset_id;
2205
2206 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2207 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2208 && ! EQ (charset_list, Viso_2022_charset_list))
2209 {
2210 CODING_ATTR_CHARSET_LIST (attrs)
2211 = charset_list = Viso_2022_charset_list;
2212 ASET (attrs, coding_attr_safe_charsets, Qnil);
2213 }
2214
2215 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2216 return;
2217
2218 max_charset_id = 0;
2219 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2220 {
2221 int id = XINT (XCAR (tail));
2222 if (max_charset_id < id)
2223 max_charset_id = id;
2224 }
2225
2226 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2227 make_number (255));
2228 request = AREF (attrs, coding_attr_iso_request);
2229 reg_usage = AREF (attrs, coding_attr_iso_usage);
2230 reg94 = XINT (XCAR (reg_usage));
2231 reg96 = XINT (XCDR (reg_usage));
2232
2233 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2234 {
2235 Lisp_Object id;
2236 Lisp_Object reg;
2237 struct charset *charset;
2238
2239 id = XCAR (tail);
2240 charset = CHARSET_FROM_ID (XINT (id));
2241 reg = Fcdr (Fassq (request, id));
2242 if (! NILP (reg))
2243 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2244 else if (charset->iso_chars_96)
2245 {
2246 if (reg96 < 4)
2247 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2248 }
2249 else
2250 {
2251 if (reg94 < 4)
2252 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2253 }
2254 }
2255 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2256 }
2257
2258
2259 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2260 Check if a text is encoded in ISO2022. If it is, returns an
2261 integer in which appropriate flag bits any of:
2262 CATEGORY_MASK_ISO_7
2263 CATEGORY_MASK_ISO_7_TIGHT
2264 CATEGORY_MASK_ISO_8_1
2265 CATEGORY_MASK_ISO_8_2
2266 CATEGORY_MASK_ISO_7_ELSE
2267 CATEGORY_MASK_ISO_8_ELSE
2268 are set. If a code which should never appear in ISO2022 is found,
2269 returns 0. */
2270
2271 static int
2272 detect_coding_iso_2022 (coding, mask)
2273 struct coding_system *coding;
2274 int *mask;
2275 {
2276 unsigned char *src = coding->source, *src_base = src;
2277 unsigned char *src_end = coding->source + coding->src_bytes;
2278 int multibytep = coding->src_multibyte;
2279 int mask_iso = CATEGORY_MASK_ISO;
2280 int mask_found = 0, mask_8bit_found = 0;
2281 int reg[4], shift_out = 0, single_shifting = 0;
2282 int id;
2283 int c, c1;
2284 int consumed_chars = 0;
2285 int i;
2286
2287 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2288 {
2289 struct coding_system *this = &(coding_categories[i]);
2290 Lisp_Object attrs, val;
2291
2292 attrs = CODING_ID_ATTRS (this->id);
2293 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2294 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2295 setup_iso_safe_charsets (attrs);
2296 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2297 this->max_charset_id = XSTRING (val)->size - 1;
2298 this->safe_charsets = (char *) XSTRING (val)->data;
2299 }
2300
2301 /* A coding system of this category is always ASCII compatible. */
2302 src += coding->head_ascii;
2303
2304 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2305 while (mask_iso && src < src_end)
2306 {
2307 ONE_MORE_BYTE (c);
2308 switch (c)
2309 {
2310 case ISO_CODE_ESC:
2311 if (inhibit_iso_escape_detection)
2312 break;
2313 single_shifting = 0;
2314 ONE_MORE_BYTE (c);
2315 if (c >= '(' && c <= '/')
2316 {
2317 /* Designation sequence for a charset of dimension 1. */
2318 ONE_MORE_BYTE (c1);
2319 if (c1 < ' ' || c1 >= 0x80
2320 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2321 /* Invalid designation sequence. Just ignore. */
2322 break;
2323 reg[(c - '(') % 4] = id;
2324 }
2325 else if (c == '$')
2326 {
2327 /* Designation sequence for a charset of dimension 2. */
2328 ONE_MORE_BYTE (c);
2329 if (c >= '@' && c <= 'B')
2330 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2331 reg[0] = id = iso_charset_table[1][0][c];
2332 else if (c >= '(' && c <= '/')
2333 {
2334 ONE_MORE_BYTE (c1);
2335 if (c1 < ' ' || c1 >= 0x80
2336 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2337 /* Invalid designation sequence. Just ignore. */
2338 break;
2339 reg[(c - '(') % 4] = id;
2340 }
2341 else
2342 /* Invalid designation sequence. Just ignore. */
2343 break;
2344 }
2345 else if (c == 'N' || c == 'O')
2346 {
2347 /* ESC <Fe> for SS2 or SS3. */
2348 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
2349 break;
2350 }
2351 else if (c >= '0' && c <= '4')
2352 {
2353 /* ESC <Fp> for start/end composition. */
2354 mask_found |= CATEGORY_MASK_ISO;
2355 break;
2356 }
2357 else
2358 {
2359 /* Invalid escape sequence. */
2360 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2361 break;
2362 }
2363
2364 /* We found a valid designation sequence for CHARSET. */
2365 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2366 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2367 id))
2368 mask_found |= CATEGORY_MASK_ISO_7;
2369 else
2370 mask_iso &= ~CATEGORY_MASK_ISO_7;
2371 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2372 id))
2373 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
2374 else
2375 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2376 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2377 id))
2378 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
2379 else
2380 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2381 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2382 id))
2383 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
2384 else
2385 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
2386 break;
2387
2388 case ISO_CODE_SO:
2389 if (inhibit_iso_escape_detection)
2390 break;
2391 single_shifting = 0;
2392 if (shift_out == 0
2393 && (reg[1] >= 0
2394 || SHIFT_OUT_OK (coding_category_iso_7_else)
2395 || SHIFT_OUT_OK (coding_category_iso_8_else)))
2396 {
2397 /* Locking shift out. */
2398 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2399 mask_found |= CATEGORY_MASK_ISO_ELSE;
2400 }
2401 break;
2402
2403 case ISO_CODE_SI:
2404 if (inhibit_iso_escape_detection)
2405 break;
2406 single_shifting = 0;
2407 if (shift_out == 1)
2408 {
2409 /* Locking shift in. */
2410 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2411 mask_found |= CATEGORY_MASK_ISO_ELSE;
2412 }
2413 break;
2414
2415 case ISO_CODE_CSI:
2416 single_shifting = 0;
2417 case ISO_CODE_SS2:
2418 case ISO_CODE_SS3:
2419 {
2420 int newmask = CATEGORY_MASK_ISO_8_ELSE;
2421
2422 if (inhibit_iso_escape_detection)
2423 break;
2424 if (c != ISO_CODE_CSI)
2425 {
2426 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2427 & CODING_ISO_FLAG_SINGLE_SHIFT)
2428 newmask |= CATEGORY_MASK_ISO_8_1;
2429 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2430 & CODING_ISO_FLAG_SINGLE_SHIFT)
2431 newmask |= CATEGORY_MASK_ISO_8_2;
2432 single_shifting = 1;
2433 }
2434 if (VECTORP (Vlatin_extra_code_table)
2435 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2436 {
2437 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2438 & CODING_ISO_FLAG_LATIN_EXTRA)
2439 newmask |= CATEGORY_MASK_ISO_8_1;
2440 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2441 & CODING_ISO_FLAG_LATIN_EXTRA)
2442 newmask |= CATEGORY_MASK_ISO_8_2;
2443 }
2444 mask_iso &= newmask;
2445 mask_found |= newmask;
2446 }
2447 break;
2448
2449 default:
2450 if (c < 0x80)
2451 {
2452 single_shifting = 0;
2453 break;
2454 }
2455 else if (c < 0xA0)
2456 {
2457 single_shifting = 0;
2458 mask_8bit_found = 1;
2459 if (VECTORP (Vlatin_extra_code_table)
2460 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2461 {
2462 int newmask = 0;
2463
2464 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2465 & CODING_ISO_FLAG_LATIN_EXTRA)
2466 newmask |= CATEGORY_MASK_ISO_8_1;
2467 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2468 & CODING_ISO_FLAG_LATIN_EXTRA)
2469 newmask |= CATEGORY_MASK_ISO_8_2;
2470 mask_iso &= newmask;
2471 mask_found |= newmask;
2472 }
2473 else
2474 return 0;
2475 }
2476 else
2477 {
2478 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2479 | CATEGORY_MASK_ISO_7_ELSE);
2480 mask_found |= CATEGORY_MASK_ISO_8_1;
2481 mask_8bit_found = 1;
2482 /* Check the length of succeeding codes of the range
2483 0xA0..0FF. If the byte length is odd, we exclude
2484 CATEGORY_MASK_ISO_8_2. We can check this only
2485 when we are not single shifting. */
2486 if (!single_shifting
2487 && mask_iso & CATEGORY_MASK_ISO_8_2)
2488 {
2489 int i = 1;
2490 while (src < src_end)
2491 {
2492 ONE_MORE_BYTE (c);
2493 if (c < 0xA0)
2494 break;
2495 i++;
2496 }
2497
2498 if (i & 1 && src < src_end)
2499 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
2500 else
2501 mask_found |= CATEGORY_MASK_ISO_8_2;
2502 }
2503 }
2504 break;
2505 }
2506 }
2507 no_more_source:
2508 if (!mask_iso)
2509 {
2510 *mask &= ~CATEGORY_MASK_ISO;
2511 return 0;
2512 }
2513 if (!mask_found)
2514 return 0;
2515 *mask &= mask_iso & mask_found;
2516 if (! mask_8bit_found)
2517 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2518 return 1;
2519 }
2520
2521
2522 /* Set designation state into CODING. */
2523 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2524 do { \
2525 int id, prev; \
2526 \
2527 if (final < '0' || final >= 128 \
2528 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2529 || !SAFE_CHARSET_P (coding, id)) \
2530 { \
2531 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2532 goto invalid_code; \
2533 } \
2534 prev = CODING_ISO_DESIGNATION (coding, reg); \
2535 CODING_ISO_DESIGNATION (coding, reg) = id; \
2536 /* If there was an invalid designation to REG previously, and this \
2537 designation is ASCII to REG, we should keep this designation \
2538 sequence. */ \
2539 if (prev == -2 && id == charset_ascii) \
2540 goto invalid_code; \
2541 } while (0)
2542
2543
2544 #define MAYBE_FINISH_COMPOSITION() \
2545 do { \
2546 int i; \
2547 if (composition_state == COMPOSING_NO) \
2548 break; \
2549 /* It is assured that we have enough room for producing \
2550 characters stored in the table `components'. */ \
2551 if (charbuf + component_idx > charbuf_end) \
2552 goto no_more_source; \
2553 composition_state = COMPOSING_NO; \
2554 if (method == COMPOSITION_RELATIVE \
2555 || method == COMPOSITION_WITH_ALTCHARS) \
2556 { \
2557 for (i = 0; i < component_idx; i++) \
2558 *charbuf++ = components[i]; \
2559 char_offset += component_idx; \
2560 } \
2561 else \
2562 { \
2563 for (i = 0; i < component_idx; i += 2) \
2564 *charbuf++ = components[i]; \
2565 char_offset += (component_idx / 2) + 1; \
2566 } \
2567 } while (0)
2568
2569
2570 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2571 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2572 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2573 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2574 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2575 */
2576
2577 #define DECODE_COMPOSITION_START(c1) \
2578 do { \
2579 if (c1 == '0' \
2580 && composition_state == COMPOSING_COMPONENT_CHAR) \
2581 { \
2582 component_len = component_idx; \
2583 composition_state = COMPOSING_CHAR; \
2584 } \
2585 else \
2586 { \
2587 unsigned char *p; \
2588 \
2589 MAYBE_FINISH_COMPOSITION (); \
2590 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2591 goto no_more_source; \
2592 for (p = src; p < src_end - 1; p++) \
2593 if (*p == ISO_CODE_ESC && p[1] == '1') \
2594 break; \
2595 if (p == src_end - 1) \
2596 { \
2597 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2598 goto invalid_code; \
2599 goto no_more_source; \
2600 } \
2601 \
2602 /* This is surely the start of a composition. */ \
2603 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2604 : c1 == '2' ? COMPOSITION_WITH_RULE \
2605 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2606 : COMPOSITION_WITH_RULE_ALTCHARS); \
2607 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2608 : COMPOSING_COMPONENT_CHAR); \
2609 component_idx = component_len = 0; \
2610 } \
2611 } while (0)
2612
2613
2614 /* Handle compositoin end sequence ESC 1. */
2615
2616 #define DECODE_COMPOSITION_END() \
2617 do { \
2618 int nchars = (component_len > 0 ? component_idx - component_len \
2619 : method == COMPOSITION_RELATIVE ? component_idx \
2620 : (component_idx + 1) / 2); \
2621 int i; \
2622 int *saved_charbuf = charbuf; \
2623 \
2624 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2625 if (method != COMPOSITION_RELATIVE) \
2626 { \
2627 if (component_len == 0) \
2628 for (i = 0; i < component_idx; i++) \
2629 *charbuf++ = components[i]; \
2630 else \
2631 for (i = 0; i < component_len; i++) \
2632 *charbuf++ = components[i]; \
2633 *saved_charbuf = saved_charbuf - charbuf; \
2634 } \
2635 if (method == COMPOSITION_WITH_RULE) \
2636 for (i = 0; i < component_idx; i += 2, char_offset++) \
2637 *charbuf++ = components[i]; \
2638 else \
2639 for (i = component_len; i < component_idx; i++, char_offset++) \
2640 *charbuf++ = components[i]; \
2641 coding->annotated = 1; \
2642 composition_state = COMPOSING_NO; \
2643 } while (0)
2644
2645
2646 /* Decode a composition rule from the byte C1 (and maybe one more byte
2647 from SRC) and store one encoded composition rule in
2648 coding->cmp_data. */
2649
2650 #define DECODE_COMPOSITION_RULE(c1) \
2651 do { \
2652 (c1) -= 32; \
2653 if (c1 < 81) /* old format (before ver.21) */ \
2654 { \
2655 int gref = (c1) / 9; \
2656 int nref = (c1) % 9; \
2657 if (gref == 4) gref = 10; \
2658 if (nref == 4) nref = 10; \
2659 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2660 } \
2661 else if (c1 < 93) /* new format (after ver.21) */ \
2662 { \
2663 ONE_MORE_BYTE (c2); \
2664 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2665 } \
2666 else \
2667 c1 = 0; \
2668 } while (0)
2669
2670
2671 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2672
2673 static void
2674 decode_coding_iso_2022 (coding)
2675 struct coding_system *coding;
2676 {
2677 unsigned char *src = coding->source + coding->consumed;
2678 unsigned char *src_end = coding->source + coding->src_bytes;
2679 unsigned char *src_base;
2680 int *charbuf = coding->charbuf;
2681 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2682 int consumed_chars = 0, consumed_chars_base;
2683 int char_offset = 0;
2684 int multibytep = coding->src_multibyte;
2685 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2686 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2687 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2688 struct charset *charset;
2689 int c;
2690 /* For handling composition sequence. */
2691 #define COMPOSING_NO 0
2692 #define COMPOSING_CHAR 1
2693 #define COMPOSING_RULE 2
2694 #define COMPOSING_COMPONENT_CHAR 3
2695 #define COMPOSING_COMPONENT_RULE 4
2696
2697 int composition_state = COMPOSING_NO;
2698 enum composition_method method;
2699 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2700 int component_idx;
2701 int component_len;
2702 Lisp_Object attrs, eol_type, charset_list;
2703
2704 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2705 setup_iso_safe_charsets (attrs);
2706
2707 while (1)
2708 {
2709 int c1, c2;
2710
2711 src_base = src;
2712 consumed_chars_base = consumed_chars;
2713
2714 if (charbuf >= charbuf_end)
2715 break;
2716
2717 ONE_MORE_BYTE (c1);
2718
2719 /* We produce no character or one character. */
2720 switch (iso_code_class [c1])
2721 {
2722 case ISO_0x20_or_0x7F:
2723 if (composition_state != COMPOSING_NO)
2724 {
2725 if (composition_state == COMPOSING_RULE
2726 || composition_state == COMPOSING_COMPONENT_RULE)
2727 {
2728 DECODE_COMPOSITION_RULE (c1);
2729 components[component_idx++] = c1;
2730 composition_state--;
2731 continue;
2732 }
2733 else if (method == COMPOSITION_WITH_RULE)
2734 composition_state = COMPOSING_RULE;
2735 else if (method == COMPOSITION_WITH_RULE_ALTCHARS
2736 && composition_state == COMPOSING_COMPONENT_CHAR)
2737 composition_state = COMPOSING_COMPONENT_CHAR;
2738 }
2739 if (charset_id_0 < 0
2740 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2741 {
2742 /* This is SPACE or DEL. */
2743 charset = CHARSET_FROM_ID (charset_ascii);
2744 break;
2745 }
2746 /* This is a graphic character, we fall down ... */
2747
2748 case ISO_graphic_plane_0:
2749 if (composition_state == COMPOSING_RULE)
2750 {
2751 DECODE_COMPOSITION_RULE (c1);
2752 components[component_idx++] = c1;
2753 composition_state = COMPOSING_CHAR;
2754 }
2755 charset = CHARSET_FROM_ID (charset_id_0);
2756 break;
2757
2758 case ISO_0xA0_or_0xFF:
2759 if (charset_id_1 < 0
2760 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2761 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2762 goto invalid_code;
2763 /* This is a graphic character, we fall down ... */
2764
2765 case ISO_graphic_plane_1:
2766 if (charset_id_1 < 0)
2767 goto invalid_code;
2768 charset = CHARSET_FROM_ID (charset_id_1);
2769 break;
2770
2771 case ISO_carriage_return:
2772 if (c1 == '\r')
2773 {
2774 if (EQ (eol_type, Qdos))
2775 {
2776 if (src == src_end)
2777 goto no_more_source;
2778 if (*src == '\n')
2779 ONE_MORE_BYTE (c1);
2780 }
2781 else if (EQ (eol_type, Qmac))
2782 c1 = '\n';
2783 }
2784 /* fall through */
2785
2786 case ISO_control_0:
2787 MAYBE_FINISH_COMPOSITION ();
2788 charset = CHARSET_FROM_ID (charset_ascii);
2789 break;
2790
2791 case ISO_control_1:
2792 MAYBE_FINISH_COMPOSITION ();
2793 goto invalid_code;
2794
2795 case ISO_shift_out:
2796 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2797 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2798 goto invalid_code;
2799 CODING_ISO_INVOCATION (coding, 0) = 1;
2800 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2801 continue;
2802
2803 case ISO_shift_in:
2804 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2805 goto invalid_code;
2806 CODING_ISO_INVOCATION (coding, 0) = 0;
2807 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2808 continue;
2809
2810 case ISO_single_shift_2_7:
2811 case ISO_single_shift_2:
2812 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2813 goto invalid_code;
2814 /* SS2 is handled as an escape sequence of ESC 'N' */
2815 c1 = 'N';
2816 goto label_escape_sequence;
2817
2818 case ISO_single_shift_3:
2819 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2820 goto invalid_code;
2821 /* SS2 is handled as an escape sequence of ESC 'O' */
2822 c1 = 'O';
2823 goto label_escape_sequence;
2824
2825 case ISO_control_sequence_introducer:
2826 /* CSI is handled as an escape sequence of ESC '[' ... */
2827 c1 = '[';
2828 goto label_escape_sequence;
2829
2830 case ISO_escape:
2831 ONE_MORE_BYTE (c1);
2832 label_escape_sequence:
2833 /* Escape sequences handled here are invocation,
2834 designation, direction specification, and character
2835 composition specification. */
2836 switch (c1)
2837 {
2838 case '&': /* revision of following character set */
2839 ONE_MORE_BYTE (c1);
2840 if (!(c1 >= '@' && c1 <= '~'))
2841 goto invalid_code;
2842 ONE_MORE_BYTE (c1);
2843 if (c1 != ISO_CODE_ESC)
2844 goto invalid_code;
2845 ONE_MORE_BYTE (c1);
2846 goto label_escape_sequence;
2847
2848 case '$': /* designation of 2-byte character set */
2849 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2850 goto invalid_code;
2851 ONE_MORE_BYTE (c1);
2852 if (c1 >= '@' && c1 <= 'B')
2853 { /* designation of JISX0208.1978, GB2312.1980,
2854 or JISX0208.1980 */
2855 DECODE_DESIGNATION (0, 2, 0, c1);
2856 }
2857 else if (c1 >= 0x28 && c1 <= 0x2B)
2858 { /* designation of DIMENSION2_CHARS94 character set */
2859 ONE_MORE_BYTE (c2);
2860 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
2861 }
2862 else if (c1 >= 0x2C && c1 <= 0x2F)
2863 { /* designation of DIMENSION2_CHARS96 character set */
2864 ONE_MORE_BYTE (c2);
2865 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
2866 }
2867 else
2868 goto invalid_code;
2869 /* We must update these variables now. */
2870 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2871 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2872 continue;
2873
2874 case 'n': /* invocation of locking-shift-2 */
2875 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2876 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2877 goto invalid_code;
2878 CODING_ISO_INVOCATION (coding, 0) = 2;
2879 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2880 continue;
2881
2882 case 'o': /* invocation of locking-shift-3 */
2883 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2884 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2885 goto invalid_code;
2886 CODING_ISO_INVOCATION (coding, 0) = 3;
2887 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2888 continue;
2889
2890 case 'N': /* invocation of single-shift-2 */
2891 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2892 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2893 goto invalid_code;
2894 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
2895 ONE_MORE_BYTE (c1);
2896 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2897 goto invalid_code;
2898 break;
2899
2900 case 'O': /* invocation of single-shift-3 */
2901 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2902 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2903 goto invalid_code;
2904 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
2905 ONE_MORE_BYTE (c1);
2906 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2907 goto invalid_code;
2908 break;
2909
2910 case '0': case '2': case '3': case '4': /* start composition */
2911 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2912 goto invalid_code;
2913 DECODE_COMPOSITION_START (c1);
2914 continue;
2915
2916 case '1': /* end composition */
2917 if (composition_state == COMPOSING_NO)
2918 goto invalid_code;
2919 DECODE_COMPOSITION_END ();
2920 continue;
2921
2922 case '[': /* specification of direction */
2923 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2924 goto invalid_code;
2925 /* For the moment, nested direction is not supported.
2926 So, `coding->mode & CODING_MODE_DIRECTION' zero means
2927 left-to-right, and nozero means right-to-left. */
2928 ONE_MORE_BYTE (c1);
2929 switch (c1)
2930 {
2931 case ']': /* end of the current direction */
2932 coding->mode &= ~CODING_MODE_DIRECTION;
2933
2934 case '0': /* end of the current direction */
2935 case '1': /* start of left-to-right direction */
2936 ONE_MORE_BYTE (c1);
2937 if (c1 == ']')
2938 coding->mode &= ~CODING_MODE_DIRECTION;
2939 else
2940 goto invalid_code;
2941 break;
2942
2943 case '2': /* start of right-to-left direction */
2944 ONE_MORE_BYTE (c1);
2945 if (c1 == ']')
2946 coding->mode |= CODING_MODE_DIRECTION;
2947 else
2948 goto invalid_code;
2949 break;
2950
2951 default:
2952 goto invalid_code;
2953 }
2954 continue;
2955
2956 default:
2957 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2958 goto invalid_code;
2959 if (c1 >= 0x28 && c1 <= 0x2B)
2960 { /* designation of DIMENSION1_CHARS94 character set */
2961 ONE_MORE_BYTE (c2);
2962 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
2963 }
2964 else if (c1 >= 0x2C && c1 <= 0x2F)
2965 { /* designation of DIMENSION1_CHARS96 character set */
2966 ONE_MORE_BYTE (c2);
2967 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
2968 }
2969 else
2970 goto invalid_code;
2971 /* We must update these variables now. */
2972 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2973 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2974 continue;
2975 }
2976 }
2977
2978 /* Now we know CHARSET and 1st position code C1 of a character.
2979 Produce a decoded character while getting 2nd position code
2980 C2 if necessary. */
2981 c1 &= 0x7F;
2982 if (CHARSET_DIMENSION (charset) > 1)
2983 {
2984 ONE_MORE_BYTE (c2);
2985 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
2986 /* C2 is not in a valid range. */
2987 goto invalid_code;
2988 c1 = (c1 << 8) | (c2 & 0x7F);
2989 if (CHARSET_DIMENSION (charset) > 2)
2990 {
2991 ONE_MORE_BYTE (c2);
2992 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
2993 /* C2 is not in a valid range. */
2994 goto invalid_code;
2995 c1 = (c1 << 8) | (c2 & 0x7F);
2996 }
2997 }
2998
2999 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3000 if (c < 0)
3001 {
3002 MAYBE_FINISH_COMPOSITION ();
3003 for (; src_base < src; src_base++, char_offset++)
3004 {
3005 if (ASCII_BYTE_P (*src_base))
3006 *charbuf++ = *src_base;
3007 else
3008 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3009 }
3010 }
3011 else if (composition_state == COMPOSING_NO)
3012 {
3013 *charbuf++ = c;
3014 char_offset++;
3015 }
3016 else
3017 components[component_idx++] = c;
3018 continue;
3019
3020 invalid_code:
3021 MAYBE_FINISH_COMPOSITION ();
3022 src = src_base;
3023 consumed_chars = consumed_chars_base;
3024 ONE_MORE_BYTE (c);
3025 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3026 coding->errors++;
3027 }
3028
3029 no_more_source:
3030 coding->consumed_char += consumed_chars_base;
3031 coding->consumed = src_base - coding->source;
3032 coding->charbuf_used = charbuf - coding->charbuf;
3033 }
3034
3035
3036 /* ISO2022 encoding stuff. */
3037
3038 /*
3039 It is not enough to say just "ISO2022" on encoding, we have to
3040 specify more details. In Emacs, each coding system of ISO2022
3041 variant has the following specifications:
3042 1. Initial designation to G0 thru G3.
3043 2. Allows short-form designation?
3044 3. ASCII should be designated to G0 before control characters?
3045 4. ASCII should be designated to G0 at end of line?
3046 5. 7-bit environment or 8-bit environment?
3047 6. Use locking-shift?
3048 7. Use Single-shift?
3049 And the following two are only for Japanese:
3050 8. Use ASCII in place of JIS0201-1976-Roman?
3051 9. Use JISX0208-1983 in place of JISX0208-1978?
3052 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3053 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3054 details.
3055 */
3056
3057 /* Produce codes (escape sequence) for designating CHARSET to graphic
3058 register REG at DST, and increment DST. If <final-char> of CHARSET is
3059 '@', 'A', or 'B' and the coding system CODING allows, produce
3060 designation sequence of short-form. */
3061
3062 #define ENCODE_DESIGNATION(charset, reg, coding) \
3063 do { \
3064 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3065 char *intermediate_char_94 = "()*+"; \
3066 char *intermediate_char_96 = ",-./"; \
3067 int revision = -1; \
3068 int c; \
3069 \
3070 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3071 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3072 \
3073 if (revision >= 0) \
3074 { \
3075 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3076 EMIT_ONE_BYTE ('@' + revision); \
3077 } \
3078 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3079 if (CHARSET_DIMENSION (charset) == 1) \
3080 { \
3081 if (! CHARSET_ISO_CHARS_96 (charset)) \
3082 c = intermediate_char_94[reg]; \
3083 else \
3084 c = intermediate_char_96[reg]; \
3085 EMIT_ONE_ASCII_BYTE (c); \
3086 } \
3087 else \
3088 { \
3089 EMIT_ONE_ASCII_BYTE ('$'); \
3090 if (! CHARSET_ISO_CHARS_96 (charset)) \
3091 { \
3092 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3093 || reg != 0 \
3094 || final_char < '@' || final_char > 'B') \
3095 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3096 } \
3097 else \
3098 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3099 } \
3100 EMIT_ONE_ASCII_BYTE (final_char); \
3101 \
3102 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3103 } while (0)
3104
3105
3106 /* The following two macros produce codes (control character or escape
3107 sequence) for ISO2022 single-shift functions (single-shift-2 and
3108 single-shift-3). */
3109
3110 #define ENCODE_SINGLE_SHIFT_2 \
3111 do { \
3112 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3113 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3114 else \
3115 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3116 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3117 } while (0)
3118
3119
3120 #define ENCODE_SINGLE_SHIFT_3 \
3121 do { \
3122 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3123 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3124 else \
3125 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3126 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3127 } while (0)
3128
3129
3130 /* The following four macros produce codes (control character or
3131 escape sequence) for ISO2022 locking-shift functions (shift-in,
3132 shift-out, locking-shift-2, and locking-shift-3). */
3133
3134 #define ENCODE_SHIFT_IN \
3135 do { \
3136 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3137 CODING_ISO_INVOCATION (coding, 0) = 0; \
3138 } while (0)
3139
3140
3141 #define ENCODE_SHIFT_OUT \
3142 do { \
3143 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3144 CODING_ISO_INVOCATION (coding, 0) = 1; \
3145 } while (0)
3146
3147
3148 #define ENCODE_LOCKING_SHIFT_2 \
3149 do { \
3150 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3151 CODING_ISO_INVOCATION (coding, 0) = 2; \
3152 } while (0)
3153
3154
3155 #define ENCODE_LOCKING_SHIFT_3 \
3156 do { \
3157 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3158 CODING_ISO_INVOCATION (coding, 0) = 3; \
3159 } while (0)
3160
3161
3162 /* Produce codes for a DIMENSION1 character whose character set is
3163 CHARSET and whose position-code is C1. Designation and invocation
3164 sequences are also produced in advance if necessary. */
3165
3166 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3167 do { \
3168 int id = CHARSET_ID (charset); \
3169 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3170 { \
3171 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3172 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3173 else \
3174 EMIT_ONE_BYTE (c1 | 0x80); \
3175 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3176 break; \
3177 } \
3178 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3179 { \
3180 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3181 break; \
3182 } \
3183 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3184 { \
3185 EMIT_ONE_BYTE (c1 | 0x80); \
3186 break; \
3187 } \
3188 else \
3189 /* Since CHARSET is not yet invoked to any graphic planes, we \
3190 must invoke it, or, at first, designate it to some graphic \
3191 register. Then repeat the loop to actually produce the \
3192 character. */ \
3193 dst = encode_invocation_designation (charset, coding, dst, \
3194 &produced_chars); \
3195 } while (1)
3196
3197
3198 /* Produce codes for a DIMENSION2 character whose character set is
3199 CHARSET and whose position-codes are C1 and C2. Designation and
3200 invocation codes are also produced in advance if necessary. */
3201
3202 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3203 do { \
3204 int id = CHARSET_ID (charset); \
3205 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3206 { \
3207 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3208 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3209 else \
3210 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3211 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3212 break; \
3213 } \
3214 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3215 { \
3216 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3217 break; \
3218 } \
3219 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3220 { \
3221 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3222 break; \
3223 } \
3224 else \
3225 /* Since CHARSET is not yet invoked to any graphic planes, we \
3226 must invoke it, or, at first, designate it to some graphic \
3227 register. Then repeat the loop to actually produce the \
3228 character. */ \
3229 dst = encode_invocation_designation (charset, coding, dst, \
3230 &produced_chars); \
3231 } while (1)
3232
3233
3234 #define ENCODE_ISO_CHARACTER(charset, c) \
3235 do { \
3236 int code = ENCODE_CHAR ((charset),(c)); \
3237 \
3238 if (CHARSET_DIMENSION (charset) == 1) \
3239 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3240 else \
3241 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3242 } while (0)
3243
3244
3245 /* Produce designation and invocation codes at a place pointed by DST
3246 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3247 Return new DST. */
3248
3249 unsigned char *
3250 encode_invocation_designation (charset, coding, dst, p_nchars)
3251 struct charset *charset;
3252 struct coding_system *coding;
3253 unsigned char *dst;
3254 int *p_nchars;
3255 {
3256 int multibytep = coding->dst_multibyte;
3257 int produced_chars = *p_nchars;
3258 int reg; /* graphic register number */
3259 int id = CHARSET_ID (charset);
3260
3261 /* At first, check designations. */
3262 for (reg = 0; reg < 4; reg++)
3263 if (id == CODING_ISO_DESIGNATION (coding, reg))
3264 break;
3265
3266 if (reg >= 4)
3267 {
3268 /* CHARSET is not yet designated to any graphic registers. */
3269 /* At first check the requested designation. */
3270 reg = CODING_ISO_REQUEST (coding, id);
3271 if (reg < 0)
3272 /* Since CHARSET requests no special designation, designate it
3273 to graphic register 0. */
3274 reg = 0;
3275
3276 ENCODE_DESIGNATION (charset, reg, coding);
3277 }
3278
3279 if (CODING_ISO_INVOCATION (coding, 0) != reg
3280 && CODING_ISO_INVOCATION (coding, 1) != reg)
3281 {
3282 /* Since the graphic register REG is not invoked to any graphic
3283 planes, invoke it to graphic plane 0. */
3284 switch (reg)
3285 {
3286 case 0: /* graphic register 0 */
3287 ENCODE_SHIFT_IN;
3288 break;
3289
3290 case 1: /* graphic register 1 */
3291 ENCODE_SHIFT_OUT;
3292 break;
3293
3294 case 2: /* graphic register 2 */
3295 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3296 ENCODE_SINGLE_SHIFT_2;
3297 else
3298 ENCODE_LOCKING_SHIFT_2;
3299 break;
3300
3301 case 3: /* graphic register 3 */
3302 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3303 ENCODE_SINGLE_SHIFT_3;
3304 else
3305 ENCODE_LOCKING_SHIFT_3;
3306 break;
3307 }
3308 }
3309
3310 *p_nchars = produced_chars;
3311 return dst;
3312 }
3313
3314 /* The following three macros produce codes for indicating direction
3315 of text. */
3316 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3317 do { \
3318 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3319 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3320 else \
3321 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3322 } while (0)
3323
3324
3325 #define ENCODE_DIRECTION_R2L() \
3326 do { \
3327 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3328 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3329 } while (0)
3330
3331
3332 #define ENCODE_DIRECTION_L2R() \
3333 do { \
3334 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3335 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3336 } while (0)
3337
3338
3339 /* Produce codes for designation and invocation to reset the graphic
3340 planes and registers to initial state. */
3341 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3342 do { \
3343 int reg; \
3344 struct charset *charset; \
3345 \
3346 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3347 ENCODE_SHIFT_IN; \
3348 for (reg = 0; reg < 4; reg++) \
3349 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3350 && (CODING_ISO_DESIGNATION (coding, reg) \
3351 != CODING_ISO_INITIAL (coding, reg))) \
3352 { \
3353 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3354 ENCODE_DESIGNATION (charset, reg, coding); \
3355 } \
3356 } while (0)
3357
3358
3359 /* Produce designation sequences of charsets in the line started from
3360 SRC to a place pointed by DST, and return updated DST.
3361
3362 If the current block ends before any end-of-line, we may fail to
3363 find all the necessary designations. */
3364
3365 static unsigned char *
3366 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3367 struct coding_system *coding;
3368 int *charbuf, *charbuf_end;
3369 unsigned char *dst;
3370 {
3371 struct charset *charset;
3372 /* Table of charsets to be designated to each graphic register. */
3373 int r[4];
3374 int c, found = 0, reg;
3375 int produced_chars = 0;
3376 int multibytep = coding->dst_multibyte;
3377 Lisp_Object attrs;
3378 Lisp_Object charset_list;
3379
3380 attrs = CODING_ID_ATTRS (coding->id);
3381 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3382 if (EQ (charset_list, Qiso_2022))
3383 charset_list = Viso_2022_charset_list;
3384
3385 for (reg = 0; reg < 4; reg++)
3386 r[reg] = -1;
3387
3388 while (found < 4)
3389 {
3390 int id;
3391
3392 c = *charbuf++;
3393 if (c == '\n')
3394 break;
3395 charset = char_charset (c, charset_list, NULL);
3396 id = CHARSET_ID (charset);
3397 reg = CODING_ISO_REQUEST (coding, id);
3398 if (reg >= 0 && r[reg] < 0)
3399 {
3400 found++;
3401 r[reg] = id;
3402 }
3403 }
3404
3405 if (found)
3406 {
3407 for (reg = 0; reg < 4; reg++)
3408 if (r[reg] >= 0
3409 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3410 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3411 }
3412
3413 return dst;
3414 }
3415
3416 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3417
3418 static int
3419 encode_coding_iso_2022 (coding)
3420 struct coding_system *coding;
3421 {
3422 int multibytep = coding->dst_multibyte;
3423 int *charbuf = coding->charbuf;
3424 int *charbuf_end = charbuf + coding->charbuf_used;
3425 unsigned char *dst = coding->destination + coding->produced;
3426 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3427 int safe_room = 16;
3428 int bol_designation
3429 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3430 && CODING_ISO_BOL (coding));
3431 int produced_chars = 0;
3432 Lisp_Object attrs, eol_type, charset_list;
3433 int ascii_compatible;
3434 int c;
3435
3436 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3437
3438 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3439
3440 while (charbuf < charbuf_end)
3441 {
3442 ASSURE_DESTINATION (safe_room);
3443
3444 if (bol_designation)
3445 {
3446 unsigned char *dst_prev = dst;
3447
3448 /* We have to produce designation sequences if any now. */
3449 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3450 bol_designation = 0;
3451 /* We are sure that designation sequences are all ASCII bytes. */
3452 produced_chars += dst - dst_prev;
3453 }
3454
3455 c = *charbuf++;
3456
3457 /* Now encode the character C. */
3458 if (c < 0x20 || c == 0x7F)
3459 {
3460 if (c == '\n'
3461 || (c == '\r' && EQ (eol_type, Qmac)))
3462 {
3463 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3464 ENCODE_RESET_PLANE_AND_REGISTER ();
3465 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3466 {
3467 int i;
3468
3469 for (i = 0; i < 4; i++)
3470 CODING_ISO_DESIGNATION (coding, i)
3471 = CODING_ISO_INITIAL (coding, i);
3472 }
3473 bol_designation
3474 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3475 }
3476 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3477 ENCODE_RESET_PLANE_AND_REGISTER ();
3478 EMIT_ONE_ASCII_BYTE (c);
3479 }
3480 else if (ASCII_CHAR_P (c))
3481 {
3482 if (ascii_compatible)
3483 EMIT_ONE_ASCII_BYTE (c);
3484 else
3485 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii), c);
3486 }
3487 else
3488 {
3489 struct charset *charset = char_charset (c, charset_list, NULL);
3490
3491 if (!charset)
3492 {
3493 c = coding->default_char;
3494 charset = char_charset (c, charset_list, NULL);
3495 }
3496 ENCODE_ISO_CHARACTER (charset, c);
3497 }
3498 }
3499
3500 if (coding->mode & CODING_MODE_LAST_BLOCK
3501 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3502 {
3503 ASSURE_DESTINATION (safe_room);
3504 ENCODE_RESET_PLANE_AND_REGISTER ();
3505 }
3506 coding->result = CODING_RESULT_SUCCESS;
3507 CODING_ISO_BOL (coding) = bol_designation;
3508 coding->produced_char += produced_chars;
3509 coding->produced = dst - coding->destination;
3510 return 0;
3511 }
3512
3513 \f
3514 /*** 8,9. SJIS and BIG5 handlers ***/
3515
3516 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3517 quite widely. So, for the moment, Emacs supports them in the bare
3518 C code. But, in the future, they may be supported only by CCL. */
3519
3520 /* SJIS is a coding system encoding three character sets: ASCII, right
3521 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3522 as is. A character of charset katakana-jisx0201 is encoded by
3523 "position-code + 0x80". A character of charset japanese-jisx0208
3524 is encoded in 2-byte but two position-codes are divided and shifted
3525 so that it fit in the range below.
3526
3527 --- CODE RANGE of SJIS ---
3528 (character set) (range)
3529 ASCII 0x00 .. 0x7F
3530 KATAKANA-JISX0201 0xA0 .. 0xDF
3531 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3532 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3533 -------------------------------
3534
3535 */
3536
3537 /* BIG5 is a coding system encoding two character sets: ASCII and
3538 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3539 character set and is encoded in two-byte.
3540
3541 --- CODE RANGE of BIG5 ---
3542 (character set) (range)
3543 ASCII 0x00 .. 0x7F
3544 Big5 (1st byte) 0xA1 .. 0xFE
3545 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3546 --------------------------
3547
3548 */
3549
3550 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3551 Check if a text is encoded in SJIS. If it is, return
3552 CATEGORY_MASK_SJIS, else return 0. */
3553
3554 static int
3555 detect_coding_sjis (coding, mask)
3556 struct coding_system *coding;
3557 int *mask;
3558 {
3559 unsigned char *src = coding->source, *src_base = src;
3560 unsigned char *src_end = coding->source + coding->src_bytes;
3561 int multibytep = coding->src_multibyte;
3562 int consumed_chars = 0;
3563 int found = 0;
3564 int c;
3565
3566 /* A coding system of this category is always ASCII compatible. */
3567 src += coding->head_ascii;
3568
3569 while (1)
3570 {
3571 ONE_MORE_BYTE (c);
3572 if (c < 0x80)
3573 continue;
3574 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3575 {
3576 ONE_MORE_BYTE (c);
3577 if (c < 0x40 || c == 0x7F || c > 0xFC)
3578 break;
3579 found = 1;
3580 }
3581 else if (c >= 0xA0 && c < 0xE0)
3582 found = 1;
3583 else
3584 break;
3585 }
3586 *mask &= ~CATEGORY_MASK_SJIS;
3587 return 0;
3588
3589 no_more_source:
3590 if (!found)
3591 return 0;
3592 *mask &= CATEGORY_MASK_SJIS;
3593 return 1;
3594 }
3595
3596 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3597 Check if a text is encoded in BIG5. If it is, return
3598 CATEGORY_MASK_BIG5, else return 0. */
3599
3600 static int
3601 detect_coding_big5 (coding, mask)
3602 struct coding_system *coding;
3603 int *mask;
3604 {
3605 unsigned char *src = coding->source, *src_base = src;
3606 unsigned char *src_end = coding->source + coding->src_bytes;
3607 int multibytep = coding->src_multibyte;
3608 int consumed_chars = 0;
3609 int found = 0;
3610 int c;
3611
3612 /* A coding system of this category is always ASCII compatible. */
3613 src += coding->head_ascii;
3614
3615 while (1)
3616 {
3617 ONE_MORE_BYTE (c);
3618 if (c < 0x80)
3619 continue;
3620 if (c >= 0xA1)
3621 {
3622 ONE_MORE_BYTE (c);
3623 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3624 return 0;
3625 found = 1;
3626 }
3627 else
3628 break;
3629 }
3630 *mask &= ~CATEGORY_MASK_BIG5;
3631 return 0;
3632
3633 no_more_source:
3634 if (!found)
3635 return 0;
3636 *mask &= CATEGORY_MASK_BIG5;
3637 return 1;
3638 }
3639
3640 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3641 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3642
3643 static void
3644 decode_coding_sjis (coding)
3645 struct coding_system *coding;
3646 {
3647 unsigned char *src = coding->source + coding->consumed;
3648 unsigned char *src_end = coding->source + coding->src_bytes;
3649 unsigned char *src_base;
3650 int *charbuf = coding->charbuf;
3651 int *charbuf_end = charbuf + coding->charbuf_size;
3652 int consumed_chars = 0, consumed_chars_base;
3653 int multibytep = coding->src_multibyte;
3654 struct charset *charset_roman, *charset_kanji, *charset_kana;
3655 Lisp_Object attrs, eol_type, charset_list, val;
3656
3657 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3658
3659 val = charset_list;
3660 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3661 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3662 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
3663
3664 while (1)
3665 {
3666 int c, c1;
3667
3668 src_base = src;
3669 consumed_chars_base = consumed_chars;
3670
3671 if (charbuf >= charbuf_end)
3672 break;
3673
3674 ONE_MORE_BYTE (c);
3675
3676 if (c == '\r')
3677 {
3678 if (EQ (eol_type, Qdos))
3679 {
3680 if (src == src_end)
3681 goto no_more_source;
3682 if (*src == '\n')
3683 ONE_MORE_BYTE (c);
3684 }
3685 else if (EQ (eol_type, Qmac))
3686 c = '\n';
3687 }
3688 else
3689 {
3690 struct charset *charset;
3691
3692 if (c < 0x80)
3693 charset = charset_roman;
3694 else
3695 {
3696 if (c >= 0xF0)
3697 goto invalid_code;
3698 if (c < 0xA0 || c >= 0xE0)
3699 {
3700 /* SJIS -> JISX0208 */
3701 ONE_MORE_BYTE (c1);
3702 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3703 goto invalid_code;
3704 c = (c << 8) | c1;
3705 SJIS_TO_JIS (c);
3706 charset = charset_kanji;
3707 }
3708 else
3709 /* SJIS -> JISX0201-Kana */
3710 charset = charset_kana;
3711 }
3712 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3713 }
3714 *charbuf++ = c;
3715 continue;
3716
3717 invalid_code:
3718 src = src_base;
3719 consumed_chars = consumed_chars_base;
3720 ONE_MORE_BYTE (c);
3721 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3722 coding->errors++;
3723 }
3724
3725 no_more_source:
3726 coding->consumed_char += consumed_chars_base;
3727 coding->consumed = src_base - coding->source;
3728 coding->charbuf_used = charbuf - coding->charbuf;
3729 }
3730
3731 static void
3732 decode_coding_big5 (coding)
3733 struct coding_system *coding;
3734 {
3735 unsigned char *src = coding->source + coding->consumed;
3736 unsigned char *src_end = coding->source + coding->src_bytes;
3737 unsigned char *src_base;
3738 int *charbuf = coding->charbuf;
3739 int *charbuf_end = charbuf + coding->charbuf_size;
3740 int consumed_chars = 0, consumed_chars_base;
3741 int multibytep = coding->src_multibyte;
3742 struct charset *charset_roman, *charset_big5;
3743 Lisp_Object attrs, eol_type, charset_list, val;
3744
3745 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3746 val = charset_list;
3747 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3748 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3749
3750 while (1)
3751 {
3752 int c, c1;
3753
3754 src_base = src;
3755 consumed_chars_base = consumed_chars;
3756
3757 if (charbuf >= charbuf_end)
3758 break;
3759
3760 ONE_MORE_BYTE (c);
3761
3762 if (c == '\r')
3763 {
3764 if (EQ (eol_type, Qdos))
3765 {
3766 if (src == src_end)
3767 goto no_more_source;
3768 if (*src == '\n')
3769 ONE_MORE_BYTE (c);
3770 }
3771 else if (EQ (eol_type, Qmac))
3772 c = '\n';
3773 }
3774 else
3775 {
3776 struct charset *charset;
3777 if (c < 0x80)
3778 charset = charset_roman;
3779 else
3780 {
3781 /* BIG5 -> Big5 */
3782 if (c < 0xA1 || c > 0xFE)
3783 goto invalid_code;
3784 ONE_MORE_BYTE (c1);
3785 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3786 goto invalid_code;
3787 c = c << 8 | c1;
3788 charset = charset_big5;
3789 }
3790 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3791 }
3792
3793 *charbuf++ = c;
3794 continue;
3795
3796 invalid_code:
3797 src = src_base;
3798 consumed_chars = consumed_chars_base;
3799 ONE_MORE_BYTE (c);
3800 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3801 coding->errors++;
3802 }
3803
3804 no_more_source:
3805 coding->consumed_char += consumed_chars_base;
3806 coding->consumed = src_base - coding->source;
3807 coding->charbuf_used = charbuf - coding->charbuf;
3808 }
3809
3810 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3811 This function can encode charsets `ascii', `katakana-jisx0201',
3812 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3813 are sure that all these charsets are registered as official charset
3814 (i.e. do not have extended leading-codes). Characters of other
3815 charsets are produced without any encoding. If SJIS_P is 1, encode
3816 SJIS text, else encode BIG5 text. */
3817
3818 static int
3819 encode_coding_sjis (coding)
3820 struct coding_system *coding;
3821 {
3822 int multibytep = coding->dst_multibyte;
3823 int *charbuf = coding->charbuf;
3824 int *charbuf_end = charbuf + coding->charbuf_used;
3825 unsigned char *dst = coding->destination + coding->produced;
3826 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3827 int safe_room = 4;
3828 int produced_chars = 0;
3829 Lisp_Object attrs, eol_type, charset_list, val;
3830 int ascii_compatible;
3831 struct charset *charset_roman, *charset_kanji, *charset_kana;
3832 int c;
3833
3834 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3835 val = charset_list;
3836 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3837 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3838 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3839
3840 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3841
3842 while (charbuf < charbuf_end)
3843 {
3844 ASSURE_DESTINATION (safe_room);
3845 c = *charbuf++;
3846 /* Now encode the character C. */
3847 if (ASCII_CHAR_P (c) && ascii_compatible)
3848 EMIT_ONE_ASCII_BYTE (c);
3849 else
3850 {
3851 unsigned code;
3852 struct charset *charset = char_charset (c, charset_list, &code);
3853
3854 if (!charset)
3855 {
3856 c = coding->default_char;
3857 charset = char_charset (c, charset_list, &code);
3858 }
3859 if (code == CHARSET_INVALID_CODE (charset))
3860 abort ();
3861 if (charset == charset_kanji)
3862 {
3863 int c1, c2;
3864 JIS_TO_SJIS (code);
3865 c1 = code >> 8, c2 = code & 0xFF;
3866 EMIT_TWO_BYTES (c1, c2);
3867 }
3868 else if (charset == charset_kana)
3869 EMIT_ONE_BYTE (code | 0x80);
3870 else
3871 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3872 }
3873 }
3874 coding->result = CODING_RESULT_SUCCESS;
3875 coding->produced_char += produced_chars;
3876 coding->produced = dst - coding->destination;
3877 return 0;
3878 }
3879
3880 static int
3881 encode_coding_big5 (coding)
3882 struct coding_system *coding;
3883 {
3884 int multibytep = coding->dst_multibyte;
3885 int *charbuf = coding->charbuf;
3886 int *charbuf_end = charbuf + coding->charbuf_used;
3887 unsigned char *dst = coding->destination + coding->produced;
3888 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3889 int safe_room = 4;
3890 int produced_chars = 0;
3891 Lisp_Object attrs, eol_type, charset_list, val;
3892 int ascii_compatible;
3893 struct charset *charset_roman, *charset_big5;
3894 int c;
3895
3896 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3897 val = charset_list;
3898 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3899 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3900 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3901
3902 while (charbuf < charbuf_end)
3903 {
3904 ASSURE_DESTINATION (safe_room);
3905 c = *charbuf++;
3906 /* Now encode the character C. */
3907 if (ASCII_CHAR_P (c) && ascii_compatible)
3908 EMIT_ONE_ASCII_BYTE (c);
3909 else
3910 {
3911 unsigned code;
3912 struct charset *charset = char_charset (c, charset_list, &code);
3913
3914 if (! charset)
3915 {
3916 c = coding->default_char;
3917 charset = char_charset (c, charset_list, &code);
3918 }
3919 if (code == CHARSET_INVALID_CODE (charset))
3920 abort ();
3921 if (charset == charset_big5)
3922 {
3923 int c1, c2;
3924
3925 c1 = code >> 8, c2 = code & 0xFF;
3926 EMIT_TWO_BYTES (c1, c2);
3927 }
3928 else
3929 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3930 }
3931 }
3932 coding->result = CODING_RESULT_SUCCESS;
3933 coding->produced_char += produced_chars;
3934 coding->produced = dst - coding->destination;
3935 return 0;
3936 }
3937
3938 \f
3939 /*** 10. CCL handlers ***/
3940
3941 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3942 Check if a text is encoded in a coding system of which
3943 encoder/decoder are written in CCL program. If it is, return
3944 CATEGORY_MASK_CCL, else return 0. */
3945
3946 static int
3947 detect_coding_ccl (coding, mask)
3948 struct coding_system *coding;
3949 int *mask;
3950 {
3951 unsigned char *src = coding->source, *src_base = src;
3952 unsigned char *src_end = coding->source + coding->src_bytes;
3953 int multibytep = coding->src_multibyte;
3954 int consumed_chars = 0;
3955 int found = 0;
3956 unsigned char *valids = CODING_CCL_VALIDS (coding);
3957 int head_ascii = coding->head_ascii;
3958 Lisp_Object attrs;
3959
3960 coding = &coding_categories[coding_category_ccl];
3961 attrs = CODING_ID_ATTRS (coding->id);
3962 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
3963 src += head_ascii;
3964
3965 while (1)
3966 {
3967 int c;
3968 ONE_MORE_BYTE (c);
3969 if (! valids[c])
3970 break;
3971 if (!found && valids[c] > 1)
3972 found = 1;
3973 }
3974 *mask &= ~CATEGORY_MASK_CCL;
3975 return 0;
3976
3977 no_more_source:
3978 if (!found)
3979 return 0;
3980 *mask &= CATEGORY_MASK_CCL;
3981 return 1;
3982 }
3983
3984 static void
3985 decode_coding_ccl (coding)
3986 struct coding_system *coding;
3987 {
3988 unsigned char *src = coding->source + coding->consumed;
3989 unsigned char *src_end = coding->source + coding->src_bytes;
3990 int *charbuf = coding->charbuf;
3991 int *charbuf_end = charbuf + coding->charbuf_size;
3992 int consumed_chars = 0;
3993 int multibytep = coding->src_multibyte;
3994 struct ccl_program ccl;
3995 int source_charbuf[1024];
3996 int source_byteidx[1024];
3997
3998 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
3999
4000 while (src < src_end)
4001 {
4002 unsigned char *p = src;
4003 int *source, *source_end;
4004 int i = 0;
4005
4006 if (multibytep)
4007 while (i < 1024 && p < src_end)
4008 {
4009 source_byteidx[i] = p - src;
4010 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4011 }
4012 else
4013 while (i < 1024 && p < src_end)
4014 source_charbuf[i++] = *p++;
4015
4016 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4017 ccl.last_block = 1;
4018
4019 source = source_charbuf;
4020 source_end = source + i;
4021 while (source < source_end)
4022 {
4023 ccl_driver (&ccl, source, charbuf,
4024 source_end - source, charbuf_end - charbuf);
4025 source += ccl.consumed;
4026 charbuf += ccl.produced;
4027 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4028 break;
4029 }
4030 if (source < source_end)
4031 src += source_byteidx[source - source_charbuf];
4032 else
4033 src = p;
4034 consumed_chars += source - source_charbuf;
4035
4036 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4037 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4038 break;
4039 }
4040
4041 switch (ccl.status)
4042 {
4043 case CCL_STAT_SUSPEND_BY_SRC:
4044 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4045 break;
4046 case CCL_STAT_SUSPEND_BY_DST:
4047 break;
4048 case CCL_STAT_QUIT:
4049 case CCL_STAT_INVALID_CMD:
4050 coding->result = CODING_RESULT_INTERRUPT;
4051 break;
4052 default:
4053 coding->result = CODING_RESULT_SUCCESS;
4054 break;
4055 }
4056 coding->consumed_char += consumed_chars;
4057 coding->consumed = src - coding->source;
4058 coding->charbuf_used = charbuf - coding->charbuf;
4059 }
4060
4061 static int
4062 encode_coding_ccl (coding)
4063 struct coding_system *coding;
4064 {
4065 struct ccl_program ccl;
4066 int multibytep = coding->dst_multibyte;
4067 int *charbuf = coding->charbuf;
4068 int *charbuf_end = charbuf + coding->charbuf_used;
4069 unsigned char *dst = coding->destination + coding->produced;
4070 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4071 unsigned char *adjusted_dst_end = dst_end - 1;
4072 int destination_charbuf[1024];
4073 int i, produced_chars = 0;
4074
4075 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4076
4077 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4078 ccl.dst_multibyte = coding->dst_multibyte;
4079
4080 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4081 {
4082 int dst_bytes = dst_end - dst;
4083 if (dst_bytes > 1024)
4084 dst_bytes = 1024;
4085
4086 ccl_driver (&ccl, charbuf, destination_charbuf,
4087 charbuf_end - charbuf, dst_bytes);
4088 charbuf += ccl.consumed;
4089 if (multibytep)
4090 for (i = 0; i < ccl.produced; i++)
4091 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4092 else
4093 {
4094 for (i = 0; i < ccl.produced; i++)
4095 *dst++ = destination_charbuf[i] & 0xFF;
4096 produced_chars += ccl.produced;
4097 }
4098 }
4099
4100 switch (ccl.status)
4101 {
4102 case CCL_STAT_SUSPEND_BY_SRC:
4103 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4104 break;
4105 case CCL_STAT_SUSPEND_BY_DST:
4106 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4107 break;
4108 case CCL_STAT_QUIT:
4109 case CCL_STAT_INVALID_CMD:
4110 coding->result = CODING_RESULT_INTERRUPT;
4111 break;
4112 default:
4113 coding->result = CODING_RESULT_SUCCESS;
4114 break;
4115 }
4116
4117 coding->produced_char += produced_chars;
4118 coding->produced = dst - coding->destination;
4119 return 0;
4120 }
4121
4122
4123 \f
4124 /*** 10, 11. no-conversion handlers ***/
4125
4126 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4127
4128 static void
4129 decode_coding_raw_text (coding)
4130 struct coding_system *coding;
4131 {
4132 coding->chars_at_source = 1;
4133 coding->consumed_char = 0;
4134 coding->consumed = 0;
4135 coding->result = CODING_RESULT_SUCCESS;
4136 }
4137
4138 static int
4139 encode_coding_raw_text (coding)
4140 struct coding_system *coding;
4141 {
4142 int multibytep = coding->dst_multibyte;
4143 int *charbuf = coding->charbuf;
4144 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4145 unsigned char *dst = coding->destination + coding->produced;
4146 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4147 int produced_chars = 0;
4148 int c;
4149
4150 if (multibytep)
4151 {
4152 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4153
4154 if (coding->src_multibyte)
4155 while (charbuf < charbuf_end)
4156 {
4157 ASSURE_DESTINATION (safe_room);
4158 c = *charbuf++;
4159 if (ASCII_CHAR_P (c))
4160 EMIT_ONE_ASCII_BYTE (c);
4161 else if (CHAR_BYTE8_P (c))
4162 {
4163 c = CHAR_TO_BYTE8 (c);
4164 EMIT_ONE_BYTE (c);
4165 }
4166 else
4167 {
4168 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4169
4170 CHAR_STRING_ADVANCE (c, p1);
4171 while (p0 < p1)
4172 EMIT_ONE_BYTE (*p0);
4173 }
4174 }
4175 else
4176 while (charbuf < charbuf_end)
4177 {
4178 ASSURE_DESTINATION (safe_room);
4179 c = *charbuf++;
4180 EMIT_ONE_BYTE (c);
4181 }
4182 }
4183 else
4184 {
4185 if (coding->src_multibyte)
4186 {
4187 int safe_room = MAX_MULTIBYTE_LENGTH;
4188
4189 while (charbuf < charbuf_end)
4190 {
4191 ASSURE_DESTINATION (safe_room);
4192 c = *charbuf++;
4193 if (ASCII_CHAR_P (c))
4194 *dst++ = c;
4195 else if (CHAR_BYTE8_P (c))
4196 *dst++ = CHAR_TO_BYTE8 (c);
4197 else
4198 CHAR_STRING_ADVANCE (c, dst);
4199 produced_chars++;
4200 }
4201 }
4202 else
4203 {
4204 ASSURE_DESTINATION (charbuf_end - charbuf);
4205 while (charbuf < charbuf_end && dst < dst_end)
4206 *dst++ = *charbuf++;
4207 produced_chars = dst - (coding->destination + coding->dst_bytes);
4208 }
4209 }
4210 coding->result = CODING_RESULT_SUCCESS;
4211 coding->produced_char += produced_chars;
4212 coding->produced = dst - coding->destination;
4213 return 0;
4214 }
4215
4216 static int
4217 detect_coding_charset (coding, mask)
4218 struct coding_system *coding;
4219 int *mask;
4220 {
4221 unsigned char *src = coding->source, *src_base = src;
4222 unsigned char *src_end = coding->source + coding->src_bytes;
4223 int multibytep = coding->src_multibyte;
4224 int consumed_chars = 0;
4225 Lisp_Object attrs, valids;
4226
4227 coding = &coding_categories[coding_category_charset];
4228 attrs = CODING_ID_ATTRS (coding->id);
4229 valids = AREF (attrs, coding_attr_charset_valids);
4230
4231 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4232 src += coding->head_ascii;
4233
4234 while (1)
4235 {
4236 int c;
4237
4238 ONE_MORE_BYTE (c);
4239 if (NILP (AREF (valids, c)))
4240 break;
4241 }
4242 *mask &= ~CATEGORY_MASK_CHARSET;
4243 return 0;
4244
4245 no_more_source:
4246 *mask &= CATEGORY_MASK_CHARSET;
4247 return 1;
4248 }
4249
4250 static void
4251 decode_coding_charset (coding)
4252 struct coding_system *coding;
4253 {
4254 unsigned char *src = coding->source + coding->consumed;
4255 unsigned char *src_end = coding->source + coding->src_bytes;
4256 unsigned char *src_base;
4257 int *charbuf = coding->charbuf;
4258 int *charbuf_end = charbuf + coding->charbuf_size;
4259 int consumed_chars = 0, consumed_chars_base;
4260 int multibytep = coding->src_multibyte;
4261 Lisp_Object attrs, eol_type, charset_list, valids;
4262
4263 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4264 valids = AREF (attrs, coding_attr_charset_valids);
4265
4266 while (1)
4267 {
4268 int c;
4269
4270 src_base = src;
4271 consumed_chars_base = consumed_chars;
4272
4273 if (charbuf >= charbuf_end)
4274 break;
4275
4276 ONE_MORE_BYTE (c);
4277 if (c == '\r')
4278 {
4279 if (EQ (eol_type, Qdos))
4280 {
4281 if (src < src_end
4282 && *src == '\n')
4283 ONE_MORE_BYTE (c);
4284 }
4285 else if (EQ (eol_type, Qmac))
4286 c = '\n';
4287 }
4288 else
4289 {
4290 Lisp_Object val;
4291 struct charset *charset;
4292 int c1;
4293
4294 val = AREF (valids, c);
4295 if (NILP (val))
4296 goto invalid_code;
4297 charset = CHARSET_FROM_ID (XFASTINT (val));
4298 if (CHARSET_DIMENSION (charset) > 1)
4299 {
4300 ONE_MORE_BYTE (c1);
4301 c = (c << 8) | c1;
4302 if (CHARSET_DIMENSION (charset) > 2)
4303 {
4304 ONE_MORE_BYTE (c1);
4305 c = (c << 8) | c1;
4306 if (CHARSET_DIMENSION (charset) > 3)
4307 {
4308 ONE_MORE_BYTE (c1);
4309 c = (c << 8) | c1;
4310 }
4311 }
4312 }
4313 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4314 if (c < 0)
4315 goto invalid_code;
4316 }
4317 *charbuf++ = c;
4318 continue;
4319
4320 invalid_code:
4321 src = src_base;
4322 consumed_chars = consumed_chars_base;
4323 ONE_MORE_BYTE (c);
4324 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4325 coding->errors++;
4326 }
4327
4328 no_more_source:
4329 coding->consumed_char += consumed_chars_base;
4330 coding->consumed = src_base - coding->source;
4331 coding->charbuf_used = charbuf - coding->charbuf;
4332 }
4333
4334 static int
4335 encode_coding_charset (coding)
4336 struct coding_system *coding;
4337 {
4338 int multibytep = coding->dst_multibyte;
4339 int *charbuf = coding->charbuf;
4340 int *charbuf_end = charbuf + coding->charbuf_used;
4341 unsigned char *dst = coding->destination + coding->produced;
4342 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4343 int safe_room = MAX_MULTIBYTE_LENGTH;
4344 int produced_chars = 0;
4345 struct charset *charset;
4346 Lisp_Object attrs, eol_type, charset_list;
4347 int ascii_compatible;
4348 int c;
4349
4350 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4351 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4352
4353 while (charbuf < charbuf_end)
4354 {
4355 struct charset *charset;
4356 unsigned code;
4357
4358 ASSURE_DESTINATION (safe_room);
4359 c = *charbuf++;
4360 if (ascii_compatible && ASCII_CHAR_P (c))
4361 EMIT_ONE_ASCII_BYTE (c);
4362 else
4363 {
4364 charset = char_charset (c, charset_list, &code);
4365 if (charset)
4366 {
4367 if (CHARSET_DIMENSION (charset) == 1)
4368 EMIT_ONE_BYTE (code);
4369 else if (CHARSET_DIMENSION (charset) == 2)
4370 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4371 else if (CHARSET_DIMENSION (charset) == 3)
4372 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4373 else
4374 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4375 (code >> 8) & 0xFF, code & 0xFF);
4376 }
4377 else
4378 EMIT_ONE_BYTE (coding->default_char);
4379 }
4380 }
4381
4382 coding->result = CODING_RESULT_SUCCESS;
4383 coding->produced_char += produced_chars;
4384 coding->produced = dst - coding->destination;
4385 return 0;
4386 }
4387
4388 \f
4389 /*** 7. C library functions ***/
4390
4391 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
4392 has a property `coding-system'. The value of this property is a
4393 vector of length 5 (called as coding-vector). Among elements of
4394 this vector, the first (element[0]) and the fifth (element[4])
4395 carry important information for decoding/encoding. Before
4396 decoding/encoding, this information should be set in fields of a
4397 structure of type `coding_system'.
4398
4399 A value of property `coding-system' can be a symbol of another
4400 subsidiary coding-system. In that case, Emacs gets coding-vector
4401 from that symbol.
4402
4403 `element[0]' contains information to be set in `coding->type'. The
4404 value and its meaning is as follows:
4405
4406 0 -- coding_type_emacs_mule
4407 1 -- coding_type_sjis
4408 2 -- coding_type_iso_2022
4409 3 -- coding_type_big5
4410 4 -- coding_type_ccl encoder/decoder written in CCL
4411 nil -- coding_type_no_conversion
4412 t -- coding_type_undecided (automatic conversion on decoding,
4413 no-conversion on encoding)
4414
4415 `element[4]' contains information to be set in `coding->flags' and
4416 `coding->spec'. The meaning varies by `coding->type'.
4417
4418 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4419 of length 32 (of which the first 13 sub-elements are used now).
4420 Meanings of these sub-elements are:
4421
4422 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4423 If the value is an integer of valid charset, the charset is
4424 assumed to be designated to graphic register N initially.
4425
4426 If the value is minus, it is a minus value of charset which
4427 reserves graphic register N, which means that the charset is
4428 not designated initially but should be designated to graphic
4429 register N just before encoding a character in that charset.
4430
4431 If the value is nil, graphic register N is never used on
4432 encoding.
4433
4434 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4435 Each value takes t or nil. See the section ISO2022 of
4436 `coding.h' for more information.
4437
4438 If `coding->type' is `coding_type_big5', element[4] is t to denote
4439 BIG5-ETen or nil to denote BIG5-HKU.
4440
4441 If `coding->type' takes the other value, element[4] is ignored.
4442
4443 Emacs Lisp's coding system also carries information about format of
4444 end-of-line in a value of property `eol-type'. If the value is
4445 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4446 it is not integer, it should be a vector of subsidiary coding
4447 systems of which property `eol-type' has one of above values.
4448
4449 */
4450
4451 /* Setup coding context CODING from information about CODING_SYSTEM.
4452 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4453 CODING_SYSTEM is invalid, signal an error. */
4454
4455 void
4456 setup_coding_system (coding_system, coding)
4457 Lisp_Object coding_system;
4458 struct coding_system *coding;
4459 {
4460 Lisp_Object attrs;
4461 Lisp_Object eol_type;
4462 Lisp_Object coding_type;
4463 Lisp_Object val;
4464
4465 if (NILP (coding_system))
4466 coding_system = Qno_conversion;
4467
4468 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4469
4470 attrs = CODING_ID_ATTRS (coding->id);
4471 eol_type = CODING_ID_EOL_TYPE (coding->id);
4472
4473 coding->mode = 0;
4474 coding->head_ascii = -1;
4475 coding->common_flags
4476 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4477
4478 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4479 coding->max_charset_id = XSTRING (val)->size - 1;
4480 coding->safe_charsets = (char *) XSTRING (val)->data;
4481 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4482
4483 coding_type = CODING_ATTR_TYPE (attrs);
4484 if (EQ (coding_type, Qundecided))
4485 {
4486 coding->detector = NULL;
4487 coding->decoder = decode_coding_raw_text;
4488 coding->encoder = encode_coding_raw_text;
4489 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4490 }
4491 else if (EQ (coding_type, Qiso_2022))
4492 {
4493 int i;
4494 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4495
4496 /* Invoke graphic register 0 to plane 0. */
4497 CODING_ISO_INVOCATION (coding, 0) = 0;
4498 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4499 CODING_ISO_INVOCATION (coding, 1)
4500 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4501 /* Setup the initial status of designation. */
4502 for (i = 0; i < 4; i++)
4503 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4504 /* Not single shifting initially. */
4505 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4506 /* Beginning of buffer should also be regarded as bol. */
4507 CODING_ISO_BOL (coding) = 1;
4508 coding->detector = detect_coding_iso_2022;
4509 coding->decoder = decode_coding_iso_2022;
4510 coding->encoder = encode_coding_iso_2022;
4511 if (flags & CODING_ISO_FLAG_SAFE)
4512 coding->mode |= CODING_MODE_SAFE_ENCODING;
4513 coding->common_flags
4514 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4515 | CODING_REQUIRE_FLUSHING_MASK);
4516 if (flags & CODING_ISO_FLAG_COMPOSITION)
4517 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4518 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4519 {
4520 setup_iso_safe_charsets (attrs);
4521 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4522 coding->max_charset_id = XSTRING (val)->size - 1;
4523 coding->safe_charsets = (char *) XSTRING (val)->data;
4524 }
4525 CODING_ISO_FLAGS (coding) = flags;
4526 }
4527 else if (EQ (coding_type, Qcharset))
4528 {
4529 coding->detector = detect_coding_charset;
4530 coding->decoder = decode_coding_charset;
4531 coding->encoder = encode_coding_charset;
4532 coding->common_flags
4533 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4534 }
4535 else if (EQ (coding_type, Qutf_8))
4536 {
4537 coding->detector = detect_coding_utf_8;
4538 coding->decoder = decode_coding_utf_8;
4539 coding->encoder = encode_coding_utf_8;
4540 coding->common_flags
4541 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4542 }
4543 else if (EQ (coding_type, Qutf_16))
4544 {
4545 val = AREF (attrs, coding_attr_utf_16_bom);
4546 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4547 : EQ (val, Qt) ? utf_16_with_bom
4548 : utf_16_without_bom);
4549 val = AREF (attrs, coding_attr_utf_16_endian);
4550 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4551 : utf_16_little_endian);
4552 CODING_UTF_16_SURROGATE (coding) = 0;
4553 coding->detector = detect_coding_utf_16;
4554 coding->decoder = decode_coding_utf_16;
4555 coding->encoder = encode_coding_utf_16;
4556 coding->common_flags
4557 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4558 }
4559 else if (EQ (coding_type, Qccl))
4560 {
4561 coding->detector = detect_coding_ccl;
4562 coding->decoder = decode_coding_ccl;
4563 coding->encoder = encode_coding_ccl;
4564 coding->common_flags
4565 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4566 | CODING_REQUIRE_FLUSHING_MASK);
4567 }
4568 else if (EQ (coding_type, Qemacs_mule))
4569 {
4570 coding->detector = detect_coding_emacs_mule;
4571 coding->decoder = decode_coding_emacs_mule;
4572 coding->encoder = encode_coding_emacs_mule;
4573 coding->common_flags
4574 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4575 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4576 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4577 {
4578 Lisp_Object tail, safe_charsets;
4579 int max_charset_id = 0;
4580
4581 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4582 tail = XCDR (tail))
4583 if (max_charset_id < XFASTINT (XCAR (tail)))
4584 max_charset_id = XFASTINT (XCAR (tail));
4585 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4586 make_number (255));
4587 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4588 tail = XCDR (tail))
4589 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4590 coding->max_charset_id = max_charset_id;
4591 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4592 }
4593 }
4594 else if (EQ (coding_type, Qshift_jis))
4595 {
4596 coding->detector = detect_coding_sjis;
4597 coding->decoder = decode_coding_sjis;
4598 coding->encoder = encode_coding_sjis;
4599 coding->common_flags
4600 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4601 }
4602 else if (EQ (coding_type, Qbig5))
4603 {
4604 coding->detector = detect_coding_big5;
4605 coding->decoder = decode_coding_big5;
4606 coding->encoder = encode_coding_big5;
4607 coding->common_flags
4608 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4609 }
4610 else /* EQ (coding_type, Qraw_text) */
4611 {
4612 coding->detector = NULL;
4613 coding->decoder = decode_coding_raw_text;
4614 coding->encoder = encode_coding_raw_text;
4615 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4616 }
4617
4618 return;
4619 }
4620
4621 /* Return raw-text or one of its subsidiaries that has the same
4622 eol_type as CODING-SYSTEM. */
4623
4624 Lisp_Object
4625 raw_text_coding_system (coding_system)
4626 Lisp_Object coding_system;
4627 {
4628 Lisp_Object spec, attrs;
4629 Lisp_Object eol_type, raw_text_eol_type;
4630
4631 spec = CODING_SYSTEM_SPEC (coding_system);
4632 attrs = AREF (spec, 0);
4633
4634 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4635 return coding_system;
4636
4637 eol_type = AREF (spec, 2);
4638 if (VECTORP (eol_type))
4639 return Qraw_text;
4640 spec = CODING_SYSTEM_SPEC (Qraw_text);
4641 raw_text_eol_type = AREF (spec, 2);
4642 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4643 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4644 : AREF (raw_text_eol_type, 2));
4645 }
4646
4647
4648 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4649 does, return one of the subsidiary that has the same eol-spec as
4650 PARENT. Otherwise, return CODING_SYSTEM. */
4651
4652 Lisp_Object
4653 coding_inherit_eol_type (coding_system, parent)
4654 {
4655 Lisp_Object spec, attrs, eol_type;
4656
4657 spec = CODING_SYSTEM_SPEC (coding_system);
4658 attrs = AREF (spec, 0);
4659 eol_type = AREF (spec, 2);
4660 if (VECTORP (eol_type))
4661 {
4662 Lisp_Object parent_spec;
4663 Lisp_Object parent_eol_type;
4664
4665 parent_spec
4666 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4667 parent_eol_type = AREF (parent_spec, 2);
4668 if (EQ (parent_eol_type, Qunix))
4669 coding_system = AREF (eol_type, 0);
4670 else if (EQ (parent_eol_type, Qdos))
4671 coding_system = AREF (eol_type, 1);
4672 else if (EQ (parent_eol_type, Qmac))
4673 coding_system = AREF (eol_type, 2);
4674 }
4675 return coding_system;
4676 }
4677
4678 /* Emacs has a mechanism to automatically detect a coding system if it
4679 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4680 it's impossible to distinguish some coding systems accurately
4681 because they use the same range of codes. So, at first, coding
4682 systems are categorized into 7, those are:
4683
4684 o coding-category-emacs-mule
4685
4686 The category for a coding system which has the same code range
4687 as Emacs' internal format. Assigned the coding-system (Lisp
4688 symbol) `emacs-mule' by default.
4689
4690 o coding-category-sjis
4691
4692 The category for a coding system which has the same code range
4693 as SJIS. Assigned the coding-system (Lisp
4694 symbol) `japanese-shift-jis' by default.
4695
4696 o coding-category-iso-7
4697
4698 The category for a coding system which has the same code range
4699 as ISO2022 of 7-bit environment. This doesn't use any locking
4700 shift and single shift functions. This can encode/decode all
4701 charsets. Assigned the coding-system (Lisp symbol)
4702 `iso-2022-7bit' by default.
4703
4704 o coding-category-iso-7-tight
4705
4706 Same as coding-category-iso-7 except that this can
4707 encode/decode only the specified charsets.
4708
4709 o coding-category-iso-8-1
4710
4711 The category for a coding system which has the same code range
4712 as ISO2022 of 8-bit environment and graphic plane 1 used only
4713 for DIMENSION1 charset. This doesn't use any locking shift
4714 and single shift functions. Assigned the coding-system (Lisp
4715 symbol) `iso-latin-1' by default.
4716
4717 o coding-category-iso-8-2
4718
4719 The category for a coding system which has the same code range
4720 as ISO2022 of 8-bit environment and graphic plane 1 used only
4721 for DIMENSION2 charset. This doesn't use any locking shift
4722 and single shift functions. Assigned the coding-system (Lisp
4723 symbol) `japanese-iso-8bit' by default.
4724
4725 o coding-category-iso-7-else
4726
4727 The category for a coding system which has the same code range
4728 as ISO2022 of 7-bit environemnt but uses locking shift or
4729 single shift functions. Assigned the coding-system (Lisp
4730 symbol) `iso-2022-7bit-lock' by default.
4731
4732 o coding-category-iso-8-else
4733
4734 The category for a coding system which has the same code range
4735 as ISO2022 of 8-bit environemnt but uses locking shift or
4736 single shift functions. Assigned the coding-system (Lisp
4737 symbol) `iso-2022-8bit-ss2' by default.
4738
4739 o coding-category-big5
4740
4741 The category for a coding system which has the same code range
4742 as BIG5. Assigned the coding-system (Lisp symbol)
4743 `cn-big5' by default.
4744
4745 o coding-category-utf-8
4746
4747 The category for a coding system which has the same code range
4748 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4749 symbol) `utf-8' by default.
4750
4751 o coding-category-utf-16-be
4752
4753 The category for a coding system in which a text has an
4754 Unicode signature (cf. Unicode Standard) in the order of BIG
4755 endian at the head. Assigned the coding-system (Lisp symbol)
4756 `utf-16-be' by default.
4757
4758 o coding-category-utf-16-le
4759
4760 The category for a coding system in which a text has an
4761 Unicode signature (cf. Unicode Standard) in the order of
4762 LITTLE endian at the head. Assigned the coding-system (Lisp
4763 symbol) `utf-16-le' by default.
4764
4765 o coding-category-ccl
4766
4767 The category for a coding system of which encoder/decoder is
4768 written in CCL programs. The default value is nil, i.e., no
4769 coding system is assigned.
4770
4771 o coding-category-binary
4772
4773 The category for a coding system not categorized in any of the
4774 above. Assigned the coding-system (Lisp symbol)
4775 `no-conversion' by default.
4776
4777 Each of them is a Lisp symbol and the value is an actual
4778 `coding-system's (this is also a Lisp symbol) assigned by a user.
4779 What Emacs does actually is to detect a category of coding system.
4780 Then, it uses a `coding-system' assigned to it. If Emacs can't
4781 decide only one possible category, it selects a category of the
4782 highest priority. Priorities of categories are also specified by a
4783 user in a Lisp variable `coding-category-list'.
4784
4785 */
4786
4787 #define EOL_SEEN_NONE 0
4788 #define EOL_SEEN_LF 1
4789 #define EOL_SEEN_CR 2
4790 #define EOL_SEEN_CRLF 4
4791
4792 /* Detect how end-of-line of a text of length CODING->src_bytes
4793 pointed by CODING->source is encoded. Return one of
4794 EOL_SEEN_XXX. */
4795
4796 #define MAX_EOL_CHECK_COUNT 3
4797
4798 static int
4799 detect_eol (coding, source, src_bytes)
4800 struct coding_system *coding;
4801 unsigned char *source;
4802 EMACS_INT src_bytes;
4803 {
4804 Lisp_Object attrs, coding_type;
4805 unsigned char *src = source, *src_end = src + src_bytes;
4806 unsigned char c;
4807 int total = 0;
4808 int eol_seen = EOL_SEEN_NONE;
4809
4810 attrs = CODING_ID_ATTRS (coding->id);
4811 coding_type = CODING_ATTR_TYPE (attrs);
4812
4813 if (EQ (coding_type, Qccl))
4814 {
4815 int msb, lsb;
4816
4817 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4818 lsb = 1 - msb;
4819
4820 while (src + 1 < src_end)
4821 {
4822 c = src[lsb];
4823 if (src[msb] == 0 && (c == '\n' || c == '\r'))
4824 {
4825 int this_eol;
4826
4827 if (c == '\n')
4828 this_eol = EOL_SEEN_LF;
4829 else if (src + 3 >= src_end
4830 || src[msb + 2] != 0
4831 || src[lsb + 2] != '\n')
4832 this_eol = EOL_SEEN_CR;
4833 else
4834 this_eol = EOL_SEEN_CRLF;
4835
4836 if (eol_seen == EOL_SEEN_NONE)
4837 /* This is the first end-of-line. */
4838 eol_seen = this_eol;
4839 else if (eol_seen != this_eol)
4840 {
4841 /* The found type is different from what found before. */
4842 eol_seen = EOL_SEEN_LF;
4843 break;
4844 }
4845 if (++total == MAX_EOL_CHECK_COUNT)
4846 break;
4847 }
4848 src += 2;
4849 }
4850 }
4851 else
4852 {
4853 while (src < src_end)
4854 {
4855 c = *src++;
4856 if (c == '\n' || c == '\r')
4857 {
4858 int this_eol;
4859
4860 if (c == '\n')
4861 this_eol = EOL_SEEN_LF;
4862 else if (src >= src_end || *src != '\n')
4863 this_eol = EOL_SEEN_CR;
4864 else
4865 this_eol = EOL_SEEN_CRLF, src++;
4866
4867 if (eol_seen == EOL_SEEN_NONE)
4868 /* This is the first end-of-line. */
4869 eol_seen = this_eol;
4870 else if (eol_seen != this_eol)
4871 {
4872 /* The found type is different from what found before. */
4873 eol_seen = EOL_SEEN_LF;
4874 break;
4875 }
4876 if (++total == MAX_EOL_CHECK_COUNT)
4877 break;
4878 }
4879 }
4880 }
4881 return eol_seen;
4882 }
4883
4884
4885 static void
4886 adjust_coding_eol_type (coding, eol_seen)
4887 struct coding_system *coding;
4888 int eol_seen;
4889 {
4890 Lisp_Object eol_type;
4891
4892 eol_type = CODING_ID_EOL_TYPE (coding->id);
4893 if (eol_seen & EOL_SEEN_LF)
4894 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
4895 else if (eol_type & EOL_SEEN_CRLF)
4896 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
4897 else if (eol_type & EOL_SEEN_CR)
4898 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
4899 }
4900
4901 /* Detect how a text specified in CODING is encoded. If a coding
4902 system is detected, update fields of CODING by the detected coding
4903 system. */
4904
4905 void
4906 detect_coding (coding)
4907 struct coding_system *coding;
4908 {
4909 unsigned char *src, *src_end;
4910 Lisp_Object attrs, coding_type;
4911
4912 coding->consumed = coding->consumed_char = 0;
4913 coding->produced = coding->produced_char = 0;
4914 coding_set_source (coding);
4915
4916 src_end = coding->source + coding->src_bytes;
4917
4918 /* If we have not yet decided the text encoding type, detect it
4919 now. */
4920 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
4921 {
4922 int mask = CATEGORY_MASK_ANY;
4923 int c, i;
4924
4925 for (src = coding->source; src < src_end; src++)
4926 {
4927 c = *src;
4928 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
4929 || c == ISO_CODE_SI
4930 || c == ISO_CODE_SO)))
4931 break;
4932 }
4933 coding->head_ascii = src - (coding->source + coding->consumed);
4934
4935 if (coding->head_ascii < coding->src_bytes)
4936 {
4937 int detected = 0;
4938
4939 for (i = 0; i < coding_category_raw_text; i++)
4940 {
4941 enum coding_category category = coding_priorities[i];
4942 struct coding_system *this = coding_categories + category;
4943
4944 if (category >= coding_category_raw_text
4945 || detected & (1 << category))
4946 continue;
4947
4948 if (this->id < 0)
4949 {
4950 /* No coding system of this category is defined. */
4951 mask &= ~(1 << category);
4952 }
4953 else
4954 {
4955 detected |= detected_mask[category];
4956 if ((*(this->detector)) (coding, &mask))
4957 break;
4958 }
4959 }
4960 if (! mask)
4961 setup_coding_system (Qraw_text, coding);
4962 else if (mask != CATEGORY_MASK_ANY)
4963 for (i = 0; i < coding_category_raw_text; i++)
4964 {
4965 enum coding_category category = coding_priorities[i];
4966 struct coding_system *this = coding_categories + category;
4967
4968 if (mask & (1 << category))
4969 {
4970 setup_coding_system (CODING_ID_NAME (this->id), coding);
4971 break;
4972 }
4973 }
4974 }
4975 }
4976
4977 attrs = CODING_ID_ATTRS (coding->id);
4978 coding_type = CODING_ATTR_TYPE (attrs);
4979
4980 /* If we have not yet decided the EOL type, detect it now. But, the
4981 detection is impossible for a CCL based coding system, in which
4982 case, we detct the EOL type after decoding. */
4983 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
4984 && ! EQ (coding_type, Qccl))
4985 {
4986 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
4987
4988 if (eol_seen != EOL_SEEN_NONE)
4989 adjust_coding_eol_type (coding, eol_seen);
4990 }
4991 }
4992
4993
4994 static void
4995 decode_eol (coding)
4996 struct coding_system *coding;
4997 {
4998 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
4999 {
5000 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5001 unsigned char *pend = p + coding->produced;
5002 int eol_seen = EOL_SEEN_NONE;
5003
5004 for (; p < pend; p++)
5005 {
5006 if (*p == '\n')
5007 eol_seen |= EOL_SEEN_LF;
5008 else if (*p == '\r')
5009 {
5010 if (p + 1 < pend && *(p + 1) == '\n')
5011 {
5012 eol_seen |= EOL_SEEN_CRLF;
5013 p++;
5014 }
5015 else
5016 eol_seen |= EOL_SEEN_CR;
5017 }
5018 }
5019 if (eol_seen != EOL_SEEN_NONE)
5020 adjust_coding_eol_type (coding, eol_seen);
5021 }
5022
5023 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5024 {
5025 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5026 unsigned char *pend = p + coding->produced;
5027
5028 for (; p < pend; p++)
5029 if (*p == '\r')
5030 *p = '\n';
5031 }
5032 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5033 {
5034 unsigned char *p, *pbeg, *pend;
5035 Lisp_Object undo_list;
5036
5037 move_gap_both (coding->dst_pos + coding->produced_char,
5038 coding->dst_pos_byte + coding->produced);
5039 undo_list = current_buffer->undo_list;
5040 current_buffer->undo_list = Qt;
5041 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, Qnil);
5042 current_buffer->undo_list = undo_list;
5043 pbeg = GPT_ADDR;
5044 pend = pbeg + coding->produced;
5045
5046 for (p = pend - 1; p >= pbeg; p--)
5047 if (*p == '\r')
5048 {
5049 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5050 pend--;
5051 }
5052 coding->produced_char -= coding->produced - (pend - pbeg);
5053 coding->produced = pend - pbeg;
5054 insert_from_gap (coding->produced_char, coding->produced);
5055 }
5056 }
5057
5058 static void
5059 translate_chars (coding, table)
5060 struct coding_system *coding;
5061 Lisp_Object table;
5062 {
5063 int *charbuf = coding->charbuf;
5064 int *charbuf_end = charbuf + coding->charbuf_used;
5065 int c;
5066
5067 if (coding->chars_at_source)
5068 return;
5069
5070 while (charbuf < charbuf_end)
5071 {
5072 c = *charbuf;
5073 if (c < 0)
5074 charbuf += c;
5075 else
5076 *charbuf++ = translate_char (table, c);
5077 }
5078 }
5079
5080 static int
5081 produce_chars (coding)
5082 struct coding_system *coding;
5083 {
5084 unsigned char *dst = coding->destination + coding->produced;
5085 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5086 int produced;
5087 int produced_chars = 0;
5088
5089 if (! coding->chars_at_source)
5090 {
5091 /* Characters are in coding->charbuf. */
5092 int *buf = coding->charbuf;
5093 int *buf_end = buf + coding->charbuf_used;
5094 unsigned char *adjusted_dst_end;
5095
5096 if (BUFFERP (coding->src_object)
5097 && EQ (coding->src_object, coding->dst_object))
5098 dst_end = coding->source + coding->consumed;
5099 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5100
5101 while (buf < buf_end)
5102 {
5103 int c = *buf++;
5104
5105 if (dst >= adjusted_dst_end)
5106 {
5107 dst = alloc_destination (coding,
5108 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5109 dst);
5110 dst_end = coding->destination + coding->dst_bytes;
5111 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5112 }
5113 if (c >= 0)
5114 {
5115 if (coding->dst_multibyte
5116 || ! CHAR_BYTE8_P (c))
5117 CHAR_STRING_ADVANCE (c, dst);
5118 else
5119 *dst++ = CHAR_TO_BYTE8 (c);
5120 produced_chars++;
5121 }
5122 else
5123 /* This is an annotation data. */
5124 buf -= c + 1;
5125 }
5126 }
5127 else
5128 {
5129 unsigned char *src = coding->source;
5130 unsigned char *src_end = src + coding->src_bytes;
5131 Lisp_Object eol_type;
5132
5133 eol_type = CODING_ID_EOL_TYPE (coding->id);
5134
5135 if (coding->src_multibyte != coding->dst_multibyte)
5136 {
5137 if (coding->src_multibyte)
5138 {
5139 int multibytep = 1;
5140 int consumed_chars;
5141
5142 while (1)
5143 {
5144 unsigned char *src_base = src;
5145 int c;
5146
5147 ONE_MORE_BYTE (c);
5148 if (c == '\r')
5149 {
5150 if (EQ (eol_type, Qdos))
5151 {
5152 if (src < src_end
5153 && *src == '\n')
5154 c = *src++;
5155 }
5156 else if (EQ (eol_type, Qmac))
5157 c = '\n';
5158 }
5159 if (dst == dst_end)
5160 {
5161 coding->consumed = src - coding->source;
5162
5163 if (EQ (coding->src_object, coding->dst_object))
5164 dst_end = src;
5165 if (dst == dst_end)
5166 {
5167 dst = alloc_destination (coding, src_end - src + 1,
5168 dst);
5169 dst_end = coding->destination + coding->dst_bytes;
5170 coding_set_source (coding);
5171 src = coding->source + coding->consumed;
5172 src_end = coding->source + coding->src_bytes;
5173 }
5174 }
5175 *dst++ = c;
5176 produced_chars++;
5177 }
5178 no_more_source:
5179 ;
5180 }
5181 else
5182 while (src < src_end)
5183 {
5184 int multibytep = 1;
5185 int c = *src++;
5186
5187 if (c == '\r')
5188 {
5189 if (EQ (eol_type, Qdos))
5190 {
5191 if (src < src_end
5192 && *src == '\n')
5193 c = *src++;
5194 }
5195 else if (EQ (eol_type, Qmac))
5196 c = '\n';
5197 }
5198 if (dst >= dst_end - 1)
5199 {
5200 coding->consumed = src - coding->source;
5201
5202 if (EQ (coding->src_object, coding->dst_object))
5203 dst_end = src;
5204 if (dst >= dst_end - 1)
5205 {
5206 dst = alloc_destination (coding, src_end - src + 2,
5207 dst);
5208 dst_end = coding->destination + coding->dst_bytes;
5209 coding_set_source (coding);
5210 src = coding->source + coding->consumed;
5211 src_end = coding->source + coding->src_bytes;
5212 }
5213 }
5214 EMIT_ONE_BYTE (c);
5215 }
5216 }
5217 else
5218 {
5219 if (!EQ (coding->src_object, coding->dst_object))
5220 {
5221 int require = coding->src_bytes - coding->dst_bytes;
5222
5223 if (require > 0)
5224 {
5225 EMACS_INT offset = src - coding->source;
5226
5227 dst = alloc_destination (coding, require, dst);
5228 coding_set_source (coding);
5229 src = coding->source + offset;
5230 src_end = coding->source + coding->src_bytes;
5231 }
5232 }
5233 produced_chars = coding->src_chars;
5234 while (src < src_end)
5235 {
5236 int c = *src++;
5237
5238 if (c == '\r')
5239 {
5240 if (EQ (eol_type, Qdos))
5241 {
5242 if (src < src_end
5243 && *src == '\n')
5244 c = *src++;
5245 produced_chars--;
5246 }
5247 else if (EQ (eol_type, Qmac))
5248 c = '\n';
5249 }
5250 *dst++ = c;
5251 }
5252 }
5253 coding->consumed = coding->src_bytes;
5254 coding->consumed_char = coding->src_chars;
5255 }
5256
5257 produced = dst - (coding->destination + coding->produced);
5258 if (BUFFERP (coding->dst_object))
5259 insert_from_gap (produced_chars, produced);
5260 coding->produced += produced;
5261 coding->produced_char += produced_chars;
5262 return produced_chars;
5263 }
5264
5265 /* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5266 or
5267 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5268 */
5269
5270 static INLINE void
5271 produce_composition (coding, charbuf)
5272 struct coding_system *coding;
5273 int *charbuf;
5274 {
5275 Lisp_Object buffer;
5276 int len;
5277 EMACS_INT pos;
5278 enum composition_method method;
5279 int cmp_len;
5280 Lisp_Object components;
5281
5282 buffer = coding->dst_object;
5283 len = -charbuf[0];
5284 pos = coding->dst_pos + charbuf[1];
5285 method = (enum composition_method) (charbuf[3]);
5286 cmp_len = charbuf[4];
5287
5288 if (method == COMPOSITION_RELATIVE)
5289 components = Qnil;
5290 else
5291 {
5292 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5293 int i;
5294
5295 len -= 5;
5296 charbuf += 5;
5297 for (i = 0; i < len; i++)
5298 args[i] = make_number (charbuf[i]);
5299 components = (method == COMPOSITION_WITH_ALTCHARS
5300 ? Fstring (len, args) : Fvector (len, args));
5301 }
5302 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5303 }
5304
5305 static int *
5306 save_composition_data (buf, buf_end, prop)
5307 int *buf, *buf_end;
5308 Lisp_Object prop;
5309 {
5310 enum composition_method method = COMPOSITION_METHOD (prop);
5311 int cmp_len = COMPOSITION_LENGTH (prop);
5312
5313 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5314 return NULL;
5315
5316 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5317 buf[2] = method;
5318 buf[3] = cmp_len;
5319
5320 if (method == COMPOSITION_RELATIVE)
5321 buf[0] = 4;
5322 else
5323 {
5324 Lisp_Object components;
5325 int len, i;
5326
5327 components = COMPOSITION_COMPONENTS (prop);
5328 if (VECTORP (components))
5329 {
5330 len = XVECTOR (components)->size;
5331 for (i = 0; i < len; i++)
5332 buf[4 + i] = XINT (AREF (components, i));
5333 }
5334 else if (STRINGP (components))
5335 {
5336 int i_byte;
5337
5338 len = XSTRING (components)->size;
5339 i = i_byte = 0;
5340 while (i < len)
5341 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5342 }
5343 else if (INTEGERP (components))
5344 {
5345 len = 1;
5346 buf[4] = XINT (components);
5347 }
5348 else if (CONSP (components))
5349 {
5350 for (len = 0; CONSP (components);
5351 len++, components = XCDR (components))
5352 buf[4 + len] = XINT (XCAR (components));
5353 }
5354 else
5355 abort ();
5356 buf[0] = 4 + len;
5357 }
5358 return (buf + buf[0]);
5359 }
5360
5361 #define CHARBUF_SIZE 0x4000
5362
5363 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5364 do { \
5365 int size = CHARBUF_SIZE;; \
5366 \
5367 coding->charbuf = NULL; \
5368 while (size > 1024) \
5369 { \
5370 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5371 if (coding->charbuf) \
5372 break; \
5373 size >>= 1; \
5374 } \
5375 if (! coding->charbuf) \
5376 { \
5377 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5378 return coding->result; \
5379 } \
5380 coding->charbuf_size = size; \
5381 } while (0)
5382
5383
5384 static void
5385 produce_annotation (coding)
5386 struct coding_system *coding;
5387 {
5388 int *charbuf = coding->charbuf;
5389 int *charbuf_end = charbuf + coding->charbuf_used;
5390
5391 while (charbuf < charbuf_end)
5392 {
5393 if (*charbuf >= 0)
5394 charbuf++;
5395 else
5396 {
5397 int len = -*charbuf;
5398 switch (charbuf[2])
5399 {
5400 case CODING_ANNOTATE_COMPOSITION_MASK:
5401 produce_composition (coding, charbuf);
5402 break;
5403 default:
5404 abort ();
5405 }
5406 charbuf += len;
5407 }
5408 }
5409 }
5410
5411 /* Decode the data at CODING->src_object into CODING->dst_object.
5412 CODING->src_object is a buffer, a string, or nil.
5413 CODING->dst_object is a buffer.
5414
5415 If CODING->src_object is a buffer, it must be the current buffer.
5416 In this case, if CODING->src_pos is positive, it is a position of
5417 the source text in the buffer, otherwise, the source text is in the
5418 gap area of the buffer, and CODING->src_pos specifies the offset of
5419 the text from GPT (which must be the same as PT). If this is the
5420 same buffer as CODING->dst_object, CODING->src_pos must be
5421 negative.
5422
5423 If CODING->src_object is a string, CODING->src_pos in an index to
5424 that string.
5425
5426 If CODING->src_object is nil, CODING->source must already point to
5427 the non-relocatable memory area. In this case, CODING->src_pos is
5428 an offset from CODING->source.
5429
5430 The decoded data is inserted at the current point of the buffer
5431 CODING->dst_object.
5432 */
5433
5434 static int
5435 decode_coding (coding)
5436 struct coding_system *coding;
5437 {
5438 Lisp_Object attrs;
5439
5440 if (BUFFERP (coding->src_object)
5441 && coding->src_pos > 0
5442 && coding->src_pos < GPT
5443 && coding->src_pos + coding->src_chars > GPT)
5444 move_gap_both (coding->src_pos, coding->src_pos_byte);
5445
5446 if (BUFFERP (coding->dst_object))
5447 {
5448 if (current_buffer != XBUFFER (coding->dst_object))
5449 set_buffer_internal (XBUFFER (coding->dst_object));
5450 if (GPT != PT)
5451 move_gap_both (PT, PT_BYTE);
5452 }
5453
5454 coding->consumed = coding->consumed_char = 0;
5455 coding->produced = coding->produced_char = 0;
5456 coding->chars_at_source = 0;
5457 coding->result = CODING_RESULT_SUCCESS;
5458 coding->errors = 0;
5459
5460 ALLOC_CONVERSION_WORK_AREA (coding);
5461
5462 attrs = CODING_ID_ATTRS (coding->id);
5463
5464 do
5465 {
5466 coding_set_source (coding);
5467 coding->annotated = 0;
5468 (*(coding->decoder)) (coding);
5469 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5470 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5471 coding_set_destination (coding);
5472 produce_chars (coding);
5473 if (coding->annotated)
5474 produce_annotation (coding);
5475 }
5476 while (coding->consumed < coding->src_bytes
5477 && ! coding->result);
5478
5479 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5480 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5481 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5482 decode_eol (coding);
5483
5484 coding->carryover_bytes = 0;
5485 if (coding->consumed < coding->src_bytes)
5486 {
5487 int nbytes = coding->src_bytes - coding->consumed;
5488 unsigned char *src;
5489
5490 coding_set_source (coding);
5491 coding_set_destination (coding);
5492 src = coding->source + coding->consumed;
5493
5494 if (coding->mode & CODING_MODE_LAST_BLOCK)
5495 {
5496 /* Flush out unprocessed data as binary chars. We are sure
5497 that the number of data is less than the size of
5498 coding->charbuf. */
5499 int *charbuf = coding->charbuf;
5500
5501 while (nbytes-- > 0)
5502 {
5503 int c = *src++;
5504 *charbuf++ = (c & 0x80 ? - c : c);
5505 }
5506 produce_chars (coding);
5507 }
5508 else
5509 {
5510 /* Record unprocessed bytes in coding->carryover. We are
5511 sure that the number of data is less than the size of
5512 coding->carryover. */
5513 unsigned char *p = coding->carryover;
5514
5515 coding->carryover_bytes = nbytes;
5516 while (nbytes-- > 0)
5517 *p++ = *src++;
5518 }
5519 coding->consumed = coding->src_bytes;
5520 }
5521
5522 return coding->result;
5523 }
5524
5525 static void
5526 consume_chars (coding)
5527 struct coding_system *coding;
5528 {
5529 int *buf = coding->charbuf;
5530 /* -1 is to compensate for CRLF. */
5531 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5532 unsigned char *src = coding->source + coding->consumed;
5533 int pos = coding->src_pos + coding->consumed_char;
5534 int end_pos = coding->src_pos + coding->src_chars;
5535 int multibytep = coding->src_multibyte;
5536 Lisp_Object eol_type;
5537 int c;
5538 int start, end, stop;
5539 Lisp_Object object, prop;
5540
5541 eol_type = CODING_ID_EOL_TYPE (coding->id);
5542 if (VECTORP (eol_type))
5543 eol_type = Qunix;
5544
5545 object = coding->src_object;
5546
5547 /* Note: composition handling is not yet implemented. */
5548 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
5549
5550 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5551 && find_composition (pos, end_pos, &start, &end, &prop, object)
5552 && end <= end_pos
5553 && (start >= pos
5554 || (find_composition (end, end_pos, &start, &end, &prop, object)
5555 && end <= end_pos)))
5556 stop = start;
5557 else
5558 stop = end_pos;
5559
5560 while (buf < buf_end)
5561 {
5562 if (pos == stop)
5563 {
5564 int *p;
5565
5566 if (pos == end_pos)
5567 break;
5568 p = save_composition_data (buf, buf_end, prop);
5569 if (p == NULL)
5570 break;
5571 buf = p;
5572 if (find_composition (end, end_pos, &start, &end, &prop, object)
5573 && end <= end_pos)
5574 stop = start;
5575 else
5576 stop = end_pos;
5577 }
5578
5579 if (! multibytep)
5580 c = *src++;
5581 else
5582 c = STRING_CHAR_ADVANCE (src);
5583 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5584 c = '\n';
5585 if (! EQ (eol_type, Qunix))
5586 {
5587 if (c == '\n')
5588 {
5589 if (EQ (eol_type, Qdos))
5590 *buf++ = '\r';
5591 else
5592 c = '\r';
5593 }
5594 }
5595 *buf++ = c;
5596 pos++;
5597 }
5598
5599 coding->consumed = src - coding->source;
5600 coding->consumed_char = pos - coding->src_pos;
5601 coding->charbuf_used = buf - coding->charbuf;
5602 coding->chars_at_source = 0;
5603 }
5604
5605
5606 /* Encode the text at CODING->src_object into CODING->dst_object.
5607 CODING->src_object is a buffer or a string.
5608 CODING->dst_object is a buffer or nil.
5609
5610 If CODING->src_object is a buffer, it must be the current buffer.
5611 In this case, if CODING->src_pos is positive, it is a position of
5612 the source text in the buffer, otherwise. the source text is in the
5613 gap area of the buffer, and coding->src_pos specifies the offset of
5614 the text from GPT (which must be the same as PT). If this is the
5615 same buffer as CODING->dst_object, CODING->src_pos must be
5616 negative and CODING should not have `pre-write-conversion'.
5617
5618 If CODING->src_object is a string, CODING should not have
5619 `pre-write-conversion'.
5620
5621 If CODING->dst_object is a buffer, the encoded data is inserted at
5622 the current point of that buffer.
5623
5624 If CODING->dst_object is nil, the encoded data is placed at the
5625 memory area specified by CODING->destination. */
5626
5627 static int
5628 encode_coding (coding)
5629 struct coding_system *coding;
5630 {
5631 Lisp_Object attrs;
5632
5633 attrs = CODING_ID_ATTRS (coding->id);
5634
5635 if (BUFFERP (coding->dst_object))
5636 {
5637 set_buffer_internal (XBUFFER (coding->dst_object));
5638 coding->dst_multibyte
5639 = ! NILP (current_buffer->enable_multibyte_characters);
5640 }
5641
5642 coding->consumed = coding->consumed_char = 0;
5643 coding->produced = coding->produced_char = 0;
5644 coding->result = CODING_RESULT_SUCCESS;
5645 coding->errors = 0;
5646
5647 ALLOC_CONVERSION_WORK_AREA (coding);
5648
5649 do {
5650 coding_set_source (coding);
5651 consume_chars (coding);
5652
5653 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5654 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5655
5656 coding_set_destination (coding);
5657 (*(coding->encoder)) (coding);
5658 } while (coding->consumed_char < coding->src_chars);
5659
5660 if (BUFFERP (coding->dst_object))
5661 insert_from_gap (coding->produced_char, coding->produced);
5662
5663 return (coding->result);
5664 }
5665
5666 /* Work buffer */
5667
5668 /* List of currently used working buffer. */
5669 Lisp_Object Vcode_conversion_work_buf_list;
5670
5671 /* A working buffer used by the top level conversion. */
5672 Lisp_Object Vcode_conversion_reused_work_buf;
5673
5674
5675 /* Return a working buffer that can be freely used by the following
5676 code conversion. MULTIBYTEP specifies the multibyteness of the
5677 buffer. */
5678
5679 Lisp_Object
5680 make_conversion_work_buffer (multibytep)
5681 int multibytep;
5682 {
5683 struct buffer *current = current_buffer;
5684 Lisp_Object buf;
5685
5686 if (NILP (Vcode_conversion_work_buf_list))
5687 {
5688 if (NILP (Vcode_conversion_reused_work_buf))
5689 Vcode_conversion_reused_work_buf
5690 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5691 Vcode_conversion_work_buf_list
5692 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
5693 }
5694 else
5695 {
5696 int depth = Flength (Vcode_conversion_work_buf_list);
5697 char str[128];
5698
5699 sprintf (str, " *code-conversion-work*<%d>", depth);
5700 Vcode_conversion_work_buf_list
5701 = Fcons (Fget_buffer_create (build_string (str)),
5702 Vcode_conversion_work_buf_list);
5703 }
5704
5705 buf = XCAR (Vcode_conversion_work_buf_list);
5706 set_buffer_internal (XBUFFER (buf));
5707 current_buffer->undo_list = Qt;
5708 Ferase_buffer ();
5709 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
5710 set_buffer_internal (current);
5711 return buf;
5712 }
5713
5714 static struct coding_system *saved_coding;
5715
5716 Lisp_Object
5717 code_conversion_restore (info)
5718 Lisp_Object info;
5719 {
5720 int depth = Flength (Vcode_conversion_work_buf_list);
5721 Lisp_Object buf;
5722
5723 if (depth > 0)
5724 {
5725 buf = XCAR (Vcode_conversion_work_buf_list);
5726 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5727 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5728 Fkill_buffer (buf);
5729 }
5730
5731 if (saved_coding->dst_object == Qt
5732 && saved_coding->destination)
5733 xfree (saved_coding->destination);
5734
5735 return save_excursion_restore (info);
5736 }
5737
5738
5739 int
5740 decode_coding_gap (coding, chars, bytes)
5741 struct coding_system *coding;
5742 EMACS_INT chars, bytes;
5743 {
5744 int count = specpdl_ptr - specpdl;
5745
5746 saved_coding = coding;
5747 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5748
5749 coding->src_object = Fcurrent_buffer ();
5750 coding->src_chars = chars;
5751 coding->src_bytes = bytes;
5752 coding->src_pos = -chars;
5753 coding->src_pos_byte = -bytes;
5754 coding->src_multibyte = chars < bytes;
5755 coding->dst_object = coding->src_object;
5756 coding->dst_pos = PT;
5757 coding->dst_pos_byte = PT_BYTE;
5758 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
5759
5760 if (CODING_REQUIRE_DETECTION (coding))
5761 detect_coding (coding);
5762
5763 decode_coding (coding);
5764
5765 unbind_to (count, Qnil);
5766 return coding->result;
5767 }
5768
5769 int
5770 encode_coding_gap (coding, chars, bytes)
5771 struct coding_system *coding;
5772 EMACS_INT chars, bytes;
5773 {
5774 int count = specpdl_ptr - specpdl;
5775 Lisp_Object buffer;
5776
5777 saved_coding = coding;
5778 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5779
5780 buffer = Fcurrent_buffer ();
5781 coding->src_object = buffer;
5782 coding->src_chars = chars;
5783 coding->src_bytes = bytes;
5784 coding->src_pos = -chars;
5785 coding->src_pos_byte = -bytes;
5786 coding->src_multibyte = chars < bytes;
5787 coding->dst_object = coding->src_object;
5788 coding->dst_pos = PT;
5789 coding->dst_pos_byte = PT_BYTE;
5790
5791 encode_coding (coding);
5792
5793 unbind_to (count, Qnil);
5794 return coding->result;
5795 }
5796
5797
5798 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5799 SRC_OBJECT into DST_OBJECT by coding context CODING.
5800
5801 SRC_OBJECT is a buffer, a string, or Qnil.
5802
5803 If it is a buffer, the text is at point of the buffer. FROM and TO
5804 are positions in the buffer.
5805
5806 If it is a string, the text is at the beginning of the string.
5807 FROM and TO are indices to the string.
5808
5809 If it is nil, the text is at coding->source. FROM and TO are
5810 indices to coding->source.
5811
5812 DST_OBJECT is a buffer, Qt, or Qnil.
5813
5814 If it is a buffer, the decoded text is inserted at point of the
5815 buffer. If the buffer is the same as SRC_OBJECT, the source text
5816 is deleted.
5817
5818 If it is Qt, a string is made from the decoded text, and
5819 set in CODING->dst_object.
5820
5821 If it is Qnil, the decoded text is stored at CODING->destination.
5822 The called must allocate CODING->dst_bytes bytes at
5823 CODING->destination by xmalloc. If the decoded text is longer than
5824 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5825 */
5826
5827 void
5828 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5829 dst_object)
5830 struct coding_system *coding;
5831 Lisp_Object src_object;
5832 EMACS_INT from, from_byte, to, to_byte;
5833 Lisp_Object dst_object;
5834 {
5835 int count = specpdl_ptr - specpdl;
5836 unsigned char *destination;
5837 EMACS_INT dst_bytes;
5838 EMACS_INT chars = to - from;
5839 EMACS_INT bytes = to_byte - from_byte;
5840 Lisp_Object attrs;
5841
5842 saved_coding = coding;
5843 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5844
5845 if (NILP (dst_object))
5846 {
5847 destination = coding->destination;
5848 dst_bytes = coding->dst_bytes;
5849 }
5850
5851 coding->src_object = src_object;
5852 coding->src_chars = chars;
5853 coding->src_bytes = bytes;
5854 coding->src_multibyte = chars < bytes;
5855
5856 if (STRINGP (src_object))
5857 {
5858 coding->src_pos = from;
5859 coding->src_pos_byte = from_byte;
5860 }
5861 else if (BUFFERP (src_object))
5862 {
5863 set_buffer_internal (XBUFFER (src_object));
5864 if (from != GPT)
5865 move_gap_both (from, from_byte);
5866 if (EQ (src_object, dst_object))
5867 {
5868 TEMP_SET_PT_BOTH (from, from_byte);
5869 del_range_both (from, from_byte, to, to_byte, 1);
5870 coding->src_pos = -chars;
5871 coding->src_pos_byte = -bytes;
5872 }
5873 else
5874 {
5875 coding->src_pos = from;
5876 coding->src_pos_byte = from_byte;
5877 }
5878 }
5879
5880 if (CODING_REQUIRE_DETECTION (coding))
5881 detect_coding (coding);
5882 attrs = CODING_ID_ATTRS (coding->id);
5883
5884 if (! NILP (CODING_ATTR_POST_READ (attrs))
5885 || EQ (dst_object, Qt))
5886 {
5887 coding->dst_object = make_conversion_work_buffer (1);
5888 coding->dst_pos = BEG;
5889 coding->dst_pos_byte = BEG_BYTE;
5890 coding->dst_multibyte = 1;
5891 }
5892 else if (BUFFERP (dst_object))
5893 {
5894 coding->dst_object = dst_object;
5895 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5896 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5897 coding->dst_multibyte
5898 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
5899 }
5900 else
5901 {
5902 coding->dst_object = Qnil;
5903 coding->dst_multibyte = 1;
5904 }
5905
5906 decode_coding (coding);
5907
5908 if (BUFFERP (coding->dst_object))
5909 set_buffer_internal (XBUFFER (coding->dst_object));
5910
5911 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5912 {
5913 struct gcpro gcpro1, gcpro2;
5914 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
5915 Lisp_Object val;
5916
5917 GCPRO2 (coding->src_object, coding->dst_object);
5918 val = call1 (CODING_ATTR_POST_READ (attrs),
5919 make_number (coding->produced_char));
5920 UNGCPRO;
5921 CHECK_NATNUM (val);
5922 coding->produced_char += Z - prev_Z;
5923 coding->produced += Z_BYTE - prev_Z_BYTE;
5924 }
5925
5926 if (EQ (dst_object, Qt))
5927 {
5928 coding->dst_object = Fbuffer_string ();
5929 }
5930 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
5931 {
5932 set_buffer_internal (XBUFFER (coding->dst_object));
5933 if (dst_bytes < coding->produced)
5934 {
5935 destination
5936 = (unsigned char *) xrealloc (destination, coding->produced);
5937 if (! destination)
5938 {
5939 coding->result = CODING_RESULT_INSUFFICIENT_DST;
5940 unbind_to (count, Qnil);
5941 return;
5942 }
5943 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
5944 move_gap_both (BEGV, BEGV_BYTE);
5945 bcopy (BEGV_ADDR, destination, coding->produced);
5946 coding->destination = destination;
5947 }
5948 }
5949
5950 unbind_to (count, Qnil);
5951 }
5952
5953
5954 void
5955 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5956 dst_object)
5957 struct coding_system *coding;
5958 Lisp_Object src_object;
5959 EMACS_INT from, from_byte, to, to_byte;
5960 Lisp_Object dst_object;
5961 {
5962 int count = specpdl_ptr - specpdl;
5963 EMACS_INT chars = to - from;
5964 EMACS_INT bytes = to_byte - from_byte;
5965 Lisp_Object attrs;
5966
5967 saved_coding = coding;
5968 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5969
5970 coding->src_object = src_object;
5971 coding->src_chars = chars;
5972 coding->src_bytes = bytes;
5973 coding->src_multibyte = chars < bytes;
5974
5975 attrs = CODING_ID_ATTRS (coding->id);
5976
5977 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5978 {
5979 Lisp_Object val;
5980
5981 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
5982 set_buffer_internal (XBUFFER (coding->src_object));
5983 if (STRINGP (src_object))
5984 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
5985 else if (BUFFERP (src_object))
5986 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
5987 else
5988 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
5989
5990 if (EQ (src_object, dst_object))
5991 {
5992 set_buffer_internal (XBUFFER (src_object));
5993 del_range_both (from, from_byte, to, to_byte, 1);
5994 set_buffer_internal (XBUFFER (coding->src_object));
5995 }
5996
5997 val = call2 (CODING_ATTR_PRE_WRITE (attrs),
5998 make_number (1), make_number (chars));
5999 CHECK_NATNUM (val);
6000 if (BEG != GPT)
6001 move_gap_both (BEG, BEG_BYTE);
6002 coding->src_chars = Z - BEG;
6003 coding->src_bytes = Z_BYTE - BEG_BYTE;
6004 coding->src_pos = BEG;
6005 coding->src_pos_byte = BEG_BYTE;
6006 coding->src_multibyte = Z < Z_BYTE;
6007 }
6008 else if (STRINGP (src_object))
6009 {
6010 coding->src_pos = from;
6011 coding->src_pos_byte = from_byte;
6012 }
6013 else if (BUFFERP (src_object))
6014 {
6015 set_buffer_internal (XBUFFER (src_object));
6016 if (from != GPT)
6017 move_gap_both (from, from_byte);
6018 if (EQ (src_object, dst_object))
6019 {
6020 del_range_both (from, from_byte, to, to_byte, 1);
6021 coding->src_pos = -chars;
6022 coding->src_pos_byte = -bytes;
6023 }
6024 else
6025 {
6026 coding->src_pos = from;
6027 coding->src_pos_byte = from_byte;
6028 }
6029 }
6030
6031 if (BUFFERP (dst_object))
6032 {
6033 coding->dst_object = dst_object;
6034 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6035 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6036 coding->dst_multibyte
6037 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6038 }
6039 else if (EQ (dst_object, Qt))
6040 {
6041 coding->dst_object = Qnil;
6042 coding->destination = (unsigned char *) xmalloc (coding->src_chars);
6043 coding->dst_bytes = coding->src_chars;
6044 coding->dst_multibyte = 0;
6045 }
6046 else
6047 {
6048 coding->dst_object = Qnil;
6049 coding->dst_multibyte = 0;
6050 }
6051
6052 encode_coding (coding);
6053
6054 if (EQ (dst_object, Qt))
6055 {
6056 if (BUFFERP (coding->dst_object))
6057 coding->dst_object = Fbuffer_string ();
6058 else
6059 {
6060 coding->dst_object
6061 = make_unibyte_string ((char *) coding->destination,
6062 coding->produced);
6063 xfree (coding->destination);
6064 }
6065 }
6066
6067 unbind_to (count, Qnil);
6068 }
6069
6070
6071 Lisp_Object
6072 preferred_coding_system ()
6073 {
6074 int id = coding_categories[coding_priorities[0]].id;
6075
6076 return CODING_ID_NAME (id);
6077 }
6078
6079 \f
6080 #ifdef emacs
6081 /*** 8. Emacs Lisp library functions ***/
6082
6083 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6084 doc: /* Return t if OBJECT is nil or a coding-system.
6085 See the documentation of `define-coding-system' for information
6086 about coding-system objects. */)
6087 (obj)
6088 Lisp_Object obj;
6089 {
6090 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6091 }
6092
6093 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6094 Sread_non_nil_coding_system, 1, 1, 0,
6095 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6096 (prompt)
6097 Lisp_Object prompt;
6098 {
6099 Lisp_Object val;
6100 do
6101 {
6102 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6103 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6104 }
6105 while (XSTRING (val)->size == 0);
6106 return (Fintern (val, Qnil));
6107 }
6108
6109 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6110 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6111 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6112 (prompt, default_coding_system)
6113 Lisp_Object prompt, default_coding_system;
6114 {
6115 Lisp_Object val;
6116 if (SYMBOLP (default_coding_system))
6117 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6118 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6119 Qt, Qnil, Qcoding_system_history,
6120 default_coding_system, Qnil);
6121 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6122 }
6123
6124 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6125 1, 1, 0,
6126 doc: /* Check validity of CODING-SYSTEM.
6127 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6128 It is valid if it is a symbol with a non-nil `coding-system' property.
6129 The value of property should be a vector of length 5. */)
6130 (coding_system)
6131 Lisp_Object coding_system;
6132 {
6133 CHECK_SYMBOL (coding_system);
6134 if (!NILP (Fcoding_system_p (coding_system)))
6135 return coding_system;
6136 while (1)
6137 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6138 }
6139
6140 \f
6141 Lisp_Object
6142 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6143 unsigned char *src;
6144 int src_bytes, highest;
6145 int multibytep;
6146 Lisp_Object coding_system;
6147 {
6148 unsigned char *src_end = src + src_bytes;
6149 int mask = CATEGORY_MASK_ANY;
6150 int detected = 0;
6151 int c, i;
6152 Lisp_Object attrs, eol_type;
6153 Lisp_Object val;
6154 struct coding_system coding;
6155
6156 if (NILP (coding_system))
6157 coding_system = Qundecided;
6158 setup_coding_system (coding_system, &coding);
6159 attrs = CODING_ID_ATTRS (coding.id);
6160 eol_type = CODING_ID_EOL_TYPE (coding.id);
6161
6162 coding.source = src;
6163 coding.src_bytes = src_bytes;
6164 coding.src_multibyte = multibytep;
6165 coding.consumed = 0;
6166
6167 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
6168 {
6169 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6170 }
6171 else
6172 {
6173 coding_system = Qnil;
6174 for (; src < src_end; src++)
6175 {
6176 c = *src;
6177 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6178 || c == ISO_CODE_SI
6179 || c == ISO_CODE_SO)))
6180 break;
6181 }
6182 coding.head_ascii = src - coding.source;
6183
6184 if (src < src_end)
6185 for (i = 0; i < coding_category_raw_text; i++)
6186 {
6187 enum coding_category category = coding_priorities[i];
6188 struct coding_system *this = coding_categories + category;
6189
6190 if (category >= coding_category_raw_text
6191 || detected & (1 << category))
6192 continue;
6193
6194 if (this->id < 0)
6195 {
6196 /* No coding system of this category is defined. */
6197 mask &= ~(1 << category);
6198 }
6199 else
6200 {
6201 detected |= detected_mask[category];
6202 if ((*(coding_categories[category].detector)) (&coding, &mask)
6203 && highest)
6204 {
6205 mask &= detected_mask[category];
6206 break;
6207 }
6208 }
6209 }
6210 }
6211
6212 if (!mask)
6213 val = Fcons (make_number (coding_category_raw_text), Qnil);
6214 else if (mask == CATEGORY_MASK_ANY)
6215 val = Fcons (make_number (coding_category_undecided), Qnil);
6216 else if (highest)
6217 {
6218 for (i = 0; i < coding_category_raw_text; i++)
6219 if (mask & (1 << coding_priorities[i]))
6220 {
6221 val = Fcons (make_number (coding_priorities[i]), Qnil);
6222 break;
6223 }
6224 }
6225 else
6226 {
6227 val = Qnil;
6228 for (i = coding_category_raw_text - 1; i >= 0; i--)
6229 if (mask & (1 << coding_priorities[i]))
6230 val = Fcons (make_number (coding_priorities[i]), val);
6231 }
6232
6233 {
6234 int one_byte_eol = -1, two_byte_eol = -1;
6235 Lisp_Object tail;
6236
6237 for (tail = val; CONSP (tail); tail = XCDR (tail))
6238 {
6239 struct coding_system *this
6240 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6241 : &coding);
6242 int this_eol;
6243
6244 attrs = CODING_ID_ATTRS (this->id);
6245 eol_type = CODING_ID_EOL_TYPE (this->id);
6246 XSETCAR (tail, CODING_ID_NAME (this->id));
6247 if (VECTORP (eol_type))
6248 {
6249 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6250 {
6251 if (two_byte_eol < 0)
6252 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6253 this_eol = two_byte_eol;
6254 }
6255 else
6256 {
6257 if (one_byte_eol < 0)
6258 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6259 this_eol = one_byte_eol;
6260 }
6261 if (this_eol == EOL_SEEN_LF)
6262 XSETCAR (tail, AREF (eol_type, 0));
6263 else if (this_eol == EOL_SEEN_CRLF)
6264 XSETCAR (tail, AREF (eol_type, 1));
6265 else if (this_eol == EOL_SEEN_CR)
6266 XSETCAR (tail, AREF (eol_type, 2));
6267 }
6268 }
6269 }
6270
6271 return (highest ? XCAR (val) : val);
6272 }
6273
6274
6275 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6276 2, 3, 0,
6277 doc: /* Detect coding system of the text in the region between START and END.
6278 Return a list of possible coding systems ordered by priority.
6279
6280 If only ASCII characters are found, it returns a list of single element
6281 `undecided' or its subsidiary coding system according to a detected
6282 end-of-line format.
6283
6284 If optional argument HIGHEST is non-nil, return the coding system of
6285 highest priority. */)
6286 (start, end, highest)
6287 Lisp_Object start, end, highest;
6288 {
6289 int from, to;
6290 int from_byte, to_byte;
6291
6292 CHECK_NUMBER_COERCE_MARKER (start);
6293 CHECK_NUMBER_COERCE_MARKER (end);
6294
6295 validate_region (&start, &end);
6296 from = XINT (start), to = XINT (end);
6297 from_byte = CHAR_TO_BYTE (from);
6298 to_byte = CHAR_TO_BYTE (to);
6299
6300 if (from < GPT && to >= GPT)
6301 move_gap_both (to, to_byte);
6302
6303 return detect_coding_system (BYTE_POS_ADDR (from_byte),
6304 to_byte - from_byte,
6305 !NILP (highest),
6306 !NILP (current_buffer
6307 ->enable_multibyte_characters),
6308 Qnil);
6309 }
6310
6311 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6312 1, 2, 0,
6313 doc: /* Detect coding system of the text in STRING.
6314 Return a list of possible coding systems ordered by priority.
6315
6316 If only ASCII characters are found, it returns a list of single element
6317 `undecided' or its subsidiary coding system according to a detected
6318 end-of-line format.
6319
6320 If optional argument HIGHEST is non-nil, return the coding system of
6321 highest priority. */)
6322 (string, highest)
6323 Lisp_Object string, highest;
6324 {
6325 CHECK_STRING (string);
6326
6327 return detect_coding_system (XSTRING (string)->data,
6328 STRING_BYTES (XSTRING (string)),
6329 !NILP (highest),
6330 STRING_MULTIBYTE (string),
6331 Qnil);
6332 }
6333
6334
6335 static INLINE int
6336 char_encodable_p (c, attrs)
6337 int c;
6338 Lisp_Object attrs;
6339 {
6340 Lisp_Object tail;
6341 struct charset *charset;
6342
6343 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6344 CONSP (tail); tail = XCDR (tail))
6345 {
6346 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6347 if (CHAR_CHARSET_P (c, charset))
6348 break;
6349 }
6350 return (! NILP (tail));
6351 }
6352
6353
6354 /* Return a list of coding systems that safely encode the text between
6355 START and END. If EXCLUDE is non-nil, it is a list of coding
6356 systems not to check. The returned list doesn't contain any such
6357 coding systems. In any case, If the text contains only ASCII or is
6358 unibyte, return t. */
6359
6360 DEFUN ("find-coding-systems-region-internal",
6361 Ffind_coding_systems_region_internal,
6362 Sfind_coding_systems_region_internal, 2, 3, 0,
6363 doc: /* Internal use only. */)
6364 (start, end, exclude)
6365 Lisp_Object start, end, exclude;
6366 {
6367 Lisp_Object coding_attrs_list, safe_codings;
6368 EMACS_INT start_byte, end_byte;
6369 unsigned char *p, *pbeg, *pend;
6370 int c;
6371 Lisp_Object tail, elt;
6372
6373 if (STRINGP (start))
6374 {
6375 if (!STRING_MULTIBYTE (start)
6376 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6377 return Qt;
6378 start_byte = 0;
6379 end_byte = STRING_BYTES (XSTRING (start));
6380 }
6381 else
6382 {
6383 CHECK_NUMBER_COERCE_MARKER (start);
6384 CHECK_NUMBER_COERCE_MARKER (end);
6385 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6386 args_out_of_range (start, end);
6387 if (NILP (current_buffer->enable_multibyte_characters))
6388 return Qt;
6389 start_byte = CHAR_TO_BYTE (XINT (start));
6390 end_byte = CHAR_TO_BYTE (XINT (end));
6391 if (XINT (end) - XINT (start) == end_byte - start_byte)
6392 return Qt;
6393
6394 if (start < GPT && end > GPT)
6395 {
6396 if ((GPT - start) < (end - GPT))
6397 move_gap_both (start, start_byte);
6398 else
6399 move_gap_both (end, end_byte);
6400 }
6401 }
6402
6403 coding_attrs_list = Qnil;
6404 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6405 if (NILP (exclude)
6406 || NILP (Fmemq (XCAR (tail), exclude)))
6407 {
6408 Lisp_Object attrs;
6409
6410 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6411 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6412 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6413 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6414 }
6415
6416 if (STRINGP (start))
6417 p = pbeg = XSTRING (start)->data;
6418 else
6419 p = pbeg = BYTE_POS_ADDR (start_byte);
6420 pend = p + (end_byte - start_byte);
6421
6422 while (p < pend && ASCII_BYTE_P (*p)) p++;
6423 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6424
6425 while (p < pend)
6426 {
6427 if (ASCII_BYTE_P (*p))
6428 p++;
6429 else
6430 {
6431 c = STRING_CHAR_ADVANCE (p);
6432
6433 charset_map_loaded = 0;
6434 for (tail = coding_attrs_list; CONSP (tail);)
6435 {
6436 elt = XCAR (tail);
6437 if (NILP (elt))
6438 tail = XCDR (tail);
6439 else if (char_encodable_p (c, elt))
6440 tail = XCDR (tail);
6441 else if (CONSP (XCDR (tail)))
6442 {
6443 XSETCAR (tail, XCAR (XCDR (tail)));
6444 XSETCDR (tail, XCDR (XCDR (tail)));
6445 }
6446 else
6447 {
6448 XSETCAR (tail, Qnil);
6449 tail = XCDR (tail);
6450 }
6451 }
6452 if (charset_map_loaded)
6453 {
6454 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6455
6456 if (STRINGP (start))
6457 pbeg = XSTRING (start)->data;
6458 else
6459 pbeg = BYTE_POS_ADDR (start_byte);
6460 p = pbeg + p_offset;
6461 pend = pbeg + pend_offset;
6462 }
6463 }
6464 }
6465
6466 safe_codings = Qnil;
6467 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6468 if (! NILP (XCAR (tail)))
6469 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6470
6471 return safe_codings;
6472 }
6473
6474
6475 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6476 Scheck_coding_systems_region, 3, 3, 0,
6477 doc: /* Check if the region is encodable by coding systems.
6478
6479 START and END are buffer positions specifying the region.
6480 CODING-SYSTEM-LIST is a list of coding systems to check.
6481
6482 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6483 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6484 whole region, POS0, POS1, ... are buffer positions where non-encodable
6485 characters are found.
6486
6487 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6488 value is nil.
6489
6490 START may be a string. In that case, check if the string is
6491 encodable, and the value contains indices to the string instead of
6492 buffer positions. END is ignored. */)
6493 (start, end, coding_system_list)
6494 Lisp_Object start, end, coding_system_list;
6495 {
6496 Lisp_Object list;
6497 EMACS_INT start_byte, end_byte;
6498 int pos;
6499 unsigned char *p, *pbeg, *pend;
6500 int c;
6501 Lisp_Object tail, elt;
6502
6503 if (STRINGP (start))
6504 {
6505 if (!STRING_MULTIBYTE (start)
6506 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6507 return Qnil;
6508 start_byte = 0;
6509 end_byte = STRING_BYTES (XSTRING (start));
6510 pos = 0;
6511 }
6512 else
6513 {
6514 CHECK_NUMBER_COERCE_MARKER (start);
6515 CHECK_NUMBER_COERCE_MARKER (end);
6516 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6517 args_out_of_range (start, end);
6518 if (NILP (current_buffer->enable_multibyte_characters))
6519 return Qnil;
6520 start_byte = CHAR_TO_BYTE (XINT (start));
6521 end_byte = CHAR_TO_BYTE (XINT (end));
6522 if (XINT (end) - XINT (start) == end_byte - start_byte)
6523 return Qt;
6524
6525 if (start < GPT && end > GPT)
6526 {
6527 if ((GPT - start) < (end - GPT))
6528 move_gap_both (start, start_byte);
6529 else
6530 move_gap_both (end, end_byte);
6531 }
6532 pos = start;
6533 }
6534
6535 list = Qnil;
6536 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6537 {
6538 elt = XCAR (tail);
6539 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6540 Qnil)),
6541 list);
6542 }
6543
6544 if (STRINGP (start))
6545 p = pbeg = XSTRING (start)->data;
6546 else
6547 p = pbeg = BYTE_POS_ADDR (start_byte);
6548 pend = p + (end_byte - start_byte);
6549
6550 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6551 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6552
6553 while (p < pend)
6554 {
6555 if (ASCII_BYTE_P (*p))
6556 p++;
6557 else
6558 {
6559 c = STRING_CHAR_ADVANCE (p);
6560
6561 charset_map_loaded = 0;
6562 for (tail = list; CONSP (tail); tail = XCDR (tail))
6563 {
6564 elt = XCDR (XCAR (tail));
6565 if (! char_encodable_p (c, XCAR (elt)))
6566 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6567 }
6568 if (charset_map_loaded)
6569 {
6570 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6571
6572 if (STRINGP (start))
6573 pbeg = XSTRING (start)->data;
6574 else
6575 pbeg = BYTE_POS_ADDR (start_byte);
6576 p = pbeg + p_offset;
6577 pend = pbeg + pend_offset;
6578 }
6579 }
6580 pos++;
6581 }
6582
6583 tail = list;
6584 list = Qnil;
6585 for (; CONSP (tail); tail = XCDR (tail))
6586 {
6587 elt = XCAR (tail);
6588 if (CONSP (XCDR (XCDR (elt))))
6589 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6590 list);
6591 }
6592
6593 return list;
6594 }
6595
6596
6597
6598 Lisp_Object
6599 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6600 Lisp_Object start, end, coding_system, dst_object;
6601 int encodep, norecord;
6602 {
6603 struct coding_system coding;
6604 EMACS_INT from, from_byte, to, to_byte;
6605 Lisp_Object src_object;
6606
6607 CHECK_NUMBER_COERCE_MARKER (start);
6608 CHECK_NUMBER_COERCE_MARKER (end);
6609 if (NILP (coding_system))
6610 coding_system = Qno_conversion;
6611 else
6612 CHECK_CODING_SYSTEM (coding_system);
6613 src_object = Fcurrent_buffer ();
6614 if (NILP (dst_object))
6615 dst_object = src_object;
6616 else if (! EQ (dst_object, Qt))
6617 CHECK_BUFFER (dst_object);
6618
6619 validate_region (&start, &end);
6620 from = XFASTINT (start);
6621 from_byte = CHAR_TO_BYTE (from);
6622 to = XFASTINT (end);
6623 to_byte = CHAR_TO_BYTE (to);
6624
6625 setup_coding_system (coding_system, &coding);
6626 coding.mode |= CODING_MODE_LAST_BLOCK;
6627
6628 if (encodep)
6629 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6630 dst_object);
6631 else
6632 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6633 dst_object);
6634 if (! norecord)
6635 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6636
6637 if (coding.result != CODING_RESULT_SUCCESS)
6638 error ("Code conversion error: %d", coding.result);
6639
6640 return (BUFFERP (dst_object)
6641 ? make_number (coding.produced_char)
6642 : coding.dst_object);
6643 }
6644
6645
6646 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6647 3, 4, "r\nzCoding system: ",
6648 doc: /* Decode the current region from the specified coding system.
6649 When called from a program, takes four arguments:
6650 START, END, CODING-SYSTEM, and DESTINATION.
6651 START and END are buffer positions.
6652
6653 Optional 4th arguments DESTINATION specifies where the decoded text goes.
6654 If nil, the region between START and END is replace by the decoded text.
6655 If buffer, the decoded text is inserted in the buffer.
6656 If t, the decoded text is returned.
6657
6658 This function sets `last-coding-system-used' to the precise coding system
6659 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6660 not fully specified.)
6661 It returns the length of the decoded text. */)
6662 (start, end, coding_system, destination)
6663 Lisp_Object start, end, coding_system, destination;
6664 {
6665 return code_convert_region (start, end, coding_system, destination, 0, 0);
6666 }
6667
6668 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6669 3, 4, "r\nzCoding system: ",
6670 doc: /* Encode the current region by specified coding system.
6671 When called from a program, takes three arguments:
6672 START, END, and CODING-SYSTEM. START and END are buffer positions.
6673
6674 Optional 4th arguments DESTINATION specifies where the encoded text goes.
6675 If nil, the region between START and END is replace by the encoded text.
6676 If buffer, the encoded text is inserted in the buffer.
6677 If t, the encoded text is returned.
6678
6679 This function sets `last-coding-system-used' to the precise coding system
6680 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6681 not fully specified.)
6682 It returns the length of the encoded text. */)
6683 (start, end, coding_system, destination)
6684 Lisp_Object start, end, coding_system, destination;
6685 {
6686 return code_convert_region (start, end, coding_system, destination, 1, 0);
6687 }
6688
6689 Lisp_Object
6690 code_convert_string (string, coding_system, dst_object,
6691 encodep, nocopy, norecord)
6692 Lisp_Object string, coding_system, dst_object;
6693 int encodep, nocopy, norecord;
6694 {
6695 struct coding_system coding;
6696 EMACS_INT chars, bytes;
6697
6698 CHECK_STRING (string);
6699 if (NILP (coding_system))
6700 {
6701 if (! norecord)
6702 Vlast_coding_system_used = Qno_conversion;
6703 if (NILP (dst_object))
6704 return (nocopy ? Fcopy_sequence (string) : string);
6705 }
6706
6707 if (NILP (coding_system))
6708 coding_system = Qno_conversion;
6709 else
6710 CHECK_CODING_SYSTEM (coding_system);
6711 if (NILP (dst_object))
6712 dst_object = Qt;
6713 else if (! EQ (dst_object, Qt))
6714 CHECK_BUFFER (dst_object);
6715
6716 setup_coding_system (coding_system, &coding);
6717 coding.mode |= CODING_MODE_LAST_BLOCK;
6718 chars = XSTRING (string)->size;
6719 bytes = STRING_BYTES (XSTRING (string));
6720 if (encodep)
6721 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6722 else
6723 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6724 if (! norecord)
6725 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6726
6727 if (coding.result != CODING_RESULT_SUCCESS)
6728 error ("Code conversion error: %d", coding.result);
6729
6730 return (BUFFERP (dst_object)
6731 ? make_number (coding.produced_char)
6732 : coding.dst_object);
6733 }
6734
6735
6736 /* Encode or decode STRING according to CODING_SYSTEM.
6737 Do not set Vlast_coding_system_used.
6738
6739 This function is called only from macros DECODE_FILE and
6740 ENCODE_FILE, thus we ignore character composition. */
6741
6742 Lisp_Object
6743 code_convert_string_norecord (string, coding_system, encodep)
6744 Lisp_Object string, coding_system;
6745 int encodep;
6746 {
6747 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
6748 }
6749
6750
6751 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6752 2, 4, 0,
6753 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6754
6755 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6756 if the decoding operation is trivial.
6757
6758 Optional fourth arg BUFFER non-nil meant that the decoded text is
6759 inserted in BUFFER instead of returned as a astring. In this case,
6760 the return value is BUFFER.
6761
6762 This function sets `last-coding-system-used' to the precise coding system
6763 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6764 not fully specified. */)
6765 (string, coding_system, nocopy, buffer)
6766 Lisp_Object string, coding_system, nocopy, buffer;
6767 {
6768 return code_convert_string (string, coding_system, buffer,
6769 0, ! NILP (nocopy), 0);
6770 }
6771
6772 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6773 2, 4, 0,
6774 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6775
6776 Optional third arg NOCOPY non-nil means it is OK to return STRING
6777 itself if the encoding operation is trivial.
6778
6779 Optional fourth arg BUFFER non-nil meant that the encoded text is
6780 inserted in BUFFER instead of returned as a astring. In this case,
6781 the return value is BUFFER.
6782
6783 This function sets `last-coding-system-used' to the precise coding system
6784 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6785 not fully specified.) */)
6786 (string, coding_system, nocopy, buffer)
6787 Lisp_Object string, coding_system, nocopy, buffer;
6788 {
6789 return code_convert_string (string, coding_system, buffer,
6790 nocopy, ! NILP (nocopy), 1);
6791 }
6792
6793 \f
6794 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6795 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6796 Return the corresponding character. */)
6797 (code)
6798 Lisp_Object code;
6799 {
6800 Lisp_Object spec, attrs, val;
6801 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6802 int c;
6803
6804 CHECK_NATNUM (code);
6805 c = XFASTINT (code);
6806 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6807 attrs = AREF (spec, 0);
6808
6809 if (ASCII_BYTE_P (c)
6810 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6811 return code;
6812
6813 val = CODING_ATTR_CHARSET_LIST (attrs);
6814 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6815 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6816 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
6817
6818 if (c <= 0x7F)
6819 charset = charset_roman;
6820 else if (c >= 0xA0 && c < 0xDF)
6821 {
6822 charset = charset_kana;
6823 c -= 0x80;
6824 }
6825 else
6826 {
6827 int s1 = c >> 8, s2 = c & 0x7F;
6828
6829 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6830 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6831 error ("Invalid code: %d", code);
6832 SJIS_TO_JIS (c);
6833 charset = charset_kanji;
6834 }
6835 c = DECODE_CHAR (charset, c);
6836 if (c < 0)
6837 error ("Invalid code: %d", code);
6838 return make_number (c);
6839 }
6840
6841
6842 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6843 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6844 Return the corresponding code in SJIS. */)
6845 (ch)
6846 Lisp_Object ch;
6847 {
6848 Lisp_Object spec, attrs, charset_list;
6849 int c;
6850 struct charset *charset;
6851 unsigned code;
6852
6853 CHECK_CHARACTER (ch);
6854 c = XFASTINT (ch);
6855 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6856 attrs = AREF (spec, 0);
6857
6858 if (ASCII_CHAR_P (c)
6859 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6860 return ch;
6861
6862 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6863 charset = char_charset (c, charset_list, &code);
6864 if (code == CHARSET_INVALID_CODE (charset))
6865 error ("Can't encode by shift_jis encoding: %d", c);
6866 JIS_TO_SJIS (code);
6867
6868 return make_number (code);
6869 }
6870
6871 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6872 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6873 Return the corresponding character. */)
6874 (code)
6875 Lisp_Object code;
6876 {
6877 Lisp_Object spec, attrs, val;
6878 struct charset *charset_roman, *charset_big5, *charset;
6879 int c;
6880
6881 CHECK_NATNUM (code);
6882 c = XFASTINT (code);
6883 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6884 attrs = AREF (spec, 0);
6885
6886 if (ASCII_BYTE_P (c)
6887 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6888 return code;
6889
6890 val = CODING_ATTR_CHARSET_LIST (attrs);
6891 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6892 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
6893
6894 if (c <= 0x7F)
6895 charset = charset_roman;
6896 else
6897 {
6898 int b1 = c >> 8, b2 = c & 0x7F;
6899 if (b1 < 0xA1 || b1 > 0xFE
6900 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
6901 error ("Invalid code: %d", code);
6902 charset = charset_big5;
6903 }
6904 c = DECODE_CHAR (charset, (unsigned )c);
6905 if (c < 0)
6906 error ("Invalid code: %d", code);
6907 return make_number (c);
6908 }
6909
6910 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6911 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
6912 Return the corresponding character code in Big5. */)
6913 (ch)
6914 Lisp_Object ch;
6915 {
6916 Lisp_Object spec, attrs, charset_list;
6917 struct charset *charset;
6918 int c;
6919 unsigned code;
6920
6921 CHECK_CHARACTER (ch);
6922 c = XFASTINT (ch);
6923 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6924 attrs = AREF (spec, 0);
6925 if (ASCII_CHAR_P (c)
6926 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6927 return ch;
6928
6929 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6930 charset = char_charset (c, charset_list, &code);
6931 if (code == CHARSET_INVALID_CODE (charset))
6932 error ("Can't encode by Big5 encoding: %d", c);
6933
6934 return make_number (code);
6935 }
6936
6937 \f
6938 DEFUN ("set-terminal-coding-system-internal",
6939 Fset_terminal_coding_system_internal,
6940 Sset_terminal_coding_system_internal, 1, 1, 0,
6941 doc: /* Internal use only. */)
6942 (coding_system)
6943 {
6944 CHECK_SYMBOL (coding_system);
6945 setup_coding_system (Fcheck_coding_system (coding_system),
6946 &terminal_coding);
6947
6948 /* We had better not send unsafe characters to terminal. */
6949 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
6950 /* Characer composition should be disabled. */
6951 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6952 terminal_coding.src_multibyte = 1;
6953 terminal_coding.dst_multibyte = 0;
6954 return Qnil;
6955 }
6956
6957 DEFUN ("set-safe-terminal-coding-system-internal",
6958 Fset_safe_terminal_coding_system_internal,
6959 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
6960 doc: /* Internal use only. */)
6961 (coding_system)
6962 {
6963 CHECK_SYMBOL (coding_system);
6964 setup_coding_system (Fcheck_coding_system (coding_system),
6965 &safe_terminal_coding);
6966 /* Characer composition should be disabled. */
6967 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6968 safe_terminal_coding.src_multibyte = 1;
6969 safe_terminal_coding.dst_multibyte = 0;
6970 return Qnil;
6971 }
6972
6973 DEFUN ("terminal-coding-system",
6974 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6975 doc: /* Return coding system specified for terminal output. */)
6976 ()
6977 {
6978 return CODING_ID_NAME (terminal_coding.id);
6979 }
6980
6981 DEFUN ("set-keyboard-coding-system-internal",
6982 Fset_keyboard_coding_system_internal,
6983 Sset_keyboard_coding_system_internal, 1, 1, 0,
6984 doc: /* Internal use only. */)
6985 (coding_system)
6986 Lisp_Object coding_system;
6987 {
6988 CHECK_SYMBOL (coding_system);
6989 setup_coding_system (Fcheck_coding_system (coding_system),
6990 &keyboard_coding);
6991 /* Characer composition should be disabled. */
6992 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6993 return Qnil;
6994 }
6995
6996 DEFUN ("keyboard-coding-system",
6997 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6998 doc: /* Return coding system specified for decoding keyboard input. */)
6999 ()
7000 {
7001 return CODING_ID_NAME (keyboard_coding.id);
7002 }
7003
7004 \f
7005 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7006 Sfind_operation_coding_system, 1, MANY, 0,
7007 doc: /* Choose a coding system for an operation based on the target name.
7008 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7009 DECODING-SYSTEM is the coding system to use for decoding
7010 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7011 for encoding (in case OPERATION does encoding).
7012
7013 The first argument OPERATION specifies an I/O primitive:
7014 For file I/O, `insert-file-contents' or `write-region'.
7015 For process I/O, `call-process', `call-process-region', or `start-process'.
7016 For network I/O, `open-network-stream'.
7017
7018 The remaining arguments should be the same arguments that were passed
7019 to the primitive. Depending on which primitive, one of those arguments
7020 is selected as the TARGET. For example, if OPERATION does file I/O,
7021 whichever argument specifies the file name is TARGET.
7022
7023 TARGET has a meaning which depends on OPERATION:
7024 For file I/O, TARGET is a file name.
7025 For process I/O, TARGET is a process name.
7026 For network I/O, TARGET is a service name or a port number
7027
7028 This function looks up what specified for TARGET in,
7029 `file-coding-system-alist', `process-coding-system-alist',
7030 or `network-coding-system-alist' depending on OPERATION.
7031 They may specify a coding system, a cons of coding systems,
7032 or a function symbol to call.
7033 In the last case, we call the function with one argument,
7034 which is a list of all the arguments given to this function.
7035
7036 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7037 (nargs, args)
7038 int nargs;
7039 Lisp_Object *args;
7040 {
7041 Lisp_Object operation, target_idx, target, val;
7042 register Lisp_Object chain;
7043
7044 if (nargs < 2)
7045 error ("Too few arguments");
7046 operation = args[0];
7047 if (!SYMBOLP (operation)
7048 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7049 error ("Invalid first arguement");
7050 if (nargs < 1 + XINT (target_idx))
7051 error ("Too few arguments for operation: %s",
7052 XSYMBOL (operation)->name->data);
7053 target = args[XINT (target_idx) + 1];
7054 if (!(STRINGP (target)
7055 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7056 error ("Invalid %dth argument", XINT (target_idx) + 1);
7057
7058 chain = ((EQ (operation, Qinsert_file_contents)
7059 || EQ (operation, Qwrite_region))
7060 ? Vfile_coding_system_alist
7061 : (EQ (operation, Qopen_network_stream)
7062 ? Vnetwork_coding_system_alist
7063 : Vprocess_coding_system_alist));
7064 if (NILP (chain))
7065 return Qnil;
7066
7067 for (; CONSP (chain); chain = XCDR (chain))
7068 {
7069 Lisp_Object elt;
7070
7071 elt = XCAR (chain);
7072 if (CONSP (elt)
7073 && ((STRINGP (target)
7074 && STRINGP (XCAR (elt))
7075 && fast_string_match (XCAR (elt), target) >= 0)
7076 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7077 {
7078 val = XCDR (elt);
7079 /* Here, if VAL is both a valid coding system and a valid
7080 function symbol, we return VAL as a coding system. */
7081 if (CONSP (val))
7082 return val;
7083 if (! SYMBOLP (val))
7084 return Qnil;
7085 if (! NILP (Fcoding_system_p (val)))
7086 return Fcons (val, val);
7087 if (! NILP (Ffboundp (val)))
7088 {
7089 val = call1 (val, Flist (nargs, args));
7090 if (CONSP (val))
7091 return val;
7092 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7093 return Fcons (val, val);
7094 }
7095 return Qnil;
7096 }
7097 }
7098 return Qnil;
7099 }
7100
7101 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7102 Sset_coding_system_priority, 1, MANY, 0,
7103 doc: /* Put higher priority to coding systems of the arguments. */)
7104 (nargs, args)
7105 int nargs;
7106 Lisp_Object *args;
7107 {
7108 int i, j;
7109 int changed[coding_category_max];
7110 enum coding_category priorities[coding_category_max];
7111
7112 bzero (changed, sizeof changed);
7113
7114 for (i = j = 0; i < nargs; i++)
7115 {
7116 enum coding_category category;
7117 Lisp_Object spec, attrs;
7118
7119 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7120 attrs = AREF (spec, 0);
7121 category = XINT (CODING_ATTR_CATEGORY (attrs));
7122 if (changed[category])
7123 /* Ignore this coding system because a coding system of the
7124 same category already had a higher priority. */
7125 continue;
7126 changed[category] = 1;
7127 priorities[j++] = category;
7128 if (coding_categories[category].id >= 0
7129 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7130 setup_coding_system (args[i], &coding_categories[category]);
7131 }
7132
7133 /* Now we have decided top J priorities. Reflect the order of the
7134 original priorities to the remaining priorities. */
7135
7136 for (i = j, j = 0; i < coding_category_max; i++, j++)
7137 {
7138 while (j < coding_category_max
7139 && changed[coding_priorities[j]])
7140 j++;
7141 if (j == coding_category_max)
7142 abort ();
7143 priorities[i] = coding_priorities[j];
7144 }
7145
7146 bcopy (priorities, coding_priorities, sizeof priorities);
7147 return Qnil;
7148 }
7149
7150 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7151 Scoding_system_priority_list, 0, 1, 0,
7152 doc: /* Return a list of coding systems ordered by their priorities. */)
7153 (highestp)
7154 Lisp_Object highestp;
7155 {
7156 int i;
7157 Lisp_Object val;
7158
7159 for (i = 0, val = Qnil; i < coding_category_max; i++)
7160 {
7161 enum coding_category category = coding_priorities[i];
7162 int id = coding_categories[category].id;
7163 Lisp_Object attrs;
7164
7165 if (id < 0)
7166 continue;
7167 attrs = CODING_ID_ATTRS (id);
7168 if (! NILP (highestp))
7169 return CODING_ATTR_BASE_NAME (attrs);
7170 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7171 }
7172 return Fnreverse (val);
7173 }
7174
7175 static Lisp_Object
7176 make_subsidiaries (base)
7177 Lisp_Object base;
7178 {
7179 Lisp_Object subsidiaries;
7180 char *suffixes[] = { "-unix", "-dos", "-mac" };
7181 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7182 char *buf = (char *) alloca (base_name_len + 6);
7183 int i;
7184
7185 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7186 subsidiaries = Fmake_vector (make_number (3), Qnil);
7187 for (i = 0; i < 3; i++)
7188 {
7189 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7190 ASET (subsidiaries, i, intern (buf));
7191 }
7192 return subsidiaries;
7193 }
7194
7195
7196 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7197 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7198 doc: /* For internal use only. */)
7199 (nargs, args)
7200 int nargs;
7201 Lisp_Object *args;
7202 {
7203 Lisp_Object name;
7204 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7205 Lisp_Object attrs; /* Vector of attributes. */
7206 Lisp_Object eol_type;
7207 Lisp_Object aliases;
7208 Lisp_Object coding_type, charset_list, safe_charsets;
7209 enum coding_category category;
7210 Lisp_Object tail, val;
7211 int max_charset_id = 0;
7212 int i;
7213
7214 if (nargs < coding_arg_max)
7215 goto short_args;
7216
7217 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7218
7219 name = args[coding_arg_name];
7220 CHECK_SYMBOL (name);
7221 CODING_ATTR_BASE_NAME (attrs) = name;
7222
7223 val = args[coding_arg_mnemonic];
7224 if (! STRINGP (val))
7225 CHECK_CHARACTER (val);
7226 CODING_ATTR_MNEMONIC (attrs) = val;
7227
7228 coding_type = args[coding_arg_coding_type];
7229 CHECK_SYMBOL (coding_type);
7230 CODING_ATTR_TYPE (attrs) = coding_type;
7231
7232 charset_list = args[coding_arg_charset_list];
7233 if (SYMBOLP (charset_list))
7234 {
7235 if (EQ (charset_list, Qiso_2022))
7236 {
7237 if (! EQ (coding_type, Qiso_2022))
7238 error ("Invalid charset-list");
7239 charset_list = Viso_2022_charset_list;
7240 }
7241 else if (EQ (charset_list, Qemacs_mule))
7242 {
7243 if (! EQ (coding_type, Qemacs_mule))
7244 error ("Invalid charset-list");
7245 charset_list = Vemacs_mule_charset_list;
7246 }
7247 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7248 if (max_charset_id < XFASTINT (XCAR (tail)))
7249 max_charset_id = XFASTINT (XCAR (tail));
7250 }
7251 else
7252 {
7253 charset_list = Fcopy_sequence (charset_list);
7254 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7255 {
7256 struct charset *charset;
7257
7258 val = Fcar (tail);
7259 CHECK_CHARSET_GET_CHARSET (val, charset);
7260 if (EQ (coding_type, Qiso_2022)
7261 ? CHARSET_ISO_FINAL (charset) < 0
7262 : EQ (coding_type, Qemacs_mule)
7263 ? CHARSET_EMACS_MULE_ID (charset) < 0
7264 : 0)
7265 error ("Can't handle charset `%s'",
7266 XSYMBOL (CHARSET_NAME (charset))->name->data);
7267
7268 XCAR (tail) = make_number (charset->id);
7269 if (max_charset_id < charset->id)
7270 max_charset_id = charset->id;
7271 }
7272 }
7273 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7274
7275 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7276 make_number (255));
7277 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7278 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7279 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7280
7281 val = args[coding_arg_decode_translation_table];
7282 if (! NILP (val))
7283 CHECK_CHAR_TABLE (val);
7284 CODING_ATTR_DECODE_TBL (attrs) = val;
7285
7286 val = args[coding_arg_encode_translation_table];
7287 if (! NILP (val))
7288 CHECK_CHAR_TABLE (val);
7289 CODING_ATTR_ENCODE_TBL (attrs) = val;
7290
7291 val = args[coding_arg_post_read_conversion];
7292 CHECK_SYMBOL (val);
7293 CODING_ATTR_POST_READ (attrs) = val;
7294
7295 val = args[coding_arg_pre_write_conversion];
7296 CHECK_SYMBOL (val);
7297 CODING_ATTR_PRE_WRITE (attrs) = val;
7298
7299 val = args[coding_arg_default_char];
7300 if (NILP (val))
7301 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7302 else
7303 {
7304 CHECK_CHARACTER (val);
7305 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7306 }
7307
7308 val = args[coding_arg_plist];
7309 CHECK_LIST (val);
7310 CODING_ATTR_PLIST (attrs) = val;
7311
7312 if (EQ (coding_type, Qcharset))
7313 {
7314 val = Fmake_vector (make_number (256), Qnil);
7315
7316 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7317 {
7318 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7319
7320 for (i = charset->code_space[0]; i <= charset->code_space[1]; i++)
7321 if (NILP (AREF (val, i)))
7322 ASET (val, i, XCAR (tail));
7323 }
7324 ASET (attrs, coding_attr_charset_valids, val);
7325 category = coding_category_charset;
7326 }
7327 else if (EQ (coding_type, Qccl))
7328 {
7329 Lisp_Object valids;
7330
7331 if (nargs < coding_arg_ccl_max)
7332 goto short_args;
7333
7334 val = args[coding_arg_ccl_decoder];
7335 CHECK_CCL_PROGRAM (val);
7336 if (VECTORP (val))
7337 val = Fcopy_sequence (val);
7338 ASET (attrs, coding_attr_ccl_decoder, val);
7339
7340 val = args[coding_arg_ccl_encoder];
7341 CHECK_CCL_PROGRAM (val);
7342 if (VECTORP (val))
7343 val = Fcopy_sequence (val);
7344 ASET (attrs, coding_attr_ccl_encoder, val);
7345
7346 val = args[coding_arg_ccl_valids];
7347 valids = Fmake_string (make_number (256), make_number (0));
7348 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7349 {
7350 val = Fcar (tail);
7351 if (INTEGERP (val))
7352 ASET (valids, XINT (val), 1);
7353 else
7354 {
7355 int from, to;
7356
7357 CHECK_CONS (val);
7358 CHECK_NUMBER (XCAR (val));
7359 CHECK_NUMBER (XCDR (val));
7360 from = XINT (XCAR (val));
7361 to = XINT (XCDR (val));
7362 for (i = from; i <= to; i++)
7363 ASET (valids, i, 1);
7364 }
7365 }
7366 ASET (attrs, coding_attr_ccl_valids, valids);
7367
7368 category = coding_category_ccl;
7369 }
7370 else if (EQ (coding_type, Qutf_16))
7371 {
7372 Lisp_Object bom, endian;
7373
7374 if (nargs < coding_arg_utf16_max)
7375 goto short_args;
7376
7377 bom = args[coding_arg_utf16_bom];
7378 if (! NILP (bom) && ! EQ (bom, Qt))
7379 {
7380 CHECK_CONS (bom);
7381 CHECK_CODING_SYSTEM (XCAR (bom));
7382 CHECK_CODING_SYSTEM (XCDR (bom));
7383 }
7384 ASET (attrs, coding_attr_utf_16_bom, bom);
7385
7386 endian = args[coding_arg_utf16_endian];
7387 ASET (attrs, coding_attr_utf_16_endian, endian);
7388
7389 category = (CONSP (bom)
7390 ? coding_category_utf_16_auto
7391 : NILP (bom)
7392 ? (NILP (endian)
7393 ? coding_category_utf_16_be_nosig
7394 : coding_category_utf_16_le_nosig)
7395 : (NILP (endian)
7396 ? coding_category_utf_16_be
7397 : coding_category_utf_16_le));
7398 }
7399 else if (EQ (coding_type, Qiso_2022))
7400 {
7401 Lisp_Object initial, reg_usage, request, flags;
7402 struct charset *charset;
7403 int i, id;
7404
7405 if (nargs < coding_arg_iso2022_max)
7406 goto short_args;
7407
7408 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7409 CHECK_VECTOR (initial);
7410 for (i = 0; i < 4; i++)
7411 {
7412 val = Faref (initial, make_number (i));
7413 if (! NILP (val))
7414 {
7415 CHECK_CHARSET_GET_ID (val, id);
7416 ASET (initial, i, make_number (id));
7417 }
7418 else
7419 ASET (initial, i, make_number (-1));
7420 }
7421
7422 reg_usage = args[coding_arg_iso2022_reg_usage];
7423 CHECK_CONS (reg_usage);
7424 CHECK_NATNUM (XCAR (reg_usage));
7425 CHECK_NATNUM (XCDR (reg_usage));
7426
7427 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7428 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
7429 {
7430 int id;
7431
7432 val = Fcar (tail);
7433 CHECK_CONS (val);
7434 CHECK_CHARSET_GET_ID (XCAR (val), id);
7435 CHECK_NATNUM (XCDR (val));
7436 if (XINT (XCDR (val)) >= 4)
7437 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7438 XCAR (val) = make_number (id);
7439 }
7440
7441 flags = args[coding_arg_iso2022_flags];
7442 CHECK_NATNUM (flags);
7443 i = XINT (flags);
7444 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7445 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7446
7447 ASET (attrs, coding_attr_iso_initial, initial);
7448 ASET (attrs, coding_attr_iso_usage, reg_usage);
7449 ASET (attrs, coding_attr_iso_request, request);
7450 ASET (attrs, coding_attr_iso_flags, flags);
7451 setup_iso_safe_charsets (attrs);
7452
7453 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7454 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7455 | CODING_ISO_FLAG_SINGLE_SHIFT))
7456 ? coding_category_iso_7_else
7457 : EQ (args[coding_arg_charset_list], Qiso_2022)
7458 ? coding_category_iso_7
7459 : coding_category_iso_7_tight);
7460 else
7461 {
7462 int id = XINT (AREF (initial, 1));
7463
7464 category = (((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7465 | CODING_ISO_FLAG_SINGLE_SHIFT))
7466 || EQ (args[coding_arg_charset_list], Qiso_2022)
7467 || id < 0)
7468 ? coding_category_iso_8_else
7469 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7470 ? coding_category_iso_8_1
7471 : coding_category_iso_8_2);
7472 }
7473 }
7474 else if (EQ (coding_type, Qemacs_mule))
7475 {
7476 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7477 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7478
7479 category = coding_category_emacs_mule;
7480 }
7481 else if (EQ (coding_type, Qshift_jis))
7482 {
7483
7484 struct charset *charset;
7485
7486 if (XINT (Flength (charset_list)) != 3)
7487 error ("There should be just three charsets");
7488
7489 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7490 if (CHARSET_DIMENSION (charset) != 1)
7491 error ("Dimension of charset %s is not one",
7492 XSYMBOL (CHARSET_NAME (charset))->name->data);
7493
7494 charset_list = XCDR (charset_list);
7495 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7496 if (CHARSET_DIMENSION (charset) != 1)
7497 error ("Dimension of charset %s is not one",
7498 XSYMBOL (CHARSET_NAME (charset))->name->data);
7499
7500 charset_list = XCDR (charset_list);
7501 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7502 if (CHARSET_DIMENSION (charset) != 2)
7503 error ("Dimension of charset %s is not two",
7504 XSYMBOL (CHARSET_NAME (charset))->name->data);
7505
7506 category = coding_category_sjis;
7507 Vsjis_coding_system = name;
7508 }
7509 else if (EQ (coding_type, Qbig5))
7510 {
7511 struct charset *charset;
7512
7513 if (XINT (Flength (charset_list)) != 2)
7514 error ("There should be just two charsets");
7515
7516 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7517 if (CHARSET_DIMENSION (charset) != 1)
7518 error ("Dimension of charset %s is not one",
7519 XSYMBOL (CHARSET_NAME (charset))->name->data);
7520
7521 charset_list = XCDR (charset_list);
7522 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7523 if (CHARSET_DIMENSION (charset) != 2)
7524 error ("Dimension of charset %s is not two",
7525 XSYMBOL (CHARSET_NAME (charset))->name->data);
7526
7527 category = coding_category_big5;
7528 Vbig5_coding_system = name;
7529 }
7530 else if (EQ (coding_type, Qraw_text))
7531 category = coding_category_raw_text;
7532 else if (EQ (coding_type, Qutf_8))
7533 category = coding_category_utf_8;
7534 else if (EQ (coding_type, Qundecided))
7535 category = coding_category_undecided;
7536 else
7537 error ("Invalid coding system type: %s",
7538 XSYMBOL (coding_type)->name->data);
7539
7540 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7541
7542 eol_type = args[coding_arg_eol_type];
7543 if (! NILP (eol_type)
7544 && ! EQ (eol_type, Qunix)
7545 && ! EQ (eol_type, Qdos)
7546 && ! EQ (eol_type, Qmac))
7547 error ("Invalid eol-type");
7548
7549 aliases = Fcons (name, Qnil);
7550
7551 if (NILP (eol_type))
7552 {
7553 eol_type = make_subsidiaries (name);
7554 for (i = 0; i < 3; i++)
7555 {
7556 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7557
7558 this_name = AREF (eol_type, i);
7559 this_aliases = Fcons (this_name, Qnil);
7560 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7561 this_spec = Fmake_vector (make_number (3), attrs);
7562 ASET (this_spec, 1, this_aliases);
7563 ASET (this_spec, 2, this_eol_type);
7564 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7565 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7566 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7567 Vcoding_system_alist);
7568 }
7569 }
7570
7571 spec_vec = Fmake_vector (make_number (3), attrs);
7572 ASET (spec_vec, 1, aliases);
7573 ASET (spec_vec, 2, eol_type);
7574
7575 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7576 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7577 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7578 Vcoding_system_alist);
7579
7580 {
7581 int id = coding_categories[category].id;
7582
7583 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7584 setup_coding_system (name, &coding_categories[category]);
7585 }
7586
7587 return Qnil;
7588
7589 short_args:
7590 return Fsignal (Qwrong_number_of_arguments,
7591 Fcons (intern ("define-coding-system-internal"),
7592 make_number (nargs)));
7593 }
7594
7595 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7596 Sdefine_coding_system_alias, 2, 2, 0,
7597 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7598 (alias, coding_system)
7599 Lisp_Object alias, coding_system;
7600 {
7601 Lisp_Object spec, aliases, eol_type;
7602
7603 CHECK_SYMBOL (alias);
7604 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7605 aliases = AREF (spec, 1);
7606 while (!NILP (XCDR (aliases)))
7607 aliases = XCDR (aliases);
7608 XCDR (aliases) = Fcons (alias, Qnil);
7609
7610 eol_type = AREF (spec, 2);
7611 if (VECTORP (eol_type))
7612 {
7613 Lisp_Object subsidiaries;
7614 int i;
7615
7616 subsidiaries = make_subsidiaries (alias);
7617 for (i = 0; i < 3; i++)
7618 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7619 AREF (eol_type, i));
7620
7621 ASET (spec, 2, subsidiaries);
7622 }
7623
7624 Fputhash (alias, spec, Vcoding_system_hash_table);
7625 Vcoding_system_alist = Fcons (Fcons (alias, Qnil), Vcoding_system_alist);
7626
7627 return Qnil;
7628 }
7629
7630 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7631 1, 1, 0,
7632 doc: /* Return the base of CODING-SYSTEM.
7633 Any alias or subsidiary coding systems are not base coding system. */)
7634 (coding_system)
7635 Lisp_Object coding_system;
7636 {
7637 Lisp_Object spec, attrs;
7638
7639 if (NILP (coding_system))
7640 return (Qno_conversion);
7641 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7642 attrs = AREF (spec, 0);
7643 return CODING_ATTR_BASE_NAME (attrs);
7644 }
7645
7646 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7647 1, 1, 0,
7648 doc: "Return the property list of CODING-SYSTEM.")
7649 (coding_system)
7650 Lisp_Object coding_system;
7651 {
7652 Lisp_Object spec, attrs;
7653
7654 if (NILP (coding_system))
7655 coding_system = Qno_conversion;
7656 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7657 attrs = AREF (spec, 0);
7658 return CODING_ATTR_PLIST (attrs);
7659 }
7660
7661
7662 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7663 1, 1, 0,
7664 doc: /* Return the list of aliases of CODING-SYSTEM.
7665 A base coding system is what made by `define-coding-system'.
7666 Any alias nor subsidiary coding systems are not base coding system. */)
7667 (coding_system)
7668 Lisp_Object coding_system;
7669 {
7670 Lisp_Object spec;
7671
7672 if (NILP (coding_system))
7673 coding_system = Qno_conversion;
7674 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7675 return AREF (spec, 2);
7676 }
7677
7678 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7679 Scoding_system_eol_type, 1, 1, 0,
7680 doc: /* Return eol-type of CODING-SYSTEM.
7681 An eol-type is integer 0, 1, 2, or a vector of coding systems.
7682
7683 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7684 and CR respectively.
7685
7686 A vector value indicates that a format of end-of-line should be
7687 detected automatically. Nth element of the vector is the subsidiary
7688 coding system whose eol-type is N. */)
7689 (coding_system)
7690 Lisp_Object coding_system;
7691 {
7692 Lisp_Object spec, eol_type;
7693 int n;
7694
7695 if (NILP (coding_system))
7696 coding_system = Qno_conversion;
7697 if (! CODING_SYSTEM_P (coding_system))
7698 return Qnil;
7699 spec = CODING_SYSTEM_SPEC (coding_system);
7700 eol_type = AREF (spec, 2);
7701 if (VECTORP (eol_type))
7702 return Fcopy_sequence (eol_type);
7703 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7704 return make_number (n);
7705 }
7706
7707 #endif /* emacs */
7708
7709 \f
7710 /*** 9. Post-amble ***/
7711
7712 void
7713 init_coding_once ()
7714 {
7715 int i;
7716
7717 for (i = 0; i < coding_category_max; i++)
7718 {
7719 coding_categories[i].id = -1;
7720 coding_priorities[i] = i;
7721 }
7722
7723 /* ISO2022 specific initialize routine. */
7724 for (i = 0; i < 0x20; i++)
7725 iso_code_class[i] = ISO_control_0;
7726 for (i = 0x21; i < 0x7F; i++)
7727 iso_code_class[i] = ISO_graphic_plane_0;
7728 for (i = 0x80; i < 0xA0; i++)
7729 iso_code_class[i] = ISO_control_1;
7730 for (i = 0xA1; i < 0xFF; i++)
7731 iso_code_class[i] = ISO_graphic_plane_1;
7732 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7733 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7734 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7735 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7736 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7737 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7738 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7739 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7740 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7741 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7742
7743 inhibit_pre_post_conversion = 0;
7744
7745 for (i = 0; i < 256; i++)
7746 {
7747 emacs_mule_bytes[i] = 1;
7748 }
7749 }
7750
7751 #ifdef emacs
7752
7753 void
7754 syms_of_coding ()
7755 {
7756 staticpro (&Vcoding_system_hash_table);
7757 Vcoding_system_hash_table = Fmakehash (Qeq);
7758
7759 staticpro (&Vsjis_coding_system);
7760 Vsjis_coding_system = Qnil;
7761
7762 staticpro (&Vbig5_coding_system);
7763 Vbig5_coding_system = Qnil;
7764
7765 staticpro (&Vcode_conversion_work_buf_list);
7766 Vcode_conversion_work_buf_list = Qnil;
7767
7768 staticpro (&Vcode_conversion_reused_work_buf);
7769 Vcode_conversion_reused_work_buf = Qnil;
7770
7771 DEFSYM (Qcharset, "charset");
7772 DEFSYM (Qtarget_idx, "target-idx");
7773 DEFSYM (Qcoding_system_history, "coding-system-history");
7774 Fset (Qcoding_system_history, Qnil);
7775
7776 /* Target FILENAME is the first argument. */
7777 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7778 /* Target FILENAME is the third argument. */
7779 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7780
7781 DEFSYM (Qcall_process, "call-process");
7782 /* Target PROGRAM is the first argument. */
7783 Fput (Qcall_process, Qtarget_idx, make_number (0));
7784
7785 DEFSYM (Qcall_process_region, "call-process-region");
7786 /* Target PROGRAM is the third argument. */
7787 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7788
7789 DEFSYM (Qstart_process, "start-process");
7790 /* Target PROGRAM is the third argument. */
7791 Fput (Qstart_process, Qtarget_idx, make_number (2));
7792
7793 DEFSYM (Qopen_network_stream, "open-network-stream");
7794 /* Target SERVICE is the fourth argument. */
7795 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7796
7797 DEFSYM (Qcoding_system, "coding-system");
7798 DEFSYM (Qcoding_aliases, "coding-aliases");
7799
7800 DEFSYM (Qeol_type, "eol-type");
7801 DEFSYM (Qunix, "unix");
7802 DEFSYM (Qdos, "dos");
7803 DEFSYM (Qmac, "mac");
7804
7805 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7806 DEFSYM (Qpost_read_conversion, "post-read-conversion");
7807 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7808 DEFSYM (Qdefault_char, "default-char");
7809 DEFSYM (Qundecided, "undecided");
7810 DEFSYM (Qno_conversion, "no-conversion");
7811 DEFSYM (Qraw_text, "raw-text");
7812
7813 DEFSYM (Qiso_2022, "iso-2022");
7814
7815 DEFSYM (Qutf_8, "utf-8");
7816
7817 DEFSYM (Qutf_16, "utf-16");
7818 DEFSYM (Qutf_16_be, "utf-16-be");
7819 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7820 DEFSYM (Qutf_16_le, "utf-16-l3");
7821 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7822 DEFSYM (Qsignature, "signature");
7823 DEFSYM (Qendian, "endian");
7824 DEFSYM (Qbig, "big");
7825 DEFSYM (Qlittle, "little");
7826
7827 DEFSYM (Qshift_jis, "shift-jis");
7828 DEFSYM (Qbig5, "big5");
7829
7830 DEFSYM (Qcoding_system_p, "coding-system-p");
7831
7832 DEFSYM (Qcoding_system_error, "coding-system-error");
7833 Fput (Qcoding_system_error, Qerror_conditions,
7834 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7835 Fput (Qcoding_system_error, Qerror_message,
7836 build_string ("Invalid coding system"));
7837
7838 /* Intern this now in case it isn't already done.
7839 Setting this variable twice is harmless.
7840 But don't staticpro it here--that is done in alloc.c. */
7841 Qchar_table_extra_slots = intern ("char-table-extra-slots");
7842
7843 DEFSYM (Qtranslation_table, "translation-table");
7844 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7845 DEFSYM (Qtranslation_table_id, "translation-table-id");
7846 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
7847 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
7848
7849 DEFSYM (Qchar_coding_system, "char-coding-system");
7850
7851 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2));
7852
7853 DEFSYM (Qvalid_codes, "valid-codes");
7854
7855 DEFSYM (Qemacs_mule, "emacs-mule");
7856
7857 Vcoding_category_table
7858 = Fmake_vector (make_number (coding_category_max), Qnil);
7859 staticpro (&Vcoding_category_table);
7860 /* Followings are target of code detection. */
7861 ASET (Vcoding_category_table, coding_category_iso_7,
7862 intern ("coding-category-iso-7"));
7863 ASET (Vcoding_category_table, coding_category_iso_7_tight,
7864 intern ("coding-category-iso-7-tight"));
7865 ASET (Vcoding_category_table, coding_category_iso_8_1,
7866 intern ("coding-category-iso-8-1"));
7867 ASET (Vcoding_category_table, coding_category_iso_8_2,
7868 intern ("coding-category-iso-8-2"));
7869 ASET (Vcoding_category_table, coding_category_iso_7_else,
7870 intern ("coding-category-iso-7-else"));
7871 ASET (Vcoding_category_table, coding_category_iso_8_else,
7872 intern ("coding-category-iso-8-else"));
7873 ASET (Vcoding_category_table, coding_category_utf_8,
7874 intern ("coding-category-utf-8"));
7875 ASET (Vcoding_category_table, coding_category_utf_16_be,
7876 intern ("coding-category-utf-16-be"));
7877 ASET (Vcoding_category_table, coding_category_utf_16_le,
7878 intern ("coding-category-utf-16-le"));
7879 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
7880 intern ("coding-category-utf-16-be-nosig"));
7881 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
7882 intern ("coding-category-utf-16-le-nosig"));
7883 ASET (Vcoding_category_table, coding_category_charset,
7884 intern ("coding-category-charset"));
7885 ASET (Vcoding_category_table, coding_category_sjis,
7886 intern ("coding-category-sjis"));
7887 ASET (Vcoding_category_table, coding_category_big5,
7888 intern ("coding-category-big5"));
7889 ASET (Vcoding_category_table, coding_category_ccl,
7890 intern ("coding-category-ccl"));
7891 ASET (Vcoding_category_table, coding_category_emacs_mule,
7892 intern ("coding-category-emacs-mule"));
7893 /* Followings are NOT target of code detection. */
7894 ASET (Vcoding_category_table, coding_category_raw_text,
7895 intern ("coding-category-raw-text"));
7896 ASET (Vcoding_category_table, coding_category_undecided,
7897 intern ("coding-category-undecided"));
7898
7899 defsubr (&Scoding_system_p);
7900 defsubr (&Sread_coding_system);
7901 defsubr (&Sread_non_nil_coding_system);
7902 defsubr (&Scheck_coding_system);
7903 defsubr (&Sdetect_coding_region);
7904 defsubr (&Sdetect_coding_string);
7905 defsubr (&Sfind_coding_systems_region_internal);
7906 defsubr (&Scheck_coding_systems_region);
7907 defsubr (&Sdecode_coding_region);
7908 defsubr (&Sencode_coding_region);
7909 defsubr (&Sdecode_coding_string);
7910 defsubr (&Sencode_coding_string);
7911 defsubr (&Sdecode_sjis_char);
7912 defsubr (&Sencode_sjis_char);
7913 defsubr (&Sdecode_big5_char);
7914 defsubr (&Sencode_big5_char);
7915 defsubr (&Sset_terminal_coding_system_internal);
7916 defsubr (&Sset_safe_terminal_coding_system_internal);
7917 defsubr (&Sterminal_coding_system);
7918 defsubr (&Sset_keyboard_coding_system_internal);
7919 defsubr (&Skeyboard_coding_system);
7920 defsubr (&Sfind_operation_coding_system);
7921 defsubr (&Sset_coding_system_priority);
7922 defsubr (&Sdefine_coding_system_internal);
7923 defsubr (&Sdefine_coding_system_alias);
7924 defsubr (&Scoding_system_base);
7925 defsubr (&Scoding_system_plist);
7926 defsubr (&Scoding_system_aliases);
7927 defsubr (&Scoding_system_eol_type);
7928 defsubr (&Scoding_system_priority_list);
7929
7930 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7931 doc: /* List of coding systems.
7932
7933 Do not alter the value of this variable manually. This variable should be
7934 updated by the functions `define-coding-system' and
7935 `define-coding-system-alias'. */);
7936 Vcoding_system_list = Qnil;
7937
7938 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7939 doc: /* Alist of coding system names.
7940 Each element is one element list of coding system name.
7941 This variable is given to `completing-read' as TABLE argument.
7942
7943 Do not alter the value of this variable manually. This variable should be
7944 updated by the functions `make-coding-system' and
7945 `define-coding-system-alias'. */);
7946 Vcoding_system_alist = Qnil;
7947
7948 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7949 doc: /* List of coding-categories (symbols) ordered by priority.
7950
7951 On detecting a coding system, Emacs tries code detection algorithms
7952 associated with each coding-category one by one in this order. When
7953 one algorithm agrees with a byte sequence of source text, the coding
7954 system bound to the corresponding coding-category is selected. */);
7955 {
7956 int i;
7957
7958 Vcoding_category_list = Qnil;
7959 for (i = coding_category_max - 1; i >= 0; i--)
7960 Vcoding_category_list
7961 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7962 Vcoding_category_list);
7963 }
7964
7965 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7966 doc: /* Specify the coding system for read operations.
7967 It is useful to bind this variable with `let', but do not set it globally.
7968 If the value is a coding system, it is used for decoding on read operation.
7969 If not, an appropriate element is used from one of the coding system alists:
7970 There are three such tables, `file-coding-system-alist',
7971 `process-coding-system-alist', and `network-coding-system-alist'. */);
7972 Vcoding_system_for_read = Qnil;
7973
7974 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7975 doc: /* Specify the coding system for write operations.
7976 Programs bind this variable with `let', but you should not set it globally.
7977 If the value is a coding system, it is used for encoding of output,
7978 when writing it to a file and when sending it to a file or subprocess.
7979
7980 If this does not specify a coding system, an appropriate element
7981 is used from one of the coding system alists:
7982 There are three such tables, `file-coding-system-alist',
7983 `process-coding-system-alist', and `network-coding-system-alist'.
7984 For output to files, if the above procedure does not specify a coding system,
7985 the value of `buffer-file-coding-system' is used. */);
7986 Vcoding_system_for_write = Qnil;
7987
7988 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7989 doc: /*
7990 Coding system used in the latest file or process I/O. */);
7991 Vlast_coding_system_used = Qnil;
7992
7993 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7994 doc: /*
7995 *Non-nil means always inhibit code conversion of end-of-line format.
7996 See info node `Coding Systems' and info node `Text and Binary' concerning
7997 such conversion. */);
7998 inhibit_eol_conversion = 0;
7999
8000 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8001 doc: /*
8002 Non-nil means process buffer inherits coding system of process output.
8003 Bind it to t if the process output is to be treated as if it were a file
8004 read from some filesystem. */);
8005 inherit_process_coding_system = 0;
8006
8007 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8008 doc: /*
8009 Alist to decide a coding system to use for a file I/O operation.
8010 The format is ((PATTERN . VAL) ...),
8011 where PATTERN is a regular expression matching a file name,
8012 VAL is a coding system, a cons of coding systems, or a function symbol.
8013 If VAL is a coding system, it is used for both decoding and encoding
8014 the file contents.
8015 If VAL is a cons of coding systems, the car part is used for decoding,
8016 and the cdr part is used for encoding.
8017 If VAL is a function symbol, the function must return a coding system
8018 or a cons of coding systems which are used as above. The function gets
8019 the arguments with which `find-operation-coding-systems' was called.
8020
8021 See also the function `find-operation-coding-system'
8022 and the variable `auto-coding-alist'. */);
8023 Vfile_coding_system_alist = Qnil;
8024
8025 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8026 doc: /*
8027 Alist to decide a coding system to use for a process I/O operation.
8028 The format is ((PATTERN . VAL) ...),
8029 where PATTERN is a regular expression matching a program name,
8030 VAL is a coding system, a cons of coding systems, or a function symbol.
8031 If VAL is a coding system, it is used for both decoding what received
8032 from the program and encoding what sent to the program.
8033 If VAL is a cons of coding systems, the car part is used for decoding,
8034 and the cdr part is used for encoding.
8035 If VAL is a function symbol, the function must return a coding system
8036 or a cons of coding systems which are used as above.
8037
8038 See also the function `find-operation-coding-system'. */);
8039 Vprocess_coding_system_alist = Qnil;
8040
8041 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8042 doc: /*
8043 Alist to decide a coding system to use for a network I/O operation.
8044 The format is ((PATTERN . VAL) ...),
8045 where PATTERN is a regular expression matching a network service name
8046 or is a port number to connect to,
8047 VAL is a coding system, a cons of coding systems, or a function symbol.
8048 If VAL is a coding system, it is used for both decoding what received
8049 from the network stream and encoding what sent to the network stream.
8050 If VAL is a cons of coding systems, the car part is used for decoding,
8051 and the cdr part is used for encoding.
8052 If VAL is a function symbol, the function must return a coding system
8053 or a cons of coding systems which are used as above.
8054
8055 See also the function `find-operation-coding-system'. */);
8056 Vnetwork_coding_system_alist = Qnil;
8057
8058 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8059 doc: /* Coding system to use with system messages.
8060 Also used for decoding keyboard input on X Window system. */);
8061 Vlocale_coding_system = Qnil;
8062
8063 /* The eol mnemonics are reset in startup.el system-dependently. */
8064 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8065 doc: /*
8066 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8067 eol_mnemonic_unix = build_string (":");
8068
8069 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8070 doc: /*
8071 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8072 eol_mnemonic_dos = build_string ("\\");
8073
8074 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8075 doc: /*
8076 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
8077 eol_mnemonic_mac = build_string ("/");
8078
8079 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8080 doc: /*
8081 *String displayed in mode line when end-of-line format is not yet determined. */);
8082 eol_mnemonic_undecided = build_string (":");
8083
8084 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8085 doc: /*
8086 *Non-nil enables character translation while encoding and decoding. */);
8087 Venable_character_translation = Qt;
8088
8089 DEFVAR_LISP ("standard-translation-table-for-decode",
8090 &Vstandard_translation_table_for_decode,
8091 doc: /* Table for translating characters while decoding. */);
8092 Vstandard_translation_table_for_decode = Qnil;
8093
8094 DEFVAR_LISP ("standard-translation-table-for-encode",
8095 &Vstandard_translation_table_for_encode,
8096 doc: /* Table for translating characters while encoding. */);
8097 Vstandard_translation_table_for_encode = Qnil;
8098
8099 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8100 doc: /* Alist of charsets vs revision numbers.
8101 While encoding, if a charset (car part of an element) is found,
8102 designate it with the escape sequence identifying revision (cdr part
8103 of the element). */);
8104 Vcharset_revision_table = Qnil;
8105
8106 DEFVAR_LISP ("default-process-coding-system",
8107 &Vdefault_process_coding_system,
8108 doc: /* Cons of coding systems used for process I/O by default.
8109 The car part is used for decoding a process output,
8110 the cdr part is used for encoding a text to be sent to a process. */);
8111 Vdefault_process_coding_system = Qnil;
8112
8113 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8114 doc: /*
8115 Table of extra Latin codes in the range 128..159 (inclusive).
8116 This is a vector of length 256.
8117 If Nth element is non-nil, the existence of code N in a file
8118 \(or output of subprocess) doesn't prevent it to be detected as
8119 a coding system of ISO 2022 variant which has a flag
8120 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8121 or reading output of a subprocess.
8122 Only 128th through 159th elements has a meaning. */);
8123 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8124
8125 DEFVAR_LISP ("select-safe-coding-system-function",
8126 &Vselect_safe_coding_system_function,
8127 doc: /*
8128 Function to call to select safe coding system for encoding a text.
8129
8130 If set, this function is called to force a user to select a proper
8131 coding system which can encode the text in the case that a default
8132 coding system used in each operation can't encode the text.
8133
8134 The default value is `select-safe-coding-system' (which see). */);
8135 Vselect_safe_coding_system_function = Qnil;
8136
8137 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
8138 doc: /*
8139 Char-table containing safe coding systems of each characters.
8140 Each element doesn't include such generic coding systems that can
8141 encode any characters. They are in the first extra slot. */);
8142 Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
8143
8144 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8145 &inhibit_iso_escape_detection,
8146 doc: /*
8147 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8148
8149 By default, on reading a file, Emacs tries to detect how the text is
8150 encoded. This code detection is sensitive to escape sequences. If
8151 the sequence is valid as ISO2022, the code is determined as one of
8152 the ISO2022 encodings, and the file is decoded by the corresponding
8153 coding system (e.g. `iso-2022-7bit').
8154
8155 However, there may be a case that you want to read escape sequences in
8156 a file as is. In such a case, you can set this variable to non-nil.
8157 Then, as the code detection ignores any escape sequences, no file is
8158 detected as encoded in some ISO2022 encoding. The result is that all
8159 escape sequences become visible in a buffer.
8160
8161 The default value is nil, and it is strongly recommended not to change
8162 it. That is because many Emacs Lisp source files that contain
8163 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8164 in Emacs's distribution, and they won't be decoded correctly on
8165 reading if you suppress escape sequence detection.
8166
8167 The other way to read escape sequences in a file without decoding is
8168 to explicitly specify some coding system that doesn't use ISO2022's
8169 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8170 inhibit_iso_escape_detection = 0;
8171
8172 {
8173 Lisp_Object args[coding_arg_max];
8174 Lisp_Object plist[14];
8175 int i;
8176
8177 for (i = 0; i < coding_arg_max; i++)
8178 args[i] = Qnil;
8179
8180 plist[0] = intern (":name");
8181 plist[1] = args[coding_arg_name] = Qno_conversion;
8182 plist[2] = intern (":mnemonic");
8183 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8184 plist[4] = intern (":coding-type");
8185 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8186 plist[6] = intern (":ascii-compatible-p");
8187 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8188 plist[8] = intern (":default-char");
8189 plist[9] = args[coding_arg_default_char] = make_number (0);
8190 plist[10] = intern (":docstring");
8191 plist[11] = build_string ("Do no conversion.\n\
8192 \n\
8193 When you visit a file with this coding, the file is read into a\n\
8194 unibyte buffer as is, thus each byte of a file is treated as a\n\
8195 character.");
8196 plist[12] = intern (":eol-type");
8197 plist[13] = args[coding_arg_eol_type] = Qunix;
8198 args[coding_arg_plist] = Flist (14, plist);
8199 Fdefine_coding_system_internal (coding_arg_max, args);
8200 }
8201
8202 setup_coding_system (Qno_conversion, &keyboard_coding);
8203 setup_coding_system (Qno_conversion, &terminal_coding);
8204 setup_coding_system (Qno_conversion, &safe_terminal_coding);
8205 }
8206
8207 char *
8208 emacs_strerror (error_number)
8209 int error_number;
8210 {
8211 char *str;
8212
8213 synchronize_system_messages_locale ();
8214 str = strerror (error_number);
8215
8216 if (! NILP (Vlocale_coding_system))
8217 {
8218 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8219 Vlocale_coding_system,
8220 0);
8221 str = (char *) XSTRING (dec)->data;
8222 }
8223
8224 return str;
8225 }
8226
8227 #endif /* emacs */