(Fdefine_charset_internal): Fix bug for the case of re-defining a
[bpt/emacs.git] / src / coding.c
1 /* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9 This file is part of GNU Emacs.
10
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
14 any later version.
15
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
25
26 /*** TABLE OF CONTENTS ***
27
28 0. General comments
29 1. Preamble
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
41
42 */
43
44 /*** 0. General comments ***
45
46
47 CODING SYSTEM
48
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
75 character set.
76
77 o Old Emacs internal format (emacs-mule)
78
79 The coding system adopted by old versions of Emacs (20 and 21).
80
81 o ISO2022-base coding system
82
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
92 section 8.
93
94 o BIG5
95
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
101
102 o CCL
103
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
108
109 o Raw-text
110
111 A coding system for a text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
114
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
118
119
120 END-OF-LINE FORMAT
121
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
127
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
131
132 STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
138
139 */
140
141 /* COMMON MACROS */
142
143
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
157 #if 0
158 static int
159 detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
162 {
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
188 }
189 #endif
190
191 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
197
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
202
203 Below is the template of these functions. */
204
205 #if 0
206 static void
207 decode_coding_XXXX (coding)
208 struct coding_system *coding;
209 {
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
243 }
244 #endif
245
246 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
252
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
257
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
261
262 Below is a template of these functions. */
263 #if 0
264 static void
265 encode_coding_XXX (coding)
266 struct coding_system *coding;
267 {
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
285 }
286 #endif
287
288 \f
289 /*** 1. Preamble ***/
290
291 #include <config.h>
292 #include <stdio.h>
293
294 #include "lisp.h"
295 #include "buffer.h"
296 #include "character.h"
297 #include "charset.h"
298 #include "ccl.h"
299 #include "composite.h"
300 #include "coding.h"
301 #include "window.h"
302
303 Lisp_Object Vcoding_system_hash_table;
304
305 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
306 Lisp_Object Qunix, Qdos, Qmac;
307 Lisp_Object Qbuffer_file_coding_system;
308 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
309 Lisp_Object Qdefault_char;
310 Lisp_Object Qno_conversion, Qundecided;
311 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
312 Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
313 Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
314 Lisp_Object Qcoding_system_history;
315 Lisp_Object Qvalid_codes;
316
317 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
318 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
319 Lisp_Object Qstart_process, Qopen_network_stream;
320 Lisp_Object Qtarget_idx;
321
322 Lisp_Object Vselect_safe_coding_system_function;
323
324 /* Mnemonic string for each format of end-of-line. */
325 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
326 /* Mnemonic string to indicate format of end-of-line is not yet
327 decided. */
328 Lisp_Object eol_mnemonic_undecided;
329
330 #ifdef emacs
331
332 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
333
334 Lisp_Object Qcoding_system_p, Qcoding_system_error;
335
336 /* Coding system emacs-mule and raw-text are for converting only
337 end-of-line format. */
338 Lisp_Object Qemacs_mule, Qraw_text;
339
340 /* Coding-systems are handed between Emacs Lisp programs and C internal
341 routines by the following three variables. */
342 /* Coding-system for reading files and receiving data from process. */
343 Lisp_Object Vcoding_system_for_read;
344 /* Coding-system for writing files and sending data to process. */
345 Lisp_Object Vcoding_system_for_write;
346 /* Coding-system actually used in the latest I/O. */
347 Lisp_Object Vlast_coding_system_used;
348
349 /* A vector of length 256 which contains information about special
350 Latin codes (especially for dealing with Microsoft codes). */
351 Lisp_Object Vlatin_extra_code_table;
352
353 /* Flag to inhibit code conversion of end-of-line format. */
354 int inhibit_eol_conversion;
355
356 /* Flag to inhibit ISO2022 escape sequence detection. */
357 int inhibit_iso_escape_detection;
358
359 /* Flag to make buffer-file-coding-system inherit from process-coding. */
360 int inherit_process_coding_system;
361
362 /* Coding system to be used to encode text for terminal display. */
363 struct coding_system terminal_coding;
364
365 /* Coding system to be used to encode text for terminal display when
366 terminal coding system is nil. */
367 struct coding_system safe_terminal_coding;
368
369 /* Coding system of what is sent from terminal keyboard. */
370 struct coding_system keyboard_coding;
371
372 Lisp_Object Vfile_coding_system_alist;
373 Lisp_Object Vprocess_coding_system_alist;
374 Lisp_Object Vnetwork_coding_system_alist;
375
376 Lisp_Object Vlocale_coding_system;
377
378 #endif /* emacs */
379
380 /* Flag to tell if we look up translation table on character code
381 conversion. */
382 Lisp_Object Venable_character_translation;
383 /* Standard translation table to look up on decoding (reading). */
384 Lisp_Object Vstandard_translation_table_for_decode;
385 /* Standard translation table to look up on encoding (writing). */
386 Lisp_Object Vstandard_translation_table_for_encode;
387
388 Lisp_Object Qtranslation_table;
389 Lisp_Object Qtranslation_table_id;
390 Lisp_Object Qtranslation_table_for_decode;
391 Lisp_Object Qtranslation_table_for_encode;
392
393 /* Alist of charsets vs revision number. */
394 static Lisp_Object Vcharset_revision_table;
395
396 /* Default coding systems used for process I/O. */
397 Lisp_Object Vdefault_process_coding_system;
398
399 /* Global flag to tell that we can't call post-read-conversion and
400 pre-write-conversion functions. Usually the value is zero, but it
401 is set to 1 temporarily while such functions are running. This is
402 to avoid infinite recursive call. */
403 static int inhibit_pre_post_conversion;
404
405 /* Two special coding systems. */
406 Lisp_Object Vsjis_coding_system;
407 Lisp_Object Vbig5_coding_system;
408
409
410 static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
411 static void decode_coding_utf_8 P_ ((struct coding_system *));
412 static int encode_coding_utf_8 P_ ((struct coding_system *));
413
414 static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
415 static void decode_coding_utf_16 P_ ((struct coding_system *));
416 static int encode_coding_utf_16 P_ ((struct coding_system *));
417
418 static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
419 static void decode_coding_iso_2022 P_ ((struct coding_system *));
420 static int encode_coding_iso_2022 P_ ((struct coding_system *));
421
422 static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
423 static void decode_coding_emacs_mule P_ ((struct coding_system *));
424 static int encode_coding_emacs_mule P_ ((struct coding_system *));
425
426 static int detect_coding_sjis P_ ((struct coding_system *, int *));
427 static void decode_coding_sjis P_ ((struct coding_system *));
428 static int encode_coding_sjis P_ ((struct coding_system *));
429
430 static int detect_coding_big5 P_ ((struct coding_system *, int *));
431 static void decode_coding_big5 P_ ((struct coding_system *));
432 static int encode_coding_big5 P_ ((struct coding_system *));
433
434 static int detect_coding_ccl P_ ((struct coding_system *, int *));
435 static void decode_coding_ccl P_ ((struct coding_system *));
436 static int encode_coding_ccl P_ ((struct coding_system *));
437
438 static void decode_coding_raw_text P_ ((struct coding_system *));
439 static int encode_coding_raw_text P_ ((struct coding_system *));
440
441
442 /* ISO2022 section */
443
444 #define CODING_ISO_INITIAL(coding, reg) \
445 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
446 coding_attr_iso_initial), \
447 reg)))
448
449
450 #define CODING_ISO_REQUEST(coding, charset_id) \
451 ((charset_id <= (coding)->max_charset_id \
452 ? (coding)->safe_charsets[charset_id] \
453 : -1))
454
455
456 #define CODING_ISO_FLAGS(coding) \
457 ((coding)->spec.iso_2022.flags)
458 #define CODING_ISO_DESIGNATION(coding, reg) \
459 ((coding)->spec.iso_2022.current_designation[reg])
460 #define CODING_ISO_INVOCATION(coding, plane) \
461 ((coding)->spec.iso_2022.current_invocation[plane])
462 #define CODING_ISO_SINGLE_SHIFTING(coding) \
463 ((coding)->spec.iso_2022.single_shifting)
464 #define CODING_ISO_BOL(coding) \
465 ((coding)->spec.iso_2022.bol)
466 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
467 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
468
469 /* Control characters of ISO2022. */
470 /* code */ /* function */
471 #define ISO_CODE_LF 0x0A /* line-feed */
472 #define ISO_CODE_CR 0x0D /* carriage-return */
473 #define ISO_CODE_SO 0x0E /* shift-out */
474 #define ISO_CODE_SI 0x0F /* shift-in */
475 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
476 #define ISO_CODE_ESC 0x1B /* escape */
477 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
478 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
479 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
480
481 /* All code (1-byte) of ISO2022 is classified into one of the
482 followings. */
483 enum iso_code_class_type
484 {
485 ISO_control_0, /* Control codes in the range
486 0x00..0x1F and 0x7F, except for the
487 following 5 codes. */
488 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
489 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
490 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
491 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
492 ISO_escape, /* ISO_CODE_SO (0x1B) */
493 ISO_control_1, /* Control codes in the range
494 0x80..0x9F, except for the
495 following 3 codes. */
496 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
497 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
498 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
499 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
500 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
501 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
502 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
503 };
504
505 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
506 `iso-flags' attribute of an iso2022 coding system. */
507
508 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
509 instead of the correct short-form sequence (e.g. ESC $ A). */
510 #define CODING_ISO_FLAG_LONG_FORM 0x0001
511
512 /* If set, reset graphic planes and registers at end-of-line to the
513 initial state. */
514 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
515
516 /* If set, reset graphic planes and registers before any control
517 characters to the initial state. */
518 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
519
520 /* If set, encode by 7-bit environment. */
521 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
522
523 /* If set, use locking-shift function. */
524 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
525
526 /* If set, use single-shift function. Overwrite
527 CODING_ISO_FLAG_LOCKING_SHIFT. */
528 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
529
530 /* If set, use designation escape sequence. */
531 #define CODING_ISO_FLAG_DESIGNATION 0x0040
532
533 /* If set, produce revision number sequence. */
534 #define CODING_ISO_FLAG_REVISION 0x0080
535
536 /* If set, produce ISO6429's direction specifying sequence. */
537 #define CODING_ISO_FLAG_DIRECTION 0x0100
538
539 /* If set, assume designation states are reset at beginning of line on
540 output. */
541 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
542
543 /* If set, designation sequence should be placed at beginning of line
544 on output. */
545 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
546
547 /* If set, do not encode unsafe charactes on output. */
548 #define CODING_ISO_FLAG_SAFE 0x0800
549
550 /* If set, extra latin codes (128..159) are accepted as a valid code
551 on input. */
552 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
553
554 #define CODING_ISO_FLAG_COMPOSITION 0x2000
555
556 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
557
558 #define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
559
560 /* A character to be produced on output if encoding of the original
561 character is prohibited by CODING_ISO_FLAG_SAFE. */
562 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
563
564
565 /* UTF-16 section */
566 #define CODING_UTF_16_BOM(coding) \
567 ((coding)->spec.utf_16.bom)
568
569 #define CODING_UTF_16_ENDIAN(coding) \
570 ((coding)->spec.utf_16.endian)
571
572 #define CODING_UTF_16_SURROGATE(coding) \
573 ((coding)->spec.utf_16.surrogate)
574
575
576 /* CCL section */
577 #define CODING_CCL_DECODER(coding) \
578 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
579 #define CODING_CCL_ENCODER(coding) \
580 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
581 #define CODING_CCL_VALIDS(coding) \
582 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
583 ->data)
584
585 /* Index for each coding category in `coding_category_table' */
586
587 enum coding_category
588 {
589 coding_category_iso_7,
590 coding_category_iso_7_tight,
591 coding_category_iso_8_1,
592 coding_category_iso_8_2,
593 coding_category_iso_7_else,
594 coding_category_iso_8_else,
595 coding_category_utf_8,
596 coding_category_utf_16_auto,
597 coding_category_utf_16_be,
598 coding_category_utf_16_le,
599 coding_category_utf_16_be_nosig,
600 coding_category_utf_16_le_nosig,
601 coding_category_charset,
602 coding_category_sjis,
603 coding_category_big5,
604 coding_category_ccl,
605 coding_category_emacs_mule,
606 /* All above are targets of code detection. */
607 coding_category_raw_text,
608 coding_category_undecided,
609 coding_category_max
610 };
611
612 /* Definitions of flag bits used in detect_coding_XXXX. */
613 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
614 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
615 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
616 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
617 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
618 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
619 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
620 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
621 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
622 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
623 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
624 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
625 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
626 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
627 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
628 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
629
630 /* This value is returned if detect_coding_mask () find nothing other
631 than ASCII characters. */
632 #define CATEGORY_MASK_ANY \
633 (CATEGORY_MASK_ISO_7 \
634 | CATEGORY_MASK_ISO_7_TIGHT \
635 | CATEGORY_MASK_ISO_8_1 \
636 | CATEGORY_MASK_ISO_8_2 \
637 | CATEGORY_MASK_ISO_7_ELSE \
638 | CATEGORY_MASK_ISO_8_ELSE \
639 | CATEGORY_MASK_UTF_8 \
640 | CATEGORY_MASK_UTF_16_BE \
641 | CATEGORY_MASK_UTF_16_LE \
642 | CATEGORY_MASK_UTF_16_BE_NOSIG \
643 | CATEGORY_MASK_UTF_16_LE_NOSIG \
644 | CATEGORY_MASK_CHARSET \
645 | CATEGORY_MASK_SJIS \
646 | CATEGORY_MASK_BIG5 \
647 | CATEGORY_MASK_CCL \
648 | CATEGORY_MASK_EMACS_MULE)
649
650
651 #define CATEGORY_MASK_ISO_7BIT \
652 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
653
654 #define CATEGORY_MASK_ISO_8BIT \
655 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
656
657 #define CATEGORY_MASK_ISO_ELSE \
658 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
659
660 #define CATEGORY_MASK_ISO_ESCAPE \
661 (CATEGORY_MASK_ISO_7 \
662 | CATEGORY_MASK_ISO_7_TIGHT \
663 | CATEGORY_MASK_ISO_7_ELSE \
664 | CATEGORY_MASK_ISO_8_ELSE)
665
666 #define CATEGORY_MASK_ISO \
667 ( CATEGORY_MASK_ISO_7BIT \
668 | CATEGORY_MASK_ISO_8BIT \
669 | CATEGORY_MASK_ISO_ELSE)
670
671 #define CATEGORY_MASK_UTF_16 \
672 (CATEGORY_MASK_UTF_16_BE \
673 | CATEGORY_MASK_UTF_16_LE \
674 | CATEGORY_MASK_UTF_16_BE_NOSIG \
675 | CATEGORY_MASK_UTF_16_LE_NOSIG)
676
677
678 /* List of symbols `coding-category-xxx' ordered by priority. This
679 variable is exposed to Emacs Lisp. */
680 static Lisp_Object Vcoding_category_list;
681
682 /* Table of coding categories (Lisp symbols). This variable is for
683 internal use oly. */
684 static Lisp_Object Vcoding_category_table;
685
686 /* Table of coding-categories ordered by priority. */
687 static enum coding_category coding_priorities[coding_category_max];
688
689 /* Nth element is a coding context for the coding system bound to the
690 Nth coding category. */
691 static struct coding_system coding_categories[coding_category_max];
692
693 static int detected_mask[coding_category_raw_text] =
694 { CATEGORY_MASK_ISO,
695 CATEGORY_MASK_ISO,
696 CATEGORY_MASK_ISO,
697 CATEGORY_MASK_ISO,
698 CATEGORY_MASK_ISO,
699 CATEGORY_MASK_ISO,
700 CATEGORY_MASK_UTF_8,
701 CATEGORY_MASK_UTF_16,
702 CATEGORY_MASK_UTF_16,
703 CATEGORY_MASK_UTF_16,
704 CATEGORY_MASK_UTF_16,
705 CATEGORY_MASK_UTF_16,
706 CATEGORY_MASK_CHARSET,
707 CATEGORY_MASK_SJIS,
708 CATEGORY_MASK_BIG5,
709 CATEGORY_MASK_CCL,
710 CATEGORY_MASK_EMACS_MULE
711 };
712
713 /*** Commonly used macros and functions ***/
714
715 #ifndef min
716 #define min(a, b) ((a) < (b) ? (a) : (b))
717 #endif
718 #ifndef max
719 #define max(a, b) ((a) > (b) ? (a) : (b))
720 #endif
721
722 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
723 do { \
724 attrs = CODING_ID_ATTRS (coding->id); \
725 eol_type = CODING_ID_EOL_TYPE (coding->id); \
726 if (VECTORP (eol_type)) \
727 eol_type = Qunix; \
728 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
729 } while (0)
730
731
732 /* Safely get one byte from the source text pointed by SRC which ends
733 at SRC_END, and set C to that byte. If there are not enough bytes
734 in the source, it jumps to `no_more_source'. The caller
735 should declare and set these variables appropriately in advance:
736 src, src_end, multibytep
737 */
738
739 #define ONE_MORE_BYTE(c) \
740 do { \
741 if (src == src_end) \
742 { \
743 if (src_base < src) \
744 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
745 goto no_more_source; \
746 } \
747 c = *src++; \
748 if (multibytep && (c & 0x80)) \
749 { \
750 if ((c & 0xFE) != 0xC0) \
751 error ("Undecodable char found"); \
752 c = ((c & 1) << 6) | *src++; \
753 } \
754 consumed_chars++; \
755 } while (0)
756
757
758 #define ONE_MORE_BYTE_NO_CHECK(c) \
759 do { \
760 c = *src++; \
761 if (multibytep && (c & 0x80)) \
762 { \
763 if ((c & 0xFE) != 0xC0) \
764 error ("Undecodable char found"); \
765 c = ((c & 1) << 6) | *src++; \
766 } \
767 } while (0)
768
769
770 /* Store a byte C in the place pointed by DST and increment DST to the
771 next free point, and increment PRODUCED_CHARS. The caller should
772 assure that C is 0..127, and declare and set the variable `dst'
773 appropriately in advance.
774 */
775
776
777 #define EMIT_ONE_ASCII_BYTE(c) \
778 do { \
779 produced_chars++; \
780 *dst++ = (c); \
781 } while (0)
782
783
784 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
785
786 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
787 do { \
788 produced_chars += 2; \
789 *dst++ = (c1), *dst++ = (c2); \
790 } while (0)
791
792
793 /* Store a byte C in the place pointed by DST and increment DST to the
794 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
795 nonzero, store in an appropriate multibyte from. The caller should
796 declare and set the variables `dst' and `multibytep' appropriately
797 in advance. */
798
799 #define EMIT_ONE_BYTE(c) \
800 do { \
801 produced_chars++; \
802 if (multibytep) \
803 { \
804 int ch = (c); \
805 if (ch >= 0x80) \
806 ch = BYTE8_TO_CHAR (ch); \
807 CHAR_STRING_ADVANCE (ch, dst); \
808 } \
809 else \
810 *dst++ = (c); \
811 } while (0)
812
813
814 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
815
816 #define EMIT_TWO_BYTES(c1, c2) \
817 do { \
818 produced_chars += 2; \
819 if (multibytep) \
820 { \
821 int ch; \
822 \
823 ch = (c1); \
824 if (ch >= 0x80) \
825 ch = BYTE8_TO_CHAR (ch); \
826 CHAR_STRING_ADVANCE (ch, dst); \
827 ch = (c2); \
828 if (ch >= 0x80) \
829 ch = BYTE8_TO_CHAR (ch); \
830 CHAR_STRING_ADVANCE (ch, dst); \
831 } \
832 else \
833 { \
834 *dst++ = (c1); \
835 *dst++ = (c2); \
836 } \
837 } while (0)
838
839
840 #define EMIT_THREE_BYTES(c1, c2, c3) \
841 do { \
842 EMIT_ONE_BYTE (c1); \
843 EMIT_TWO_BYTES (c2, c3); \
844 } while (0)
845
846
847 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
848 do { \
849 EMIT_TWO_BYTES (c1, c2); \
850 EMIT_TWO_BYTES (c3, c4); \
851 } while (0)
852
853
854 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
855 do { \
856 charset_map_loaded = 0; \
857 c = DECODE_CHAR (charset, code); \
858 if (charset_map_loaded) \
859 { \
860 unsigned char *orig = coding->source; \
861 EMACS_INT offset; \
862 \
863 coding_set_source (coding); \
864 offset = coding->source - orig; \
865 src += offset; \
866 src_base += offset; \
867 src_end += offset; \
868 } \
869 } while (0)
870
871
872 #define ASSURE_DESTINATION(bytes) \
873 do { \
874 if (dst + (bytes) >= dst_end) \
875 { \
876 int more_bytes = charbuf_end - charbuf + (bytes); \
877 \
878 dst = alloc_destination (coding, more_bytes, dst); \
879 dst_end = coding->destination + coding->dst_bytes; \
880 } \
881 } while (0)
882
883
884
885 static void
886 coding_set_source (coding)
887 struct coding_system *coding;
888 {
889 if (BUFFERP (coding->src_object))
890 {
891 if (coding->src_pos < 0)
892 coding->source = GAP_END_ADDR + coding->src_pos_byte;
893 else
894 {
895 struct buffer *buf = XBUFFER (coding->src_object);
896 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
897 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
898
899 coding->source = beg_addr + coding->src_pos_byte - 1;
900 if (coding->src_pos_byte >= gpt_byte)
901 coding->source += BUF_GAP_SIZE (buf);
902 }
903 }
904 else if (STRINGP (coding->src_object))
905 {
906 coding->source = (XSTRING (coding->src_object)->data
907 + coding->src_pos_byte);
908 }
909 else
910 /* Otherwise, the source is C string and is never relocated
911 automatically. Thus we don't have to update anything. */
912 ;
913 }
914
915 static void
916 coding_set_destination (coding)
917 struct coding_system *coding;
918 {
919 if (BUFFERP (coding->dst_object))
920 {
921 /* We are sure that coding->dst_pos_byte is before the gap of the
922 buffer. */
923 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
924 + coding->dst_pos_byte - 1);
925 if (coding->src_pos < 0)
926 coding->dst_bytes = (GAP_END_ADDR
927 - (coding->src_bytes - coding->consumed)
928 - coding->destination);
929 else
930 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
931 - coding->destination);
932 }
933 else
934 /* Otherwise, the destination is C string and is never relocated
935 automatically. Thus we don't have to update anything. */
936 ;
937 }
938
939
940 static void
941 coding_alloc_by_realloc (coding, bytes)
942 struct coding_system *coding;
943 EMACS_INT bytes;
944 {
945 coding->destination = (unsigned char *) xrealloc (coding->destination,
946 coding->dst_bytes + bytes);
947 coding->dst_bytes += bytes;
948 }
949
950 static void
951 coding_alloc_by_making_gap (coding, bytes)
952 struct coding_system *coding;
953 EMACS_INT bytes;
954 {
955 if (BUFFERP (coding->dst_object)
956 && EQ (coding->src_object, coding->dst_object))
957 {
958 EMACS_INT add = coding->src_bytes - coding->consumed;
959
960 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
961 make_gap (bytes);
962 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
963 }
964 else
965 {
966 Lisp_Object this_buffer;
967
968 this_buffer = Fcurrent_buffer ();
969 set_buffer_internal (XBUFFER (coding->dst_object));
970 make_gap (bytes);
971 set_buffer_internal (XBUFFER (this_buffer));
972 }
973 }
974
975
976 static unsigned char *
977 alloc_destination (coding, nbytes, dst)
978 struct coding_system *coding;
979 int nbytes;
980 unsigned char *dst;
981 {
982 EMACS_INT offset = dst - coding->destination;
983
984 if (BUFFERP (coding->dst_object))
985 coding_alloc_by_making_gap (coding, nbytes);
986 else
987 coding_alloc_by_realloc (coding, nbytes);
988 coding->result = CODING_RESULT_SUCCESS;
989 coding_set_destination (coding);
990 dst = coding->destination + offset;
991 return dst;
992 }
993
994 \f
995 /*** 2. Emacs' internal format (emacs-utf-8) ***/
996
997
998
999 \f
1000 /*** 3. UTF-8 ***/
1001
1002 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1003 Check if a text is encoded in UTF-8. If it is, return
1004 CATEGORY_MASK_UTF_8, else return 0. */
1005
1006 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1007 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1008 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1009 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1010 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1011 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1012
1013 static int
1014 detect_coding_utf_8 (coding, mask)
1015 struct coding_system *coding;
1016 int *mask;
1017 {
1018 unsigned char *src = coding->source, *src_base = src;
1019 unsigned char *src_end = coding->source + coding->src_bytes;
1020 int multibytep = coding->src_multibyte;
1021 int consumed_chars = 0;
1022 int found = 0;
1023
1024 /* A coding system of this category is always ASCII compatible. */
1025 src += coding->head_ascii;
1026
1027 while (1)
1028 {
1029 int c, c1, c2, c3, c4;
1030
1031 ONE_MORE_BYTE (c);
1032 if (UTF_8_1_OCTET_P (c))
1033 continue;
1034 ONE_MORE_BYTE (c1);
1035 if (! UTF_8_EXTRA_OCTET_P (c1))
1036 break;
1037 if (UTF_8_2_OCTET_LEADING_P (c))
1038 {
1039 found++;
1040 continue;
1041 }
1042 ONE_MORE_BYTE (c2);
1043 if (! UTF_8_EXTRA_OCTET_P (c2))
1044 break;
1045 if (UTF_8_3_OCTET_LEADING_P (c))
1046 {
1047 found++;
1048 continue;
1049 }
1050 ONE_MORE_BYTE (c3);
1051 if (! UTF_8_EXTRA_OCTET_P (c3))
1052 break;
1053 if (UTF_8_4_OCTET_LEADING_P (c))
1054 {
1055 found++;
1056 continue;
1057 }
1058 ONE_MORE_BYTE (c4);
1059 if (! UTF_8_EXTRA_OCTET_P (c4))
1060 break;
1061 if (UTF_8_5_OCTET_LEADING_P (c))
1062 {
1063 found++;
1064 continue;
1065 }
1066 break;
1067 }
1068 *mask &= ~CATEGORY_MASK_UTF_8;
1069 return 0;
1070
1071 no_more_source:
1072 if (! found)
1073 return 0;
1074 *mask &= CATEGORY_MASK_UTF_8;
1075 return 1;
1076 }
1077
1078
1079 static void
1080 decode_coding_utf_8 (coding)
1081 struct coding_system *coding;
1082 {
1083 unsigned char *src = coding->source + coding->consumed;
1084 unsigned char *src_end = coding->source + coding->src_bytes;
1085 unsigned char *src_base;
1086 int *charbuf = coding->charbuf;
1087 int *charbuf_end = charbuf + coding->charbuf_size;
1088 int consumed_chars = 0, consumed_chars_base;
1089 int multibytep = coding->src_multibyte;
1090 Lisp_Object attr, eol_type, charset_list;
1091
1092 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1093
1094 while (1)
1095 {
1096 int c, c1, c2, c3, c4, c5;
1097
1098 src_base = src;
1099 consumed_chars_base = consumed_chars;
1100
1101 if (charbuf >= charbuf_end)
1102 break;
1103
1104 ONE_MORE_BYTE (c1);
1105 if (UTF_8_1_OCTET_P(c1))
1106 {
1107 c = c1;
1108 if (c == '\r')
1109 {
1110 if (EQ (eol_type, Qdos))
1111 {
1112 if (src == src_end)
1113 goto no_more_source;
1114 if (*src == '\n')
1115 ONE_MORE_BYTE (c);
1116 }
1117 else if (EQ (eol_type, Qmac))
1118 c = '\n';
1119 }
1120 }
1121 else
1122 {
1123 ONE_MORE_BYTE (c2);
1124 if (! UTF_8_EXTRA_OCTET_P (c2))
1125 goto invalid_code;
1126 if (UTF_8_2_OCTET_LEADING_P (c1))
1127 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1128 else
1129 {
1130 ONE_MORE_BYTE (c3);
1131 if (! UTF_8_EXTRA_OCTET_P (c3))
1132 goto invalid_code;
1133 if (UTF_8_3_OCTET_LEADING_P (c1))
1134 c = (((c1 & 0xF) << 12)
1135 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1136 else
1137 {
1138 ONE_MORE_BYTE (c4);
1139 if (! UTF_8_EXTRA_OCTET_P (c4))
1140 goto invalid_code;
1141 if (UTF_8_4_OCTET_LEADING_P (c1))
1142 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1143 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1144 else
1145 {
1146 ONE_MORE_BYTE (c5);
1147 if (! UTF_8_EXTRA_OCTET_P (c5))
1148 goto invalid_code;
1149 if (UTF_8_5_OCTET_LEADING_P (c1))
1150 {
1151 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1152 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1153 | (c5 & 0x3F));
1154 if (c > MAX_CHAR)
1155 goto invalid_code;
1156 }
1157 else
1158 goto invalid_code;
1159 }
1160 }
1161 }
1162 }
1163
1164 *charbuf++ = c;
1165 continue;
1166
1167 invalid_code:
1168 src = src_base;
1169 consumed_chars = consumed_chars_base;
1170 ONE_MORE_BYTE (c);
1171 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1172 coding->errors++;
1173 }
1174
1175 no_more_source:
1176 coding->consumed_char += consumed_chars_base;
1177 coding->consumed = src_base - coding->source;
1178 coding->charbuf_used = charbuf - coding->charbuf;
1179 }
1180
1181
1182 static int
1183 encode_coding_utf_8 (coding)
1184 struct coding_system *coding;
1185 {
1186 int multibytep = coding->dst_multibyte;
1187 int *charbuf = coding->charbuf;
1188 int *charbuf_end = charbuf + coding->charbuf_used;
1189 unsigned char *dst = coding->destination + coding->produced;
1190 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1191 int produced_chars = 0;
1192 int c;
1193
1194 if (multibytep)
1195 {
1196 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1197
1198 while (charbuf < charbuf_end)
1199 {
1200 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1201
1202 ASSURE_DESTINATION (safe_room);
1203 c = *charbuf++;
1204 CHAR_STRING_ADVANCE (c, pend);
1205 for (p = str; p < pend; p++)
1206 EMIT_ONE_BYTE (*p);
1207 }
1208 }
1209 else
1210 {
1211 int safe_room = MAX_MULTIBYTE_LENGTH;
1212
1213 while (charbuf < charbuf_end)
1214 {
1215 ASSURE_DESTINATION (safe_room);
1216 c = *charbuf++;
1217 dst += CHAR_STRING (c, dst);
1218 produced_chars++;
1219 }
1220 }
1221 coding->result = CODING_RESULT_SUCCESS;
1222 coding->produced_char += produced_chars;
1223 coding->produced = dst - coding->destination;
1224 return 0;
1225 }
1226
1227
1228 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1229 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1230 Little Endian (otherwise). If it is, return
1231 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1232 else return 0. */
1233
1234 #define UTF_16_HIGH_SURROGATE_P(val) \
1235 (((val) & 0xFC00) == 0xD800)
1236
1237 #define UTF_16_LOW_SURROGATE_P(val) \
1238 (((val) & 0xFC00) == 0xDC00)
1239
1240 #define UTF_16_INVALID_P(val) \
1241 (((val) == 0xFFFE) \
1242 || ((val) == 0xFFFF) \
1243 || UTF_16_LOW_SURROGATE_P (val))
1244
1245
1246 static int
1247 detect_coding_utf_16 (coding, mask)
1248 struct coding_system *coding;
1249 int *mask;
1250 {
1251 unsigned char *src = coding->source, *src_base = src;
1252 unsigned char *src_end = coding->source + coding->src_bytes;
1253 int multibytep = coding->src_multibyte;
1254 int consumed_chars = 0;
1255 int c1, c2;
1256
1257 ONE_MORE_BYTE (c1);
1258 ONE_MORE_BYTE (c2);
1259
1260 if ((c1 == 0xFF) && (c2 == 0xFE))
1261 {
1262 *mask &= CATEGORY_MASK_UTF_16_LE;
1263 return 1;
1264 }
1265 else if ((c1 == 0xFE) && (c2 == 0xFF))
1266 {
1267 *mask &= CATEGORY_MASK_UTF_16_BE;
1268 return 1;
1269 }
1270 no_more_source:
1271 return 0;
1272 }
1273
1274 static void
1275 decode_coding_utf_16 (coding)
1276 struct coding_system *coding;
1277 {
1278 unsigned char *src = coding->source + coding->consumed;
1279 unsigned char *src_end = coding->source + coding->src_bytes;
1280 unsigned char *src_base;
1281 int *charbuf = coding->charbuf;
1282 int *charbuf_end = charbuf + coding->charbuf_size;
1283 int consumed_chars = 0, consumed_chars_base;
1284 int multibytep = coding->src_multibyte;
1285 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1286 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1287 int surrogate = CODING_UTF_16_SURROGATE (coding);
1288 Lisp_Object attr, eol_type, charset_list;
1289
1290 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1291
1292 if (bom != utf_16_without_bom)
1293 {
1294 int c, c1, c2;
1295
1296 src_base = src;
1297 ONE_MORE_BYTE (c1);
1298 ONE_MORE_BYTE (c2);
1299 c = (c1 << 8) | c2;
1300 if (bom == utf_16_with_bom)
1301 {
1302 if (endian == utf_16_big_endian
1303 ? c != 0xFFFE : c != 0xFEFF)
1304 {
1305 /* We are sure that there's enouph room at CHARBUF. */
1306 *charbuf++ = c1;
1307 *charbuf++ = c2;
1308 coding->errors++;
1309 }
1310 }
1311 else
1312 {
1313 if (c == 0xFFFE)
1314 CODING_UTF_16_ENDIAN (coding)
1315 = endian = utf_16_big_endian;
1316 else if (c == 0xFEFF)
1317 CODING_UTF_16_ENDIAN (coding)
1318 = endian = utf_16_little_endian;
1319 else
1320 {
1321 CODING_UTF_16_ENDIAN (coding)
1322 = endian = utf_16_big_endian;
1323 src = src_base;
1324 }
1325 }
1326 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1327 }
1328
1329 while (1)
1330 {
1331 int c, c1, c2;
1332
1333 src_base = src;
1334 consumed_chars_base = consumed_chars;
1335
1336 if (charbuf + 2 >= charbuf_end)
1337 break;
1338
1339 ONE_MORE_BYTE (c1);
1340 ONE_MORE_BYTE (c2);
1341 c = (endian == utf_16_big_endian
1342 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1343 if (surrogate)
1344 {
1345 if (! UTF_16_LOW_SURROGATE_P (c))
1346 {
1347 if (endian == utf_16_big_endian)
1348 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1349 else
1350 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1351 *charbuf++ = c1;
1352 *charbuf++ = c2;
1353 coding->errors++;
1354 if (UTF_16_HIGH_SURROGATE_P (c))
1355 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1356 else
1357 *charbuf++ = c;
1358 }
1359 else
1360 {
1361 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1362 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1363 *charbuf++ = c;
1364 }
1365 }
1366 else
1367 {
1368 if (UTF_16_HIGH_SURROGATE_P (c))
1369 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1370 else
1371 *charbuf++ = c;
1372 }
1373 }
1374
1375 no_more_source:
1376 coding->consumed_char += consumed_chars_base;
1377 coding->consumed = src_base - coding->source;
1378 coding->charbuf_used = charbuf - coding->charbuf;
1379 }
1380
1381 static int
1382 encode_coding_utf_16 (coding)
1383 struct coding_system *coding;
1384 {
1385 int multibytep = coding->dst_multibyte;
1386 int *charbuf = coding->charbuf;
1387 int *charbuf_end = charbuf + coding->charbuf_used;
1388 unsigned char *dst = coding->destination + coding->produced;
1389 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1390 int safe_room = 8;
1391 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1392 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1393 int produced_chars = 0;
1394 Lisp_Object attrs, eol_type, charset_list;
1395 int c;
1396
1397 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1398
1399 if (bom == utf_16_with_bom)
1400 {
1401 ASSURE_DESTINATION (safe_room);
1402 if (big_endian)
1403 EMIT_TWO_BYTES (0xFF, 0xFE);
1404 else
1405 EMIT_TWO_BYTES (0xFE, 0xFF);
1406 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1407 }
1408
1409 while (charbuf < charbuf_end)
1410 {
1411 ASSURE_DESTINATION (safe_room);
1412 c = *charbuf++;
1413 if (c >= MAX_UNICODE_CHAR)
1414 c = coding->default_char;
1415
1416 if (c < 0x10000)
1417 {
1418 if (big_endian)
1419 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1420 else
1421 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1422 }
1423 else
1424 {
1425 int c1, c2;
1426
1427 c -= 0x10000;
1428 c1 = (c >> 10) + 0xD800;
1429 c2 = (c & 0x3FF) + 0xDC00;
1430 if (big_endian)
1431 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1432 else
1433 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1434 }
1435 }
1436 coding->result = CODING_RESULT_SUCCESS;
1437 coding->produced = dst - coding->destination;
1438 coding->produced_char += produced_chars;
1439 return 0;
1440 }
1441
1442 \f
1443 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1444
1445 /* Emacs' internal format for representation of multiple character
1446 sets is a kind of multi-byte encoding, i.e. characters are
1447 represented by variable-length sequences of one-byte codes.
1448
1449 ASCII characters and control characters (e.g. `tab', `newline') are
1450 represented by one-byte sequences which are their ASCII codes, in
1451 the range 0x00 through 0x7F.
1452
1453 8-bit characters of the range 0x80..0x9F are represented by
1454 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1455 code + 0x20).
1456
1457 8-bit characters of the range 0xA0..0xFF are represented by
1458 one-byte sequences which are their 8-bit code.
1459
1460 The other characters are represented by a sequence of `base
1461 leading-code', optional `extended leading-code', and one or two
1462 `position-code's. The length of the sequence is determined by the
1463 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1464 whereas extended leading-code and position-code take the range 0xA0
1465 through 0xFF. See `charset.h' for more details about leading-code
1466 and position-code.
1467
1468 --- CODE RANGE of Emacs' internal format ---
1469 character set range
1470 ------------- -----
1471 ascii 0x00..0x7F
1472 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1473 eight-bit-graphic 0xA0..0xBF
1474 ELSE 0x81..0x9D + [0xA0..0xFF]+
1475 ---------------------------------------------
1476
1477 As this is the internal character representation, the format is
1478 usually not used externally (i.e. in a file or in a data sent to a
1479 process). But, it is possible to have a text externally in this
1480 format (i.e. by encoding by the coding system `emacs-mule').
1481
1482 In that case, a sequence of one-byte codes has a slightly different
1483 form.
1484
1485 At first, all characters in eight-bit-control are represented by
1486 one-byte sequences which are their 8-bit code.
1487
1488 Next, character composition data are represented by the byte
1489 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1490 where,
1491 METHOD is 0xF0 plus one of composition method (enum
1492 composition_method),
1493
1494 BYTES is 0xA0 plus a byte length of this composition data,
1495
1496 CHARS is 0x20 plus a number of characters composed by this
1497 data,
1498
1499 COMPONENTs are characters of multibye form or composition
1500 rules encoded by two-byte of ASCII codes.
1501
1502 In addition, for backward compatibility, the following formats are
1503 also recognized as composition data on decoding.
1504
1505 0x80 MSEQ ...
1506 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1507
1508 Here,
1509 MSEQ is a multibyte form but in these special format:
1510 ASCII: 0xA0 ASCII_CODE+0x80,
1511 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1512 RULE is a one byte code of the range 0xA0..0xF0 that
1513 represents a composition rule.
1514 */
1515
1516 char emacs_mule_bytes[256];
1517
1518 /* Leading-code followed by extended leading-code. */
1519 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1520 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1521 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1522 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1523
1524
1525 int
1526 emacs_mule_char (coding, composition, nbytes, nchars)
1527 struct coding_system *coding;
1528 int composition;
1529 int *nbytes, *nchars;
1530 {
1531 unsigned char *src = coding->source + coding->consumed;
1532 unsigned char *src_end = coding->source + coding->src_bytes;
1533 int multibytep = coding->src_multibyte;
1534 unsigned char *src_base = src;
1535 struct charset *charset;
1536 unsigned code;
1537 int c;
1538 int consumed_chars = 0;
1539
1540 ONE_MORE_BYTE (c);
1541 if (composition)
1542 {
1543 c -= 0x20;
1544 if (c == 0x80)
1545 {
1546 ONE_MORE_BYTE (c);
1547 if (c < 0xA0)
1548 goto invalid_code;
1549 *nbytes = src - src_base;
1550 *nchars = consumed_chars;
1551 return (c - 0x80);
1552 }
1553 }
1554
1555 switch (emacs_mule_bytes[c])
1556 {
1557 case 2:
1558 if (! (charset = emacs_mule_charset[c]))
1559 goto invalid_code;
1560 ONE_MORE_BYTE (c);
1561 code = c & 0x7F;
1562 break;
1563
1564 case 3:
1565 if (c == LEADING_CODE_PRIVATE_11
1566 || c == LEADING_CODE_PRIVATE_12)
1567 {
1568 ONE_MORE_BYTE (c);
1569 if (! (charset = emacs_mule_charset[c]))
1570 goto invalid_code;
1571 ONE_MORE_BYTE (c);
1572 code = c & 0x7F;
1573 }
1574 else
1575 {
1576 if (! (charset = emacs_mule_charset[c]))
1577 goto invalid_code;
1578 ONE_MORE_BYTE (c);
1579 code = (c & 0x7F) << 7;
1580 ONE_MORE_BYTE (c);
1581 code |= c & 0x7F;
1582 }
1583 break;
1584
1585 case 4:
1586 if (! (charset = emacs_mule_charset[c]))
1587 goto invalid_code;
1588 ONE_MORE_BYTE (c);
1589 code = (c & 0x7F) << 7;
1590 ONE_MORE_BYTE (c);
1591 code |= c & 0x7F;
1592 break;
1593
1594 case 1:
1595 code = c;
1596 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) ? charset_ascii
1597 : code < 0xA0 ? charset_8_bit_control
1598 : charset_8_bit_graphic);
1599 break;
1600
1601 default:
1602 abort ();
1603 }
1604 c = DECODE_CHAR (charset, code);
1605 if (c < 0)
1606 goto invalid_code;
1607 *nbytes = src - src_base;
1608 *nchars = consumed_chars;
1609 return c;
1610
1611 no_more_source:
1612 return -2;
1613
1614 invalid_code:
1615 return -1;
1616 }
1617
1618
1619 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1620 Check if a text is encoded in `emacs-mule'. */
1621
1622 static int
1623 detect_coding_emacs_mule (coding, mask)
1624 struct coding_system *coding;
1625 int *mask;
1626 {
1627 unsigned char *src = coding->source, *src_base = src;
1628 unsigned char *src_end = coding->source + coding->src_bytes;
1629 int multibytep = coding->src_multibyte;
1630 int consumed_chars = 0;
1631 int c;
1632 int found = 0;
1633
1634 /* A coding system of this category is always ASCII compatible. */
1635 src += coding->head_ascii;
1636
1637 while (1)
1638 {
1639 ONE_MORE_BYTE (c);
1640
1641 if (c == 0x80)
1642 {
1643 /* Perhaps the start of composite character. We simple skip
1644 it because analyzing it is too heavy for detecting. But,
1645 at least, we check that the composite character
1646 constitues of more than 4 bytes. */
1647 unsigned char *src_base;
1648
1649 repeat:
1650 src_base = src;
1651 do
1652 {
1653 ONE_MORE_BYTE (c);
1654 }
1655 while (c >= 0xA0);
1656
1657 if (src - src_base <= 4)
1658 break;
1659 found = 1;
1660 if (c == 0x80)
1661 goto repeat;
1662 }
1663
1664 if (c < 0x80)
1665 {
1666 if (c < 0x20
1667 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1668 break;
1669 }
1670 else
1671 {
1672 unsigned char *src_base = src - 1;
1673
1674 do
1675 {
1676 ONE_MORE_BYTE (c);
1677 }
1678 while (c >= 0xA0);
1679 if (src - src_base != emacs_mule_bytes[*src_base])
1680 break;
1681 found = 1;
1682 }
1683 }
1684 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1685 return 0;
1686
1687 no_more_source:
1688 if (!found)
1689 return 0;
1690 *mask &= CATEGORY_MASK_EMACS_MULE;
1691 return 1;
1692 }
1693
1694
1695 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1696
1697 /* Decode a character represented as a component of composition
1698 sequence of Emacs 20/21 style at SRC. Set C to that character and
1699 update SRC to the head of next character (or an encoded composition
1700 rule). If SRC doesn't points a composition component, set C to -1.
1701 If SRC points an invalid byte sequence, global exit by a return
1702 value 0. */
1703
1704 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1705 if (1) \
1706 { \
1707 int c; \
1708 int nbytes, nchars; \
1709 \
1710 if (src == src_end) \
1711 break; \
1712 c = emacs_mule_char (coding, 1, &nbytes, &nchars); \
1713 if (c < 0) \
1714 { \
1715 if (c == -2) \
1716 break; \
1717 goto invalid_code; \
1718 } \
1719 *buf++ = c; \
1720 src += nbytes; \
1721 consumed_chars += nchars; \
1722 } \
1723 else
1724
1725
1726 /* Decode a composition rule represented as a component of composition
1727 sequence of Emacs 20 style at SRC. Set C to the rule. If SRC
1728 points an invalid byte sequence, set C to -1. */
1729
1730 #define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \
1731 do { \
1732 int c, gref, nref; \
1733 \
1734 if (src < src_end) \
1735 goto invalid_code; \
1736 ONE_MORE_BYTE_NO_CHECK (c); \
1737 c -= 0xA0; \
1738 if (c < 0 || c >= 81) \
1739 goto invalid_code; \
1740 \
1741 gref = c / 9, nref = c % 9; \
1742 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1743 } while (0)
1744
1745
1746 #define ADD_COMPOSITION_DATA(buf, method, nchars) \
1747 do { \
1748 *buf++ = -5; \
1749 *buf++ = coding->produced_char + char_offset; \
1750 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1751 *buf++ = method; \
1752 *buf++ = nchars; \
1753 } while (0)
1754
1755
1756 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1757 do { \
1758 /* Emacs 21 style format. The first three bytes at SRC are \
1759 (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1760 the byte length of this composition information, CHARS is the \
1761 number of characters composed by this composition. */ \
1762 enum composition_method method = c - 0xF0; \
1763 int consumed_chars_limit; \
1764 int nbytes, nchars; \
1765 \
1766 ONE_MORE_BYTE (c); \
1767 nbytes = c - 0xA0; \
1768 if (nbytes < 3) \
1769 goto invalid_code; \
1770 ONE_MORE_BYTE (c); \
1771 nchars = c - 0xA0; \
1772 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1773 consumed_chars_limit = consumed_chars_base + nbytes; \
1774 if (method != COMPOSITION_RELATIVE) \
1775 { \
1776 int i = 0; \
1777 while (consumed_chars < consumed_chars_limit) \
1778 { \
1779 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1780 DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \
1781 else \
1782 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1783 } \
1784 if (consumed_chars < consumed_chars_limit) \
1785 goto invalid_code; \
1786 } \
1787 } while (0)
1788
1789
1790 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1791 do { \
1792 /* Emacs 20 style format for relative composition. */ \
1793 /* Store multibyte form of characters to be composed. */ \
1794 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1795 int *buf = components; \
1796 int i, j; \
1797 \
1798 src = src_base; \
1799 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1800 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1801 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1802 if (i < 2) \
1803 goto invalid_code; \
1804 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1805 for (j = 0; j < i; j++) \
1806 *charbuf++ = components[j]; \
1807 } while (0)
1808
1809
1810 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1811 do { \
1812 /* Emacs 20 style format for rule-base composition. */ \
1813 /* Store multibyte form of characters to be composed. */ \
1814 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1815 int *buf = components; \
1816 int i, j; \
1817 \
1818 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1819 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1820 { \
1821 DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \
1822 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1823 } \
1824 if (i < 1 || (buf - components) % 2 == 0) \
1825 goto invalid_code; \
1826 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1827 goto no_more_source; \
1828 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1829 for (j = 0; j < i; j++) \
1830 *charbuf++ = components[j]; \
1831 for (j = 0; j < i; j += 2) \
1832 *charbuf++ = components[j]; \
1833 } while (0)
1834
1835
1836 static void
1837 decode_coding_emacs_mule (coding)
1838 struct coding_system *coding;
1839 {
1840 unsigned char *src = coding->source + coding->consumed;
1841 unsigned char *src_end = coding->source + coding->src_bytes;
1842 unsigned char *src_base;
1843 int *charbuf = coding->charbuf;
1844 int *charbuf_end = charbuf + coding->charbuf_size;
1845 int consumed_chars = 0, consumed_chars_base;
1846 int char_offset = 0;
1847 int multibytep = coding->src_multibyte;
1848 Lisp_Object attrs, eol_type, charset_list;
1849
1850 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1851
1852 while (1)
1853 {
1854 int c;
1855
1856 src_base = src;
1857 consumed_chars_base = consumed_chars;
1858
1859 if (charbuf >= charbuf_end)
1860 break;
1861
1862 ONE_MORE_BYTE (c);
1863
1864 if (c < 0x80)
1865 {
1866 if (c == '\r')
1867 {
1868 if (EQ (eol_type, Qdos))
1869 {
1870 if (src == src_end)
1871 goto no_more_source;
1872 if (*src == '\n')
1873 ONE_MORE_BYTE (c);
1874 }
1875 else if (EQ (eol_type, Qmac))
1876 c = '\n';
1877 }
1878 *charbuf++ = c;
1879 char_offset++;
1880 }
1881 else if (c == 0x80)
1882 {
1883 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1884 break;
1885 ONE_MORE_BYTE (c);
1886 if (c - 0xF0 >= COMPOSITION_RELATIVE
1887 && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
1888 DECODE_EMACS_MULE_21_COMPOSITION (c);
1889 else if (c < 0xC0)
1890 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1891 else if (c == 0xFF)
1892 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1893 else
1894 goto invalid_code;
1895 }
1896 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1897 {
1898 int nbytes, nchars;
1899 src--;
1900 c = emacs_mule_char (coding, 0, &nbytes, &nchars);
1901 if (c < 0)
1902 {
1903 if (c == -2)
1904 break;
1905 goto invalid_code;
1906 }
1907 *charbuf++ = c;
1908 char_offset++;
1909 }
1910 continue;
1911
1912 invalid_code:
1913 src = src_base;
1914 consumed_chars = consumed_chars_base;
1915 ONE_MORE_BYTE (c);
1916 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1917 coding->errors++;
1918 }
1919
1920 no_more_source:
1921 coding->consumed_char += consumed_chars_base;
1922 coding->consumed = src_base - coding->source;
1923 coding->charbuf_used = charbuf - coding->charbuf;
1924 }
1925
1926
1927 #define EMACS_MULE_LEADING_CODES(id, codes) \
1928 do { \
1929 if (id < 0xA0) \
1930 codes[0] = id, codes[1] = 0; \
1931 else if (id < 0xE0) \
1932 codes[0] = 0x9A, codes[1] = id; \
1933 else if (id < 0xF0) \
1934 codes[0] = 0x9B, codes[1] = id; \
1935 else if (id < 0xF5) \
1936 codes[0] = 0x9C, codes[1] = id; \
1937 else \
1938 codes[0] = 0x9D, codes[1] = id; \
1939 } while (0);
1940
1941
1942 static int
1943 encode_coding_emacs_mule (coding)
1944 struct coding_system *coding;
1945 {
1946 int multibytep = coding->dst_multibyte;
1947 int *charbuf = coding->charbuf;
1948 int *charbuf_end = charbuf + coding->charbuf_used;
1949 unsigned char *dst = coding->destination + coding->produced;
1950 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1951 int safe_room = 8;
1952 int produced_chars = 0;
1953 Lisp_Object attrs, eol_type, charset_list;
1954 int c;
1955
1956 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1957
1958 while (charbuf < charbuf_end)
1959 {
1960 ASSURE_DESTINATION (safe_room);
1961 c = *charbuf++;
1962 if (ASCII_CHAR_P (c))
1963 EMIT_ONE_ASCII_BYTE (c);
1964 else
1965 {
1966 struct charset *charset;
1967 unsigned code;
1968 int dimension;
1969 int emacs_mule_id;
1970 unsigned char leading_codes[2];
1971
1972 charset = char_charset (c, charset_list, &code);
1973 if (! charset)
1974 {
1975 c = coding->default_char;
1976 if (ASCII_CHAR_P (c))
1977 {
1978 EMIT_ONE_ASCII_BYTE (c);
1979 continue;
1980 }
1981 charset = char_charset (c, charset_list, &code);
1982 }
1983 dimension = CHARSET_DIMENSION (charset);
1984 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
1985 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
1986 EMIT_ONE_BYTE (leading_codes[0]);
1987 if (leading_codes[1])
1988 EMIT_ONE_BYTE (leading_codes[1]);
1989 if (dimension == 1)
1990 EMIT_ONE_BYTE (code);
1991 else
1992 {
1993 EMIT_ONE_BYTE (code >> 8);
1994 EMIT_ONE_BYTE (code & 0xFF);
1995 }
1996 }
1997 }
1998 coding->result = CODING_RESULT_SUCCESS;
1999 coding->produced_char += produced_chars;
2000 coding->produced = dst - coding->destination;
2001 return 0;
2002 }
2003
2004 \f
2005 /*** 7. ISO2022 handlers ***/
2006
2007 /* The following note describes the coding system ISO2022 briefly.
2008 Since the intention of this note is to help understand the
2009 functions in this file, some parts are NOT ACCURATE or OVERLY
2010 SIMPLIFIED. For thorough understanding, please refer to the
2011 original document of ISO2022.
2012
2013 ISO2022 provides many mechanisms to encode several character sets
2014 in 7-bit and 8-bit environments. For 7-bite environments, all text
2015 is encoded using bytes less than 128. This may make the encoded
2016 text a little bit longer, but the text passes more easily through
2017 several gateways, some of which strip off MSB (Most Signigant Bit).
2018
2019 There are two kinds of character sets: control character set and
2020 graphic character set. The former contains control characters such
2021 as `newline' and `escape' to provide control functions (control
2022 functions are also provided by escape sequences). The latter
2023 contains graphic characters such as 'A' and '-'. Emacs recognizes
2024 two control character sets and many graphic character sets.
2025
2026 Graphic character sets are classified into one of the following
2027 four classes, according to the number of bytes (DIMENSION) and
2028 number of characters in one dimension (CHARS) of the set:
2029 - DIMENSION1_CHARS94
2030 - DIMENSION1_CHARS96
2031 - DIMENSION2_CHARS94
2032 - DIMENSION2_CHARS96
2033
2034 In addition, each character set is assigned an identification tag,
2035 unique for each set, called "final character" (denoted as <F>
2036 hereafter). The <F> of each character set is decided by ECMA(*)
2037 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2038 (0x30..0x3F are for private use only).
2039
2040 Note (*): ECMA = European Computer Manufacturers Association
2041
2042 Here are examples of graphic character set [NAME(<F>)]:
2043 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2044 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2045 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2046 o DIMENSION2_CHARS96 -- none for the moment
2047
2048 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2049 C0 [0x00..0x1F] -- control character plane 0
2050 GL [0x20..0x7F] -- graphic character plane 0
2051 C1 [0x80..0x9F] -- control character plane 1
2052 GR [0xA0..0xFF] -- graphic character plane 1
2053
2054 A control character set is directly designated and invoked to C0 or
2055 C1 by an escape sequence. The most common case is that:
2056 - ISO646's control character set is designated/invoked to C0, and
2057 - ISO6429's control character set is designated/invoked to C1,
2058 and usually these designations/invocations are omitted in encoded
2059 text. In a 7-bit environment, only C0 can be used, and a control
2060 character for C1 is encoded by an appropriate escape sequence to
2061 fit into the environment. All control characters for C1 are
2062 defined to have corresponding escape sequences.
2063
2064 A graphic character set is at first designated to one of four
2065 graphic registers (G0 through G3), then these graphic registers are
2066 invoked to GL or GR. These designations and invocations can be
2067 done independently. The most common case is that G0 is invoked to
2068 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2069 these invocations and designations are omitted in encoded text.
2070 In a 7-bit environment, only GL can be used.
2071
2072 When a graphic character set of CHARS94 is invoked to GL, codes
2073 0x20 and 0x7F of the GL area work as control characters SPACE and
2074 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2075 be used.
2076
2077 There are two ways of invocation: locking-shift and single-shift.
2078 With locking-shift, the invocation lasts until the next different
2079 invocation, whereas with single-shift, the invocation affects the
2080 following character only and doesn't affect the locking-shift
2081 state. Invocations are done by the following control characters or
2082 escape sequences:
2083
2084 ----------------------------------------------------------------------
2085 abbrev function cntrl escape seq description
2086 ----------------------------------------------------------------------
2087 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2088 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2089 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2090 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2091 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2092 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2093 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2094 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2095 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2096 ----------------------------------------------------------------------
2097 (*) These are not used by any known coding system.
2098
2099 Control characters for these functions are defined by macros
2100 ISO_CODE_XXX in `coding.h'.
2101
2102 Designations are done by the following escape sequences:
2103 ----------------------------------------------------------------------
2104 escape sequence description
2105 ----------------------------------------------------------------------
2106 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2107 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2108 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2109 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2110 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2111 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2112 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2113 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2114 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2115 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2116 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2117 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2118 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2119 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2120 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2121 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2122 ----------------------------------------------------------------------
2123
2124 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2125 of dimension 1, chars 94, and final character <F>, etc...
2126
2127 Note (*): Although these designations are not allowed in ISO2022,
2128 Emacs accepts them on decoding, and produces them on encoding
2129 CHARS96 character sets in a coding system which is characterized as
2130 7-bit environment, non-locking-shift, and non-single-shift.
2131
2132 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2133 '(' must be omitted. We refer to this as "short-form" hereafter.
2134
2135 Now you may notice that there are a lot of ways for encoding the
2136 same multilingual text in ISO2022. Actually, there exist many
2137 coding systems such as Compound Text (used in X11's inter client
2138 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2139 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
2140 localized platforms), and all of these are variants of ISO2022.
2141
2142 In addition to the above, Emacs handles two more kinds of escape
2143 sequences: ISO6429's direction specification and Emacs' private
2144 sequence for specifying character composition.
2145
2146 ISO6429's direction specification takes the following form:
2147 o CSI ']' -- end of the current direction
2148 o CSI '0' ']' -- end of the current direction
2149 o CSI '1' ']' -- start of left-to-right text
2150 o CSI '2' ']' -- start of right-to-left text
2151 The control character CSI (0x9B: control sequence introducer) is
2152 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2153
2154 Character composition specification takes the following form:
2155 o ESC '0' -- start relative composition
2156 o ESC '1' -- end composition
2157 o ESC '2' -- start rule-base composition (*)
2158 o ESC '3' -- start relative composition with alternate chars (**)
2159 o ESC '4' -- start rule-base composition with alternate chars (**)
2160 Since these are not standard escape sequences of any ISO standard,
2161 the use of them for these meaning is restricted to Emacs only.
2162
2163 (*) This form is used only in Emacs 20.5 and the older versions,
2164 but the newer versions can safely decode it.
2165 (**) This form is used only in Emacs 21.1 and the newer versions,
2166 and the older versions can't decode it.
2167
2168 Here's a list of examples usages of these composition escape
2169 sequences (categorized by `enum composition_method').
2170
2171 COMPOSITION_RELATIVE:
2172 ESC 0 CHAR [ CHAR ] ESC 1
2173 COMPOSITOIN_WITH_RULE:
2174 ESC 2 CHAR [ RULE CHAR ] ESC 1
2175 COMPOSITION_WITH_ALTCHARS:
2176 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2177 COMPOSITION_WITH_RULE_ALTCHARS:
2178 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2179
2180 enum iso_code_class_type iso_code_class[256];
2181
2182 #define SAFE_CHARSET_P(coding, id) \
2183 ((id) <= (coding)->max_charset_id \
2184 && (coding)->safe_charsets[id] >= 0)
2185
2186
2187 #define SHIFT_OUT_OK(category) \
2188 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2189
2190 static void
2191 setup_iso_safe_charsets (Lisp_Object attrs)
2192 {
2193 Lisp_Object charset_list, safe_charsets;
2194 Lisp_Object request;
2195 Lisp_Object reg_usage;
2196 Lisp_Object tail;
2197 int reg94, reg96;
2198 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2199 int max_charset_id;
2200
2201 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2202 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2203 && ! EQ (charset_list, Viso_2022_charset_list))
2204 {
2205 CODING_ATTR_CHARSET_LIST (attrs)
2206 = charset_list = Viso_2022_charset_list;
2207 ASET (attrs, coding_attr_safe_charsets, Qnil);
2208 }
2209
2210 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2211 return;
2212
2213 max_charset_id = 0;
2214 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2215 {
2216 int id = XINT (XCAR (tail));
2217 if (max_charset_id < id)
2218 max_charset_id = id;
2219 }
2220
2221 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2222 make_number (255));
2223 request = AREF (attrs, coding_attr_iso_request);
2224 reg_usage = AREF (attrs, coding_attr_iso_usage);
2225 reg94 = XINT (XCAR (reg_usage));
2226 reg96 = XINT (XCDR (reg_usage));
2227
2228 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2229 {
2230 Lisp_Object id;
2231 Lisp_Object reg;
2232 struct charset *charset;
2233
2234 id = XCAR (tail);
2235 charset = CHARSET_FROM_ID (XINT (id));
2236 reg = Fcdr (Fassq (request, id));
2237 if (! NILP (reg))
2238 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2239 else if (charset->iso_chars_96)
2240 {
2241 if (reg96 < 4)
2242 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2243 }
2244 else
2245 {
2246 if (reg94 < 4)
2247 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2248 }
2249 }
2250 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2251 }
2252
2253
2254 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2255 Check if a text is encoded in ISO2022. If it is, returns an
2256 integer in which appropriate flag bits any of:
2257 CATEGORY_MASK_ISO_7
2258 CATEGORY_MASK_ISO_7_TIGHT
2259 CATEGORY_MASK_ISO_8_1
2260 CATEGORY_MASK_ISO_8_2
2261 CATEGORY_MASK_ISO_7_ELSE
2262 CATEGORY_MASK_ISO_8_ELSE
2263 are set. If a code which should never appear in ISO2022 is found,
2264 returns 0. */
2265
2266 static int
2267 detect_coding_iso_2022 (coding, mask)
2268 struct coding_system *coding;
2269 int *mask;
2270 {
2271 unsigned char *src = coding->source, *src_base = src;
2272 unsigned char *src_end = coding->source + coding->src_bytes;
2273 int multibytep = coding->src_multibyte;
2274 int mask_iso = CATEGORY_MASK_ISO;
2275 int mask_found = 0, mask_8bit_found = 0;
2276 int reg[4], shift_out = 0, single_shifting = 0;
2277 int id;
2278 int c, c1;
2279 int consumed_chars = 0;
2280 int i;
2281
2282 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2283 {
2284 struct coding_system *this = &(coding_categories[i]);
2285 Lisp_Object attrs, val;
2286
2287 attrs = CODING_ID_ATTRS (this->id);
2288 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2289 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2290 setup_iso_safe_charsets (attrs);
2291 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2292 this->max_charset_id = XSTRING (val)->size - 1;
2293 this->safe_charsets = (char *) XSTRING (val)->data;
2294 }
2295
2296 /* A coding system of this category is always ASCII compatible. */
2297 src += coding->head_ascii;
2298
2299 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2300 while (mask_iso && src < src_end)
2301 {
2302 ONE_MORE_BYTE (c);
2303 switch (c)
2304 {
2305 case ISO_CODE_ESC:
2306 if (inhibit_iso_escape_detection)
2307 break;
2308 single_shifting = 0;
2309 ONE_MORE_BYTE (c);
2310 if (c >= '(' && c <= '/')
2311 {
2312 /* Designation sequence for a charset of dimension 1. */
2313 ONE_MORE_BYTE (c1);
2314 if (c1 < ' ' || c1 >= 0x80
2315 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2316 /* Invalid designation sequence. Just ignore. */
2317 break;
2318 reg[(c - '(') % 4] = id;
2319 }
2320 else if (c == '$')
2321 {
2322 /* Designation sequence for a charset of dimension 2. */
2323 ONE_MORE_BYTE (c);
2324 if (c >= '@' && c <= 'B')
2325 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2326 reg[0] = id = iso_charset_table[1][0][c];
2327 else if (c >= '(' && c <= '/')
2328 {
2329 ONE_MORE_BYTE (c1);
2330 if (c1 < ' ' || c1 >= 0x80
2331 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2332 /* Invalid designation sequence. Just ignore. */
2333 break;
2334 reg[(c - '(') % 4] = id;
2335 }
2336 else
2337 /* Invalid designation sequence. Just ignore. */
2338 break;
2339 }
2340 else if (c == 'N' || c == 'O')
2341 {
2342 /* ESC <Fe> for SS2 or SS3. */
2343 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
2344 break;
2345 }
2346 else if (c >= '0' && c <= '4')
2347 {
2348 /* ESC <Fp> for start/end composition. */
2349 mask_found |= CATEGORY_MASK_ISO;
2350 break;
2351 }
2352 else
2353 {
2354 /* Invalid escape sequence. */
2355 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2356 break;
2357 }
2358
2359 /* We found a valid designation sequence for CHARSET. */
2360 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2361 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2362 id))
2363 mask_found |= CATEGORY_MASK_ISO_7;
2364 else
2365 mask_iso &= ~CATEGORY_MASK_ISO_7;
2366 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2367 id))
2368 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
2369 else
2370 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2371 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2372 id))
2373 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
2374 else
2375 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2376 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2377 id))
2378 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
2379 else
2380 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
2381 break;
2382
2383 case ISO_CODE_SO:
2384 if (inhibit_iso_escape_detection)
2385 break;
2386 single_shifting = 0;
2387 if (shift_out == 0
2388 && (reg[1] >= 0
2389 || SHIFT_OUT_OK (coding_category_iso_7_else)
2390 || SHIFT_OUT_OK (coding_category_iso_8_else)))
2391 {
2392 /* Locking shift out. */
2393 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2394 mask_found |= CATEGORY_MASK_ISO_ELSE;
2395 }
2396 break;
2397
2398 case ISO_CODE_SI:
2399 if (inhibit_iso_escape_detection)
2400 break;
2401 single_shifting = 0;
2402 if (shift_out == 1)
2403 {
2404 /* Locking shift in. */
2405 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2406 mask_found |= CATEGORY_MASK_ISO_ELSE;
2407 }
2408 break;
2409
2410 case ISO_CODE_CSI:
2411 single_shifting = 0;
2412 case ISO_CODE_SS2:
2413 case ISO_CODE_SS3:
2414 {
2415 int newmask = CATEGORY_MASK_ISO_8_ELSE;
2416
2417 if (inhibit_iso_escape_detection)
2418 break;
2419 if (c != ISO_CODE_CSI)
2420 {
2421 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2422 & CODING_ISO_FLAG_SINGLE_SHIFT)
2423 newmask |= CATEGORY_MASK_ISO_8_1;
2424 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2425 & CODING_ISO_FLAG_SINGLE_SHIFT)
2426 newmask |= CATEGORY_MASK_ISO_8_2;
2427 single_shifting = 1;
2428 }
2429 if (VECTORP (Vlatin_extra_code_table)
2430 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2431 {
2432 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2433 & CODING_ISO_FLAG_LATIN_EXTRA)
2434 newmask |= CATEGORY_MASK_ISO_8_1;
2435 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2436 & CODING_ISO_FLAG_LATIN_EXTRA)
2437 newmask |= CATEGORY_MASK_ISO_8_2;
2438 }
2439 mask_iso &= newmask;
2440 mask_found |= newmask;
2441 }
2442 break;
2443
2444 default:
2445 if (c < 0x80)
2446 {
2447 single_shifting = 0;
2448 break;
2449 }
2450 else if (c < 0xA0)
2451 {
2452 single_shifting = 0;
2453 mask_8bit_found = 1;
2454 if (VECTORP (Vlatin_extra_code_table)
2455 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2456 {
2457 int newmask = 0;
2458
2459 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2460 & CODING_ISO_FLAG_LATIN_EXTRA)
2461 newmask |= CATEGORY_MASK_ISO_8_1;
2462 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2463 & CODING_ISO_FLAG_LATIN_EXTRA)
2464 newmask |= CATEGORY_MASK_ISO_8_2;
2465 mask_iso &= newmask;
2466 mask_found |= newmask;
2467 }
2468 else
2469 return 0;
2470 }
2471 else
2472 {
2473 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2474 | CATEGORY_MASK_ISO_7_ELSE);
2475 mask_found |= CATEGORY_MASK_ISO_8_1;
2476 mask_8bit_found = 1;
2477 /* Check the length of succeeding codes of the range
2478 0xA0..0FF. If the byte length is odd, we exclude
2479 CATEGORY_MASK_ISO_8_2. We can check this only
2480 when we are not single shifting. */
2481 if (!single_shifting
2482 && mask_iso & CATEGORY_MASK_ISO_8_2)
2483 {
2484 int i = 1;
2485 while (src < src_end)
2486 {
2487 ONE_MORE_BYTE (c);
2488 if (c < 0xA0)
2489 break;
2490 i++;
2491 }
2492
2493 if (i & 1 && src < src_end)
2494 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
2495 else
2496 mask_found |= CATEGORY_MASK_ISO_8_2;
2497 }
2498 }
2499 break;
2500 }
2501 }
2502 no_more_source:
2503 if (!mask_iso)
2504 {
2505 *mask &= ~CATEGORY_MASK_ISO;
2506 return 0;
2507 }
2508 if (!mask_found)
2509 return 0;
2510 *mask &= mask_iso & mask_found;
2511 if (! mask_8bit_found)
2512 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2513 return 1;
2514 }
2515
2516
2517 /* Set designation state into CODING. */
2518 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2519 do { \
2520 int id, prev; \
2521 \
2522 if (final < '0' || final >= 128 \
2523 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2524 || !SAFE_CHARSET_P (coding, id)) \
2525 { \
2526 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2527 goto invalid_code; \
2528 } \
2529 prev = CODING_ISO_DESIGNATION (coding, reg); \
2530 CODING_ISO_DESIGNATION (coding, reg) = id; \
2531 /* If there was an invalid designation to REG previously, and this \
2532 designation is ASCII to REG, we should keep this designation \
2533 sequence. */ \
2534 if (prev == -2 && id == charset_ascii) \
2535 goto invalid_code; \
2536 } while (0)
2537
2538
2539 #define MAYBE_FINISH_COMPOSITION() \
2540 do { \
2541 int i; \
2542 if (composition_state == COMPOSING_NO) \
2543 break; \
2544 /* It is assured that we have enough room for producing \
2545 characters stored in the table `components'. */ \
2546 if (charbuf + component_idx > charbuf_end) \
2547 goto no_more_source; \
2548 composition_state = COMPOSING_NO; \
2549 if (method == COMPOSITION_RELATIVE \
2550 || method == COMPOSITION_WITH_ALTCHARS) \
2551 { \
2552 for (i = 0; i < component_idx; i++) \
2553 *charbuf++ = components[i]; \
2554 char_offset += component_idx; \
2555 } \
2556 else \
2557 { \
2558 for (i = 0; i < component_idx; i += 2) \
2559 *charbuf++ = components[i]; \
2560 char_offset += (component_idx / 2) + 1; \
2561 } \
2562 } while (0)
2563
2564
2565 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2566 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2567 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2568 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2569 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2570 */
2571
2572 #define DECODE_COMPOSITION_START(c1) \
2573 do { \
2574 if (c1 == '0' \
2575 && composition_state == COMPOSING_COMPONENT_CHAR) \
2576 { \
2577 component_len = component_idx; \
2578 composition_state = COMPOSING_CHAR; \
2579 } \
2580 else \
2581 { \
2582 unsigned char *p; \
2583 \
2584 MAYBE_FINISH_COMPOSITION (); \
2585 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2586 goto no_more_source; \
2587 for (p = src; p < src_end - 1; p++) \
2588 if (*p == ISO_CODE_ESC && p[1] == '1') \
2589 break; \
2590 if (p == src_end - 1) \
2591 { \
2592 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2593 goto invalid_code; \
2594 goto no_more_source; \
2595 } \
2596 \
2597 /* This is surely the start of a composition. */ \
2598 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2599 : c1 == '2' ? COMPOSITION_WITH_RULE \
2600 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2601 : COMPOSITION_WITH_RULE_ALTCHARS); \
2602 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2603 : COMPOSING_COMPONENT_CHAR); \
2604 component_idx = component_len = 0; \
2605 } \
2606 } while (0)
2607
2608
2609 /* Handle compositoin end sequence ESC 1. */
2610
2611 #define DECODE_COMPOSITION_END() \
2612 do { \
2613 int nchars = (component_len > 0 ? component_idx - component_len \
2614 : method == COMPOSITION_RELATIVE ? component_idx \
2615 : (component_idx + 1) / 2); \
2616 int i; \
2617 int *saved_charbuf = charbuf; \
2618 \
2619 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2620 if (method != COMPOSITION_RELATIVE) \
2621 { \
2622 if (component_len == 0) \
2623 for (i = 0; i < component_idx; i++) \
2624 *charbuf++ = components[i]; \
2625 else \
2626 for (i = 0; i < component_len; i++) \
2627 *charbuf++ = components[i]; \
2628 *saved_charbuf = saved_charbuf - charbuf; \
2629 } \
2630 if (method == COMPOSITION_WITH_RULE) \
2631 for (i = 0; i < component_idx; i += 2, char_offset++) \
2632 *charbuf++ = components[i]; \
2633 else \
2634 for (i = component_len; i < component_idx; i++, char_offset++) \
2635 *charbuf++ = components[i]; \
2636 coding->annotated = 1; \
2637 composition_state = COMPOSING_NO; \
2638 } while (0)
2639
2640
2641 /* Decode a composition rule from the byte C1 (and maybe one more byte
2642 from SRC) and store one encoded composition rule in
2643 coding->cmp_data. */
2644
2645 #define DECODE_COMPOSITION_RULE(c1) \
2646 do { \
2647 (c1) -= 32; \
2648 if (c1 < 81) /* old format (before ver.21) */ \
2649 { \
2650 int gref = (c1) / 9; \
2651 int nref = (c1) % 9; \
2652 if (gref == 4) gref = 10; \
2653 if (nref == 4) nref = 10; \
2654 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2655 } \
2656 else if (c1 < 93) /* new format (after ver.21) */ \
2657 { \
2658 ONE_MORE_BYTE (c2); \
2659 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2660 } \
2661 else \
2662 c1 = 0; \
2663 } while (0)
2664
2665
2666 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2667
2668 static void
2669 decode_coding_iso_2022 (coding)
2670 struct coding_system *coding;
2671 {
2672 unsigned char *src = coding->source + coding->consumed;
2673 unsigned char *src_end = coding->source + coding->src_bytes;
2674 unsigned char *src_base;
2675 int *charbuf = coding->charbuf;
2676 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2677 int consumed_chars = 0, consumed_chars_base;
2678 int char_offset = 0;
2679 int multibytep = coding->src_multibyte;
2680 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2681 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2682 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2683 struct charset *charset;
2684 int c;
2685 /* For handling composition sequence. */
2686 #define COMPOSING_NO 0
2687 #define COMPOSING_CHAR 1
2688 #define COMPOSING_RULE 2
2689 #define COMPOSING_COMPONENT_CHAR 3
2690 #define COMPOSING_COMPONENT_RULE 4
2691
2692 int composition_state = COMPOSING_NO;
2693 enum composition_method method;
2694 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2695 int component_idx;
2696 int component_len;
2697 Lisp_Object attrs, eol_type, charset_list;
2698
2699 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2700 setup_iso_safe_charsets (attrs);
2701
2702 while (1)
2703 {
2704 int c1, c2;
2705
2706 src_base = src;
2707 consumed_chars_base = consumed_chars;
2708
2709 if (charbuf >= charbuf_end)
2710 break;
2711
2712 ONE_MORE_BYTE (c1);
2713
2714 /* We produce no character or one character. */
2715 switch (iso_code_class [c1])
2716 {
2717 case ISO_0x20_or_0x7F:
2718 if (composition_state != COMPOSING_NO)
2719 {
2720 if (composition_state == COMPOSING_RULE
2721 || composition_state == COMPOSING_COMPONENT_RULE)
2722 {
2723 DECODE_COMPOSITION_RULE (c1);
2724 components[component_idx++] = c1;
2725 composition_state--;
2726 continue;
2727 }
2728 else if (method == COMPOSITION_WITH_RULE)
2729 composition_state = COMPOSING_RULE;
2730 else if (method == COMPOSITION_WITH_RULE_ALTCHARS
2731 && composition_state == COMPOSING_COMPONENT_CHAR)
2732 composition_state = COMPOSING_COMPONENT_CHAR;
2733 }
2734 if (charset_id_0 < 0
2735 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2736 {
2737 /* This is SPACE or DEL. */
2738 charset = CHARSET_FROM_ID (charset_ascii);
2739 break;
2740 }
2741 /* This is a graphic character, we fall down ... */
2742
2743 case ISO_graphic_plane_0:
2744 if (composition_state == COMPOSING_RULE)
2745 {
2746 DECODE_COMPOSITION_RULE (c1);
2747 components[component_idx++] = c1;
2748 composition_state = COMPOSING_CHAR;
2749 }
2750 charset = CHARSET_FROM_ID (charset_id_0);
2751 break;
2752
2753 case ISO_0xA0_or_0xFF:
2754 if (charset_id_1 < 0
2755 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2756 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2757 goto invalid_code;
2758 /* This is a graphic character, we fall down ... */
2759
2760 case ISO_graphic_plane_1:
2761 if (charset_id_1 < 0)
2762 goto invalid_code;
2763 charset = CHARSET_FROM_ID (charset_id_1);
2764 break;
2765
2766 case ISO_carriage_return:
2767 if (c1 == '\r')
2768 {
2769 if (EQ (eol_type, Qdos))
2770 {
2771 if (src == src_end)
2772 goto no_more_source;
2773 if (*src == '\n')
2774 ONE_MORE_BYTE (c1);
2775 }
2776 else if (EQ (eol_type, Qmac))
2777 c1 = '\n';
2778 }
2779 /* fall through */
2780
2781 case ISO_control_0:
2782 MAYBE_FINISH_COMPOSITION ();
2783 charset = CHARSET_FROM_ID (charset_ascii);
2784 break;
2785
2786 case ISO_control_1:
2787 MAYBE_FINISH_COMPOSITION ();
2788 goto invalid_code;
2789
2790 case ISO_shift_out:
2791 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2792 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2793 goto invalid_code;
2794 CODING_ISO_INVOCATION (coding, 0) = 1;
2795 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2796 continue;
2797
2798 case ISO_shift_in:
2799 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2800 goto invalid_code;
2801 CODING_ISO_INVOCATION (coding, 0) = 0;
2802 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2803 continue;
2804
2805 case ISO_single_shift_2_7:
2806 case ISO_single_shift_2:
2807 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2808 goto invalid_code;
2809 /* SS2 is handled as an escape sequence of ESC 'N' */
2810 c1 = 'N';
2811 goto label_escape_sequence;
2812
2813 case ISO_single_shift_3:
2814 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2815 goto invalid_code;
2816 /* SS2 is handled as an escape sequence of ESC 'O' */
2817 c1 = 'O';
2818 goto label_escape_sequence;
2819
2820 case ISO_control_sequence_introducer:
2821 /* CSI is handled as an escape sequence of ESC '[' ... */
2822 c1 = '[';
2823 goto label_escape_sequence;
2824
2825 case ISO_escape:
2826 ONE_MORE_BYTE (c1);
2827 label_escape_sequence:
2828 /* Escape sequences handled here are invocation,
2829 designation, direction specification, and character
2830 composition specification. */
2831 switch (c1)
2832 {
2833 case '&': /* revision of following character set */
2834 ONE_MORE_BYTE (c1);
2835 if (!(c1 >= '@' && c1 <= '~'))
2836 goto invalid_code;
2837 ONE_MORE_BYTE (c1);
2838 if (c1 != ISO_CODE_ESC)
2839 goto invalid_code;
2840 ONE_MORE_BYTE (c1);
2841 goto label_escape_sequence;
2842
2843 case '$': /* designation of 2-byte character set */
2844 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2845 goto invalid_code;
2846 ONE_MORE_BYTE (c1);
2847 if (c1 >= '@' && c1 <= 'B')
2848 { /* designation of JISX0208.1978, GB2312.1980,
2849 or JISX0208.1980 */
2850 DECODE_DESIGNATION (0, 2, 0, c1);
2851 }
2852 else if (c1 >= 0x28 && c1 <= 0x2B)
2853 { /* designation of DIMENSION2_CHARS94 character set */
2854 ONE_MORE_BYTE (c2);
2855 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
2856 }
2857 else if (c1 >= 0x2C && c1 <= 0x2F)
2858 { /* designation of DIMENSION2_CHARS96 character set */
2859 ONE_MORE_BYTE (c2);
2860 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
2861 }
2862 else
2863 goto invalid_code;
2864 /* We must update these variables now. */
2865 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2866 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2867 continue;
2868
2869 case 'n': /* invocation of locking-shift-2 */
2870 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2871 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2872 goto invalid_code;
2873 CODING_ISO_INVOCATION (coding, 0) = 2;
2874 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2875 continue;
2876
2877 case 'o': /* invocation of locking-shift-3 */
2878 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2879 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2880 goto invalid_code;
2881 CODING_ISO_INVOCATION (coding, 0) = 3;
2882 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2883 continue;
2884
2885 case 'N': /* invocation of single-shift-2 */
2886 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2887 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2888 goto invalid_code;
2889 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
2890 ONE_MORE_BYTE (c1);
2891 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2892 goto invalid_code;
2893 break;
2894
2895 case 'O': /* invocation of single-shift-3 */
2896 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2897 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2898 goto invalid_code;
2899 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
2900 ONE_MORE_BYTE (c1);
2901 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2902 goto invalid_code;
2903 break;
2904
2905 case '0': case '2': case '3': case '4': /* start composition */
2906 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2907 goto invalid_code;
2908 DECODE_COMPOSITION_START (c1);
2909 continue;
2910
2911 case '1': /* end composition */
2912 if (composition_state == COMPOSING_NO)
2913 goto invalid_code;
2914 DECODE_COMPOSITION_END ();
2915 continue;
2916
2917 case '[': /* specification of direction */
2918 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2919 goto invalid_code;
2920 /* For the moment, nested direction is not supported.
2921 So, `coding->mode & CODING_MODE_DIRECTION' zero means
2922 left-to-right, and nozero means right-to-left. */
2923 ONE_MORE_BYTE (c1);
2924 switch (c1)
2925 {
2926 case ']': /* end of the current direction */
2927 coding->mode &= ~CODING_MODE_DIRECTION;
2928
2929 case '0': /* end of the current direction */
2930 case '1': /* start of left-to-right direction */
2931 ONE_MORE_BYTE (c1);
2932 if (c1 == ']')
2933 coding->mode &= ~CODING_MODE_DIRECTION;
2934 else
2935 goto invalid_code;
2936 break;
2937
2938 case '2': /* start of right-to-left direction */
2939 ONE_MORE_BYTE (c1);
2940 if (c1 == ']')
2941 coding->mode |= CODING_MODE_DIRECTION;
2942 else
2943 goto invalid_code;
2944 break;
2945
2946 default:
2947 goto invalid_code;
2948 }
2949 continue;
2950
2951 default:
2952 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2953 goto invalid_code;
2954 if (c1 >= 0x28 && c1 <= 0x2B)
2955 { /* designation of DIMENSION1_CHARS94 character set */
2956 ONE_MORE_BYTE (c2);
2957 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
2958 }
2959 else if (c1 >= 0x2C && c1 <= 0x2F)
2960 { /* designation of DIMENSION1_CHARS96 character set */
2961 ONE_MORE_BYTE (c2);
2962 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
2963 }
2964 else
2965 goto invalid_code;
2966 /* We must update these variables now. */
2967 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2968 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2969 continue;
2970 }
2971 }
2972
2973 /* Now we know CHARSET and 1st position code C1 of a character.
2974 Produce a decoded character while getting 2nd position code
2975 C2 if necessary. */
2976 c1 &= 0x7F;
2977 if (CHARSET_DIMENSION (charset) > 1)
2978 {
2979 ONE_MORE_BYTE (c2);
2980 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
2981 /* C2 is not in a valid range. */
2982 goto invalid_code;
2983 c1 = (c1 << 8) | (c2 & 0x7F);
2984 if (CHARSET_DIMENSION (charset) > 2)
2985 {
2986 ONE_MORE_BYTE (c2);
2987 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
2988 /* C2 is not in a valid range. */
2989 goto invalid_code;
2990 c1 = (c1 << 8) | (c2 & 0x7F);
2991 }
2992 }
2993
2994 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
2995 if (c < 0)
2996 {
2997 MAYBE_FINISH_COMPOSITION ();
2998 for (; src_base < src; src_base++, char_offset++)
2999 {
3000 if (ASCII_BYTE_P (*src_base))
3001 *charbuf++ = *src_base;
3002 else
3003 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3004 }
3005 }
3006 else if (composition_state == COMPOSING_NO)
3007 {
3008 *charbuf++ = c;
3009 char_offset++;
3010 }
3011 else
3012 components[component_idx++] = c;
3013 continue;
3014
3015 invalid_code:
3016 MAYBE_FINISH_COMPOSITION ();
3017 src = src_base;
3018 consumed_chars = consumed_chars_base;
3019 ONE_MORE_BYTE (c);
3020 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3021 coding->errors++;
3022 }
3023
3024 no_more_source:
3025 coding->consumed_char += consumed_chars_base;
3026 coding->consumed = src_base - coding->source;
3027 coding->charbuf_used = charbuf - coding->charbuf;
3028 }
3029
3030
3031 /* ISO2022 encoding stuff. */
3032
3033 /*
3034 It is not enough to say just "ISO2022" on encoding, we have to
3035 specify more details. In Emacs, each coding system of ISO2022
3036 variant has the following specifications:
3037 1. Initial designation to G0 thru G3.
3038 2. Allows short-form designation?
3039 3. ASCII should be designated to G0 before control characters?
3040 4. ASCII should be designated to G0 at end of line?
3041 5. 7-bit environment or 8-bit environment?
3042 6. Use locking-shift?
3043 7. Use Single-shift?
3044 And the following two are only for Japanese:
3045 8. Use ASCII in place of JIS0201-1976-Roman?
3046 9. Use JISX0208-1983 in place of JISX0208-1978?
3047 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3048 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3049 details.
3050 */
3051
3052 /* Produce codes (escape sequence) for designating CHARSET to graphic
3053 register REG at DST, and increment DST. If <final-char> of CHARSET is
3054 '@', 'A', or 'B' and the coding system CODING allows, produce
3055 designation sequence of short-form. */
3056
3057 #define ENCODE_DESIGNATION(charset, reg, coding) \
3058 do { \
3059 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3060 char *intermediate_char_94 = "()*+"; \
3061 char *intermediate_char_96 = ",-./"; \
3062 int revision = -1; \
3063 int c; \
3064 \
3065 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3066 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3067 \
3068 if (revision >= 0) \
3069 { \
3070 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3071 EMIT_ONE_BYTE ('@' + revision); \
3072 } \
3073 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3074 if (CHARSET_DIMENSION (charset) == 1) \
3075 { \
3076 if (! CHARSET_ISO_CHARS_96 (charset)) \
3077 c = intermediate_char_94[reg]; \
3078 else \
3079 c = intermediate_char_96[reg]; \
3080 EMIT_ONE_ASCII_BYTE (c); \
3081 } \
3082 else \
3083 { \
3084 EMIT_ONE_ASCII_BYTE ('$'); \
3085 if (! CHARSET_ISO_CHARS_96 (charset)) \
3086 { \
3087 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3088 || reg != 0 \
3089 || final_char < '@' || final_char > 'B') \
3090 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3091 } \
3092 else \
3093 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3094 } \
3095 EMIT_ONE_ASCII_BYTE (final_char); \
3096 \
3097 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3098 } while (0)
3099
3100
3101 /* The following two macros produce codes (control character or escape
3102 sequence) for ISO2022 single-shift functions (single-shift-2 and
3103 single-shift-3). */
3104
3105 #define ENCODE_SINGLE_SHIFT_2 \
3106 do { \
3107 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3108 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3109 else \
3110 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3111 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3112 } while (0)
3113
3114
3115 #define ENCODE_SINGLE_SHIFT_3 \
3116 do { \
3117 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3118 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3119 else \
3120 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3121 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3122 } while (0)
3123
3124
3125 /* The following four macros produce codes (control character or
3126 escape sequence) for ISO2022 locking-shift functions (shift-in,
3127 shift-out, locking-shift-2, and locking-shift-3). */
3128
3129 #define ENCODE_SHIFT_IN \
3130 do { \
3131 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3132 CODING_ISO_INVOCATION (coding, 0) = 0; \
3133 } while (0)
3134
3135
3136 #define ENCODE_SHIFT_OUT \
3137 do { \
3138 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3139 CODING_ISO_INVOCATION (coding, 0) = 1; \
3140 } while (0)
3141
3142
3143 #define ENCODE_LOCKING_SHIFT_2 \
3144 do { \
3145 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3146 CODING_ISO_INVOCATION (coding, 0) = 2; \
3147 } while (0)
3148
3149
3150 #define ENCODE_LOCKING_SHIFT_3 \
3151 do { \
3152 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3153 CODING_ISO_INVOCATION (coding, 0) = 3; \
3154 } while (0)
3155
3156
3157 /* Produce codes for a DIMENSION1 character whose character set is
3158 CHARSET and whose position-code is C1. Designation and invocation
3159 sequences are also produced in advance if necessary. */
3160
3161 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3162 do { \
3163 int id = CHARSET_ID (charset); \
3164 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3165 { \
3166 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3167 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3168 else \
3169 EMIT_ONE_BYTE (c1 | 0x80); \
3170 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3171 break; \
3172 } \
3173 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3174 { \
3175 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3176 break; \
3177 } \
3178 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3179 { \
3180 EMIT_ONE_BYTE (c1 | 0x80); \
3181 break; \
3182 } \
3183 else \
3184 /* Since CHARSET is not yet invoked to any graphic planes, we \
3185 must invoke it, or, at first, designate it to some graphic \
3186 register. Then repeat the loop to actually produce the \
3187 character. */ \
3188 dst = encode_invocation_designation (charset, coding, dst, \
3189 &produced_chars); \
3190 } while (1)
3191
3192
3193 /* Produce codes for a DIMENSION2 character whose character set is
3194 CHARSET and whose position-codes are C1 and C2. Designation and
3195 invocation codes are also produced in advance if necessary. */
3196
3197 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3198 do { \
3199 int id = CHARSET_ID (charset); \
3200 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3201 { \
3202 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3203 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3204 else \
3205 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3206 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3207 break; \
3208 } \
3209 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3210 { \
3211 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3212 break; \
3213 } \
3214 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3215 { \
3216 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3217 break; \
3218 } \
3219 else \
3220 /* Since CHARSET is not yet invoked to any graphic planes, we \
3221 must invoke it, or, at first, designate it to some graphic \
3222 register. Then repeat the loop to actually produce the \
3223 character. */ \
3224 dst = encode_invocation_designation (charset, coding, dst, \
3225 &produced_chars); \
3226 } while (1)
3227
3228
3229 #define ENCODE_ISO_CHARACTER(charset, c) \
3230 do { \
3231 int code = ENCODE_CHAR ((charset),(c)); \
3232 \
3233 if (CHARSET_DIMENSION (charset) == 1) \
3234 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3235 else \
3236 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3237 } while (0)
3238
3239
3240 /* Produce designation and invocation codes at a place pointed by DST
3241 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3242 Return new DST. */
3243
3244 unsigned char *
3245 encode_invocation_designation (charset, coding, dst, p_nchars)
3246 struct charset *charset;
3247 struct coding_system *coding;
3248 unsigned char *dst;
3249 int *p_nchars;
3250 {
3251 int multibytep = coding->dst_multibyte;
3252 int produced_chars = *p_nchars;
3253 int reg; /* graphic register number */
3254 int id = CHARSET_ID (charset);
3255
3256 /* At first, check designations. */
3257 for (reg = 0; reg < 4; reg++)
3258 if (id == CODING_ISO_DESIGNATION (coding, reg))
3259 break;
3260
3261 if (reg >= 4)
3262 {
3263 /* CHARSET is not yet designated to any graphic registers. */
3264 /* At first check the requested designation. */
3265 reg = CODING_ISO_REQUEST (coding, id);
3266 if (reg < 0)
3267 /* Since CHARSET requests no special designation, designate it
3268 to graphic register 0. */
3269 reg = 0;
3270
3271 ENCODE_DESIGNATION (charset, reg, coding);
3272 }
3273
3274 if (CODING_ISO_INVOCATION (coding, 0) != reg
3275 && CODING_ISO_INVOCATION (coding, 1) != reg)
3276 {
3277 /* Since the graphic register REG is not invoked to any graphic
3278 planes, invoke it to graphic plane 0. */
3279 switch (reg)
3280 {
3281 case 0: /* graphic register 0 */
3282 ENCODE_SHIFT_IN;
3283 break;
3284
3285 case 1: /* graphic register 1 */
3286 ENCODE_SHIFT_OUT;
3287 break;
3288
3289 case 2: /* graphic register 2 */
3290 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3291 ENCODE_SINGLE_SHIFT_2;
3292 else
3293 ENCODE_LOCKING_SHIFT_2;
3294 break;
3295
3296 case 3: /* graphic register 3 */
3297 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3298 ENCODE_SINGLE_SHIFT_3;
3299 else
3300 ENCODE_LOCKING_SHIFT_3;
3301 break;
3302 }
3303 }
3304
3305 *p_nchars = produced_chars;
3306 return dst;
3307 }
3308
3309 /* The following three macros produce codes for indicating direction
3310 of text. */
3311 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3312 do { \
3313 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3314 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3315 else \
3316 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3317 } while (0)
3318
3319
3320 #define ENCODE_DIRECTION_R2L() \
3321 do { \
3322 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3323 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3324 } while (0)
3325
3326
3327 #define ENCODE_DIRECTION_L2R() \
3328 do { \
3329 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3330 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3331 } while (0)
3332
3333
3334 /* Produce codes for designation and invocation to reset the graphic
3335 planes and registers to initial state. */
3336 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3337 do { \
3338 int reg; \
3339 struct charset *charset; \
3340 \
3341 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3342 ENCODE_SHIFT_IN; \
3343 for (reg = 0; reg < 4; reg++) \
3344 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3345 && (CODING_ISO_DESIGNATION (coding, reg) \
3346 != CODING_ISO_INITIAL (coding, reg))) \
3347 { \
3348 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3349 ENCODE_DESIGNATION (charset, reg, coding); \
3350 } \
3351 } while (0)
3352
3353
3354 /* Produce designation sequences of charsets in the line started from
3355 SRC to a place pointed by DST, and return updated DST.
3356
3357 If the current block ends before any end-of-line, we may fail to
3358 find all the necessary designations. */
3359
3360 static unsigned char *
3361 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3362 struct coding_system *coding;
3363 int *charbuf, *charbuf_end;
3364 unsigned char *dst;
3365 {
3366 struct charset *charset;
3367 /* Table of charsets to be designated to each graphic register. */
3368 int r[4];
3369 int c, found = 0, reg;
3370 int produced_chars = 0;
3371 int multibytep = coding->dst_multibyte;
3372 Lisp_Object attrs;
3373 Lisp_Object charset_list;
3374
3375 attrs = CODING_ID_ATTRS (coding->id);
3376 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3377 if (EQ (charset_list, Qiso_2022))
3378 charset_list = Viso_2022_charset_list;
3379
3380 for (reg = 0; reg < 4; reg++)
3381 r[reg] = -1;
3382
3383 while (found < 4)
3384 {
3385 int id;
3386
3387 c = *charbuf++;
3388 if (c == '\n')
3389 break;
3390 charset = char_charset (c, charset_list, NULL);
3391 id = CHARSET_ID (charset);
3392 reg = CODING_ISO_REQUEST (coding, id);
3393 if (reg >= 0 && r[reg] < 0)
3394 {
3395 found++;
3396 r[reg] = id;
3397 }
3398 }
3399
3400 if (found)
3401 {
3402 for (reg = 0; reg < 4; reg++)
3403 if (r[reg] >= 0
3404 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3405 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3406 }
3407
3408 return dst;
3409 }
3410
3411 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3412
3413 static int
3414 encode_coding_iso_2022 (coding)
3415 struct coding_system *coding;
3416 {
3417 int multibytep = coding->dst_multibyte;
3418 int *charbuf = coding->charbuf;
3419 int *charbuf_end = charbuf + coding->charbuf_used;
3420 unsigned char *dst = coding->destination + coding->produced;
3421 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3422 int safe_room = 16;
3423 int bol_designation
3424 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3425 && CODING_ISO_BOL (coding));
3426 int produced_chars = 0;
3427 Lisp_Object attrs, eol_type, charset_list;
3428 int ascii_compatible;
3429 int c;
3430
3431 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3432 setup_iso_safe_charsets (attrs);
3433 coding->safe_charsets
3434 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
3435
3436 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3437
3438 while (charbuf < charbuf_end)
3439 {
3440 ASSURE_DESTINATION (safe_room);
3441
3442 if (bol_designation)
3443 {
3444 unsigned char *dst_prev = dst;
3445
3446 /* We have to produce designation sequences if any now. */
3447 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3448 bol_designation = 0;
3449 /* We are sure that designation sequences are all ASCII bytes. */
3450 produced_chars += dst - dst_prev;
3451 }
3452
3453 c = *charbuf++;
3454
3455 /* Now encode the character C. */
3456 if (c < 0x20 || c == 0x7F)
3457 {
3458 if (c == '\n'
3459 || (c == '\r' && EQ (eol_type, Qmac)))
3460 {
3461 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3462 ENCODE_RESET_PLANE_AND_REGISTER ();
3463 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3464 {
3465 int i;
3466
3467 for (i = 0; i < 4; i++)
3468 CODING_ISO_DESIGNATION (coding, i)
3469 = CODING_ISO_INITIAL (coding, i);
3470 }
3471 bol_designation
3472 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3473 }
3474 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3475 ENCODE_RESET_PLANE_AND_REGISTER ();
3476 EMIT_ONE_ASCII_BYTE (c);
3477 }
3478 else if (ASCII_CHAR_P (c))
3479 {
3480 if (ascii_compatible)
3481 EMIT_ONE_ASCII_BYTE (c);
3482 else
3483 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii), c);
3484 }
3485 else
3486 {
3487 struct charset *charset = char_charset (c, charset_list, NULL);
3488
3489 if (!charset)
3490 {
3491 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3492 {
3493 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3494 charset = CHARSET_FROM_ID (charset_ascii);
3495 }
3496 else
3497 {
3498 c = coding->default_char;
3499 charset = char_charset (c, charset_list, NULL);
3500 }
3501 }
3502 ENCODE_ISO_CHARACTER (charset, c);
3503 }
3504 }
3505
3506 if (coding->mode & CODING_MODE_LAST_BLOCK
3507 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3508 {
3509 ASSURE_DESTINATION (safe_room);
3510 ENCODE_RESET_PLANE_AND_REGISTER ();
3511 }
3512 coding->result = CODING_RESULT_SUCCESS;
3513 CODING_ISO_BOL (coding) = bol_designation;
3514 coding->produced_char += produced_chars;
3515 coding->produced = dst - coding->destination;
3516 return 0;
3517 }
3518
3519 \f
3520 /*** 8,9. SJIS and BIG5 handlers ***/
3521
3522 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3523 quite widely. So, for the moment, Emacs supports them in the bare
3524 C code. But, in the future, they may be supported only by CCL. */
3525
3526 /* SJIS is a coding system encoding three character sets: ASCII, right
3527 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3528 as is. A character of charset katakana-jisx0201 is encoded by
3529 "position-code + 0x80". A character of charset japanese-jisx0208
3530 is encoded in 2-byte but two position-codes are divided and shifted
3531 so that it fit in the range below.
3532
3533 --- CODE RANGE of SJIS ---
3534 (character set) (range)
3535 ASCII 0x00 .. 0x7F
3536 KATAKANA-JISX0201 0xA0 .. 0xDF
3537 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3538 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3539 -------------------------------
3540
3541 */
3542
3543 /* BIG5 is a coding system encoding two character sets: ASCII and
3544 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3545 character set and is encoded in two-byte.
3546
3547 --- CODE RANGE of BIG5 ---
3548 (character set) (range)
3549 ASCII 0x00 .. 0x7F
3550 Big5 (1st byte) 0xA1 .. 0xFE
3551 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3552 --------------------------
3553
3554 */
3555
3556 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3557 Check if a text is encoded in SJIS. If it is, return
3558 CATEGORY_MASK_SJIS, else return 0. */
3559
3560 static int
3561 detect_coding_sjis (coding, mask)
3562 struct coding_system *coding;
3563 int *mask;
3564 {
3565 unsigned char *src = coding->source, *src_base = src;
3566 unsigned char *src_end = coding->source + coding->src_bytes;
3567 int multibytep = coding->src_multibyte;
3568 int consumed_chars = 0;
3569 int found = 0;
3570 int c;
3571
3572 /* A coding system of this category is always ASCII compatible. */
3573 src += coding->head_ascii;
3574
3575 while (1)
3576 {
3577 ONE_MORE_BYTE (c);
3578 if (c < 0x80)
3579 continue;
3580 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3581 {
3582 ONE_MORE_BYTE (c);
3583 if (c < 0x40 || c == 0x7F || c > 0xFC)
3584 break;
3585 found = 1;
3586 }
3587 else if (c >= 0xA0 && c < 0xE0)
3588 found = 1;
3589 else
3590 break;
3591 }
3592 *mask &= ~CATEGORY_MASK_SJIS;
3593 return 0;
3594
3595 no_more_source:
3596 if (!found)
3597 return 0;
3598 *mask &= CATEGORY_MASK_SJIS;
3599 return 1;
3600 }
3601
3602 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3603 Check if a text is encoded in BIG5. If it is, return
3604 CATEGORY_MASK_BIG5, else return 0. */
3605
3606 static int
3607 detect_coding_big5 (coding, mask)
3608 struct coding_system *coding;
3609 int *mask;
3610 {
3611 unsigned char *src = coding->source, *src_base = src;
3612 unsigned char *src_end = coding->source + coding->src_bytes;
3613 int multibytep = coding->src_multibyte;
3614 int consumed_chars = 0;
3615 int found = 0;
3616 int c;
3617
3618 /* A coding system of this category is always ASCII compatible. */
3619 src += coding->head_ascii;
3620
3621 while (1)
3622 {
3623 ONE_MORE_BYTE (c);
3624 if (c < 0x80)
3625 continue;
3626 if (c >= 0xA1)
3627 {
3628 ONE_MORE_BYTE (c);
3629 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3630 return 0;
3631 found = 1;
3632 }
3633 else
3634 break;
3635 }
3636 *mask &= ~CATEGORY_MASK_BIG5;
3637 return 0;
3638
3639 no_more_source:
3640 if (!found)
3641 return 0;
3642 *mask &= CATEGORY_MASK_BIG5;
3643 return 1;
3644 }
3645
3646 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3647 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3648
3649 static void
3650 decode_coding_sjis (coding)
3651 struct coding_system *coding;
3652 {
3653 unsigned char *src = coding->source + coding->consumed;
3654 unsigned char *src_end = coding->source + coding->src_bytes;
3655 unsigned char *src_base;
3656 int *charbuf = coding->charbuf;
3657 int *charbuf_end = charbuf + coding->charbuf_size;
3658 int consumed_chars = 0, consumed_chars_base;
3659 int multibytep = coding->src_multibyte;
3660 struct charset *charset_roman, *charset_kanji, *charset_kana;
3661 Lisp_Object attrs, eol_type, charset_list, val;
3662
3663 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3664
3665 val = charset_list;
3666 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3667 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3668 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
3669
3670 while (1)
3671 {
3672 int c, c1;
3673
3674 src_base = src;
3675 consumed_chars_base = consumed_chars;
3676
3677 if (charbuf >= charbuf_end)
3678 break;
3679
3680 ONE_MORE_BYTE (c);
3681
3682 if (c == '\r')
3683 {
3684 if (EQ (eol_type, Qdos))
3685 {
3686 if (src == src_end)
3687 goto no_more_source;
3688 if (*src == '\n')
3689 ONE_MORE_BYTE (c);
3690 }
3691 else if (EQ (eol_type, Qmac))
3692 c = '\n';
3693 }
3694 else
3695 {
3696 struct charset *charset;
3697
3698 if (c < 0x80)
3699 charset = charset_roman;
3700 else
3701 {
3702 if (c >= 0xF0)
3703 goto invalid_code;
3704 if (c < 0xA0 || c >= 0xE0)
3705 {
3706 /* SJIS -> JISX0208 */
3707 ONE_MORE_BYTE (c1);
3708 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3709 goto invalid_code;
3710 c = (c << 8) | c1;
3711 SJIS_TO_JIS (c);
3712 charset = charset_kanji;
3713 }
3714 else
3715 /* SJIS -> JISX0201-Kana */
3716 charset = charset_kana;
3717 }
3718 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3719 }
3720 *charbuf++ = c;
3721 continue;
3722
3723 invalid_code:
3724 src = src_base;
3725 consumed_chars = consumed_chars_base;
3726 ONE_MORE_BYTE (c);
3727 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3728 coding->errors++;
3729 }
3730
3731 no_more_source:
3732 coding->consumed_char += consumed_chars_base;
3733 coding->consumed = src_base - coding->source;
3734 coding->charbuf_used = charbuf - coding->charbuf;
3735 }
3736
3737 static void
3738 decode_coding_big5 (coding)
3739 struct coding_system *coding;
3740 {
3741 unsigned char *src = coding->source + coding->consumed;
3742 unsigned char *src_end = coding->source + coding->src_bytes;
3743 unsigned char *src_base;
3744 int *charbuf = coding->charbuf;
3745 int *charbuf_end = charbuf + coding->charbuf_size;
3746 int consumed_chars = 0, consumed_chars_base;
3747 int multibytep = coding->src_multibyte;
3748 struct charset *charset_roman, *charset_big5;
3749 Lisp_Object attrs, eol_type, charset_list, val;
3750
3751 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3752 val = charset_list;
3753 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3754 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3755
3756 while (1)
3757 {
3758 int c, c1;
3759
3760 src_base = src;
3761 consumed_chars_base = consumed_chars;
3762
3763 if (charbuf >= charbuf_end)
3764 break;
3765
3766 ONE_MORE_BYTE (c);
3767
3768 if (c == '\r')
3769 {
3770 if (EQ (eol_type, Qdos))
3771 {
3772 if (src == src_end)
3773 goto no_more_source;
3774 if (*src == '\n')
3775 ONE_MORE_BYTE (c);
3776 }
3777 else if (EQ (eol_type, Qmac))
3778 c = '\n';
3779 }
3780 else
3781 {
3782 struct charset *charset;
3783 if (c < 0x80)
3784 charset = charset_roman;
3785 else
3786 {
3787 /* BIG5 -> Big5 */
3788 if (c < 0xA1 || c > 0xFE)
3789 goto invalid_code;
3790 ONE_MORE_BYTE (c1);
3791 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3792 goto invalid_code;
3793 c = c << 8 | c1;
3794 charset = charset_big5;
3795 }
3796 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3797 }
3798
3799 *charbuf++ = c;
3800 continue;
3801
3802 invalid_code:
3803 src = src_base;
3804 consumed_chars = consumed_chars_base;
3805 ONE_MORE_BYTE (c);
3806 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3807 coding->errors++;
3808 }
3809
3810 no_more_source:
3811 coding->consumed_char += consumed_chars_base;
3812 coding->consumed = src_base - coding->source;
3813 coding->charbuf_used = charbuf - coding->charbuf;
3814 }
3815
3816 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3817 This function can encode charsets `ascii', `katakana-jisx0201',
3818 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3819 are sure that all these charsets are registered as official charset
3820 (i.e. do not have extended leading-codes). Characters of other
3821 charsets are produced without any encoding. If SJIS_P is 1, encode
3822 SJIS text, else encode BIG5 text. */
3823
3824 static int
3825 encode_coding_sjis (coding)
3826 struct coding_system *coding;
3827 {
3828 int multibytep = coding->dst_multibyte;
3829 int *charbuf = coding->charbuf;
3830 int *charbuf_end = charbuf + coding->charbuf_used;
3831 unsigned char *dst = coding->destination + coding->produced;
3832 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3833 int safe_room = 4;
3834 int produced_chars = 0;
3835 Lisp_Object attrs, eol_type, charset_list, val;
3836 int ascii_compatible;
3837 struct charset *charset_roman, *charset_kanji, *charset_kana;
3838 int c;
3839
3840 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3841 val = charset_list;
3842 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3843 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3844 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3845
3846 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3847
3848 while (charbuf < charbuf_end)
3849 {
3850 ASSURE_DESTINATION (safe_room);
3851 c = *charbuf++;
3852 /* Now encode the character C. */
3853 if (ASCII_CHAR_P (c) && ascii_compatible)
3854 EMIT_ONE_ASCII_BYTE (c);
3855 else
3856 {
3857 unsigned code;
3858 struct charset *charset = char_charset (c, charset_list, &code);
3859
3860 if (!charset)
3861 {
3862 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3863 {
3864 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3865 charset = CHARSET_FROM_ID (charset_ascii);
3866 }
3867 else
3868 {
3869 c = coding->default_char;
3870 charset = char_charset (c, charset_list, &code);
3871 }
3872 }
3873 if (code == CHARSET_INVALID_CODE (charset))
3874 abort ();
3875 if (charset == charset_kanji)
3876 {
3877 int c1, c2;
3878 JIS_TO_SJIS (code);
3879 c1 = code >> 8, c2 = code & 0xFF;
3880 EMIT_TWO_BYTES (c1, c2);
3881 }
3882 else if (charset == charset_kana)
3883 EMIT_ONE_BYTE (code | 0x80);
3884 else
3885 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3886 }
3887 }
3888 coding->result = CODING_RESULT_SUCCESS;
3889 coding->produced_char += produced_chars;
3890 coding->produced = dst - coding->destination;
3891 return 0;
3892 }
3893
3894 static int
3895 encode_coding_big5 (coding)
3896 struct coding_system *coding;
3897 {
3898 int multibytep = coding->dst_multibyte;
3899 int *charbuf = coding->charbuf;
3900 int *charbuf_end = charbuf + coding->charbuf_used;
3901 unsigned char *dst = coding->destination + coding->produced;
3902 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3903 int safe_room = 4;
3904 int produced_chars = 0;
3905 Lisp_Object attrs, eol_type, charset_list, val;
3906 int ascii_compatible;
3907 struct charset *charset_roman, *charset_big5;
3908 int c;
3909
3910 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3911 val = charset_list;
3912 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3913 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3914 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3915
3916 while (charbuf < charbuf_end)
3917 {
3918 ASSURE_DESTINATION (safe_room);
3919 c = *charbuf++;
3920 /* Now encode the character C. */
3921 if (ASCII_CHAR_P (c) && ascii_compatible)
3922 EMIT_ONE_ASCII_BYTE (c);
3923 else
3924 {
3925 unsigned code;
3926 struct charset *charset = char_charset (c, charset_list, &code);
3927
3928 if (! charset)
3929 {
3930 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3931 {
3932 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3933 charset = CHARSET_FROM_ID (charset_ascii);
3934 }
3935 else
3936 {
3937 c = coding->default_char;
3938 charset = char_charset (c, charset_list, &code);
3939 }
3940 }
3941 if (code == CHARSET_INVALID_CODE (charset))
3942 abort ();
3943 if (charset == charset_big5)
3944 {
3945 int c1, c2;
3946
3947 c1 = code >> 8, c2 = code & 0xFF;
3948 EMIT_TWO_BYTES (c1, c2);
3949 }
3950 else
3951 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3952 }
3953 }
3954 coding->result = CODING_RESULT_SUCCESS;
3955 coding->produced_char += produced_chars;
3956 coding->produced = dst - coding->destination;
3957 return 0;
3958 }
3959
3960 \f
3961 /*** 10. CCL handlers ***/
3962
3963 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3964 Check if a text is encoded in a coding system of which
3965 encoder/decoder are written in CCL program. If it is, return
3966 CATEGORY_MASK_CCL, else return 0. */
3967
3968 static int
3969 detect_coding_ccl (coding, mask)
3970 struct coding_system *coding;
3971 int *mask;
3972 {
3973 unsigned char *src = coding->source, *src_base = src;
3974 unsigned char *src_end = coding->source + coding->src_bytes;
3975 int multibytep = coding->src_multibyte;
3976 int consumed_chars = 0;
3977 int found = 0;
3978 unsigned char *valids = CODING_CCL_VALIDS (coding);
3979 int head_ascii = coding->head_ascii;
3980 Lisp_Object attrs;
3981
3982 coding = &coding_categories[coding_category_ccl];
3983 attrs = CODING_ID_ATTRS (coding->id);
3984 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
3985 src += head_ascii;
3986
3987 while (1)
3988 {
3989 int c;
3990 ONE_MORE_BYTE (c);
3991 if (! valids[c])
3992 break;
3993 if (!found && valids[c] > 1)
3994 found = 1;
3995 }
3996 *mask &= ~CATEGORY_MASK_CCL;
3997 return 0;
3998
3999 no_more_source:
4000 if (!found)
4001 return 0;
4002 *mask &= CATEGORY_MASK_CCL;
4003 return 1;
4004 }
4005
4006 static void
4007 decode_coding_ccl (coding)
4008 struct coding_system *coding;
4009 {
4010 unsigned char *src = coding->source + coding->consumed;
4011 unsigned char *src_end = coding->source + coding->src_bytes;
4012 int *charbuf = coding->charbuf;
4013 int *charbuf_end = charbuf + coding->charbuf_size;
4014 int consumed_chars = 0;
4015 int multibytep = coding->src_multibyte;
4016 struct ccl_program ccl;
4017 int source_charbuf[1024];
4018 int source_byteidx[1024];
4019
4020 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4021
4022 while (src < src_end)
4023 {
4024 unsigned char *p = src;
4025 int *source, *source_end;
4026 int i = 0;
4027
4028 if (multibytep)
4029 while (i < 1024 && p < src_end)
4030 {
4031 source_byteidx[i] = p - src;
4032 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4033 }
4034 else
4035 while (i < 1024 && p < src_end)
4036 source_charbuf[i++] = *p++;
4037
4038 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4039 ccl.last_block = 1;
4040
4041 source = source_charbuf;
4042 source_end = source + i;
4043 while (source < source_end)
4044 {
4045 ccl_driver (&ccl, source, charbuf,
4046 source_end - source, charbuf_end - charbuf);
4047 source += ccl.consumed;
4048 charbuf += ccl.produced;
4049 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4050 break;
4051 }
4052 if (source < source_end)
4053 src += source_byteidx[source - source_charbuf];
4054 else
4055 src = p;
4056 consumed_chars += source - source_charbuf;
4057
4058 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4059 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4060 break;
4061 }
4062
4063 switch (ccl.status)
4064 {
4065 case CCL_STAT_SUSPEND_BY_SRC:
4066 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4067 break;
4068 case CCL_STAT_SUSPEND_BY_DST:
4069 break;
4070 case CCL_STAT_QUIT:
4071 case CCL_STAT_INVALID_CMD:
4072 coding->result = CODING_RESULT_INTERRUPT;
4073 break;
4074 default:
4075 coding->result = CODING_RESULT_SUCCESS;
4076 break;
4077 }
4078 coding->consumed_char += consumed_chars;
4079 coding->consumed = src - coding->source;
4080 coding->charbuf_used = charbuf - coding->charbuf;
4081 }
4082
4083 static int
4084 encode_coding_ccl (coding)
4085 struct coding_system *coding;
4086 {
4087 struct ccl_program ccl;
4088 int multibytep = coding->dst_multibyte;
4089 int *charbuf = coding->charbuf;
4090 int *charbuf_end = charbuf + coding->charbuf_used;
4091 unsigned char *dst = coding->destination + coding->produced;
4092 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4093 unsigned char *adjusted_dst_end = dst_end - 1;
4094 int destination_charbuf[1024];
4095 int i, produced_chars = 0;
4096
4097 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4098
4099 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4100 ccl.dst_multibyte = coding->dst_multibyte;
4101
4102 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4103 {
4104 int dst_bytes = dst_end - dst;
4105 if (dst_bytes > 1024)
4106 dst_bytes = 1024;
4107
4108 ccl_driver (&ccl, charbuf, destination_charbuf,
4109 charbuf_end - charbuf, dst_bytes);
4110 charbuf += ccl.consumed;
4111 if (multibytep)
4112 for (i = 0; i < ccl.produced; i++)
4113 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4114 else
4115 {
4116 for (i = 0; i < ccl.produced; i++)
4117 *dst++ = destination_charbuf[i] & 0xFF;
4118 produced_chars += ccl.produced;
4119 }
4120 }
4121
4122 switch (ccl.status)
4123 {
4124 case CCL_STAT_SUSPEND_BY_SRC:
4125 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4126 break;
4127 case CCL_STAT_SUSPEND_BY_DST:
4128 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4129 break;
4130 case CCL_STAT_QUIT:
4131 case CCL_STAT_INVALID_CMD:
4132 coding->result = CODING_RESULT_INTERRUPT;
4133 break;
4134 default:
4135 coding->result = CODING_RESULT_SUCCESS;
4136 break;
4137 }
4138
4139 coding->produced_char += produced_chars;
4140 coding->produced = dst - coding->destination;
4141 return 0;
4142 }
4143
4144
4145 \f
4146 /*** 10, 11. no-conversion handlers ***/
4147
4148 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4149
4150 static void
4151 decode_coding_raw_text (coding)
4152 struct coding_system *coding;
4153 {
4154 coding->chars_at_source = 1;
4155 coding->consumed_char = 0;
4156 coding->consumed = 0;
4157 coding->result = CODING_RESULT_SUCCESS;
4158 }
4159
4160 static int
4161 encode_coding_raw_text (coding)
4162 struct coding_system *coding;
4163 {
4164 int multibytep = coding->dst_multibyte;
4165 int *charbuf = coding->charbuf;
4166 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4167 unsigned char *dst = coding->destination + coding->produced;
4168 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4169 int produced_chars = 0;
4170 int c;
4171
4172 if (multibytep)
4173 {
4174 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4175
4176 if (coding->src_multibyte)
4177 while (charbuf < charbuf_end)
4178 {
4179 ASSURE_DESTINATION (safe_room);
4180 c = *charbuf++;
4181 if (ASCII_CHAR_P (c))
4182 EMIT_ONE_ASCII_BYTE (c);
4183 else if (CHAR_BYTE8_P (c))
4184 {
4185 c = CHAR_TO_BYTE8 (c);
4186 EMIT_ONE_BYTE (c);
4187 }
4188 else
4189 {
4190 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4191
4192 CHAR_STRING_ADVANCE (c, p1);
4193 while (p0 < p1)
4194 EMIT_ONE_BYTE (*p0);
4195 }
4196 }
4197 else
4198 while (charbuf < charbuf_end)
4199 {
4200 ASSURE_DESTINATION (safe_room);
4201 c = *charbuf++;
4202 EMIT_ONE_BYTE (c);
4203 }
4204 }
4205 else
4206 {
4207 if (coding->src_multibyte)
4208 {
4209 int safe_room = MAX_MULTIBYTE_LENGTH;
4210
4211 while (charbuf < charbuf_end)
4212 {
4213 ASSURE_DESTINATION (safe_room);
4214 c = *charbuf++;
4215 if (ASCII_CHAR_P (c))
4216 *dst++ = c;
4217 else if (CHAR_BYTE8_P (c))
4218 *dst++ = CHAR_TO_BYTE8 (c);
4219 else
4220 CHAR_STRING_ADVANCE (c, dst);
4221 produced_chars++;
4222 }
4223 }
4224 else
4225 {
4226 ASSURE_DESTINATION (charbuf_end - charbuf);
4227 while (charbuf < charbuf_end && dst < dst_end)
4228 *dst++ = *charbuf++;
4229 produced_chars = dst - (coding->destination + coding->dst_bytes);
4230 }
4231 }
4232 coding->result = CODING_RESULT_SUCCESS;
4233 coding->produced_char += produced_chars;
4234 coding->produced = dst - coding->destination;
4235 return 0;
4236 }
4237
4238 static int
4239 detect_coding_charset (coding, mask)
4240 struct coding_system *coding;
4241 int *mask;
4242 {
4243 unsigned char *src = coding->source, *src_base = src;
4244 unsigned char *src_end = coding->source + coding->src_bytes;
4245 int multibytep = coding->src_multibyte;
4246 int consumed_chars = 0;
4247 Lisp_Object attrs, valids;
4248
4249 coding = &coding_categories[coding_category_charset];
4250 attrs = CODING_ID_ATTRS (coding->id);
4251 valids = AREF (attrs, coding_attr_charset_valids);
4252
4253 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4254 src += coding->head_ascii;
4255
4256 while (1)
4257 {
4258 int c;
4259
4260 ONE_MORE_BYTE (c);
4261 if (NILP (AREF (valids, c)))
4262 break;
4263 }
4264 *mask &= ~CATEGORY_MASK_CHARSET;
4265 return 0;
4266
4267 no_more_source:
4268 *mask &= CATEGORY_MASK_CHARSET;
4269 return 1;
4270 }
4271
4272 static void
4273 decode_coding_charset (coding)
4274 struct coding_system *coding;
4275 {
4276 unsigned char *src = coding->source + coding->consumed;
4277 unsigned char *src_end = coding->source + coding->src_bytes;
4278 unsigned char *src_base;
4279 int *charbuf = coding->charbuf;
4280 int *charbuf_end = charbuf + coding->charbuf_size;
4281 int consumed_chars = 0, consumed_chars_base;
4282 int multibytep = coding->src_multibyte;
4283 Lisp_Object attrs, eol_type, charset_list, valids;
4284
4285 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4286 valids = AREF (attrs, coding_attr_charset_valids);
4287
4288 while (1)
4289 {
4290 int c;
4291
4292 src_base = src;
4293 consumed_chars_base = consumed_chars;
4294
4295 if (charbuf >= charbuf_end)
4296 break;
4297
4298 ONE_MORE_BYTE (c);
4299 if (c == '\r')
4300 {
4301 if (EQ (eol_type, Qdos))
4302 {
4303 if (src < src_end
4304 && *src == '\n')
4305 ONE_MORE_BYTE (c);
4306 }
4307 else if (EQ (eol_type, Qmac))
4308 c = '\n';
4309 }
4310 else
4311 {
4312 Lisp_Object val;
4313 struct charset *charset;
4314 int c1;
4315
4316 val = AREF (valids, c);
4317 if (NILP (val))
4318 goto invalid_code;
4319 charset = CHARSET_FROM_ID (XFASTINT (val));
4320 if (CHARSET_DIMENSION (charset) > 1)
4321 {
4322 ONE_MORE_BYTE (c1);
4323 c = (c << 8) | c1;
4324 if (CHARSET_DIMENSION (charset) > 2)
4325 {
4326 ONE_MORE_BYTE (c1);
4327 c = (c << 8) | c1;
4328 if (CHARSET_DIMENSION (charset) > 3)
4329 {
4330 ONE_MORE_BYTE (c1);
4331 c = (c << 8) | c1;
4332 }
4333 }
4334 }
4335 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4336 if (c < 0)
4337 goto invalid_code;
4338 }
4339 *charbuf++ = c;
4340 continue;
4341
4342 invalid_code:
4343 src = src_base;
4344 consumed_chars = consumed_chars_base;
4345 ONE_MORE_BYTE (c);
4346 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4347 coding->errors++;
4348 }
4349
4350 no_more_source:
4351 coding->consumed_char += consumed_chars_base;
4352 coding->consumed = src_base - coding->source;
4353 coding->charbuf_used = charbuf - coding->charbuf;
4354 }
4355
4356 static int
4357 encode_coding_charset (coding)
4358 struct coding_system *coding;
4359 {
4360 int multibytep = coding->dst_multibyte;
4361 int *charbuf = coding->charbuf;
4362 int *charbuf_end = charbuf + coding->charbuf_used;
4363 unsigned char *dst = coding->destination + coding->produced;
4364 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4365 int safe_room = MAX_MULTIBYTE_LENGTH;
4366 int produced_chars = 0;
4367 Lisp_Object attrs, eol_type, charset_list;
4368 int ascii_compatible;
4369 int c;
4370
4371 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4372 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4373
4374 while (charbuf < charbuf_end)
4375 {
4376 struct charset *charset;
4377 unsigned code;
4378
4379 ASSURE_DESTINATION (safe_room);
4380 c = *charbuf++;
4381 if (ascii_compatible && ASCII_CHAR_P (c))
4382 EMIT_ONE_ASCII_BYTE (c);
4383 else
4384 {
4385 charset = char_charset (c, charset_list, &code);
4386 if (charset)
4387 {
4388 if (CHARSET_DIMENSION (charset) == 1)
4389 EMIT_ONE_BYTE (code);
4390 else if (CHARSET_DIMENSION (charset) == 2)
4391 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4392 else if (CHARSET_DIMENSION (charset) == 3)
4393 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4394 else
4395 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4396 (code >> 8) & 0xFF, code & 0xFF);
4397 }
4398 else
4399 {
4400 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4401 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4402 else
4403 c = coding->default_char;
4404 EMIT_ONE_BYTE (c);
4405 }
4406 }
4407 }
4408
4409 coding->result = CODING_RESULT_SUCCESS;
4410 coding->produced_char += produced_chars;
4411 coding->produced = dst - coding->destination;
4412 return 0;
4413 }
4414
4415 \f
4416 /*** 7. C library functions ***/
4417
4418 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
4419 has a property `coding-system'. The value of this property is a
4420 vector of length 5 (called as coding-vector). Among elements of
4421 this vector, the first (element[0]) and the fifth (element[4])
4422 carry important information for decoding/encoding. Before
4423 decoding/encoding, this information should be set in fields of a
4424 structure of type `coding_system'.
4425
4426 A value of property `coding-system' can be a symbol of another
4427 subsidiary coding-system. In that case, Emacs gets coding-vector
4428 from that symbol.
4429
4430 `element[0]' contains information to be set in `coding->type'. The
4431 value and its meaning is as follows:
4432
4433 0 -- coding_type_emacs_mule
4434 1 -- coding_type_sjis
4435 2 -- coding_type_iso_2022
4436 3 -- coding_type_big5
4437 4 -- coding_type_ccl encoder/decoder written in CCL
4438 nil -- coding_type_no_conversion
4439 t -- coding_type_undecided (automatic conversion on decoding,
4440 no-conversion on encoding)
4441
4442 `element[4]' contains information to be set in `coding->flags' and
4443 `coding->spec'. The meaning varies by `coding->type'.
4444
4445 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4446 of length 32 (of which the first 13 sub-elements are used now).
4447 Meanings of these sub-elements are:
4448
4449 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4450 If the value is an integer of valid charset, the charset is
4451 assumed to be designated to graphic register N initially.
4452
4453 If the value is minus, it is a minus value of charset which
4454 reserves graphic register N, which means that the charset is
4455 not designated initially but should be designated to graphic
4456 register N just before encoding a character in that charset.
4457
4458 If the value is nil, graphic register N is never used on
4459 encoding.
4460
4461 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4462 Each value takes t or nil. See the section ISO2022 of
4463 `coding.h' for more information.
4464
4465 If `coding->type' is `coding_type_big5', element[4] is t to denote
4466 BIG5-ETen or nil to denote BIG5-HKU.
4467
4468 If `coding->type' takes the other value, element[4] is ignored.
4469
4470 Emacs Lisp's coding system also carries information about format of
4471 end-of-line in a value of property `eol-type'. If the value is
4472 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4473 it is not integer, it should be a vector of subsidiary coding
4474 systems of which property `eol-type' has one of above values.
4475
4476 */
4477
4478 /* Setup coding context CODING from information about CODING_SYSTEM.
4479 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4480 CODING_SYSTEM is invalid, signal an error. */
4481
4482 void
4483 setup_coding_system (coding_system, coding)
4484 Lisp_Object coding_system;
4485 struct coding_system *coding;
4486 {
4487 Lisp_Object attrs;
4488 Lisp_Object eol_type;
4489 Lisp_Object coding_type;
4490 Lisp_Object val;
4491
4492 if (NILP (coding_system))
4493 coding_system = Qno_conversion;
4494
4495 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4496
4497 attrs = CODING_ID_ATTRS (coding->id);
4498 eol_type = CODING_ID_EOL_TYPE (coding->id);
4499
4500 coding->mode = 0;
4501 coding->head_ascii = -1;
4502 coding->common_flags
4503 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4504
4505 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4506 coding->max_charset_id = XSTRING (val)->size - 1;
4507 coding->safe_charsets = (char *) XSTRING (val)->data;
4508 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4509
4510 coding_type = CODING_ATTR_TYPE (attrs);
4511 if (EQ (coding_type, Qundecided))
4512 {
4513 coding->detector = NULL;
4514 coding->decoder = decode_coding_raw_text;
4515 coding->encoder = encode_coding_raw_text;
4516 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4517 }
4518 else if (EQ (coding_type, Qiso_2022))
4519 {
4520 int i;
4521 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4522
4523 /* Invoke graphic register 0 to plane 0. */
4524 CODING_ISO_INVOCATION (coding, 0) = 0;
4525 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4526 CODING_ISO_INVOCATION (coding, 1)
4527 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4528 /* Setup the initial status of designation. */
4529 for (i = 0; i < 4; i++)
4530 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4531 /* Not single shifting initially. */
4532 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4533 /* Beginning of buffer should also be regarded as bol. */
4534 CODING_ISO_BOL (coding) = 1;
4535 coding->detector = detect_coding_iso_2022;
4536 coding->decoder = decode_coding_iso_2022;
4537 coding->encoder = encode_coding_iso_2022;
4538 if (flags & CODING_ISO_FLAG_SAFE)
4539 coding->mode |= CODING_MODE_SAFE_ENCODING;
4540 coding->common_flags
4541 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4542 | CODING_REQUIRE_FLUSHING_MASK);
4543 if (flags & CODING_ISO_FLAG_COMPOSITION)
4544 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4545 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4546 {
4547 setup_iso_safe_charsets (attrs);
4548 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4549 coding->max_charset_id = XSTRING (val)->size - 1;
4550 coding->safe_charsets = (char *) XSTRING (val)->data;
4551 }
4552 CODING_ISO_FLAGS (coding) = flags;
4553 }
4554 else if (EQ (coding_type, Qcharset))
4555 {
4556 coding->detector = detect_coding_charset;
4557 coding->decoder = decode_coding_charset;
4558 coding->encoder = encode_coding_charset;
4559 coding->common_flags
4560 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4561 }
4562 else if (EQ (coding_type, Qutf_8))
4563 {
4564 coding->detector = detect_coding_utf_8;
4565 coding->decoder = decode_coding_utf_8;
4566 coding->encoder = encode_coding_utf_8;
4567 coding->common_flags
4568 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4569 }
4570 else if (EQ (coding_type, Qutf_16))
4571 {
4572 val = AREF (attrs, coding_attr_utf_16_bom);
4573 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4574 : EQ (val, Qt) ? utf_16_with_bom
4575 : utf_16_without_bom);
4576 val = AREF (attrs, coding_attr_utf_16_endian);
4577 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4578 : utf_16_little_endian);
4579 CODING_UTF_16_SURROGATE (coding) = 0;
4580 coding->detector = detect_coding_utf_16;
4581 coding->decoder = decode_coding_utf_16;
4582 coding->encoder = encode_coding_utf_16;
4583 coding->common_flags
4584 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4585 }
4586 else if (EQ (coding_type, Qccl))
4587 {
4588 coding->detector = detect_coding_ccl;
4589 coding->decoder = decode_coding_ccl;
4590 coding->encoder = encode_coding_ccl;
4591 coding->common_flags
4592 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4593 | CODING_REQUIRE_FLUSHING_MASK);
4594 }
4595 else if (EQ (coding_type, Qemacs_mule))
4596 {
4597 coding->detector = detect_coding_emacs_mule;
4598 coding->decoder = decode_coding_emacs_mule;
4599 coding->encoder = encode_coding_emacs_mule;
4600 coding->common_flags
4601 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4602 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4603 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4604 {
4605 Lisp_Object tail, safe_charsets;
4606 int max_charset_id = 0;
4607
4608 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4609 tail = XCDR (tail))
4610 if (max_charset_id < XFASTINT (XCAR (tail)))
4611 max_charset_id = XFASTINT (XCAR (tail));
4612 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4613 make_number (255));
4614 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4615 tail = XCDR (tail))
4616 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4617 coding->max_charset_id = max_charset_id;
4618 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4619 }
4620 }
4621 else if (EQ (coding_type, Qshift_jis))
4622 {
4623 coding->detector = detect_coding_sjis;
4624 coding->decoder = decode_coding_sjis;
4625 coding->encoder = encode_coding_sjis;
4626 coding->common_flags
4627 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4628 }
4629 else if (EQ (coding_type, Qbig5))
4630 {
4631 coding->detector = detect_coding_big5;
4632 coding->decoder = decode_coding_big5;
4633 coding->encoder = encode_coding_big5;
4634 coding->common_flags
4635 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4636 }
4637 else /* EQ (coding_type, Qraw_text) */
4638 {
4639 coding->detector = NULL;
4640 coding->decoder = decode_coding_raw_text;
4641 coding->encoder = encode_coding_raw_text;
4642 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4643 }
4644
4645 return;
4646 }
4647
4648 /* Return raw-text or one of its subsidiaries that has the same
4649 eol_type as CODING-SYSTEM. */
4650
4651 Lisp_Object
4652 raw_text_coding_system (coding_system)
4653 Lisp_Object coding_system;
4654 {
4655 Lisp_Object spec, attrs;
4656 Lisp_Object eol_type, raw_text_eol_type;
4657
4658 spec = CODING_SYSTEM_SPEC (coding_system);
4659 attrs = AREF (spec, 0);
4660
4661 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4662 return coding_system;
4663
4664 eol_type = AREF (spec, 2);
4665 if (VECTORP (eol_type))
4666 return Qraw_text;
4667 spec = CODING_SYSTEM_SPEC (Qraw_text);
4668 raw_text_eol_type = AREF (spec, 2);
4669 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4670 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4671 : AREF (raw_text_eol_type, 2));
4672 }
4673
4674
4675 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4676 does, return one of the subsidiary that has the same eol-spec as
4677 PARENT. Otherwise, return CODING_SYSTEM. */
4678
4679 Lisp_Object
4680 coding_inherit_eol_type (coding_system, parent)
4681 Lisp_Object coding_system, parent;
4682 {
4683 Lisp_Object spec, attrs, eol_type;
4684
4685 spec = CODING_SYSTEM_SPEC (coding_system);
4686 attrs = AREF (spec, 0);
4687 eol_type = AREF (spec, 2);
4688 if (VECTORP (eol_type))
4689 {
4690 Lisp_Object parent_spec;
4691 Lisp_Object parent_eol_type;
4692
4693 parent_spec
4694 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4695 parent_eol_type = AREF (parent_spec, 2);
4696 if (EQ (parent_eol_type, Qunix))
4697 coding_system = AREF (eol_type, 0);
4698 else if (EQ (parent_eol_type, Qdos))
4699 coding_system = AREF (eol_type, 1);
4700 else if (EQ (parent_eol_type, Qmac))
4701 coding_system = AREF (eol_type, 2);
4702 }
4703 return coding_system;
4704 }
4705
4706 /* Emacs has a mechanism to automatically detect a coding system if it
4707 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4708 it's impossible to distinguish some coding systems accurately
4709 because they use the same range of codes. So, at first, coding
4710 systems are categorized into 7, those are:
4711
4712 o coding-category-emacs-mule
4713
4714 The category for a coding system which has the same code range
4715 as Emacs' internal format. Assigned the coding-system (Lisp
4716 symbol) `emacs-mule' by default.
4717
4718 o coding-category-sjis
4719
4720 The category for a coding system which has the same code range
4721 as SJIS. Assigned the coding-system (Lisp
4722 symbol) `japanese-shift-jis' by default.
4723
4724 o coding-category-iso-7
4725
4726 The category for a coding system which has the same code range
4727 as ISO2022 of 7-bit environment. This doesn't use any locking
4728 shift and single shift functions. This can encode/decode all
4729 charsets. Assigned the coding-system (Lisp symbol)
4730 `iso-2022-7bit' by default.
4731
4732 o coding-category-iso-7-tight
4733
4734 Same as coding-category-iso-7 except that this can
4735 encode/decode only the specified charsets.
4736
4737 o coding-category-iso-8-1
4738
4739 The category for a coding system which has the same code range
4740 as ISO2022 of 8-bit environment and graphic plane 1 used only
4741 for DIMENSION1 charset. This doesn't use any locking shift
4742 and single shift functions. Assigned the coding-system (Lisp
4743 symbol) `iso-latin-1' by default.
4744
4745 o coding-category-iso-8-2
4746
4747 The category for a coding system which has the same code range
4748 as ISO2022 of 8-bit environment and graphic plane 1 used only
4749 for DIMENSION2 charset. This doesn't use any locking shift
4750 and single shift functions. Assigned the coding-system (Lisp
4751 symbol) `japanese-iso-8bit' by default.
4752
4753 o coding-category-iso-7-else
4754
4755 The category for a coding system which has the same code range
4756 as ISO2022 of 7-bit environemnt but uses locking shift or
4757 single shift functions. Assigned the coding-system (Lisp
4758 symbol) `iso-2022-7bit-lock' by default.
4759
4760 o coding-category-iso-8-else
4761
4762 The category for a coding system which has the same code range
4763 as ISO2022 of 8-bit environemnt but uses locking shift or
4764 single shift functions. Assigned the coding-system (Lisp
4765 symbol) `iso-2022-8bit-ss2' by default.
4766
4767 o coding-category-big5
4768
4769 The category for a coding system which has the same code range
4770 as BIG5. Assigned the coding-system (Lisp symbol)
4771 `cn-big5' by default.
4772
4773 o coding-category-utf-8
4774
4775 The category for a coding system which has the same code range
4776 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4777 symbol) `utf-8' by default.
4778
4779 o coding-category-utf-16-be
4780
4781 The category for a coding system in which a text has an
4782 Unicode signature (cf. Unicode Standard) in the order of BIG
4783 endian at the head. Assigned the coding-system (Lisp symbol)
4784 `utf-16-be' by default.
4785
4786 o coding-category-utf-16-le
4787
4788 The category for a coding system in which a text has an
4789 Unicode signature (cf. Unicode Standard) in the order of
4790 LITTLE endian at the head. Assigned the coding-system (Lisp
4791 symbol) `utf-16-le' by default.
4792
4793 o coding-category-ccl
4794
4795 The category for a coding system of which encoder/decoder is
4796 written in CCL programs. The default value is nil, i.e., no
4797 coding system is assigned.
4798
4799 o coding-category-binary
4800
4801 The category for a coding system not categorized in any of the
4802 above. Assigned the coding-system (Lisp symbol)
4803 `no-conversion' by default.
4804
4805 Each of them is a Lisp symbol and the value is an actual
4806 `coding-system's (this is also a Lisp symbol) assigned by a user.
4807 What Emacs does actually is to detect a category of coding system.
4808 Then, it uses a `coding-system' assigned to it. If Emacs can't
4809 decide only one possible category, it selects a category of the
4810 highest priority. Priorities of categories are also specified by a
4811 user in a Lisp variable `coding-category-list'.
4812
4813 */
4814
4815 #define EOL_SEEN_NONE 0
4816 #define EOL_SEEN_LF 1
4817 #define EOL_SEEN_CR 2
4818 #define EOL_SEEN_CRLF 4
4819
4820 /* Detect how end-of-line of a text of length CODING->src_bytes
4821 pointed by CODING->source is encoded. Return one of
4822 EOL_SEEN_XXX. */
4823
4824 #define MAX_EOL_CHECK_COUNT 3
4825
4826 static int
4827 detect_eol (coding, source, src_bytes)
4828 struct coding_system *coding;
4829 unsigned char *source;
4830 EMACS_INT src_bytes;
4831 {
4832 Lisp_Object attrs, coding_type;
4833 unsigned char *src = source, *src_end = src + src_bytes;
4834 unsigned char c;
4835 int total = 0;
4836 int eol_seen = EOL_SEEN_NONE;
4837
4838 attrs = CODING_ID_ATTRS (coding->id);
4839 coding_type = CODING_ATTR_TYPE (attrs);
4840
4841 if (EQ (coding_type, Qccl))
4842 {
4843 int msb, lsb;
4844
4845 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4846 lsb = 1 - msb;
4847
4848 while (src + 1 < src_end)
4849 {
4850 c = src[lsb];
4851 if (src[msb] == 0 && (c == '\n' || c == '\r'))
4852 {
4853 int this_eol;
4854
4855 if (c == '\n')
4856 this_eol = EOL_SEEN_LF;
4857 else if (src + 3 >= src_end
4858 || src[msb + 2] != 0
4859 || src[lsb + 2] != '\n')
4860 this_eol = EOL_SEEN_CR;
4861 else
4862 this_eol = EOL_SEEN_CRLF;
4863
4864 if (eol_seen == EOL_SEEN_NONE)
4865 /* This is the first end-of-line. */
4866 eol_seen = this_eol;
4867 else if (eol_seen != this_eol)
4868 {
4869 /* The found type is different from what found before. */
4870 eol_seen = EOL_SEEN_LF;
4871 break;
4872 }
4873 if (++total == MAX_EOL_CHECK_COUNT)
4874 break;
4875 }
4876 src += 2;
4877 }
4878 }
4879 else
4880 {
4881 while (src < src_end)
4882 {
4883 c = *src++;
4884 if (c == '\n' || c == '\r')
4885 {
4886 int this_eol;
4887
4888 if (c == '\n')
4889 this_eol = EOL_SEEN_LF;
4890 else if (src >= src_end || *src != '\n')
4891 this_eol = EOL_SEEN_CR;
4892 else
4893 this_eol = EOL_SEEN_CRLF, src++;
4894
4895 if (eol_seen == EOL_SEEN_NONE)
4896 /* This is the first end-of-line. */
4897 eol_seen = this_eol;
4898 else if (eol_seen != this_eol)
4899 {
4900 /* The found type is different from what found before. */
4901 eol_seen = EOL_SEEN_LF;
4902 break;
4903 }
4904 if (++total == MAX_EOL_CHECK_COUNT)
4905 break;
4906 }
4907 }
4908 }
4909 return eol_seen;
4910 }
4911
4912
4913 static void
4914 adjust_coding_eol_type (coding, eol_seen)
4915 struct coding_system *coding;
4916 int eol_seen;
4917 {
4918 Lisp_Object eol_type;
4919
4920 eol_type = CODING_ID_EOL_TYPE (coding->id);
4921 if (eol_seen & EOL_SEEN_LF)
4922 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
4923 else if (eol_type & EOL_SEEN_CRLF)
4924 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
4925 else if (eol_type & EOL_SEEN_CR)
4926 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
4927 }
4928
4929 /* Detect how a text specified in CODING is encoded. If a coding
4930 system is detected, update fields of CODING by the detected coding
4931 system. */
4932
4933 void
4934 detect_coding (coding)
4935 struct coding_system *coding;
4936 {
4937 unsigned char *src, *src_end;
4938 Lisp_Object attrs, coding_type;
4939
4940 coding->consumed = coding->consumed_char = 0;
4941 coding->produced = coding->produced_char = 0;
4942 coding_set_source (coding);
4943
4944 src_end = coding->source + coding->src_bytes;
4945
4946 /* If we have not yet decided the text encoding type, detect it
4947 now. */
4948 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
4949 {
4950 int mask = CATEGORY_MASK_ANY;
4951 int c, i;
4952
4953 for (src = coding->source; src < src_end; src++)
4954 {
4955 c = *src;
4956 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
4957 || c == ISO_CODE_SI
4958 || c == ISO_CODE_SO)))
4959 break;
4960 }
4961 coding->head_ascii = src - (coding->source + coding->consumed);
4962
4963 if (coding->head_ascii < coding->src_bytes)
4964 {
4965 int detected = 0;
4966
4967 for (i = 0; i < coding_category_raw_text; i++)
4968 {
4969 enum coding_category category = coding_priorities[i];
4970 struct coding_system *this = coding_categories + category;
4971
4972 if (category >= coding_category_raw_text
4973 || detected & (1 << category))
4974 continue;
4975
4976 if (this->id < 0)
4977 {
4978 /* No coding system of this category is defined. */
4979 mask &= ~(1 << category);
4980 }
4981 else
4982 {
4983 detected |= detected_mask[category];
4984 if ((*(this->detector)) (coding, &mask))
4985 break;
4986 }
4987 }
4988 if (! mask)
4989 setup_coding_system (Qraw_text, coding);
4990 else if (mask != CATEGORY_MASK_ANY)
4991 for (i = 0; i < coding_category_raw_text; i++)
4992 {
4993 enum coding_category category = coding_priorities[i];
4994 struct coding_system *this = coding_categories + category;
4995
4996 if (mask & (1 << category))
4997 {
4998 setup_coding_system (CODING_ID_NAME (this->id), coding);
4999 break;
5000 }
5001 }
5002 }
5003 }
5004
5005 attrs = CODING_ID_ATTRS (coding->id);
5006 coding_type = CODING_ATTR_TYPE (attrs);
5007
5008 /* If we have not yet decided the EOL type, detect it now. But, the
5009 detection is impossible for a CCL based coding system, in which
5010 case, we detct the EOL type after decoding. */
5011 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5012 && ! EQ (coding_type, Qccl))
5013 {
5014 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
5015
5016 if (eol_seen != EOL_SEEN_NONE)
5017 adjust_coding_eol_type (coding, eol_seen);
5018 }
5019 }
5020
5021
5022 static void
5023 decode_eol (coding)
5024 struct coding_system *coding;
5025 {
5026 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5027 {
5028 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5029 unsigned char *pend = p + coding->produced;
5030 int eol_seen = EOL_SEEN_NONE;
5031
5032 for (; p < pend; p++)
5033 {
5034 if (*p == '\n')
5035 eol_seen |= EOL_SEEN_LF;
5036 else if (*p == '\r')
5037 {
5038 if (p + 1 < pend && *(p + 1) == '\n')
5039 {
5040 eol_seen |= EOL_SEEN_CRLF;
5041 p++;
5042 }
5043 else
5044 eol_seen |= EOL_SEEN_CR;
5045 }
5046 }
5047 if (eol_seen != EOL_SEEN_NONE)
5048 adjust_coding_eol_type (coding, eol_seen);
5049 }
5050
5051 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5052 {
5053 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5054 unsigned char *pend = p + coding->produced;
5055
5056 for (; p < pend; p++)
5057 if (*p == '\r')
5058 *p = '\n';
5059 }
5060 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5061 {
5062 unsigned char *p, *pbeg, *pend;
5063 Lisp_Object undo_list;
5064
5065 move_gap_both (coding->dst_pos + coding->produced_char,
5066 coding->dst_pos_byte + coding->produced);
5067 undo_list = current_buffer->undo_list;
5068 current_buffer->undo_list = Qt;
5069 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, Qnil);
5070 current_buffer->undo_list = undo_list;
5071 pbeg = GPT_ADDR;
5072 pend = pbeg + coding->produced;
5073
5074 for (p = pend - 1; p >= pbeg; p--)
5075 if (*p == '\r')
5076 {
5077 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5078 pend--;
5079 }
5080 coding->produced_char -= coding->produced - (pend - pbeg);
5081 coding->produced = pend - pbeg;
5082 insert_from_gap (coding->produced_char, coding->produced);
5083 }
5084 }
5085
5086 static void
5087 translate_chars (coding, table)
5088 struct coding_system *coding;
5089 Lisp_Object table;
5090 {
5091 int *charbuf = coding->charbuf;
5092 int *charbuf_end = charbuf + coding->charbuf_used;
5093 int c;
5094
5095 if (coding->chars_at_source)
5096 return;
5097
5098 while (charbuf < charbuf_end)
5099 {
5100 c = *charbuf;
5101 if (c < 0)
5102 charbuf += c;
5103 else
5104 *charbuf++ = translate_char (table, c);
5105 }
5106 }
5107
5108 static int
5109 produce_chars (coding)
5110 struct coding_system *coding;
5111 {
5112 unsigned char *dst = coding->destination + coding->produced;
5113 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5114 int produced;
5115 int produced_chars = 0;
5116
5117 if (! coding->chars_at_source)
5118 {
5119 /* Characters are in coding->charbuf. */
5120 int *buf = coding->charbuf;
5121 int *buf_end = buf + coding->charbuf_used;
5122 unsigned char *adjusted_dst_end;
5123
5124 if (BUFFERP (coding->src_object)
5125 && EQ (coding->src_object, coding->dst_object))
5126 dst_end = coding->source + coding->consumed;
5127 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5128
5129 while (buf < buf_end)
5130 {
5131 int c = *buf++;
5132
5133 if (dst >= adjusted_dst_end)
5134 {
5135 dst = alloc_destination (coding,
5136 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5137 dst);
5138 dst_end = coding->destination + coding->dst_bytes;
5139 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5140 }
5141 if (c >= 0)
5142 {
5143 if (coding->dst_multibyte
5144 || ! CHAR_BYTE8_P (c))
5145 CHAR_STRING_ADVANCE (c, dst);
5146 else
5147 *dst++ = CHAR_TO_BYTE8 (c);
5148 produced_chars++;
5149 }
5150 else
5151 /* This is an annotation data. */
5152 buf -= c + 1;
5153 }
5154 }
5155 else
5156 {
5157 unsigned char *src = coding->source;
5158 unsigned char *src_end = src + coding->src_bytes;
5159 Lisp_Object eol_type;
5160
5161 eol_type = CODING_ID_EOL_TYPE (coding->id);
5162
5163 if (coding->src_multibyte != coding->dst_multibyte)
5164 {
5165 if (coding->src_multibyte)
5166 {
5167 int multibytep = 1;
5168 int consumed_chars;
5169
5170 while (1)
5171 {
5172 unsigned char *src_base = src;
5173 int c;
5174
5175 ONE_MORE_BYTE (c);
5176 if (c == '\r')
5177 {
5178 if (EQ (eol_type, Qdos))
5179 {
5180 if (src < src_end
5181 && *src == '\n')
5182 c = *src++;
5183 }
5184 else if (EQ (eol_type, Qmac))
5185 c = '\n';
5186 }
5187 if (dst == dst_end)
5188 {
5189 coding->consumed = src - coding->source;
5190
5191 if (EQ (coding->src_object, coding->dst_object))
5192 dst_end = src;
5193 if (dst == dst_end)
5194 {
5195 dst = alloc_destination (coding, src_end - src + 1,
5196 dst);
5197 dst_end = coding->destination + coding->dst_bytes;
5198 coding_set_source (coding);
5199 src = coding->source + coding->consumed;
5200 src_end = coding->source + coding->src_bytes;
5201 }
5202 }
5203 *dst++ = c;
5204 produced_chars++;
5205 }
5206 no_more_source:
5207 ;
5208 }
5209 else
5210 while (src < src_end)
5211 {
5212 int multibytep = 1;
5213 int c = *src++;
5214
5215 if (c == '\r')
5216 {
5217 if (EQ (eol_type, Qdos))
5218 {
5219 if (src < src_end
5220 && *src == '\n')
5221 c = *src++;
5222 }
5223 else if (EQ (eol_type, Qmac))
5224 c = '\n';
5225 }
5226 if (dst >= dst_end - 1)
5227 {
5228 coding->consumed = src - coding->source;
5229
5230 if (EQ (coding->src_object, coding->dst_object))
5231 dst_end = src;
5232 if (dst >= dst_end - 1)
5233 {
5234 dst = alloc_destination (coding, src_end - src + 2,
5235 dst);
5236 dst_end = coding->destination + coding->dst_bytes;
5237 coding_set_source (coding);
5238 src = coding->source + coding->consumed;
5239 src_end = coding->source + coding->src_bytes;
5240 }
5241 }
5242 EMIT_ONE_BYTE (c);
5243 }
5244 }
5245 else
5246 {
5247 if (!EQ (coding->src_object, coding->dst_object))
5248 {
5249 int require = coding->src_bytes - coding->dst_bytes;
5250
5251 if (require > 0)
5252 {
5253 EMACS_INT offset = src - coding->source;
5254
5255 dst = alloc_destination (coding, require, dst);
5256 coding_set_source (coding);
5257 src = coding->source + offset;
5258 src_end = coding->source + coding->src_bytes;
5259 }
5260 }
5261 produced_chars = coding->src_chars;
5262 while (src < src_end)
5263 {
5264 int c = *src++;
5265
5266 if (c == '\r')
5267 {
5268 if (EQ (eol_type, Qdos))
5269 {
5270 if (src < src_end
5271 && *src == '\n')
5272 c = *src++;
5273 produced_chars--;
5274 }
5275 else if (EQ (eol_type, Qmac))
5276 c = '\n';
5277 }
5278 *dst++ = c;
5279 }
5280 }
5281 coding->consumed = coding->src_bytes;
5282 coding->consumed_char = coding->src_chars;
5283 }
5284
5285 produced = dst - (coding->destination + coding->produced);
5286 if (BUFFERP (coding->dst_object))
5287 insert_from_gap (produced_chars, produced);
5288 coding->produced += produced;
5289 coding->produced_char += produced_chars;
5290 return produced_chars;
5291 }
5292
5293 /* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5294 or
5295 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5296 */
5297
5298 static INLINE void
5299 produce_composition (coding, charbuf)
5300 struct coding_system *coding;
5301 int *charbuf;
5302 {
5303 Lisp_Object buffer;
5304 int len;
5305 EMACS_INT pos;
5306 enum composition_method method;
5307 int cmp_len;
5308 Lisp_Object components;
5309
5310 buffer = coding->dst_object;
5311 len = -charbuf[0];
5312 pos = coding->dst_pos + charbuf[1];
5313 method = (enum composition_method) (charbuf[3]);
5314 cmp_len = charbuf[4];
5315
5316 if (method == COMPOSITION_RELATIVE)
5317 components = Qnil;
5318 else
5319 {
5320 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5321 int i;
5322
5323 len -= 5;
5324 charbuf += 5;
5325 for (i = 0; i < len; i++)
5326 args[i] = make_number (charbuf[i]);
5327 components = (method == COMPOSITION_WITH_ALTCHARS
5328 ? Fstring (len, args) : Fvector (len, args));
5329 }
5330 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5331 }
5332
5333 static int *
5334 save_composition_data (buf, buf_end, prop)
5335 int *buf, *buf_end;
5336 Lisp_Object prop;
5337 {
5338 enum composition_method method = COMPOSITION_METHOD (prop);
5339 int cmp_len = COMPOSITION_LENGTH (prop);
5340
5341 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5342 return NULL;
5343
5344 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5345 buf[2] = method;
5346 buf[3] = cmp_len;
5347
5348 if (method == COMPOSITION_RELATIVE)
5349 buf[0] = 4;
5350 else
5351 {
5352 Lisp_Object components;
5353 int len, i;
5354
5355 components = COMPOSITION_COMPONENTS (prop);
5356 if (VECTORP (components))
5357 {
5358 len = XVECTOR (components)->size;
5359 for (i = 0; i < len; i++)
5360 buf[4 + i] = XINT (AREF (components, i));
5361 }
5362 else if (STRINGP (components))
5363 {
5364 int i_byte;
5365
5366 len = XSTRING (components)->size;
5367 i = i_byte = 0;
5368 while (i < len)
5369 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5370 }
5371 else if (INTEGERP (components))
5372 {
5373 len = 1;
5374 buf[4] = XINT (components);
5375 }
5376 else if (CONSP (components))
5377 {
5378 for (len = 0; CONSP (components);
5379 len++, components = XCDR (components))
5380 buf[4 + len] = XINT (XCAR (components));
5381 }
5382 else
5383 abort ();
5384 buf[0] = 4 + len;
5385 }
5386 return (buf + buf[0]);
5387 }
5388
5389 #define CHARBUF_SIZE 0x4000
5390
5391 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5392 do { \
5393 int size = CHARBUF_SIZE;; \
5394 \
5395 coding->charbuf = NULL; \
5396 while (size > 1024) \
5397 { \
5398 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5399 if (coding->charbuf) \
5400 break; \
5401 size >>= 1; \
5402 } \
5403 if (! coding->charbuf) \
5404 { \
5405 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5406 return coding->result; \
5407 } \
5408 coding->charbuf_size = size; \
5409 } while (0)
5410
5411
5412 static void
5413 produce_annotation (coding)
5414 struct coding_system *coding;
5415 {
5416 int *charbuf = coding->charbuf;
5417 int *charbuf_end = charbuf + coding->charbuf_used;
5418
5419 while (charbuf < charbuf_end)
5420 {
5421 if (*charbuf >= 0)
5422 charbuf++;
5423 else
5424 {
5425 int len = -*charbuf;
5426 switch (charbuf[2])
5427 {
5428 case CODING_ANNOTATE_COMPOSITION_MASK:
5429 produce_composition (coding, charbuf);
5430 break;
5431 default:
5432 abort ();
5433 }
5434 charbuf += len;
5435 }
5436 }
5437 }
5438
5439 /* Decode the data at CODING->src_object into CODING->dst_object.
5440 CODING->src_object is a buffer, a string, or nil.
5441 CODING->dst_object is a buffer.
5442
5443 If CODING->src_object is a buffer, it must be the current buffer.
5444 In this case, if CODING->src_pos is positive, it is a position of
5445 the source text in the buffer, otherwise, the source text is in the
5446 gap area of the buffer, and CODING->src_pos specifies the offset of
5447 the text from GPT (which must be the same as PT). If this is the
5448 same buffer as CODING->dst_object, CODING->src_pos must be
5449 negative.
5450
5451 If CODING->src_object is a string, CODING->src_pos in an index to
5452 that string.
5453
5454 If CODING->src_object is nil, CODING->source must already point to
5455 the non-relocatable memory area. In this case, CODING->src_pos is
5456 an offset from CODING->source.
5457
5458 The decoded data is inserted at the current point of the buffer
5459 CODING->dst_object.
5460 */
5461
5462 static int
5463 decode_coding (coding)
5464 struct coding_system *coding;
5465 {
5466 Lisp_Object attrs;
5467
5468 if (BUFFERP (coding->src_object)
5469 && coding->src_pos > 0
5470 && coding->src_pos < GPT
5471 && coding->src_pos + coding->src_chars > GPT)
5472 move_gap_both (coding->src_pos, coding->src_pos_byte);
5473
5474 if (BUFFERP (coding->dst_object))
5475 {
5476 if (current_buffer != XBUFFER (coding->dst_object))
5477 set_buffer_internal (XBUFFER (coding->dst_object));
5478 if (GPT != PT)
5479 move_gap_both (PT, PT_BYTE);
5480 }
5481
5482 coding->consumed = coding->consumed_char = 0;
5483 coding->produced = coding->produced_char = 0;
5484 coding->chars_at_source = 0;
5485 coding->result = CODING_RESULT_SUCCESS;
5486 coding->errors = 0;
5487
5488 ALLOC_CONVERSION_WORK_AREA (coding);
5489
5490 attrs = CODING_ID_ATTRS (coding->id);
5491
5492 do
5493 {
5494 coding_set_source (coding);
5495 coding->annotated = 0;
5496 (*(coding->decoder)) (coding);
5497 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5498 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5499 coding_set_destination (coding);
5500 produce_chars (coding);
5501 if (coding->annotated)
5502 produce_annotation (coding);
5503 }
5504 while (coding->consumed < coding->src_bytes
5505 && ! coding->result);
5506
5507 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5508 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5509 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5510 decode_eol (coding);
5511
5512 coding->carryover_bytes = 0;
5513 if (coding->consumed < coding->src_bytes)
5514 {
5515 int nbytes = coding->src_bytes - coding->consumed;
5516 unsigned char *src;
5517
5518 coding_set_source (coding);
5519 coding_set_destination (coding);
5520 src = coding->source + coding->consumed;
5521
5522 if (coding->mode & CODING_MODE_LAST_BLOCK)
5523 {
5524 /* Flush out unprocessed data as binary chars. We are sure
5525 that the number of data is less than the size of
5526 coding->charbuf. */
5527 int *charbuf = coding->charbuf;
5528
5529 while (nbytes-- > 0)
5530 {
5531 int c = *src++;
5532 *charbuf++ = (c & 0x80 ? - c : c);
5533 }
5534 produce_chars (coding);
5535 }
5536 else
5537 {
5538 /* Record unprocessed bytes in coding->carryover. We are
5539 sure that the number of data is less than the size of
5540 coding->carryover. */
5541 unsigned char *p = coding->carryover;
5542
5543 coding->carryover_bytes = nbytes;
5544 while (nbytes-- > 0)
5545 *p++ = *src++;
5546 }
5547 coding->consumed = coding->src_bytes;
5548 }
5549
5550 return coding->result;
5551 }
5552
5553 static void
5554 consume_chars (coding)
5555 struct coding_system *coding;
5556 {
5557 int *buf = coding->charbuf;
5558 /* -1 is to compensate for CRLF. */
5559 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5560 unsigned char *src = coding->source + coding->consumed;
5561 int pos = coding->src_pos + coding->consumed_char;
5562 int end_pos = coding->src_pos + coding->src_chars;
5563 int multibytep = coding->src_multibyte;
5564 Lisp_Object eol_type;
5565 int c;
5566 int start, end, stop;
5567 Lisp_Object object, prop;
5568
5569 eol_type = CODING_ID_EOL_TYPE (coding->id);
5570 if (VECTORP (eol_type))
5571 eol_type = Qunix;
5572
5573 object = coding->src_object;
5574
5575 /* Note: composition handling is not yet implemented. */
5576 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
5577
5578 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5579 && find_composition (pos, end_pos, &start, &end, &prop, object)
5580 && end <= end_pos
5581 && (start >= pos
5582 || (find_composition (end, end_pos, &start, &end, &prop, object)
5583 && end <= end_pos)))
5584 stop = start;
5585 else
5586 stop = end_pos;
5587
5588 while (buf < buf_end)
5589 {
5590 if (pos == stop)
5591 {
5592 int *p;
5593
5594 if (pos == end_pos)
5595 break;
5596 p = save_composition_data (buf, buf_end, prop);
5597 if (p == NULL)
5598 break;
5599 buf = p;
5600 if (find_composition (end, end_pos, &start, &end, &prop, object)
5601 && end <= end_pos)
5602 stop = start;
5603 else
5604 stop = end_pos;
5605 }
5606
5607 if (! multibytep)
5608 c = *src++;
5609 else
5610 c = STRING_CHAR_ADVANCE (src);
5611 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5612 c = '\n';
5613 if (! EQ (eol_type, Qunix))
5614 {
5615 if (c == '\n')
5616 {
5617 if (EQ (eol_type, Qdos))
5618 *buf++ = '\r';
5619 else
5620 c = '\r';
5621 }
5622 }
5623 *buf++ = c;
5624 pos++;
5625 }
5626
5627 coding->consumed = src - coding->source;
5628 coding->consumed_char = pos - coding->src_pos;
5629 coding->charbuf_used = buf - coding->charbuf;
5630 coding->chars_at_source = 0;
5631 }
5632
5633
5634 /* Encode the text at CODING->src_object into CODING->dst_object.
5635 CODING->src_object is a buffer or a string.
5636 CODING->dst_object is a buffer or nil.
5637
5638 If CODING->src_object is a buffer, it must be the current buffer.
5639 In this case, if CODING->src_pos is positive, it is a position of
5640 the source text in the buffer, otherwise. the source text is in the
5641 gap area of the buffer, and coding->src_pos specifies the offset of
5642 the text from GPT (which must be the same as PT). If this is the
5643 same buffer as CODING->dst_object, CODING->src_pos must be
5644 negative and CODING should not have `pre-write-conversion'.
5645
5646 If CODING->src_object is a string, CODING should not have
5647 `pre-write-conversion'.
5648
5649 If CODING->dst_object is a buffer, the encoded data is inserted at
5650 the current point of that buffer.
5651
5652 If CODING->dst_object is nil, the encoded data is placed at the
5653 memory area specified by CODING->destination. */
5654
5655 static int
5656 encode_coding (coding)
5657 struct coding_system *coding;
5658 {
5659 Lisp_Object attrs;
5660
5661 attrs = CODING_ID_ATTRS (coding->id);
5662
5663 if (BUFFERP (coding->dst_object))
5664 {
5665 set_buffer_internal (XBUFFER (coding->dst_object));
5666 coding->dst_multibyte
5667 = ! NILP (current_buffer->enable_multibyte_characters);
5668 }
5669
5670 coding->consumed = coding->consumed_char = 0;
5671 coding->produced = coding->produced_char = 0;
5672 coding->result = CODING_RESULT_SUCCESS;
5673 coding->errors = 0;
5674
5675 ALLOC_CONVERSION_WORK_AREA (coding);
5676
5677 do {
5678 coding_set_source (coding);
5679 consume_chars (coding);
5680
5681 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5682 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5683
5684 coding_set_destination (coding);
5685 (*(coding->encoder)) (coding);
5686 } while (coding->consumed_char < coding->src_chars);
5687
5688 if (BUFFERP (coding->dst_object))
5689 insert_from_gap (coding->produced_char, coding->produced);
5690
5691 return (coding->result);
5692 }
5693
5694 /* Work buffer */
5695
5696 /* List of currently used working buffer. */
5697 Lisp_Object Vcode_conversion_work_buf_list;
5698
5699 /* A working buffer used by the top level conversion. */
5700 Lisp_Object Vcode_conversion_reused_work_buf;
5701
5702
5703 /* Return a working buffer that can be freely used by the following
5704 code conversion. MULTIBYTEP specifies the multibyteness of the
5705 buffer. */
5706
5707 Lisp_Object
5708 make_conversion_work_buffer (multibytep)
5709 int multibytep;
5710 {
5711 struct buffer *current = current_buffer;
5712 Lisp_Object buf;
5713
5714 if (NILP (Vcode_conversion_work_buf_list))
5715 {
5716 if (NILP (Vcode_conversion_reused_work_buf))
5717 Vcode_conversion_reused_work_buf
5718 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5719 Vcode_conversion_work_buf_list
5720 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
5721 }
5722 else
5723 {
5724 int depth = Flength (Vcode_conversion_work_buf_list);
5725 char str[128];
5726
5727 sprintf (str, " *code-conversion-work*<%d>", depth);
5728 Vcode_conversion_work_buf_list
5729 = Fcons (Fget_buffer_create (build_string (str)),
5730 Vcode_conversion_work_buf_list);
5731 }
5732
5733 buf = XCAR (Vcode_conversion_work_buf_list);
5734 set_buffer_internal (XBUFFER (buf));
5735 current_buffer->undo_list = Qt;
5736 Ferase_buffer ();
5737 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
5738 set_buffer_internal (current);
5739 return buf;
5740 }
5741
5742 static struct coding_system *saved_coding;
5743
5744 Lisp_Object
5745 code_conversion_restore (info)
5746 Lisp_Object info;
5747 {
5748 int depth = Flength (Vcode_conversion_work_buf_list);
5749 Lisp_Object buf;
5750
5751 if (depth > 0)
5752 {
5753 buf = XCAR (Vcode_conversion_work_buf_list);
5754 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5755 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5756 Fkill_buffer (buf);
5757 }
5758
5759 if (saved_coding->dst_object == Qt
5760 && saved_coding->destination)
5761 xfree (saved_coding->destination);
5762
5763 return save_excursion_restore (info);
5764 }
5765
5766
5767 int
5768 decode_coding_gap (coding, chars, bytes)
5769 struct coding_system *coding;
5770 EMACS_INT chars, bytes;
5771 {
5772 int count = specpdl_ptr - specpdl;
5773
5774 saved_coding = coding;
5775 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5776
5777 coding->src_object = Fcurrent_buffer ();
5778 coding->src_chars = chars;
5779 coding->src_bytes = bytes;
5780 coding->src_pos = -chars;
5781 coding->src_pos_byte = -bytes;
5782 coding->src_multibyte = chars < bytes;
5783 coding->dst_object = coding->src_object;
5784 coding->dst_pos = PT;
5785 coding->dst_pos_byte = PT_BYTE;
5786 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
5787
5788 if (CODING_REQUIRE_DETECTION (coding))
5789 detect_coding (coding);
5790
5791 decode_coding (coding);
5792
5793 unbind_to (count, Qnil);
5794 return coding->result;
5795 }
5796
5797 int
5798 encode_coding_gap (coding, chars, bytes)
5799 struct coding_system *coding;
5800 EMACS_INT chars, bytes;
5801 {
5802 int count = specpdl_ptr - specpdl;
5803 Lisp_Object buffer;
5804
5805 saved_coding = coding;
5806 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5807
5808 buffer = Fcurrent_buffer ();
5809 coding->src_object = buffer;
5810 coding->src_chars = chars;
5811 coding->src_bytes = bytes;
5812 coding->src_pos = -chars;
5813 coding->src_pos_byte = -bytes;
5814 coding->src_multibyte = chars < bytes;
5815 coding->dst_object = coding->src_object;
5816 coding->dst_pos = PT;
5817 coding->dst_pos_byte = PT_BYTE;
5818
5819 encode_coding (coding);
5820
5821 unbind_to (count, Qnil);
5822 return coding->result;
5823 }
5824
5825
5826 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5827 SRC_OBJECT into DST_OBJECT by coding context CODING.
5828
5829 SRC_OBJECT is a buffer, a string, or Qnil.
5830
5831 If it is a buffer, the text is at point of the buffer. FROM and TO
5832 are positions in the buffer.
5833
5834 If it is a string, the text is at the beginning of the string.
5835 FROM and TO are indices to the string.
5836
5837 If it is nil, the text is at coding->source. FROM and TO are
5838 indices to coding->source.
5839
5840 DST_OBJECT is a buffer, Qt, or Qnil.
5841
5842 If it is a buffer, the decoded text is inserted at point of the
5843 buffer. If the buffer is the same as SRC_OBJECT, the source text
5844 is deleted.
5845
5846 If it is Qt, a string is made from the decoded text, and
5847 set in CODING->dst_object.
5848
5849 If it is Qnil, the decoded text is stored at CODING->destination.
5850 The called must allocate CODING->dst_bytes bytes at
5851 CODING->destination by xmalloc. If the decoded text is longer than
5852 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5853 */
5854
5855 void
5856 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5857 dst_object)
5858 struct coding_system *coding;
5859 Lisp_Object src_object;
5860 EMACS_INT from, from_byte, to, to_byte;
5861 Lisp_Object dst_object;
5862 {
5863 int count = specpdl_ptr - specpdl;
5864 unsigned char *destination;
5865 EMACS_INT dst_bytes;
5866 EMACS_INT chars = to - from;
5867 EMACS_INT bytes = to_byte - from_byte;
5868 Lisp_Object attrs;
5869
5870 saved_coding = coding;
5871 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5872
5873 if (NILP (dst_object))
5874 {
5875 destination = coding->destination;
5876 dst_bytes = coding->dst_bytes;
5877 }
5878
5879 coding->src_object = src_object;
5880 coding->src_chars = chars;
5881 coding->src_bytes = bytes;
5882 coding->src_multibyte = chars < bytes;
5883
5884 if (STRINGP (src_object))
5885 {
5886 coding->src_pos = from;
5887 coding->src_pos_byte = from_byte;
5888 }
5889 else if (BUFFERP (src_object))
5890 {
5891 set_buffer_internal (XBUFFER (src_object));
5892 if (from != GPT)
5893 move_gap_both (from, from_byte);
5894 if (EQ (src_object, dst_object))
5895 {
5896 TEMP_SET_PT_BOTH (from, from_byte);
5897 del_range_both (from, from_byte, to, to_byte, 1);
5898 coding->src_pos = -chars;
5899 coding->src_pos_byte = -bytes;
5900 }
5901 else
5902 {
5903 coding->src_pos = from;
5904 coding->src_pos_byte = from_byte;
5905 }
5906 }
5907
5908 if (CODING_REQUIRE_DETECTION (coding))
5909 detect_coding (coding);
5910 attrs = CODING_ID_ATTRS (coding->id);
5911
5912 if (! NILP (CODING_ATTR_POST_READ (attrs))
5913 || EQ (dst_object, Qt))
5914 {
5915 coding->dst_object = make_conversion_work_buffer (1);
5916 coding->dst_pos = BEG;
5917 coding->dst_pos_byte = BEG_BYTE;
5918 coding->dst_multibyte = 1;
5919 }
5920 else if (BUFFERP (dst_object))
5921 {
5922 coding->dst_object = dst_object;
5923 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5924 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5925 coding->dst_multibyte
5926 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
5927 }
5928 else
5929 {
5930 coding->dst_object = Qnil;
5931 coding->dst_multibyte = 1;
5932 }
5933
5934 decode_coding (coding);
5935
5936 if (BUFFERP (coding->dst_object))
5937 set_buffer_internal (XBUFFER (coding->dst_object));
5938
5939 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5940 {
5941 struct gcpro gcpro1, gcpro2;
5942 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
5943 Lisp_Object val;
5944
5945 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5946 GCPRO2 (coding->src_object, coding->dst_object);
5947 val = call1 (CODING_ATTR_POST_READ (attrs),
5948 make_number (coding->produced_char));
5949 UNGCPRO;
5950 CHECK_NATNUM (val);
5951 coding->produced_char += Z - prev_Z;
5952 coding->produced += Z_BYTE - prev_Z_BYTE;
5953 }
5954
5955 if (EQ (dst_object, Qt))
5956 {
5957 coding->dst_object = Fbuffer_string ();
5958 }
5959 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
5960 {
5961 set_buffer_internal (XBUFFER (coding->dst_object));
5962 if (dst_bytes < coding->produced)
5963 {
5964 destination
5965 = (unsigned char *) xrealloc (destination, coding->produced);
5966 if (! destination)
5967 {
5968 coding->result = CODING_RESULT_INSUFFICIENT_DST;
5969 unbind_to (count, Qnil);
5970 return;
5971 }
5972 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
5973 move_gap_both (BEGV, BEGV_BYTE);
5974 bcopy (BEGV_ADDR, destination, coding->produced);
5975 coding->destination = destination;
5976 }
5977 }
5978
5979 unbind_to (count, Qnil);
5980 }
5981
5982
5983 void
5984 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5985 dst_object)
5986 struct coding_system *coding;
5987 Lisp_Object src_object;
5988 EMACS_INT from, from_byte, to, to_byte;
5989 Lisp_Object dst_object;
5990 {
5991 int count = specpdl_ptr - specpdl;
5992 EMACS_INT chars = to - from;
5993 EMACS_INT bytes = to_byte - from_byte;
5994 Lisp_Object attrs;
5995
5996 saved_coding = coding;
5997 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5998
5999 coding->src_object = src_object;
6000 coding->src_chars = chars;
6001 coding->src_bytes = bytes;
6002 coding->src_multibyte = chars < bytes;
6003
6004 attrs = CODING_ID_ATTRS (coding->id);
6005
6006 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6007 {
6008 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6009 set_buffer_internal (XBUFFER (coding->src_object));
6010 if (STRINGP (src_object))
6011 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6012 else if (BUFFERP (src_object))
6013 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6014 else
6015 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6016
6017 if (EQ (src_object, dst_object))
6018 {
6019 set_buffer_internal (XBUFFER (src_object));
6020 del_range_both (from, from_byte, to, to_byte, 1);
6021 set_buffer_internal (XBUFFER (coding->src_object));
6022 }
6023
6024 call2 (CODING_ATTR_PRE_WRITE (attrs),
6025 make_number (BEG), make_number (Z));
6026 coding->src_object = Fcurrent_buffer ();
6027 if (BEG != GPT)
6028 move_gap_both (BEG, BEG_BYTE);
6029 coding->src_chars = Z - BEG;
6030 coding->src_bytes = Z_BYTE - BEG_BYTE;
6031 coding->src_pos = BEG;
6032 coding->src_pos_byte = BEG_BYTE;
6033 coding->src_multibyte = Z < Z_BYTE;
6034 }
6035 else if (STRINGP (src_object))
6036 {
6037 coding->src_pos = from;
6038 coding->src_pos_byte = from_byte;
6039 }
6040 else if (BUFFERP (src_object))
6041 {
6042 set_buffer_internal (XBUFFER (src_object));
6043 if (from != GPT)
6044 move_gap_both (from, from_byte);
6045 if (EQ (src_object, dst_object))
6046 {
6047 del_range_both (from, from_byte, to, to_byte, 1);
6048 coding->src_pos = -chars;
6049 coding->src_pos_byte = -bytes;
6050 }
6051 else
6052 {
6053 coding->src_pos = from;
6054 coding->src_pos_byte = from_byte;
6055 }
6056 }
6057
6058 if (BUFFERP (dst_object))
6059 {
6060 coding->dst_object = dst_object;
6061 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6062 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6063 coding->dst_multibyte
6064 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6065 }
6066 else if (EQ (dst_object, Qt))
6067 {
6068 coding->dst_object = Qnil;
6069 coding->dst_bytes = coding->src_chars;
6070 if (coding->dst_bytes == 0)
6071 coding->dst_bytes = 1;
6072 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6073 coding->dst_multibyte = 0;
6074 }
6075 else
6076 {
6077 coding->dst_object = Qnil;
6078 coding->dst_multibyte = 0;
6079 }
6080
6081 encode_coding (coding);
6082
6083 if (EQ (dst_object, Qt))
6084 {
6085 if (BUFFERP (coding->dst_object))
6086 coding->dst_object = Fbuffer_string ();
6087 else
6088 {
6089 coding->dst_object
6090 = make_unibyte_string ((char *) coding->destination,
6091 coding->produced);
6092 xfree (coding->destination);
6093 }
6094 }
6095
6096 unbind_to (count, Qnil);
6097 }
6098
6099
6100 Lisp_Object
6101 preferred_coding_system ()
6102 {
6103 int id = coding_categories[coding_priorities[0]].id;
6104
6105 return CODING_ID_NAME (id);
6106 }
6107
6108 \f
6109 #ifdef emacs
6110 /*** 8. Emacs Lisp library functions ***/
6111
6112 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6113 doc: /* Return t if OBJECT is nil or a coding-system.
6114 See the documentation of `define-coding-system' for information
6115 about coding-system objects. */)
6116 (obj)
6117 Lisp_Object obj;
6118 {
6119 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6120 }
6121
6122 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6123 Sread_non_nil_coding_system, 1, 1, 0,
6124 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6125 (prompt)
6126 Lisp_Object prompt;
6127 {
6128 Lisp_Object val;
6129 do
6130 {
6131 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6132 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6133 }
6134 while (XSTRING (val)->size == 0);
6135 return (Fintern (val, Qnil));
6136 }
6137
6138 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6139 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6140 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6141 (prompt, default_coding_system)
6142 Lisp_Object prompt, default_coding_system;
6143 {
6144 Lisp_Object val;
6145 if (SYMBOLP (default_coding_system))
6146 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6147 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6148 Qt, Qnil, Qcoding_system_history,
6149 default_coding_system, Qnil);
6150 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6151 }
6152
6153 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6154 1, 1, 0,
6155 doc: /* Check validity of CODING-SYSTEM.
6156 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6157 It is valid if it is a symbol with a non-nil `coding-system' property.
6158 The value of property should be a vector of length 5. */)
6159 (coding_system)
6160 Lisp_Object coding_system;
6161 {
6162 CHECK_SYMBOL (coding_system);
6163 if (!NILP (Fcoding_system_p (coding_system)))
6164 return coding_system;
6165 while (1)
6166 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6167 }
6168
6169 \f
6170 Lisp_Object
6171 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6172 unsigned char *src;
6173 int src_bytes, highest;
6174 int multibytep;
6175 Lisp_Object coding_system;
6176 {
6177 unsigned char *src_end = src + src_bytes;
6178 int mask = CATEGORY_MASK_ANY;
6179 int detected = 0;
6180 int c, i;
6181 Lisp_Object attrs, eol_type;
6182 Lisp_Object val;
6183 struct coding_system coding;
6184
6185 if (NILP (coding_system))
6186 coding_system = Qundecided;
6187 setup_coding_system (coding_system, &coding);
6188 attrs = CODING_ID_ATTRS (coding.id);
6189 eol_type = CODING_ID_EOL_TYPE (coding.id);
6190
6191 coding.source = src;
6192 coding.src_bytes = src_bytes;
6193 coding.src_multibyte = multibytep;
6194 coding.consumed = 0;
6195
6196 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
6197 {
6198 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6199 }
6200 else
6201 {
6202 coding_system = Qnil;
6203 for (; src < src_end; src++)
6204 {
6205 c = *src;
6206 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6207 || c == ISO_CODE_SI
6208 || c == ISO_CODE_SO)))
6209 break;
6210 }
6211 coding.head_ascii = src - coding.source;
6212
6213 if (src < src_end)
6214 for (i = 0; i < coding_category_raw_text; i++)
6215 {
6216 enum coding_category category = coding_priorities[i];
6217 struct coding_system *this = coding_categories + category;
6218
6219 if (category >= coding_category_raw_text
6220 || detected & (1 << category))
6221 continue;
6222
6223 if (this->id < 0)
6224 {
6225 /* No coding system of this category is defined. */
6226 mask &= ~(1 << category);
6227 }
6228 else
6229 {
6230 detected |= detected_mask[category];
6231 if ((*(coding_categories[category].detector)) (&coding, &mask)
6232 && highest)
6233 {
6234 mask &= detected_mask[category];
6235 break;
6236 }
6237 }
6238 }
6239 }
6240
6241 if (!mask)
6242 val = Fcons (make_number (coding_category_raw_text), Qnil);
6243 else if (mask == CATEGORY_MASK_ANY)
6244 val = Fcons (make_number (coding_category_undecided), Qnil);
6245 else if (highest)
6246 {
6247 for (i = 0; i < coding_category_raw_text; i++)
6248 if (mask & (1 << coding_priorities[i]))
6249 {
6250 val = Fcons (make_number (coding_priorities[i]), Qnil);
6251 break;
6252 }
6253 }
6254 else
6255 {
6256 val = Qnil;
6257 for (i = coding_category_raw_text - 1; i >= 0; i--)
6258 if (mask & (1 << coding_priorities[i]))
6259 val = Fcons (make_number (coding_priorities[i]), val);
6260 }
6261
6262 {
6263 int one_byte_eol = -1, two_byte_eol = -1;
6264 Lisp_Object tail;
6265
6266 for (tail = val; CONSP (tail); tail = XCDR (tail))
6267 {
6268 struct coding_system *this
6269 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6270 : &coding);
6271 int this_eol;
6272
6273 attrs = CODING_ID_ATTRS (this->id);
6274 eol_type = CODING_ID_EOL_TYPE (this->id);
6275 XSETCAR (tail, CODING_ID_NAME (this->id));
6276 if (VECTORP (eol_type))
6277 {
6278 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6279 {
6280 if (two_byte_eol < 0)
6281 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6282 this_eol = two_byte_eol;
6283 }
6284 else
6285 {
6286 if (one_byte_eol < 0)
6287 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6288 this_eol = one_byte_eol;
6289 }
6290 if (this_eol == EOL_SEEN_LF)
6291 XSETCAR (tail, AREF (eol_type, 0));
6292 else if (this_eol == EOL_SEEN_CRLF)
6293 XSETCAR (tail, AREF (eol_type, 1));
6294 else if (this_eol == EOL_SEEN_CR)
6295 XSETCAR (tail, AREF (eol_type, 2));
6296 }
6297 }
6298 }
6299
6300 return (highest ? XCAR (val) : val);
6301 }
6302
6303
6304 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6305 2, 3, 0,
6306 doc: /* Detect coding system of the text in the region between START and END.
6307 Return a list of possible coding systems ordered by priority.
6308
6309 If only ASCII characters are found, it returns a list of single element
6310 `undecided' or its subsidiary coding system according to a detected
6311 end-of-line format.
6312
6313 If optional argument HIGHEST is non-nil, return the coding system of
6314 highest priority. */)
6315 (start, end, highest)
6316 Lisp_Object start, end, highest;
6317 {
6318 int from, to;
6319 int from_byte, to_byte;
6320
6321 CHECK_NUMBER_COERCE_MARKER (start);
6322 CHECK_NUMBER_COERCE_MARKER (end);
6323
6324 validate_region (&start, &end);
6325 from = XINT (start), to = XINT (end);
6326 from_byte = CHAR_TO_BYTE (from);
6327 to_byte = CHAR_TO_BYTE (to);
6328
6329 if (from < GPT && to >= GPT)
6330 move_gap_both (to, to_byte);
6331
6332 return detect_coding_system (BYTE_POS_ADDR (from_byte),
6333 to_byte - from_byte,
6334 !NILP (highest),
6335 !NILP (current_buffer
6336 ->enable_multibyte_characters),
6337 Qnil);
6338 }
6339
6340 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6341 1, 2, 0,
6342 doc: /* Detect coding system of the text in STRING.
6343 Return a list of possible coding systems ordered by priority.
6344
6345 If only ASCII characters are found, it returns a list of single element
6346 `undecided' or its subsidiary coding system according to a detected
6347 end-of-line format.
6348
6349 If optional argument HIGHEST is non-nil, return the coding system of
6350 highest priority. */)
6351 (string, highest)
6352 Lisp_Object string, highest;
6353 {
6354 CHECK_STRING (string);
6355
6356 return detect_coding_system (XSTRING (string)->data,
6357 STRING_BYTES (XSTRING (string)),
6358 !NILP (highest),
6359 STRING_MULTIBYTE (string),
6360 Qnil);
6361 }
6362
6363
6364 static INLINE int
6365 char_encodable_p (c, attrs)
6366 int c;
6367 Lisp_Object attrs;
6368 {
6369 Lisp_Object tail;
6370 struct charset *charset;
6371
6372 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6373 CONSP (tail); tail = XCDR (tail))
6374 {
6375 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6376 if (CHAR_CHARSET_P (c, charset))
6377 break;
6378 }
6379 return (! NILP (tail));
6380 }
6381
6382
6383 /* Return a list of coding systems that safely encode the text between
6384 START and END. If EXCLUDE is non-nil, it is a list of coding
6385 systems not to check. The returned list doesn't contain any such
6386 coding systems. In any case, If the text contains only ASCII or is
6387 unibyte, return t. */
6388
6389 DEFUN ("find-coding-systems-region-internal",
6390 Ffind_coding_systems_region_internal,
6391 Sfind_coding_systems_region_internal, 2, 3, 0,
6392 doc: /* Internal use only. */)
6393 (start, end, exclude)
6394 Lisp_Object start, end, exclude;
6395 {
6396 Lisp_Object coding_attrs_list, safe_codings;
6397 EMACS_INT start_byte, end_byte;
6398 unsigned char *p, *pbeg, *pend;
6399 int c;
6400 Lisp_Object tail, elt;
6401
6402 if (STRINGP (start))
6403 {
6404 if (!STRING_MULTIBYTE (start)
6405 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6406 return Qt;
6407 start_byte = 0;
6408 end_byte = STRING_BYTES (XSTRING (start));
6409 }
6410 else
6411 {
6412 CHECK_NUMBER_COERCE_MARKER (start);
6413 CHECK_NUMBER_COERCE_MARKER (end);
6414 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6415 args_out_of_range (start, end);
6416 if (NILP (current_buffer->enable_multibyte_characters))
6417 return Qt;
6418 start_byte = CHAR_TO_BYTE (XINT (start));
6419 end_byte = CHAR_TO_BYTE (XINT (end));
6420 if (XINT (end) - XINT (start) == end_byte - start_byte)
6421 return Qt;
6422
6423 if (start < GPT && end > GPT)
6424 {
6425 if ((GPT - start) < (end - GPT))
6426 move_gap_both (start, start_byte);
6427 else
6428 move_gap_both (end, end_byte);
6429 }
6430 }
6431
6432 coding_attrs_list = Qnil;
6433 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6434 if (NILP (exclude)
6435 || NILP (Fmemq (XCAR (tail), exclude)))
6436 {
6437 Lisp_Object attrs;
6438
6439 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6440 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6441 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6442 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6443 }
6444
6445 if (STRINGP (start))
6446 p = pbeg = XSTRING (start)->data;
6447 else
6448 p = pbeg = BYTE_POS_ADDR (start_byte);
6449 pend = p + (end_byte - start_byte);
6450
6451 while (p < pend && ASCII_BYTE_P (*p)) p++;
6452 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6453
6454 while (p < pend)
6455 {
6456 if (ASCII_BYTE_P (*p))
6457 p++;
6458 else
6459 {
6460 c = STRING_CHAR_ADVANCE (p);
6461
6462 charset_map_loaded = 0;
6463 for (tail = coding_attrs_list; CONSP (tail);)
6464 {
6465 elt = XCAR (tail);
6466 if (NILP (elt))
6467 tail = XCDR (tail);
6468 else if (char_encodable_p (c, elt))
6469 tail = XCDR (tail);
6470 else if (CONSP (XCDR (tail)))
6471 {
6472 XSETCAR (tail, XCAR (XCDR (tail)));
6473 XSETCDR (tail, XCDR (XCDR (tail)));
6474 }
6475 else
6476 {
6477 XSETCAR (tail, Qnil);
6478 tail = XCDR (tail);
6479 }
6480 }
6481 if (charset_map_loaded)
6482 {
6483 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6484
6485 if (STRINGP (start))
6486 pbeg = XSTRING (start)->data;
6487 else
6488 pbeg = BYTE_POS_ADDR (start_byte);
6489 p = pbeg + p_offset;
6490 pend = pbeg + pend_offset;
6491 }
6492 }
6493 }
6494
6495 safe_codings = Qnil;
6496 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6497 if (! NILP (XCAR (tail)))
6498 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6499
6500 return safe_codings;
6501 }
6502
6503
6504 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6505 Scheck_coding_systems_region, 3, 3, 0,
6506 doc: /* Check if the region is encodable by coding systems.
6507
6508 START and END are buffer positions specifying the region.
6509 CODING-SYSTEM-LIST is a list of coding systems to check.
6510
6511 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6512 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6513 whole region, POS0, POS1, ... are buffer positions where non-encodable
6514 characters are found.
6515
6516 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6517 value is nil.
6518
6519 START may be a string. In that case, check if the string is
6520 encodable, and the value contains indices to the string instead of
6521 buffer positions. END is ignored. */)
6522 (start, end, coding_system_list)
6523 Lisp_Object start, end, coding_system_list;
6524 {
6525 Lisp_Object list;
6526 EMACS_INT start_byte, end_byte;
6527 int pos;
6528 unsigned char *p, *pbeg, *pend;
6529 int c;
6530 Lisp_Object tail, elt;
6531
6532 if (STRINGP (start))
6533 {
6534 if (!STRING_MULTIBYTE (start)
6535 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6536 return Qnil;
6537 start_byte = 0;
6538 end_byte = STRING_BYTES (XSTRING (start));
6539 pos = 0;
6540 }
6541 else
6542 {
6543 CHECK_NUMBER_COERCE_MARKER (start);
6544 CHECK_NUMBER_COERCE_MARKER (end);
6545 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6546 args_out_of_range (start, end);
6547 if (NILP (current_buffer->enable_multibyte_characters))
6548 return Qnil;
6549 start_byte = CHAR_TO_BYTE (XINT (start));
6550 end_byte = CHAR_TO_BYTE (XINT (end));
6551 if (XINT (end) - XINT (start) == end_byte - start_byte)
6552 return Qt;
6553
6554 if (start < GPT && end > GPT)
6555 {
6556 if ((GPT - start) < (end - GPT))
6557 move_gap_both (start, start_byte);
6558 else
6559 move_gap_both (end, end_byte);
6560 }
6561 pos = start;
6562 }
6563
6564 list = Qnil;
6565 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6566 {
6567 elt = XCAR (tail);
6568 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6569 Qnil)),
6570 list);
6571 }
6572
6573 if (STRINGP (start))
6574 p = pbeg = XSTRING (start)->data;
6575 else
6576 p = pbeg = BYTE_POS_ADDR (start_byte);
6577 pend = p + (end_byte - start_byte);
6578
6579 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6580 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6581
6582 while (p < pend)
6583 {
6584 if (ASCII_BYTE_P (*p))
6585 p++;
6586 else
6587 {
6588 c = STRING_CHAR_ADVANCE (p);
6589
6590 charset_map_loaded = 0;
6591 for (tail = list; CONSP (tail); tail = XCDR (tail))
6592 {
6593 elt = XCDR (XCAR (tail));
6594 if (! char_encodable_p (c, XCAR (elt)))
6595 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6596 }
6597 if (charset_map_loaded)
6598 {
6599 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6600
6601 if (STRINGP (start))
6602 pbeg = XSTRING (start)->data;
6603 else
6604 pbeg = BYTE_POS_ADDR (start_byte);
6605 p = pbeg + p_offset;
6606 pend = pbeg + pend_offset;
6607 }
6608 }
6609 pos++;
6610 }
6611
6612 tail = list;
6613 list = Qnil;
6614 for (; CONSP (tail); tail = XCDR (tail))
6615 {
6616 elt = XCAR (tail);
6617 if (CONSP (XCDR (XCDR (elt))))
6618 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6619 list);
6620 }
6621
6622 return list;
6623 }
6624
6625
6626
6627 Lisp_Object
6628 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6629 Lisp_Object start, end, coding_system, dst_object;
6630 int encodep, norecord;
6631 {
6632 struct coding_system coding;
6633 EMACS_INT from, from_byte, to, to_byte;
6634 Lisp_Object src_object;
6635
6636 CHECK_NUMBER_COERCE_MARKER (start);
6637 CHECK_NUMBER_COERCE_MARKER (end);
6638 if (NILP (coding_system))
6639 coding_system = Qno_conversion;
6640 else
6641 CHECK_CODING_SYSTEM (coding_system);
6642 src_object = Fcurrent_buffer ();
6643 if (NILP (dst_object))
6644 dst_object = src_object;
6645 else if (! EQ (dst_object, Qt))
6646 CHECK_BUFFER (dst_object);
6647
6648 validate_region (&start, &end);
6649 from = XFASTINT (start);
6650 from_byte = CHAR_TO_BYTE (from);
6651 to = XFASTINT (end);
6652 to_byte = CHAR_TO_BYTE (to);
6653
6654 setup_coding_system (coding_system, &coding);
6655 coding.mode |= CODING_MODE_LAST_BLOCK;
6656
6657 if (encodep)
6658 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6659 dst_object);
6660 else
6661 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6662 dst_object);
6663 if (! norecord)
6664 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6665
6666 if (coding.result != CODING_RESULT_SUCCESS)
6667 error ("Code conversion error: %d", coding.result);
6668
6669 return (BUFFERP (dst_object)
6670 ? make_number (coding.produced_char)
6671 : coding.dst_object);
6672 }
6673
6674
6675 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6676 3, 4, "r\nzCoding system: ",
6677 doc: /* Decode the current region from the specified coding system.
6678 When called from a program, takes four arguments:
6679 START, END, CODING-SYSTEM, and DESTINATION.
6680 START and END are buffer positions.
6681
6682 Optional 4th arguments DESTINATION specifies where the decoded text goes.
6683 If nil, the region between START and END is replace by the decoded text.
6684 If buffer, the decoded text is inserted in the buffer.
6685 If t, the decoded text is returned.
6686
6687 This function sets `last-coding-system-used' to the precise coding system
6688 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6689 not fully specified.)
6690 It returns the length of the decoded text. */)
6691 (start, end, coding_system, destination)
6692 Lisp_Object start, end, coding_system, destination;
6693 {
6694 return code_convert_region (start, end, coding_system, destination, 0, 0);
6695 }
6696
6697 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6698 3, 4, "r\nzCoding system: ",
6699 doc: /* Encode the current region by specified coding system.
6700 When called from a program, takes three arguments:
6701 START, END, and CODING-SYSTEM. START and END are buffer positions.
6702
6703 Optional 4th arguments DESTINATION specifies where the encoded text goes.
6704 If nil, the region between START and END is replace by the encoded text.
6705 If buffer, the encoded text is inserted in the buffer.
6706 If t, the encoded text is returned.
6707
6708 This function sets `last-coding-system-used' to the precise coding system
6709 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6710 not fully specified.)
6711 It returns the length of the encoded text. */)
6712 (start, end, coding_system, destination)
6713 Lisp_Object start, end, coding_system, destination;
6714 {
6715 return code_convert_region (start, end, coding_system, destination, 1, 0);
6716 }
6717
6718 Lisp_Object
6719 code_convert_string (string, coding_system, dst_object,
6720 encodep, nocopy, norecord)
6721 Lisp_Object string, coding_system, dst_object;
6722 int encodep, nocopy, norecord;
6723 {
6724 struct coding_system coding;
6725 EMACS_INT chars, bytes;
6726
6727 CHECK_STRING (string);
6728 if (NILP (coding_system))
6729 {
6730 if (! norecord)
6731 Vlast_coding_system_used = Qno_conversion;
6732 if (NILP (dst_object))
6733 return (nocopy ? Fcopy_sequence (string) : string);
6734 }
6735
6736 if (NILP (coding_system))
6737 coding_system = Qno_conversion;
6738 else
6739 CHECK_CODING_SYSTEM (coding_system);
6740 if (NILP (dst_object))
6741 dst_object = Qt;
6742 else if (! EQ (dst_object, Qt))
6743 CHECK_BUFFER (dst_object);
6744
6745 setup_coding_system (coding_system, &coding);
6746 coding.mode |= CODING_MODE_LAST_BLOCK;
6747 chars = XSTRING (string)->size;
6748 bytes = STRING_BYTES (XSTRING (string));
6749 if (encodep)
6750 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6751 else
6752 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6753 if (! norecord)
6754 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6755
6756 if (coding.result != CODING_RESULT_SUCCESS)
6757 error ("Code conversion error: %d", coding.result);
6758
6759 return (BUFFERP (dst_object)
6760 ? make_number (coding.produced_char)
6761 : coding.dst_object);
6762 }
6763
6764
6765 /* Encode or decode STRING according to CODING_SYSTEM.
6766 Do not set Vlast_coding_system_used.
6767
6768 This function is called only from macros DECODE_FILE and
6769 ENCODE_FILE, thus we ignore character composition. */
6770
6771 Lisp_Object
6772 code_convert_string_norecord (string, coding_system, encodep)
6773 Lisp_Object string, coding_system;
6774 int encodep;
6775 {
6776 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
6777 }
6778
6779
6780 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6781 2, 4, 0,
6782 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6783
6784 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6785 if the decoding operation is trivial.
6786
6787 Optional fourth arg BUFFER non-nil meant that the decoded text is
6788 inserted in BUFFER instead of returned as a astring. In this case,
6789 the return value is BUFFER.
6790
6791 This function sets `last-coding-system-used' to the precise coding system
6792 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6793 not fully specified. */)
6794 (string, coding_system, nocopy, buffer)
6795 Lisp_Object string, coding_system, nocopy, buffer;
6796 {
6797 return code_convert_string (string, coding_system, buffer,
6798 0, ! NILP (nocopy), 0);
6799 }
6800
6801 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6802 2, 4, 0,
6803 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6804
6805 Optional third arg NOCOPY non-nil means it is OK to return STRING
6806 itself if the encoding operation is trivial.
6807
6808 Optional fourth arg BUFFER non-nil meant that the encoded text is
6809 inserted in BUFFER instead of returned as a astring. In this case,
6810 the return value is BUFFER.
6811
6812 This function sets `last-coding-system-used' to the precise coding system
6813 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6814 not fully specified.) */)
6815 (string, coding_system, nocopy, buffer)
6816 Lisp_Object string, coding_system, nocopy, buffer;
6817 {
6818 return code_convert_string (string, coding_system, buffer,
6819 nocopy, ! NILP (nocopy), 1);
6820 }
6821
6822 \f
6823 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6824 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6825 Return the corresponding character. */)
6826 (code)
6827 Lisp_Object code;
6828 {
6829 Lisp_Object spec, attrs, val;
6830 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6831 int c;
6832
6833 CHECK_NATNUM (code);
6834 c = XFASTINT (code);
6835 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6836 attrs = AREF (spec, 0);
6837
6838 if (ASCII_BYTE_P (c)
6839 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6840 return code;
6841
6842 val = CODING_ATTR_CHARSET_LIST (attrs);
6843 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6844 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6845 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
6846
6847 if (c <= 0x7F)
6848 charset = charset_roman;
6849 else if (c >= 0xA0 && c < 0xDF)
6850 {
6851 charset = charset_kana;
6852 c -= 0x80;
6853 }
6854 else
6855 {
6856 int s1 = c >> 8, s2 = c & 0xFF;
6857
6858 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6859 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6860 error ("Invalid code: %d", code);
6861 SJIS_TO_JIS (c);
6862 charset = charset_kanji;
6863 }
6864 c = DECODE_CHAR (charset, c);
6865 if (c < 0)
6866 error ("Invalid code: %d", code);
6867 return make_number (c);
6868 }
6869
6870
6871 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6872 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6873 Return the corresponding code in SJIS. */)
6874 (ch)
6875 Lisp_Object ch;
6876 {
6877 Lisp_Object spec, attrs, charset_list;
6878 int c;
6879 struct charset *charset;
6880 unsigned code;
6881
6882 CHECK_CHARACTER (ch);
6883 c = XFASTINT (ch);
6884 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6885 attrs = AREF (spec, 0);
6886
6887 if (ASCII_CHAR_P (c)
6888 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6889 return ch;
6890
6891 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6892 charset = char_charset (c, charset_list, &code);
6893 if (code == CHARSET_INVALID_CODE (charset))
6894 error ("Can't encode by shift_jis encoding: %d", c);
6895 JIS_TO_SJIS (code);
6896
6897 return make_number (code);
6898 }
6899
6900 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6901 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6902 Return the corresponding character. */)
6903 (code)
6904 Lisp_Object code;
6905 {
6906 Lisp_Object spec, attrs, val;
6907 struct charset *charset_roman, *charset_big5, *charset;
6908 int c;
6909
6910 CHECK_NATNUM (code);
6911 c = XFASTINT (code);
6912 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6913 attrs = AREF (spec, 0);
6914
6915 if (ASCII_BYTE_P (c)
6916 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6917 return code;
6918
6919 val = CODING_ATTR_CHARSET_LIST (attrs);
6920 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6921 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
6922
6923 if (c <= 0x7F)
6924 charset = charset_roman;
6925 else
6926 {
6927 int b1 = c >> 8, b2 = c & 0x7F;
6928 if (b1 < 0xA1 || b1 > 0xFE
6929 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
6930 error ("Invalid code: %d", code);
6931 charset = charset_big5;
6932 }
6933 c = DECODE_CHAR (charset, (unsigned )c);
6934 if (c < 0)
6935 error ("Invalid code: %d", code);
6936 return make_number (c);
6937 }
6938
6939 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6940 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
6941 Return the corresponding character code in Big5. */)
6942 (ch)
6943 Lisp_Object ch;
6944 {
6945 Lisp_Object spec, attrs, charset_list;
6946 struct charset *charset;
6947 int c;
6948 unsigned code;
6949
6950 CHECK_CHARACTER (ch);
6951 c = XFASTINT (ch);
6952 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6953 attrs = AREF (spec, 0);
6954 if (ASCII_CHAR_P (c)
6955 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6956 return ch;
6957
6958 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6959 charset = char_charset (c, charset_list, &code);
6960 if (code == CHARSET_INVALID_CODE (charset))
6961 error ("Can't encode by Big5 encoding: %d", c);
6962
6963 return make_number (code);
6964 }
6965
6966 \f
6967 DEFUN ("set-terminal-coding-system-internal",
6968 Fset_terminal_coding_system_internal,
6969 Sset_terminal_coding_system_internal, 1, 1, 0,
6970 doc: /* Internal use only. */)
6971 (coding_system)
6972 Lisp_Object coding_system;
6973 {
6974 CHECK_SYMBOL (coding_system);
6975 setup_coding_system (Fcheck_coding_system (coding_system),
6976 &terminal_coding);
6977
6978 /* We had better not send unsafe characters to terminal. */
6979 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
6980 /* Characer composition should be disabled. */
6981 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6982 terminal_coding.src_multibyte = 1;
6983 terminal_coding.dst_multibyte = 0;
6984 return Qnil;
6985 }
6986
6987 DEFUN ("set-safe-terminal-coding-system-internal",
6988 Fset_safe_terminal_coding_system_internal,
6989 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
6990 doc: /* Internal use only. */)
6991 (coding_system)
6992 Lisp_Object coding_system;
6993 {
6994 CHECK_SYMBOL (coding_system);
6995 setup_coding_system (Fcheck_coding_system (coding_system),
6996 &safe_terminal_coding);
6997 /* Characer composition should be disabled. */
6998 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6999 safe_terminal_coding.src_multibyte = 1;
7000 safe_terminal_coding.dst_multibyte = 0;
7001 return Qnil;
7002 }
7003
7004 DEFUN ("terminal-coding-system",
7005 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7006 doc: /* Return coding system specified for terminal output. */)
7007 ()
7008 {
7009 return CODING_ID_NAME (terminal_coding.id);
7010 }
7011
7012 DEFUN ("set-keyboard-coding-system-internal",
7013 Fset_keyboard_coding_system_internal,
7014 Sset_keyboard_coding_system_internal, 1, 1, 0,
7015 doc: /* Internal use only. */)
7016 (coding_system)
7017 Lisp_Object coding_system;
7018 {
7019 CHECK_SYMBOL (coding_system);
7020 setup_coding_system (Fcheck_coding_system (coding_system),
7021 &keyboard_coding);
7022 /* Characer composition should be disabled. */
7023 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7024 return Qnil;
7025 }
7026
7027 DEFUN ("keyboard-coding-system",
7028 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7029 doc: /* Return coding system specified for decoding keyboard input. */)
7030 ()
7031 {
7032 return CODING_ID_NAME (keyboard_coding.id);
7033 }
7034
7035 \f
7036 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7037 Sfind_operation_coding_system, 1, MANY, 0,
7038 doc: /* Choose a coding system for an operation based on the target name.
7039 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7040 DECODING-SYSTEM is the coding system to use for decoding
7041 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7042 for encoding (in case OPERATION does encoding).
7043
7044 The first argument OPERATION specifies an I/O primitive:
7045 For file I/O, `insert-file-contents' or `write-region'.
7046 For process I/O, `call-process', `call-process-region', or `start-process'.
7047 For network I/O, `open-network-stream'.
7048
7049 The remaining arguments should be the same arguments that were passed
7050 to the primitive. Depending on which primitive, one of those arguments
7051 is selected as the TARGET. For example, if OPERATION does file I/O,
7052 whichever argument specifies the file name is TARGET.
7053
7054 TARGET has a meaning which depends on OPERATION:
7055 For file I/O, TARGET is a file name.
7056 For process I/O, TARGET is a process name.
7057 For network I/O, TARGET is a service name or a port number
7058
7059 This function looks up what specified for TARGET in,
7060 `file-coding-system-alist', `process-coding-system-alist',
7061 or `network-coding-system-alist' depending on OPERATION.
7062 They may specify a coding system, a cons of coding systems,
7063 or a function symbol to call.
7064 In the last case, we call the function with one argument,
7065 which is a list of all the arguments given to this function.
7066
7067 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7068 (nargs, args)
7069 int nargs;
7070 Lisp_Object *args;
7071 {
7072 Lisp_Object operation, target_idx, target, val;
7073 register Lisp_Object chain;
7074
7075 if (nargs < 2)
7076 error ("Too few arguments");
7077 operation = args[0];
7078 if (!SYMBOLP (operation)
7079 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7080 error ("Invalid first arguement");
7081 if (nargs < 1 + XINT (target_idx))
7082 error ("Too few arguments for operation: %s",
7083 XSYMBOL (operation)->name->data);
7084 target = args[XINT (target_idx) + 1];
7085 if (!(STRINGP (target)
7086 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7087 error ("Invalid %dth argument", XINT (target_idx) + 1);
7088
7089 chain = ((EQ (operation, Qinsert_file_contents)
7090 || EQ (operation, Qwrite_region))
7091 ? Vfile_coding_system_alist
7092 : (EQ (operation, Qopen_network_stream)
7093 ? Vnetwork_coding_system_alist
7094 : Vprocess_coding_system_alist));
7095 if (NILP (chain))
7096 return Qnil;
7097
7098 for (; CONSP (chain); chain = XCDR (chain))
7099 {
7100 Lisp_Object elt;
7101
7102 elt = XCAR (chain);
7103 if (CONSP (elt)
7104 && ((STRINGP (target)
7105 && STRINGP (XCAR (elt))
7106 && fast_string_match (XCAR (elt), target) >= 0)
7107 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7108 {
7109 val = XCDR (elt);
7110 /* Here, if VAL is both a valid coding system and a valid
7111 function symbol, we return VAL as a coding system. */
7112 if (CONSP (val))
7113 return val;
7114 if (! SYMBOLP (val))
7115 return Qnil;
7116 if (! NILP (Fcoding_system_p (val)))
7117 return Fcons (val, val);
7118 if (! NILP (Ffboundp (val)))
7119 {
7120 val = call1 (val, Flist (nargs, args));
7121 if (CONSP (val))
7122 return val;
7123 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7124 return Fcons (val, val);
7125 }
7126 return Qnil;
7127 }
7128 }
7129 return Qnil;
7130 }
7131
7132 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7133 Sset_coding_system_priority, 1, MANY, 0,
7134 doc: /* Assign higher priority to coding systems given as arguments.
7135 usage: (set-coding-system-priority CODING-SYSTEM ...) */)
7136 (nargs, args)
7137 int nargs;
7138 Lisp_Object *args;
7139 {
7140 int i, j;
7141 int changed[coding_category_max];
7142 enum coding_category priorities[coding_category_max];
7143
7144 bzero (changed, sizeof changed);
7145
7146 for (i = j = 0; i < nargs; i++)
7147 {
7148 enum coding_category category;
7149 Lisp_Object spec, attrs;
7150
7151 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7152 attrs = AREF (spec, 0);
7153 category = XINT (CODING_ATTR_CATEGORY (attrs));
7154 if (changed[category])
7155 /* Ignore this coding system because a coding system of the
7156 same category already had a higher priority. */
7157 continue;
7158 changed[category] = 1;
7159 priorities[j++] = category;
7160 if (coding_categories[category].id >= 0
7161 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7162 setup_coding_system (args[i], &coding_categories[category]);
7163 }
7164
7165 /* Now we have decided top J priorities. Reflect the order of the
7166 original priorities to the remaining priorities. */
7167
7168 for (i = j, j = 0; i < coding_category_max; i++, j++)
7169 {
7170 while (j < coding_category_max
7171 && changed[coding_priorities[j]])
7172 j++;
7173 if (j == coding_category_max)
7174 abort ();
7175 priorities[i] = coding_priorities[j];
7176 }
7177
7178 bcopy (priorities, coding_priorities, sizeof priorities);
7179 return Qnil;
7180 }
7181
7182 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7183 Scoding_system_priority_list, 0, 1, 0,
7184 doc: /* Return a list of coding systems ordered by their priorities. */)
7185 (highestp)
7186 Lisp_Object highestp;
7187 {
7188 int i;
7189 Lisp_Object val;
7190
7191 for (i = 0, val = Qnil; i < coding_category_max; i++)
7192 {
7193 enum coding_category category = coding_priorities[i];
7194 int id = coding_categories[category].id;
7195 Lisp_Object attrs;
7196
7197 if (id < 0)
7198 continue;
7199 attrs = CODING_ID_ATTRS (id);
7200 if (! NILP (highestp))
7201 return CODING_ATTR_BASE_NAME (attrs);
7202 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7203 }
7204 return Fnreverse (val);
7205 }
7206
7207 static Lisp_Object
7208 make_subsidiaries (base)
7209 Lisp_Object base;
7210 {
7211 Lisp_Object subsidiaries;
7212 char *suffixes[] = { "-unix", "-dos", "-mac" };
7213 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7214 char *buf = (char *) alloca (base_name_len + 6);
7215 int i;
7216
7217 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7218 subsidiaries = Fmake_vector (make_number (3), Qnil);
7219 for (i = 0; i < 3; i++)
7220 {
7221 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7222 ASET (subsidiaries, i, intern (buf));
7223 }
7224 return subsidiaries;
7225 }
7226
7227
7228 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7229 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7230 doc: /* For internal use only.
7231 usage: (define-coding-system-internal ...) */)
7232 (nargs, args)
7233 int nargs;
7234 Lisp_Object *args;
7235 {
7236 Lisp_Object name;
7237 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7238 Lisp_Object attrs; /* Vector of attributes. */
7239 Lisp_Object eol_type;
7240 Lisp_Object aliases;
7241 Lisp_Object coding_type, charset_list, safe_charsets;
7242 enum coding_category category;
7243 Lisp_Object tail, val;
7244 int max_charset_id = 0;
7245 int i;
7246
7247 if (nargs < coding_arg_max)
7248 goto short_args;
7249
7250 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7251
7252 name = args[coding_arg_name];
7253 CHECK_SYMBOL (name);
7254 CODING_ATTR_BASE_NAME (attrs) = name;
7255
7256 val = args[coding_arg_mnemonic];
7257 if (! STRINGP (val))
7258 CHECK_CHARACTER (val);
7259 CODING_ATTR_MNEMONIC (attrs) = val;
7260
7261 coding_type = args[coding_arg_coding_type];
7262 CHECK_SYMBOL (coding_type);
7263 CODING_ATTR_TYPE (attrs) = coding_type;
7264
7265 charset_list = args[coding_arg_charset_list];
7266 if (SYMBOLP (charset_list))
7267 {
7268 if (EQ (charset_list, Qiso_2022))
7269 {
7270 if (! EQ (coding_type, Qiso_2022))
7271 error ("Invalid charset-list");
7272 charset_list = Viso_2022_charset_list;
7273 }
7274 else if (EQ (charset_list, Qemacs_mule))
7275 {
7276 if (! EQ (coding_type, Qemacs_mule))
7277 error ("Invalid charset-list");
7278 charset_list = Vemacs_mule_charset_list;
7279 }
7280 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7281 if (max_charset_id < XFASTINT (XCAR (tail)))
7282 max_charset_id = XFASTINT (XCAR (tail));
7283 }
7284 else
7285 {
7286 charset_list = Fcopy_sequence (charset_list);
7287 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7288 {
7289 struct charset *charset;
7290
7291 val = Fcar (tail);
7292 CHECK_CHARSET_GET_CHARSET (val, charset);
7293 if (EQ (coding_type, Qiso_2022)
7294 ? CHARSET_ISO_FINAL (charset) < 0
7295 : EQ (coding_type, Qemacs_mule)
7296 ? CHARSET_EMACS_MULE_ID (charset) < 0
7297 : 0)
7298 error ("Can't handle charset `%s'",
7299 XSYMBOL (CHARSET_NAME (charset))->name->data);
7300
7301 XCAR (tail) = make_number (charset->id);
7302 if (max_charset_id < charset->id)
7303 max_charset_id = charset->id;
7304 }
7305 }
7306 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7307
7308 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7309 make_number (255));
7310 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7311 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7312 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7313
7314 val = args[coding_arg_decode_translation_table];
7315 if (! NILP (val))
7316 CHECK_CHAR_TABLE (val);
7317 CODING_ATTR_DECODE_TBL (attrs) = val;
7318
7319 val = args[coding_arg_encode_translation_table];
7320 if (! NILP (val))
7321 CHECK_CHAR_TABLE (val);
7322 CODING_ATTR_ENCODE_TBL (attrs) = val;
7323
7324 val = args[coding_arg_post_read_conversion];
7325 CHECK_SYMBOL (val);
7326 CODING_ATTR_POST_READ (attrs) = val;
7327
7328 val = args[coding_arg_pre_write_conversion];
7329 CHECK_SYMBOL (val);
7330 CODING_ATTR_PRE_WRITE (attrs) = val;
7331
7332 val = args[coding_arg_default_char];
7333 if (NILP (val))
7334 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7335 else
7336 {
7337 CHECK_CHARACTER (val);
7338 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7339 }
7340
7341 val = args[coding_arg_plist];
7342 CHECK_LIST (val);
7343 CODING_ATTR_PLIST (attrs) = val;
7344
7345 if (EQ (coding_type, Qcharset))
7346 {
7347 val = Fmake_vector (make_number (256), Qnil);
7348
7349 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7350 {
7351 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7352 int idx = (CHARSET_DIMENSION (charset) - 1) * 4;
7353
7354 for (i = charset->code_space[idx];
7355 i <= charset->code_space[idx + 1]; i++)
7356 {
7357 if (NILP (AREF (val, i)))
7358 ASET (val, i, XCAR (tail));
7359 else
7360 error ("Charsets conflicts in the first byte");
7361 }
7362 }
7363 ASET (attrs, coding_attr_charset_valids, val);
7364 category = coding_category_charset;
7365 }
7366 else if (EQ (coding_type, Qccl))
7367 {
7368 Lisp_Object valids;
7369
7370 if (nargs < coding_arg_ccl_max)
7371 goto short_args;
7372
7373 val = args[coding_arg_ccl_decoder];
7374 CHECK_CCL_PROGRAM (val);
7375 if (VECTORP (val))
7376 val = Fcopy_sequence (val);
7377 ASET (attrs, coding_attr_ccl_decoder, val);
7378
7379 val = args[coding_arg_ccl_encoder];
7380 CHECK_CCL_PROGRAM (val);
7381 if (VECTORP (val))
7382 val = Fcopy_sequence (val);
7383 ASET (attrs, coding_attr_ccl_encoder, val);
7384
7385 val = args[coding_arg_ccl_valids];
7386 valids = Fmake_string (make_number (256), make_number (0));
7387 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7388 {
7389 val = Fcar (tail);
7390 if (INTEGERP (val))
7391 ASET (valids, XINT (val), 1);
7392 else
7393 {
7394 int from, to;
7395
7396 CHECK_CONS (val);
7397 CHECK_NUMBER (XCAR (val));
7398 CHECK_NUMBER (XCDR (val));
7399 from = XINT (XCAR (val));
7400 to = XINT (XCDR (val));
7401 for (i = from; i <= to; i++)
7402 ASET (valids, i, 1);
7403 }
7404 }
7405 ASET (attrs, coding_attr_ccl_valids, valids);
7406
7407 category = coding_category_ccl;
7408 }
7409 else if (EQ (coding_type, Qutf_16))
7410 {
7411 Lisp_Object bom, endian;
7412
7413 if (nargs < coding_arg_utf16_max)
7414 goto short_args;
7415
7416 bom = args[coding_arg_utf16_bom];
7417 if (! NILP (bom) && ! EQ (bom, Qt))
7418 {
7419 CHECK_CONS (bom);
7420 CHECK_CODING_SYSTEM (XCAR (bom));
7421 CHECK_CODING_SYSTEM (XCDR (bom));
7422 }
7423 ASET (attrs, coding_attr_utf_16_bom, bom);
7424
7425 endian = args[coding_arg_utf16_endian];
7426 ASET (attrs, coding_attr_utf_16_endian, endian);
7427
7428 category = (CONSP (bom)
7429 ? coding_category_utf_16_auto
7430 : NILP (bom)
7431 ? (NILP (endian)
7432 ? coding_category_utf_16_be_nosig
7433 : coding_category_utf_16_le_nosig)
7434 : (NILP (endian)
7435 ? coding_category_utf_16_be
7436 : coding_category_utf_16_le));
7437 }
7438 else if (EQ (coding_type, Qiso_2022))
7439 {
7440 Lisp_Object initial, reg_usage, request, flags;
7441 int i, id;
7442
7443 if (nargs < coding_arg_iso2022_max)
7444 goto short_args;
7445
7446 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7447 CHECK_VECTOR (initial);
7448 for (i = 0; i < 4; i++)
7449 {
7450 val = Faref (initial, make_number (i));
7451 if (! NILP (val))
7452 {
7453 CHECK_CHARSET_GET_ID (val, id);
7454 ASET (initial, i, make_number (id));
7455 }
7456 else
7457 ASET (initial, i, make_number (-1));
7458 }
7459
7460 reg_usage = args[coding_arg_iso2022_reg_usage];
7461 CHECK_CONS (reg_usage);
7462 CHECK_NATNUM (XCAR (reg_usage));
7463 CHECK_NATNUM (XCDR (reg_usage));
7464
7465 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7466 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
7467 {
7468 int id;
7469
7470 val = Fcar (tail);
7471 CHECK_CONS (val);
7472 CHECK_CHARSET_GET_ID (XCAR (val), id);
7473 CHECK_NATNUM (XCDR (val));
7474 if (XINT (XCDR (val)) >= 4)
7475 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7476 XCAR (val) = make_number (id);
7477 }
7478
7479 flags = args[coding_arg_iso2022_flags];
7480 CHECK_NATNUM (flags);
7481 i = XINT (flags);
7482 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7483 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7484
7485 ASET (attrs, coding_attr_iso_initial, initial);
7486 ASET (attrs, coding_attr_iso_usage, reg_usage);
7487 ASET (attrs, coding_attr_iso_request, request);
7488 ASET (attrs, coding_attr_iso_flags, flags);
7489 setup_iso_safe_charsets (attrs);
7490
7491 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7492 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7493 | CODING_ISO_FLAG_SINGLE_SHIFT))
7494 ? coding_category_iso_7_else
7495 : EQ (args[coding_arg_charset_list], Qiso_2022)
7496 ? coding_category_iso_7
7497 : coding_category_iso_7_tight);
7498 else
7499 {
7500 int id = XINT (AREF (initial, 1));
7501
7502 category = (((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7503 | CODING_ISO_FLAG_SINGLE_SHIFT))
7504 || EQ (args[coding_arg_charset_list], Qiso_2022)
7505 || id < 0)
7506 ? coding_category_iso_8_else
7507 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7508 ? coding_category_iso_8_1
7509 : coding_category_iso_8_2);
7510 }
7511 }
7512 else if (EQ (coding_type, Qemacs_mule))
7513 {
7514 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7515 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7516
7517 category = coding_category_emacs_mule;
7518 }
7519 else if (EQ (coding_type, Qshift_jis))
7520 {
7521
7522 struct charset *charset;
7523
7524 if (XINT (Flength (charset_list)) != 3)
7525 error ("There should be just three charsets");
7526
7527 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7528 if (CHARSET_DIMENSION (charset) != 1)
7529 error ("Dimension of charset %s is not one",
7530 XSYMBOL (CHARSET_NAME (charset))->name->data);
7531
7532 charset_list = XCDR (charset_list);
7533 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7534 if (CHARSET_DIMENSION (charset) != 1)
7535 error ("Dimension of charset %s is not one",
7536 XSYMBOL (CHARSET_NAME (charset))->name->data);
7537
7538 charset_list = XCDR (charset_list);
7539 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7540 if (CHARSET_DIMENSION (charset) != 2)
7541 error ("Dimension of charset %s is not two",
7542 XSYMBOL (CHARSET_NAME (charset))->name->data);
7543
7544 category = coding_category_sjis;
7545 Vsjis_coding_system = name;
7546 }
7547 else if (EQ (coding_type, Qbig5))
7548 {
7549 struct charset *charset;
7550
7551 if (XINT (Flength (charset_list)) != 2)
7552 error ("There should be just two charsets");
7553
7554 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7555 if (CHARSET_DIMENSION (charset) != 1)
7556 error ("Dimension of charset %s is not one",
7557 XSYMBOL (CHARSET_NAME (charset))->name->data);
7558
7559 charset_list = XCDR (charset_list);
7560 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7561 if (CHARSET_DIMENSION (charset) != 2)
7562 error ("Dimension of charset %s is not two",
7563 XSYMBOL (CHARSET_NAME (charset))->name->data);
7564
7565 category = coding_category_big5;
7566 Vbig5_coding_system = name;
7567 }
7568 else if (EQ (coding_type, Qraw_text))
7569 category = coding_category_raw_text;
7570 else if (EQ (coding_type, Qutf_8))
7571 category = coding_category_utf_8;
7572 else if (EQ (coding_type, Qundecided))
7573 category = coding_category_undecided;
7574 else
7575 error ("Invalid coding system type: %s",
7576 XSYMBOL (coding_type)->name->data);
7577
7578 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7579
7580 eol_type = args[coding_arg_eol_type];
7581 if (! NILP (eol_type)
7582 && ! EQ (eol_type, Qunix)
7583 && ! EQ (eol_type, Qdos)
7584 && ! EQ (eol_type, Qmac))
7585 error ("Invalid eol-type");
7586
7587 aliases = Fcons (name, Qnil);
7588
7589 if (NILP (eol_type))
7590 {
7591 eol_type = make_subsidiaries (name);
7592 for (i = 0; i < 3; i++)
7593 {
7594 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7595
7596 this_name = AREF (eol_type, i);
7597 this_aliases = Fcons (this_name, Qnil);
7598 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7599 this_spec = Fmake_vector (make_number (3), attrs);
7600 ASET (this_spec, 1, this_aliases);
7601 ASET (this_spec, 2, this_eol_type);
7602 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7603 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7604 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7605 Vcoding_system_alist);
7606 }
7607 }
7608
7609 spec_vec = Fmake_vector (make_number (3), attrs);
7610 ASET (spec_vec, 1, aliases);
7611 ASET (spec_vec, 2, eol_type);
7612
7613 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7614 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7615 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7616 Vcoding_system_alist);
7617
7618 {
7619 int id = coding_categories[category].id;
7620
7621 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7622 setup_coding_system (name, &coding_categories[category]);
7623 }
7624
7625 return Qnil;
7626
7627 short_args:
7628 return Fsignal (Qwrong_number_of_arguments,
7629 Fcons (intern ("define-coding-system-internal"),
7630 make_number (nargs)));
7631 }
7632
7633 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7634 Sdefine_coding_system_alias, 2, 2, 0,
7635 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7636 (alias, coding_system)
7637 Lisp_Object alias, coding_system;
7638 {
7639 Lisp_Object spec, aliases, eol_type;
7640
7641 CHECK_SYMBOL (alias);
7642 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7643 aliases = AREF (spec, 1);
7644 while (!NILP (XCDR (aliases)))
7645 aliases = XCDR (aliases);
7646 XCDR (aliases) = Fcons (alias, Qnil);
7647
7648 eol_type = AREF (spec, 2);
7649 if (VECTORP (eol_type))
7650 {
7651 Lisp_Object subsidiaries;
7652 int i;
7653
7654 subsidiaries = make_subsidiaries (alias);
7655 for (i = 0; i < 3; i++)
7656 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7657 AREF (eol_type, i));
7658
7659 ASET (spec, 2, subsidiaries);
7660 }
7661
7662 Fputhash (alias, spec, Vcoding_system_hash_table);
7663 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
7664 Vcoding_system_alist);
7665
7666 return Qnil;
7667 }
7668
7669 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7670 1, 1, 0,
7671 doc: /* Return the base of CODING-SYSTEM.
7672 Any alias or subsidiary coding systems are not base coding system. */)
7673 (coding_system)
7674 Lisp_Object coding_system;
7675 {
7676 Lisp_Object spec, attrs;
7677
7678 if (NILP (coding_system))
7679 return (Qno_conversion);
7680 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7681 attrs = AREF (spec, 0);
7682 return CODING_ATTR_BASE_NAME (attrs);
7683 }
7684
7685 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7686 1, 1, 0,
7687 doc: "Return the property list of CODING-SYSTEM.")
7688 (coding_system)
7689 Lisp_Object coding_system;
7690 {
7691 Lisp_Object spec, attrs;
7692
7693 if (NILP (coding_system))
7694 coding_system = Qno_conversion;
7695 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7696 attrs = AREF (spec, 0);
7697 return CODING_ATTR_PLIST (attrs);
7698 }
7699
7700
7701 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7702 1, 1, 0,
7703 doc: /* Return the list of aliases of CODING-SYSTEM.
7704 A base coding system is what made by `define-coding-system'.
7705 Any alias nor subsidiary coding systems are not base coding system. */)
7706 (coding_system)
7707 Lisp_Object coding_system;
7708 {
7709 Lisp_Object spec;
7710
7711 if (NILP (coding_system))
7712 coding_system = Qno_conversion;
7713 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7714 return AREF (spec, 2);
7715 }
7716
7717 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7718 Scoding_system_eol_type, 1, 1, 0,
7719 doc: /* Return eol-type of CODING-SYSTEM.
7720 An eol-type is integer 0, 1, 2, or a vector of coding systems.
7721
7722 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7723 and CR respectively.
7724
7725 A vector value indicates that a format of end-of-line should be
7726 detected automatically. Nth element of the vector is the subsidiary
7727 coding system whose eol-type is N. */)
7728 (coding_system)
7729 Lisp_Object coding_system;
7730 {
7731 Lisp_Object spec, eol_type;
7732 int n;
7733
7734 if (NILP (coding_system))
7735 coding_system = Qno_conversion;
7736 if (! CODING_SYSTEM_P (coding_system))
7737 return Qnil;
7738 spec = CODING_SYSTEM_SPEC (coding_system);
7739 eol_type = AREF (spec, 2);
7740 if (VECTORP (eol_type))
7741 return Fcopy_sequence (eol_type);
7742 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7743 return make_number (n);
7744 }
7745
7746 #endif /* emacs */
7747
7748 \f
7749 /*** 9. Post-amble ***/
7750
7751 void
7752 init_coding_once ()
7753 {
7754 int i;
7755
7756 for (i = 0; i < coding_category_max; i++)
7757 {
7758 coding_categories[i].id = -1;
7759 coding_priorities[i] = i;
7760 }
7761
7762 /* ISO2022 specific initialize routine. */
7763 for (i = 0; i < 0x20; i++)
7764 iso_code_class[i] = ISO_control_0;
7765 for (i = 0x21; i < 0x7F; i++)
7766 iso_code_class[i] = ISO_graphic_plane_0;
7767 for (i = 0x80; i < 0xA0; i++)
7768 iso_code_class[i] = ISO_control_1;
7769 for (i = 0xA1; i < 0xFF; i++)
7770 iso_code_class[i] = ISO_graphic_plane_1;
7771 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7772 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7773 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7774 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7775 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7776 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7777 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7778 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7779 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7780 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7781
7782 inhibit_pre_post_conversion = 0;
7783
7784 for (i = 0; i < 256; i++)
7785 {
7786 emacs_mule_bytes[i] = 1;
7787 }
7788 }
7789
7790 #ifdef emacs
7791
7792 void
7793 syms_of_coding ()
7794 {
7795 staticpro (&Vcoding_system_hash_table);
7796 Vcoding_system_hash_table = Fmakehash (Qeq);
7797
7798 staticpro (&Vsjis_coding_system);
7799 Vsjis_coding_system = Qnil;
7800
7801 staticpro (&Vbig5_coding_system);
7802 Vbig5_coding_system = Qnil;
7803
7804 staticpro (&Vcode_conversion_work_buf_list);
7805 Vcode_conversion_work_buf_list = Qnil;
7806
7807 staticpro (&Vcode_conversion_reused_work_buf);
7808 Vcode_conversion_reused_work_buf = Qnil;
7809
7810 DEFSYM (Qcharset, "charset");
7811 DEFSYM (Qtarget_idx, "target-idx");
7812 DEFSYM (Qcoding_system_history, "coding-system-history");
7813 Fset (Qcoding_system_history, Qnil);
7814
7815 /* Target FILENAME is the first argument. */
7816 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7817 /* Target FILENAME is the third argument. */
7818 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7819
7820 DEFSYM (Qcall_process, "call-process");
7821 /* Target PROGRAM is the first argument. */
7822 Fput (Qcall_process, Qtarget_idx, make_number (0));
7823
7824 DEFSYM (Qcall_process_region, "call-process-region");
7825 /* Target PROGRAM is the third argument. */
7826 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7827
7828 DEFSYM (Qstart_process, "start-process");
7829 /* Target PROGRAM is the third argument. */
7830 Fput (Qstart_process, Qtarget_idx, make_number (2));
7831
7832 DEFSYM (Qopen_network_stream, "open-network-stream");
7833 /* Target SERVICE is the fourth argument. */
7834 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7835
7836 DEFSYM (Qcoding_system, "coding-system");
7837 DEFSYM (Qcoding_aliases, "coding-aliases");
7838
7839 DEFSYM (Qeol_type, "eol-type");
7840 DEFSYM (Qunix, "unix");
7841 DEFSYM (Qdos, "dos");
7842 DEFSYM (Qmac, "mac");
7843
7844 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7845 DEFSYM (Qpost_read_conversion, "post-read-conversion");
7846 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7847 DEFSYM (Qdefault_char, "default-char");
7848 DEFSYM (Qundecided, "undecided");
7849 DEFSYM (Qno_conversion, "no-conversion");
7850 DEFSYM (Qraw_text, "raw-text");
7851
7852 DEFSYM (Qiso_2022, "iso-2022");
7853
7854 DEFSYM (Qutf_8, "utf-8");
7855
7856 DEFSYM (Qutf_16, "utf-16");
7857 DEFSYM (Qutf_16_be, "utf-16-be");
7858 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7859 DEFSYM (Qutf_16_le, "utf-16-l3");
7860 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7861 DEFSYM (Qsignature, "signature");
7862 DEFSYM (Qendian, "endian");
7863 DEFSYM (Qbig, "big");
7864 DEFSYM (Qlittle, "little");
7865
7866 DEFSYM (Qshift_jis, "shift-jis");
7867 DEFSYM (Qbig5, "big5");
7868
7869 DEFSYM (Qcoding_system_p, "coding-system-p");
7870
7871 DEFSYM (Qcoding_system_error, "coding-system-error");
7872 Fput (Qcoding_system_error, Qerror_conditions,
7873 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7874 Fput (Qcoding_system_error, Qerror_message,
7875 build_string ("Invalid coding system"));
7876
7877 /* Intern this now in case it isn't already done.
7878 Setting this variable twice is harmless.
7879 But don't staticpro it here--that is done in alloc.c. */
7880 Qchar_table_extra_slots = intern ("char-table-extra-slots");
7881
7882 DEFSYM (Qtranslation_table, "translation-table");
7883 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7884 DEFSYM (Qtranslation_table_id, "translation-table-id");
7885 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
7886 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
7887
7888 DEFSYM (Qvalid_codes, "valid-codes");
7889
7890 DEFSYM (Qemacs_mule, "emacs-mule");
7891
7892 Vcoding_category_table
7893 = Fmake_vector (make_number (coding_category_max), Qnil);
7894 staticpro (&Vcoding_category_table);
7895 /* Followings are target of code detection. */
7896 ASET (Vcoding_category_table, coding_category_iso_7,
7897 intern ("coding-category-iso-7"));
7898 ASET (Vcoding_category_table, coding_category_iso_7_tight,
7899 intern ("coding-category-iso-7-tight"));
7900 ASET (Vcoding_category_table, coding_category_iso_8_1,
7901 intern ("coding-category-iso-8-1"));
7902 ASET (Vcoding_category_table, coding_category_iso_8_2,
7903 intern ("coding-category-iso-8-2"));
7904 ASET (Vcoding_category_table, coding_category_iso_7_else,
7905 intern ("coding-category-iso-7-else"));
7906 ASET (Vcoding_category_table, coding_category_iso_8_else,
7907 intern ("coding-category-iso-8-else"));
7908 ASET (Vcoding_category_table, coding_category_utf_8,
7909 intern ("coding-category-utf-8"));
7910 ASET (Vcoding_category_table, coding_category_utf_16_be,
7911 intern ("coding-category-utf-16-be"));
7912 ASET (Vcoding_category_table, coding_category_utf_16_le,
7913 intern ("coding-category-utf-16-le"));
7914 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
7915 intern ("coding-category-utf-16-be-nosig"));
7916 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
7917 intern ("coding-category-utf-16-le-nosig"));
7918 ASET (Vcoding_category_table, coding_category_charset,
7919 intern ("coding-category-charset"));
7920 ASET (Vcoding_category_table, coding_category_sjis,
7921 intern ("coding-category-sjis"));
7922 ASET (Vcoding_category_table, coding_category_big5,
7923 intern ("coding-category-big5"));
7924 ASET (Vcoding_category_table, coding_category_ccl,
7925 intern ("coding-category-ccl"));
7926 ASET (Vcoding_category_table, coding_category_emacs_mule,
7927 intern ("coding-category-emacs-mule"));
7928 /* Followings are NOT target of code detection. */
7929 ASET (Vcoding_category_table, coding_category_raw_text,
7930 intern ("coding-category-raw-text"));
7931 ASET (Vcoding_category_table, coding_category_undecided,
7932 intern ("coding-category-undecided"));
7933
7934 defsubr (&Scoding_system_p);
7935 defsubr (&Sread_coding_system);
7936 defsubr (&Sread_non_nil_coding_system);
7937 defsubr (&Scheck_coding_system);
7938 defsubr (&Sdetect_coding_region);
7939 defsubr (&Sdetect_coding_string);
7940 defsubr (&Sfind_coding_systems_region_internal);
7941 defsubr (&Scheck_coding_systems_region);
7942 defsubr (&Sdecode_coding_region);
7943 defsubr (&Sencode_coding_region);
7944 defsubr (&Sdecode_coding_string);
7945 defsubr (&Sencode_coding_string);
7946 defsubr (&Sdecode_sjis_char);
7947 defsubr (&Sencode_sjis_char);
7948 defsubr (&Sdecode_big5_char);
7949 defsubr (&Sencode_big5_char);
7950 defsubr (&Sset_terminal_coding_system_internal);
7951 defsubr (&Sset_safe_terminal_coding_system_internal);
7952 defsubr (&Sterminal_coding_system);
7953 defsubr (&Sset_keyboard_coding_system_internal);
7954 defsubr (&Skeyboard_coding_system);
7955 defsubr (&Sfind_operation_coding_system);
7956 defsubr (&Sset_coding_system_priority);
7957 defsubr (&Sdefine_coding_system_internal);
7958 defsubr (&Sdefine_coding_system_alias);
7959 defsubr (&Scoding_system_base);
7960 defsubr (&Scoding_system_plist);
7961 defsubr (&Scoding_system_aliases);
7962 defsubr (&Scoding_system_eol_type);
7963 defsubr (&Scoding_system_priority_list);
7964
7965 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7966 doc: /* List of coding systems.
7967
7968 Do not alter the value of this variable manually. This variable should be
7969 updated by the functions `define-coding-system' and
7970 `define-coding-system-alias'. */);
7971 Vcoding_system_list = Qnil;
7972
7973 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7974 doc: /* Alist of coding system names.
7975 Each element is one element list of coding system name.
7976 This variable is given to `completing-read' as TABLE argument.
7977
7978 Do not alter the value of this variable manually. This variable should be
7979 updated by the functions `make-coding-system' and
7980 `define-coding-system-alias'. */);
7981 Vcoding_system_alist = Qnil;
7982
7983 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7984 doc: /* List of coding-categories (symbols) ordered by priority.
7985
7986 On detecting a coding system, Emacs tries code detection algorithms
7987 associated with each coding-category one by one in this order. When
7988 one algorithm agrees with a byte sequence of source text, the coding
7989 system bound to the corresponding coding-category is selected. */);
7990 {
7991 int i;
7992
7993 Vcoding_category_list = Qnil;
7994 for (i = coding_category_max - 1; i >= 0; i--)
7995 Vcoding_category_list
7996 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7997 Vcoding_category_list);
7998 }
7999
8000 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8001 doc: /* Specify the coding system for read operations.
8002 It is useful to bind this variable with `let', but do not set it globally.
8003 If the value is a coding system, it is used for decoding on read operation.
8004 If not, an appropriate element is used from one of the coding system alists:
8005 There are three such tables, `file-coding-system-alist',
8006 `process-coding-system-alist', and `network-coding-system-alist'. */);
8007 Vcoding_system_for_read = Qnil;
8008
8009 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8010 doc: /* Specify the coding system for write operations.
8011 Programs bind this variable with `let', but you should not set it globally.
8012 If the value is a coding system, it is used for encoding of output,
8013 when writing it to a file and when sending it to a file or subprocess.
8014
8015 If this does not specify a coding system, an appropriate element
8016 is used from one of the coding system alists:
8017 There are three such tables, `file-coding-system-alist',
8018 `process-coding-system-alist', and `network-coding-system-alist'.
8019 For output to files, if the above procedure does not specify a coding system,
8020 the value of `buffer-file-coding-system' is used. */);
8021 Vcoding_system_for_write = Qnil;
8022
8023 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8024 doc: /*
8025 Coding system used in the latest file or process I/O. */);
8026 Vlast_coding_system_used = Qnil;
8027
8028 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8029 doc: /*
8030 *Non-nil means always inhibit code conversion of end-of-line format.
8031 See info node `Coding Systems' and info node `Text and Binary' concerning
8032 such conversion. */);
8033 inhibit_eol_conversion = 0;
8034
8035 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8036 doc: /*
8037 Non-nil means process buffer inherits coding system of process output.
8038 Bind it to t if the process output is to be treated as if it were a file
8039 read from some filesystem. */);
8040 inherit_process_coding_system = 0;
8041
8042 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8043 doc: /*
8044 Alist to decide a coding system to use for a file I/O operation.
8045 The format is ((PATTERN . VAL) ...),
8046 where PATTERN is a regular expression matching a file name,
8047 VAL is a coding system, a cons of coding systems, or a function symbol.
8048 If VAL is a coding system, it is used for both decoding and encoding
8049 the file contents.
8050 If VAL is a cons of coding systems, the car part is used for decoding,
8051 and the cdr part is used for encoding.
8052 If VAL is a function symbol, the function must return a coding system
8053 or a cons of coding systems which are used as above. The function gets
8054 the arguments with which `find-operation-coding-systems' was called.
8055
8056 See also the function `find-operation-coding-system'
8057 and the variable `auto-coding-alist'. */);
8058 Vfile_coding_system_alist = Qnil;
8059
8060 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8061 doc: /*
8062 Alist to decide a coding system to use for a process I/O operation.
8063 The format is ((PATTERN . VAL) ...),
8064 where PATTERN is a regular expression matching a program name,
8065 VAL is a coding system, a cons of coding systems, or a function symbol.
8066 If VAL is a coding system, it is used for both decoding what received
8067 from the program and encoding what sent to the program.
8068 If VAL is a cons of coding systems, the car part is used for decoding,
8069 and the cdr part is used for encoding.
8070 If VAL is a function symbol, the function must return a coding system
8071 or a cons of coding systems which are used as above.
8072
8073 See also the function `find-operation-coding-system'. */);
8074 Vprocess_coding_system_alist = Qnil;
8075
8076 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8077 doc: /*
8078 Alist to decide a coding system to use for a network I/O operation.
8079 The format is ((PATTERN . VAL) ...),
8080 where PATTERN is a regular expression matching a network service name
8081 or is a port number to connect to,
8082 VAL is a coding system, a cons of coding systems, or a function symbol.
8083 If VAL is a coding system, it is used for both decoding what received
8084 from the network stream and encoding what sent to the network stream.
8085 If VAL is a cons of coding systems, the car part is used for decoding,
8086 and the cdr part is used for encoding.
8087 If VAL is a function symbol, the function must return a coding system
8088 or a cons of coding systems which are used as above.
8089
8090 See also the function `find-operation-coding-system'. */);
8091 Vnetwork_coding_system_alist = Qnil;
8092
8093 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8094 doc: /* Coding system to use with system messages.
8095 Also used for decoding keyboard input on X Window system. */);
8096 Vlocale_coding_system = Qnil;
8097
8098 /* The eol mnemonics are reset in startup.el system-dependently. */
8099 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8100 doc: /*
8101 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8102 eol_mnemonic_unix = build_string (":");
8103
8104 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8105 doc: /*
8106 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8107 eol_mnemonic_dos = build_string ("\\");
8108
8109 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8110 doc: /*
8111 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
8112 eol_mnemonic_mac = build_string ("/");
8113
8114 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8115 doc: /*
8116 *String displayed in mode line when end-of-line format is not yet determined. */);
8117 eol_mnemonic_undecided = build_string (":");
8118
8119 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8120 doc: /*
8121 *Non-nil enables character translation while encoding and decoding. */);
8122 Venable_character_translation = Qt;
8123
8124 DEFVAR_LISP ("standard-translation-table-for-decode",
8125 &Vstandard_translation_table_for_decode,
8126 doc: /* Table for translating characters while decoding. */);
8127 Vstandard_translation_table_for_decode = Qnil;
8128
8129 DEFVAR_LISP ("standard-translation-table-for-encode",
8130 &Vstandard_translation_table_for_encode,
8131 doc: /* Table for translating characters while encoding. */);
8132 Vstandard_translation_table_for_encode = Qnil;
8133
8134 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8135 doc: /* Alist of charsets vs revision numbers.
8136 While encoding, if a charset (car part of an element) is found,
8137 designate it with the escape sequence identifying revision (cdr part
8138 of the element). */);
8139 Vcharset_revision_table = Qnil;
8140
8141 DEFVAR_LISP ("default-process-coding-system",
8142 &Vdefault_process_coding_system,
8143 doc: /* Cons of coding systems used for process I/O by default.
8144 The car part is used for decoding a process output,
8145 the cdr part is used for encoding a text to be sent to a process. */);
8146 Vdefault_process_coding_system = Qnil;
8147
8148 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8149 doc: /*
8150 Table of extra Latin codes in the range 128..159 (inclusive).
8151 This is a vector of length 256.
8152 If Nth element is non-nil, the existence of code N in a file
8153 \(or output of subprocess) doesn't prevent it to be detected as
8154 a coding system of ISO 2022 variant which has a flag
8155 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8156 or reading output of a subprocess.
8157 Only 128th through 159th elements has a meaning. */);
8158 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8159
8160 DEFVAR_LISP ("select-safe-coding-system-function",
8161 &Vselect_safe_coding_system_function,
8162 doc: /*
8163 Function to call to select safe coding system for encoding a text.
8164
8165 If set, this function is called to force a user to select a proper
8166 coding system which can encode the text in the case that a default
8167 coding system used in each operation can't encode the text.
8168
8169 The default value is `select-safe-coding-system' (which see). */);
8170 Vselect_safe_coding_system_function = Qnil;
8171
8172 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8173 &inhibit_iso_escape_detection,
8174 doc: /*
8175 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8176
8177 By default, on reading a file, Emacs tries to detect how the text is
8178 encoded. This code detection is sensitive to escape sequences. If
8179 the sequence is valid as ISO2022, the code is determined as one of
8180 the ISO2022 encodings, and the file is decoded by the corresponding
8181 coding system (e.g. `iso-2022-7bit').
8182
8183 However, there may be a case that you want to read escape sequences in
8184 a file as is. In such a case, you can set this variable to non-nil.
8185 Then, as the code detection ignores any escape sequences, no file is
8186 detected as encoded in some ISO2022 encoding. The result is that all
8187 escape sequences become visible in a buffer.
8188
8189 The default value is nil, and it is strongly recommended not to change
8190 it. That is because many Emacs Lisp source files that contain
8191 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8192 in Emacs's distribution, and they won't be decoded correctly on
8193 reading if you suppress escape sequence detection.
8194
8195 The other way to read escape sequences in a file without decoding is
8196 to explicitly specify some coding system that doesn't use ISO2022's
8197 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8198 inhibit_iso_escape_detection = 0;
8199
8200 {
8201 Lisp_Object args[coding_arg_max];
8202 Lisp_Object plist[14];
8203 int i;
8204
8205 for (i = 0; i < coding_arg_max; i++)
8206 args[i] = Qnil;
8207
8208 plist[0] = intern (":name");
8209 plist[1] = args[coding_arg_name] = Qno_conversion;
8210 plist[2] = intern (":mnemonic");
8211 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8212 plist[4] = intern (":coding-type");
8213 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8214 plist[6] = intern (":ascii-compatible-p");
8215 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8216 plist[8] = intern (":default-char");
8217 plist[9] = args[coding_arg_default_char] = make_number (0);
8218 plist[10] = intern (":docstring");
8219 plist[11] = build_string ("Do no conversion.\n\
8220 \n\
8221 When you visit a file with this coding, the file is read into a\n\
8222 unibyte buffer as is, thus each byte of a file is treated as a\n\
8223 character.");
8224 plist[12] = intern (":eol-type");
8225 plist[13] = args[coding_arg_eol_type] = Qunix;
8226 args[coding_arg_plist] = Flist (14, plist);
8227 Fdefine_coding_system_internal (coding_arg_max, args);
8228 }
8229
8230 setup_coding_system (Qno_conversion, &keyboard_coding);
8231 setup_coding_system (Qno_conversion, &terminal_coding);
8232 setup_coding_system (Qno_conversion, &safe_terminal_coding);
8233 }
8234
8235 char *
8236 emacs_strerror (error_number)
8237 int error_number;
8238 {
8239 char *str;
8240
8241 synchronize_system_messages_locale ();
8242 str = strerror (error_number);
8243
8244 if (! NILP (Vlocale_coding_system))
8245 {
8246 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8247 Vlocale_coding_system,
8248 0);
8249 str = (char *) XSTRING (dec)->data;
8250 }
8251
8252 return str;
8253 }
8254
8255 #endif /* emacs */