(utf-16-le): Renamed from utf-16-le-nosig.
[bpt/emacs.git] / src / coding.c
1 /* Coding system handler (conversion, detection, etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9 This file is part of GNU Emacs.
10
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
14 any later version.
15
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
25
26 /*** TABLE OF CONTENTS ***
27
28 0. General comments
29 1. Preamble
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
41
42 */
43
44 /*** 0. General comments ***
45
46
47 CODING SYSTEM
48
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
75 character set.
76
77 o Old Emacs internal format (emacs-mule)
78
79 The coding system adopted by old versions of Emacs (20 and 21).
80
81 o ISO2022-base coding system
82
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
92 section 8.
93
94 o BIG5
95
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
101
102 o CCL
103
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
108
109 o Raw-text
110
111 A coding system for text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
114
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
118
119
120 END-OF-LINE FORMAT
121
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
127
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
131
132 STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
138
139 */
140
141 /* COMMON MACROS */
142
143
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
149
150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
151
152 Below is the template of these functions. */
153
154 #if 0
155 static int
156 detect_coding_XXX (coding, detect_info)
157 struct coding_system *coding;
158 struct coding_detection_info *detect_info;
159 {
160 unsigned char *src = coding->source;
161 unsigned char *src_end = coding->source + coding->src_bytes;
162 int multibytep = coding->src_multibyte;
163 int consumed_chars = 0;
164 int found = 0;
165 ...;
166
167 while (1)
168 {
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
171 ONE_MORE_BYTE (c);
172
173 if (! __C_conforms_to_XXX___ (c))
174 break;
175 if (! __C_strongly_suggests_XXX__ (c))
176 found = CATEGORY_MASK_XXX;
177 }
178 /* The byte sequence is invalid for XXX. */
179 detect_info->rejected |= CATEGORY_MASK_XXX;
180 return 0;
181
182 no_more_source:
183 /* The source exausted successfully. */
184 detect_info->found |= found;
185 return 1;
186 }
187 #endif
188
189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
190
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
195
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
200
201 Below is the template of these functions. */
202
203 #if 0
204 static void
205 decode_coding_XXXX (coding)
206 struct coding_system *coding;
207 {
208 unsigned char *src = coding->source + coding->consumed;
209 unsigned char *src_end = coding->source + coding->src_bytes;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base;
214 /* A buffer to produce decoded characters. */
215 int *charbuf = coding->charbuf;
216 int *charbuf_end = charbuf + coding->charbuf_size;
217 int multibytep = coding->src_multibyte;
218
219 while (1)
220 {
221 src_base = src;
222 if (charbuf < charbuf_end)
223 /* No more room to produce a decoded character. */
224 break;
225 ONE_MORE_BYTE (c);
226 /* Decode it. */
227 }
228
229 no_more_source:
230 if (src_base < src_end
231 && coding->mode & CODING_MODE_LAST_BLOCK)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base < src_end && charbuf < charbuf_end)
235 *charbuf++ = *src_base++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding->consumed = coding->consumed_char = src_base - coding->source;
239 /* Remember how many characters we produced. */
240 coding->charbuf_used = charbuf - coding->charbuf;
241 }
242 #endif
243
244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
245
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
250
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
255
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
259
260 Below is a template of these functions. */
261 #if 0
262 static void
263 encode_coding_XXX (coding)
264 struct coding_system *coding;
265 {
266 int multibytep = coding->dst_multibyte;
267 int *charbuf = coding->charbuf;
268 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
269 unsigned char *dst = coding->destination + coding->produced;
270 unsigned char *dst_end = coding->destination + coding->dst_bytes;
271 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
272 int produced_chars = 0;
273
274 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
275 {
276 int c = *charbuf;
277 /* Encode C into DST, and increment DST. */
278 }
279 label_no_more_destination:
280 /* How many chars and bytes we produced. */
281 coding->produced_char += produced_chars;
282 coding->produced = dst - coding->destination;
283 }
284 #endif
285
286 \f
287 /*** 1. Preamble ***/
288
289 #include <config.h>
290 #include <stdio.h>
291
292 #include "lisp.h"
293 #include "buffer.h"
294 #include "character.h"
295 #include "charset.h"
296 #include "ccl.h"
297 #include "composite.h"
298 #include "coding.h"
299 #include "window.h"
300
301 Lisp_Object Vcoding_system_hash_table;
302
303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
304 Lisp_Object Qunix, Qdos;
305 extern Lisp_Object Qmac; /* frame.c */
306 Lisp_Object Qbuffer_file_coding_system;
307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
308 Lisp_Object Qdefault_char;
309 Lisp_Object Qno_conversion, Qundecided;
310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
311 Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
312 Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
313 Lisp_Object Qcoding_system_history;
314 Lisp_Object Qvalid_codes;
315
316 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
317 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
318 Lisp_Object Qstart_process, Qopen_network_stream;
319 Lisp_Object Qtarget_idx;
320
321 Lisp_Object Vselect_safe_coding_system_function;
322
323 /* Mnemonic string for each format of end-of-line. */
324 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
325 /* Mnemonic string to indicate format of end-of-line is not yet
326 decided. */
327 Lisp_Object eol_mnemonic_undecided;
328
329 #ifdef emacs
330
331 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
332
333 Lisp_Object Qcoding_system_p, Qcoding_system_error;
334
335 /* Coding system emacs-mule and raw-text are for converting only
336 end-of-line format. */
337 Lisp_Object Qemacs_mule, Qraw_text;
338
339 /* Coding-systems are handed between Emacs Lisp programs and C internal
340 routines by the following three variables. */
341 /* Coding-system for reading files and receiving data from process. */
342 Lisp_Object Vcoding_system_for_read;
343 /* Coding-system for writing files and sending data to process. */
344 Lisp_Object Vcoding_system_for_write;
345 /* Coding-system actually used in the latest I/O. */
346 Lisp_Object Vlast_coding_system_used;
347
348 /* A vector of length 256 which contains information about special
349 Latin codes (especially for dealing with Microsoft codes). */
350 Lisp_Object Vlatin_extra_code_table;
351
352 /* Flag to inhibit code conversion of end-of-line format. */
353 int inhibit_eol_conversion;
354
355 /* Flag to inhibit ISO2022 escape sequence detection. */
356 int inhibit_iso_escape_detection;
357
358 /* Flag to make buffer-file-coding-system inherit from process-coding. */
359 int inherit_process_coding_system;
360
361 /* Coding system to be used to encode text for terminal display. */
362 struct coding_system terminal_coding;
363
364 /* Coding system to be used to encode text for terminal display when
365 terminal coding system is nil. */
366 struct coding_system safe_terminal_coding;
367
368 /* Coding system of what is sent from terminal keyboard. */
369 struct coding_system keyboard_coding;
370
371 Lisp_Object Vfile_coding_system_alist;
372 Lisp_Object Vprocess_coding_system_alist;
373 Lisp_Object Vnetwork_coding_system_alist;
374
375 Lisp_Object Vlocale_coding_system;
376
377 #endif /* emacs */
378
379 /* Flag to tell if we look up translation table on character code
380 conversion. */
381 Lisp_Object Venable_character_translation;
382 /* Standard translation table to look up on decoding (reading). */
383 Lisp_Object Vstandard_translation_table_for_decode;
384 /* Standard translation table to look up on encoding (writing). */
385 Lisp_Object Vstandard_translation_table_for_encode;
386
387 Lisp_Object Qtranslation_table;
388 Lisp_Object Qtranslation_table_id;
389 Lisp_Object Qtranslation_table_for_decode;
390 Lisp_Object Qtranslation_table_for_encode;
391
392 /* Alist of charsets vs revision number. */
393 static Lisp_Object Vcharset_revision_table;
394
395 /* Default coding systems used for process I/O. */
396 Lisp_Object Vdefault_process_coding_system;
397
398 /* Global flag to tell that we can't call post-read-conversion and
399 pre-write-conversion functions. Usually the value is zero, but it
400 is set to 1 temporarily while such functions are running. This is
401 to avoid infinite recursive call. */
402 static int inhibit_pre_post_conversion;
403
404 /* Two special coding systems. */
405 Lisp_Object Vsjis_coding_system;
406 Lisp_Object Vbig5_coding_system;
407
408
409 static int detect_coding_utf_8 P_ ((struct coding_system *,
410 struct coding_detection_info *info));
411 static void decode_coding_utf_8 P_ ((struct coding_system *));
412 static int encode_coding_utf_8 P_ ((struct coding_system *));
413
414 static int detect_coding_utf_16 P_ ((struct coding_system *,
415 struct coding_detection_info *info));
416 static void decode_coding_utf_16 P_ ((struct coding_system *));
417 static int encode_coding_utf_16 P_ ((struct coding_system *));
418
419 static int detect_coding_iso_2022 P_ ((struct coding_system *,
420 struct coding_detection_info *info));
421 static void decode_coding_iso_2022 P_ ((struct coding_system *));
422 static int encode_coding_iso_2022 P_ ((struct coding_system *));
423
424 static int detect_coding_emacs_mule P_ ((struct coding_system *,
425 struct coding_detection_info *info));
426 static void decode_coding_emacs_mule P_ ((struct coding_system *));
427 static int encode_coding_emacs_mule P_ ((struct coding_system *));
428
429 static int detect_coding_sjis P_ ((struct coding_system *,
430 struct coding_detection_info *info));
431 static void decode_coding_sjis P_ ((struct coding_system *));
432 static int encode_coding_sjis P_ ((struct coding_system *));
433
434 static int detect_coding_big5 P_ ((struct coding_system *,
435 struct coding_detection_info *info));
436 static void decode_coding_big5 P_ ((struct coding_system *));
437 static int encode_coding_big5 P_ ((struct coding_system *));
438
439 static int detect_coding_ccl P_ ((struct coding_system *,
440 struct coding_detection_info *info));
441 static void decode_coding_ccl P_ ((struct coding_system *));
442 static int encode_coding_ccl P_ ((struct coding_system *));
443
444 static void decode_coding_raw_text P_ ((struct coding_system *));
445 static int encode_coding_raw_text P_ ((struct coding_system *));
446
447
448 /* ISO2022 section */
449
450 #define CODING_ISO_INITIAL(coding, reg) \
451 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
452 coding_attr_iso_initial), \
453 reg)))
454
455
456 #define CODING_ISO_REQUEST(coding, charset_id) \
457 ((charset_id <= (coding)->max_charset_id \
458 ? (coding)->safe_charsets[charset_id] \
459 : -1))
460
461
462 #define CODING_ISO_FLAGS(coding) \
463 ((coding)->spec.iso_2022.flags)
464 #define CODING_ISO_DESIGNATION(coding, reg) \
465 ((coding)->spec.iso_2022.current_designation[reg])
466 #define CODING_ISO_INVOCATION(coding, plane) \
467 ((coding)->spec.iso_2022.current_invocation[plane])
468 #define CODING_ISO_SINGLE_SHIFTING(coding) \
469 ((coding)->spec.iso_2022.single_shifting)
470 #define CODING_ISO_BOL(coding) \
471 ((coding)->spec.iso_2022.bol)
472 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
473 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
474
475 /* Control characters of ISO2022. */
476 /* code */ /* function */
477 #define ISO_CODE_LF 0x0A /* line-feed */
478 #define ISO_CODE_CR 0x0D /* carriage-return */
479 #define ISO_CODE_SO 0x0E /* shift-out */
480 #define ISO_CODE_SI 0x0F /* shift-in */
481 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
482 #define ISO_CODE_ESC 0x1B /* escape */
483 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
484 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
485 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
486
487 /* All code (1-byte) of ISO2022 is classified into one of the
488 followings. */
489 enum iso_code_class_type
490 {
491 ISO_control_0, /* Control codes in the range
492 0x00..0x1F and 0x7F, except for the
493 following 5 codes. */
494 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
495 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
496 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
497 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
498 ISO_escape, /* ISO_CODE_SO (0x1B) */
499 ISO_control_1, /* Control codes in the range
500 0x80..0x9F, except for the
501 following 3 codes. */
502 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
503 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
504 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
505 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
506 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
507 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
508 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
509 };
510
511 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
512 `iso-flags' attribute of an iso2022 coding system. */
513
514 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
515 instead of the correct short-form sequence (e.g. ESC $ A). */
516 #define CODING_ISO_FLAG_LONG_FORM 0x0001
517
518 /* If set, reset graphic planes and registers at end-of-line to the
519 initial state. */
520 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
521
522 /* If set, reset graphic planes and registers before any control
523 characters to the initial state. */
524 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
525
526 /* If set, encode by 7-bit environment. */
527 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
528
529 /* If set, use locking-shift function. */
530 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
531
532 /* If set, use single-shift function. Overwrite
533 CODING_ISO_FLAG_LOCKING_SHIFT. */
534 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
535
536 /* If set, use designation escape sequence. */
537 #define CODING_ISO_FLAG_DESIGNATION 0x0040
538
539 /* If set, produce revision number sequence. */
540 #define CODING_ISO_FLAG_REVISION 0x0080
541
542 /* If set, produce ISO6429's direction specifying sequence. */
543 #define CODING_ISO_FLAG_DIRECTION 0x0100
544
545 /* If set, assume designation states are reset at beginning of line on
546 output. */
547 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
548
549 /* If set, designation sequence should be placed at beginning of line
550 on output. */
551 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
552
553 /* If set, do not encode unsafe charactes on output. */
554 #define CODING_ISO_FLAG_SAFE 0x0800
555
556 /* If set, extra latin codes (128..159) are accepted as a valid code
557 on input. */
558 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
559
560 #define CODING_ISO_FLAG_COMPOSITION 0x2000
561
562 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
563
564 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
565
566 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
567
568 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
569
570 /* A character to be produced on output if encoding of the original
571 character is prohibited by CODING_ISO_FLAG_SAFE. */
572 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
573
574
575 /* UTF-16 section */
576 #define CODING_UTF_16_BOM(coding) \
577 ((coding)->spec.utf_16.bom)
578
579 #define CODING_UTF_16_ENDIAN(coding) \
580 ((coding)->spec.utf_16.endian)
581
582 #define CODING_UTF_16_SURROGATE(coding) \
583 ((coding)->spec.utf_16.surrogate)
584
585
586 /* CCL section */
587 #define CODING_CCL_DECODER(coding) \
588 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
589 #define CODING_CCL_ENCODER(coding) \
590 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
591 #define CODING_CCL_VALIDS(coding) \
592 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
593 ->data)
594
595 /* Index for each coding category in `coding_categories' */
596
597 enum coding_category
598 {
599 coding_category_iso_7,
600 coding_category_iso_7_tight,
601 coding_category_iso_8_1,
602 coding_category_iso_8_2,
603 coding_category_iso_7_else,
604 coding_category_iso_8_else,
605 coding_category_utf_8,
606 coding_category_utf_16_auto,
607 coding_category_utf_16_be,
608 coding_category_utf_16_le,
609 coding_category_utf_16_be_nosig,
610 coding_category_utf_16_le_nosig,
611 coding_category_charset,
612 coding_category_sjis,
613 coding_category_big5,
614 coding_category_ccl,
615 coding_category_emacs_mule,
616 /* All above are targets of code detection. */
617 coding_category_raw_text,
618 coding_category_undecided,
619 coding_category_max
620 };
621
622 /* Definitions of flag bits used in detect_coding_XXXX. */
623 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
624 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
625 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
626 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
627 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
628 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
629 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
630 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
631 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
632 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
633 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
634 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
635 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
636 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
637 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
638 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
639 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
640
641 /* This value is returned if detect_coding_mask () find nothing other
642 than ASCII characters. */
643 #define CATEGORY_MASK_ANY \
644 (CATEGORY_MASK_ISO_7 \
645 | CATEGORY_MASK_ISO_7_TIGHT \
646 | CATEGORY_MASK_ISO_8_1 \
647 | CATEGORY_MASK_ISO_8_2 \
648 | CATEGORY_MASK_ISO_7_ELSE \
649 | CATEGORY_MASK_ISO_8_ELSE \
650 | CATEGORY_MASK_UTF_8 \
651 | CATEGORY_MASK_UTF_16_BE \
652 | CATEGORY_MASK_UTF_16_LE \
653 | CATEGORY_MASK_UTF_16_BE_NOSIG \
654 | CATEGORY_MASK_UTF_16_LE_NOSIG \
655 | CATEGORY_MASK_CHARSET \
656 | CATEGORY_MASK_SJIS \
657 | CATEGORY_MASK_BIG5 \
658 | CATEGORY_MASK_CCL \
659 | CATEGORY_MASK_EMACS_MULE)
660
661
662 #define CATEGORY_MASK_ISO_7BIT \
663 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
664
665 #define CATEGORY_MASK_ISO_8BIT \
666 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
667
668 #define CATEGORY_MASK_ISO_ELSE \
669 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
670
671 #define CATEGORY_MASK_ISO_ESCAPE \
672 (CATEGORY_MASK_ISO_7 \
673 | CATEGORY_MASK_ISO_7_TIGHT \
674 | CATEGORY_MASK_ISO_7_ELSE \
675 | CATEGORY_MASK_ISO_8_ELSE)
676
677 #define CATEGORY_MASK_ISO \
678 ( CATEGORY_MASK_ISO_7BIT \
679 | CATEGORY_MASK_ISO_8BIT \
680 | CATEGORY_MASK_ISO_ELSE)
681
682 #define CATEGORY_MASK_UTF_16 \
683 (CATEGORY_MASK_UTF_16_BE \
684 | CATEGORY_MASK_UTF_16_LE \
685 | CATEGORY_MASK_UTF_16_BE_NOSIG \
686 | CATEGORY_MASK_UTF_16_LE_NOSIG)
687
688
689 /* List of symbols `coding-category-xxx' ordered by priority. This
690 variable is exposed to Emacs Lisp. */
691 static Lisp_Object Vcoding_category_list;
692
693 /* Table of coding categories (Lisp symbols). This variable is for
694 internal use oly. */
695 static Lisp_Object Vcoding_category_table;
696
697 /* Table of coding-categories ordered by priority. */
698 static enum coding_category coding_priorities[coding_category_max];
699
700 /* Nth element is a coding context for the coding system bound to the
701 Nth coding category. */
702 static struct coding_system coding_categories[coding_category_max];
703
704 static int detected_mask[coding_category_raw_text] =
705 { CATEGORY_MASK_ISO,
706 CATEGORY_MASK_ISO,
707 CATEGORY_MASK_ISO,
708 CATEGORY_MASK_ISO,
709 CATEGORY_MASK_ISO,
710 CATEGORY_MASK_ISO,
711 CATEGORY_MASK_UTF_8,
712 CATEGORY_MASK_UTF_16,
713 CATEGORY_MASK_UTF_16,
714 CATEGORY_MASK_UTF_16,
715 CATEGORY_MASK_UTF_16,
716 CATEGORY_MASK_UTF_16,
717 CATEGORY_MASK_CHARSET,
718 CATEGORY_MASK_SJIS,
719 CATEGORY_MASK_BIG5,
720 CATEGORY_MASK_CCL,
721 CATEGORY_MASK_EMACS_MULE
722 };
723
724 /*** Commonly used macros and functions ***/
725
726 #ifndef min
727 #define min(a, b) ((a) < (b) ? (a) : (b))
728 #endif
729 #ifndef max
730 #define max(a, b) ((a) > (b) ? (a) : (b))
731 #endif
732
733 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
734 do { \
735 attrs = CODING_ID_ATTRS (coding->id); \
736 eol_type = CODING_ID_EOL_TYPE (coding->id); \
737 if (VECTORP (eol_type)) \
738 eol_type = Qunix; \
739 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
740 } while (0)
741
742
743 /* Safely get one byte from the source text pointed by SRC which ends
744 at SRC_END, and set C to that byte. If there are not enough bytes
745 in the source, it jumps to `no_more_source'. The caller
746 should declare and set these variables appropriately in advance:
747 src, src_end, multibytep
748 */
749
750 #define ONE_MORE_BYTE(c) \
751 do { \
752 if (src == src_end) \
753 { \
754 if (src_base < src) \
755 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
756 goto no_more_source; \
757 } \
758 c = *src++; \
759 if (multibytep && (c & 0x80)) \
760 { \
761 if ((c & 0xFE) != 0xC0) \
762 error ("Undecodable char found"); \
763 c = ((c & 1) << 6) | *src++; \
764 } \
765 consumed_chars++; \
766 } while (0)
767
768
769 #define ONE_MORE_BYTE_NO_CHECK(c) \
770 do { \
771 c = *src++; \
772 if (multibytep && (c & 0x80)) \
773 { \
774 if ((c & 0xFE) != 0xC0) \
775 error ("Undecodable char found"); \
776 c = ((c & 1) << 6) | *src++; \
777 } \
778 consumed_chars++; \
779 } while (0)
780
781
782 /* Store a byte C in the place pointed by DST and increment DST to the
783 next free point, and increment PRODUCED_CHARS. The caller should
784 assure that C is 0..127, and declare and set the variable `dst'
785 appropriately in advance.
786 */
787
788
789 #define EMIT_ONE_ASCII_BYTE(c) \
790 do { \
791 produced_chars++; \
792 *dst++ = (c); \
793 } while (0)
794
795
796 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
797
798 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
799 do { \
800 produced_chars += 2; \
801 *dst++ = (c1), *dst++ = (c2); \
802 } while (0)
803
804
805 /* Store a byte C in the place pointed by DST and increment DST to the
806 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
807 nonzero, store in an appropriate multibyte from. The caller should
808 declare and set the variables `dst' and `multibytep' appropriately
809 in advance. */
810
811 #define EMIT_ONE_BYTE(c) \
812 do { \
813 produced_chars++; \
814 if (multibytep) \
815 { \
816 int ch = (c); \
817 if (ch >= 0x80) \
818 ch = BYTE8_TO_CHAR (ch); \
819 CHAR_STRING_ADVANCE (ch, dst); \
820 } \
821 else \
822 *dst++ = (c); \
823 } while (0)
824
825
826 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
827
828 #define EMIT_TWO_BYTES(c1, c2) \
829 do { \
830 produced_chars += 2; \
831 if (multibytep) \
832 { \
833 int ch; \
834 \
835 ch = (c1); \
836 if (ch >= 0x80) \
837 ch = BYTE8_TO_CHAR (ch); \
838 CHAR_STRING_ADVANCE (ch, dst); \
839 ch = (c2); \
840 if (ch >= 0x80) \
841 ch = BYTE8_TO_CHAR (ch); \
842 CHAR_STRING_ADVANCE (ch, dst); \
843 } \
844 else \
845 { \
846 *dst++ = (c1); \
847 *dst++ = (c2); \
848 } \
849 } while (0)
850
851
852 #define EMIT_THREE_BYTES(c1, c2, c3) \
853 do { \
854 EMIT_ONE_BYTE (c1); \
855 EMIT_TWO_BYTES (c2, c3); \
856 } while (0)
857
858
859 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
860 do { \
861 EMIT_TWO_BYTES (c1, c2); \
862 EMIT_TWO_BYTES (c3, c4); \
863 } while (0)
864
865
866 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
867 do { \
868 charset_map_loaded = 0; \
869 c = DECODE_CHAR (charset, code); \
870 if (charset_map_loaded) \
871 { \
872 unsigned char *orig = coding->source; \
873 EMACS_INT offset; \
874 \
875 coding_set_source (coding); \
876 offset = coding->source - orig; \
877 src += offset; \
878 src_base += offset; \
879 src_end += offset; \
880 } \
881 } while (0)
882
883
884 #define ASSURE_DESTINATION(bytes) \
885 do { \
886 if (dst + (bytes) >= dst_end) \
887 { \
888 int more_bytes = charbuf_end - charbuf + (bytes); \
889 \
890 dst = alloc_destination (coding, more_bytes, dst); \
891 dst_end = coding->destination + coding->dst_bytes; \
892 } \
893 } while (0)
894
895
896
897 static void
898 coding_set_source (coding)
899 struct coding_system *coding;
900 {
901 if (BUFFERP (coding->src_object))
902 {
903 if (coding->src_pos < 0)
904 coding->source = GAP_END_ADDR + coding->src_pos_byte;
905 else
906 {
907 struct buffer *buf = XBUFFER (coding->src_object);
908 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
909 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
910
911 coding->source = beg_addr + coding->src_pos_byte - 1;
912 if (coding->src_pos_byte >= gpt_byte)
913 coding->source += BUF_GAP_SIZE (buf);
914 }
915 }
916 else if (STRINGP (coding->src_object))
917 {
918 coding->source = (XSTRING (coding->src_object)->data
919 + coding->src_pos_byte);
920 }
921 else
922 /* Otherwise, the source is C string and is never relocated
923 automatically. Thus we don't have to update anything. */
924 ;
925 }
926
927 static void
928 coding_set_destination (coding)
929 struct coding_system *coding;
930 {
931 if (BUFFERP (coding->dst_object))
932 {
933 if (coding->src_pos < 0)
934 {
935 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
936 coding->dst_bytes = (GAP_END_ADDR
937 - (coding->src_bytes - coding->consumed)
938 - coding->destination);
939 }
940 else
941 {
942 /* We are sure that coding->dst_pos_byte is before the gap
943 of the buffer. */
944 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
945 + coding->dst_pos_byte - 1);
946 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
947 - coding->destination);
948 }
949 }
950 else
951 /* Otherwise, the destination is C string and is never relocated
952 automatically. Thus we don't have to update anything. */
953 ;
954 }
955
956
957 static void
958 coding_alloc_by_realloc (coding, bytes)
959 struct coding_system *coding;
960 EMACS_INT bytes;
961 {
962 coding->destination = (unsigned char *) xrealloc (coding->destination,
963 coding->dst_bytes + bytes);
964 coding->dst_bytes += bytes;
965 }
966
967 static void
968 coding_alloc_by_making_gap (coding, bytes)
969 struct coding_system *coding;
970 EMACS_INT bytes;
971 {
972 if (BUFFERP (coding->dst_object)
973 && EQ (coding->src_object, coding->dst_object))
974 {
975 EMACS_INT add = coding->src_bytes - coding->consumed;
976
977 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
978 make_gap (bytes);
979 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
980 }
981 else
982 {
983 Lisp_Object this_buffer;
984
985 this_buffer = Fcurrent_buffer ();
986 set_buffer_internal (XBUFFER (coding->dst_object));
987 make_gap (bytes);
988 set_buffer_internal (XBUFFER (this_buffer));
989 }
990 }
991
992
993 static unsigned char *
994 alloc_destination (coding, nbytes, dst)
995 struct coding_system *coding;
996 int nbytes;
997 unsigned char *dst;
998 {
999 EMACS_INT offset = dst - coding->destination;
1000
1001 if (BUFFERP (coding->dst_object))
1002 coding_alloc_by_making_gap (coding, nbytes);
1003 else
1004 coding_alloc_by_realloc (coding, nbytes);
1005 coding->result = CODING_RESULT_SUCCESS;
1006 coding_set_destination (coding);
1007 dst = coding->destination + offset;
1008 return dst;
1009 }
1010
1011 /** Macros for annotations. */
1012
1013 /* Maximum length of annotation data (sum of annotations for
1014 composition and charset). */
1015 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1016
1017 /* An annotation data is stored in the array coding->charbuf in this
1018 format:
1019 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1020 LENGTH is the number of elements in the annotation.
1021 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1022 FROM and TO specify the range of text annotated. They are relative
1023 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1024
1025 The format of the following elements depend on ANNOTATION_MASK.
1026
1027 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1028 follows:
1029 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1030 METHOD is one of enum composition_method.
1031 Optionnal COMPOSITION-COMPONENTS are characters and composition
1032 rules.
1033
1034 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1035 follows. */
1036
1037 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1038 do { \
1039 *(buf)++ = -(len); \
1040 *(buf)++ = (mask); \
1041 *(buf)++ = (from); \
1042 *(buf)++ = (to); \
1043 coding->annotated = 1; \
1044 } while (0);
1045
1046 #define ADD_COMPOSITION_DATA(buf, from, to, method) \
1047 do { \
1048 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1049 *buf++ = method; \
1050 } while (0)
1051
1052
1053 #define ADD_CHARSET_DATA(buf, from, to, id) \
1054 do { \
1055 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1056 *buf++ = id; \
1057 } while (0)
1058
1059 \f
1060 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1061
1062
1063
1064 \f
1065 /*** 3. UTF-8 ***/
1066
1067 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1068 Check if a text is encoded in UTF-8. If it is, return 1, else
1069 return 0. */
1070
1071 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1072 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1073 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1074 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1075 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1076 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1077
1078 static int
1079 detect_coding_utf_8 (coding, detect_info)
1080 struct coding_system *coding;
1081 struct coding_detection_info *detect_info;
1082 {
1083 unsigned char *src = coding->source, *src_base = src;
1084 unsigned char *src_end = coding->source + coding->src_bytes;
1085 int multibytep = coding->src_multibyte;
1086 int consumed_chars = 0;
1087 int found = 0;
1088 int incomplete;
1089
1090 detect_info->checked |= CATEGORY_MASK_UTF_8;
1091 /* A coding system of this category is always ASCII compatible. */
1092 src += coding->head_ascii;
1093
1094 while (1)
1095 {
1096 int c, c1, c2, c3, c4;
1097
1098 incomplete = 0;
1099 ONE_MORE_BYTE (c);
1100 if (UTF_8_1_OCTET_P (c))
1101 continue;
1102 incomplete = 1;
1103 ONE_MORE_BYTE (c1);
1104 if (! UTF_8_EXTRA_OCTET_P (c1))
1105 break;
1106 if (UTF_8_2_OCTET_LEADING_P (c))
1107 {
1108 found = CATEGORY_MASK_UTF_8;
1109 continue;
1110 }
1111 ONE_MORE_BYTE (c2);
1112 if (! UTF_8_EXTRA_OCTET_P (c2))
1113 break;
1114 if (UTF_8_3_OCTET_LEADING_P (c))
1115 {
1116 found = CATEGORY_MASK_UTF_8;
1117 continue;
1118 }
1119 ONE_MORE_BYTE (c3);
1120 if (! UTF_8_EXTRA_OCTET_P (c3))
1121 break;
1122 if (UTF_8_4_OCTET_LEADING_P (c))
1123 {
1124 found = CATEGORY_MASK_UTF_8;
1125 continue;
1126 }
1127 ONE_MORE_BYTE (c4);
1128 if (! UTF_8_EXTRA_OCTET_P (c4))
1129 break;
1130 if (UTF_8_5_OCTET_LEADING_P (c))
1131 {
1132 found = CATEGORY_MASK_UTF_8;
1133 continue;
1134 }
1135 break;
1136 }
1137 detect_info->rejected |= CATEGORY_MASK_UTF_8;
1138 return 0;
1139
1140 no_more_source:
1141 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1142 {
1143 detect_info->rejected |= CATEGORY_MASK_UTF_8;
1144 return 0;
1145 }
1146 detect_info->found |= found;
1147 return 1;
1148 }
1149
1150
1151 static void
1152 decode_coding_utf_8 (coding)
1153 struct coding_system *coding;
1154 {
1155 unsigned char *src = coding->source + coding->consumed;
1156 unsigned char *src_end = coding->source + coding->src_bytes;
1157 unsigned char *src_base;
1158 int *charbuf = coding->charbuf;
1159 int *charbuf_end = charbuf + coding->charbuf_size;
1160 int consumed_chars = 0, consumed_chars_base;
1161 int multibytep = coding->src_multibyte;
1162 Lisp_Object attr, eol_type, charset_list;
1163
1164 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1165
1166 while (1)
1167 {
1168 int c, c1, c2, c3, c4, c5;
1169
1170 src_base = src;
1171 consumed_chars_base = consumed_chars;
1172
1173 if (charbuf >= charbuf_end)
1174 break;
1175
1176 ONE_MORE_BYTE (c1);
1177 if (UTF_8_1_OCTET_P(c1))
1178 {
1179 c = c1;
1180 if (c == '\r')
1181 {
1182 if (EQ (eol_type, Qdos))
1183 {
1184 if (src == src_end)
1185 {
1186 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1187 goto no_more_source;
1188 }
1189 if (*src == '\n')
1190 ONE_MORE_BYTE (c);
1191 }
1192 else if (EQ (eol_type, Qmac))
1193 c = '\n';
1194 }
1195 }
1196 else
1197 {
1198 ONE_MORE_BYTE (c2);
1199 if (! UTF_8_EXTRA_OCTET_P (c2))
1200 goto invalid_code;
1201 if (UTF_8_2_OCTET_LEADING_P (c1))
1202 {
1203 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1204 /* Reject overlong sequences here and below. Encoders
1205 producing them are incorrect, they can be misleading,
1206 and they mess up read/write invariance. */
1207 if (c < 128)
1208 goto invalid_code;
1209 }
1210 else
1211 {
1212 ONE_MORE_BYTE (c3);
1213 if (! UTF_8_EXTRA_OCTET_P (c3))
1214 goto invalid_code;
1215 if (UTF_8_3_OCTET_LEADING_P (c1))
1216 {
1217 c = (((c1 & 0xF) << 12)
1218 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1219 if (c < 0x800
1220 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1221 goto invalid_code;
1222 }
1223 else
1224 {
1225 ONE_MORE_BYTE (c4);
1226 if (! UTF_8_EXTRA_OCTET_P (c4))
1227 goto invalid_code;
1228 if (UTF_8_4_OCTET_LEADING_P (c1))
1229 {
1230 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1231 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1232 if (c < 0x10000)
1233 goto invalid_code;
1234 }
1235 else
1236 {
1237 ONE_MORE_BYTE (c5);
1238 if (! UTF_8_EXTRA_OCTET_P (c5))
1239 goto invalid_code;
1240 if (UTF_8_5_OCTET_LEADING_P (c1))
1241 {
1242 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1243 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1244 | (c5 & 0x3F));
1245 if ((c > MAX_CHAR) || (c < 0x200000))
1246 goto invalid_code;
1247 }
1248 else
1249 goto invalid_code;
1250 }
1251 }
1252 }
1253 }
1254
1255 *charbuf++ = c;
1256 continue;
1257
1258 invalid_code:
1259 src = src_base;
1260 consumed_chars = consumed_chars_base;
1261 ONE_MORE_BYTE (c);
1262 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1263 coding->errors++;
1264 }
1265
1266 no_more_source:
1267 coding->consumed_char += consumed_chars_base;
1268 coding->consumed = src_base - coding->source;
1269 coding->charbuf_used = charbuf - coding->charbuf;
1270 }
1271
1272
1273 static int
1274 encode_coding_utf_8 (coding)
1275 struct coding_system *coding;
1276 {
1277 int multibytep = coding->dst_multibyte;
1278 int *charbuf = coding->charbuf;
1279 int *charbuf_end = charbuf + coding->charbuf_used;
1280 unsigned char *dst = coding->destination + coding->produced;
1281 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1282 int produced_chars = 0;
1283 int c;
1284
1285 if (multibytep)
1286 {
1287 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1288
1289 while (charbuf < charbuf_end)
1290 {
1291 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1292
1293 ASSURE_DESTINATION (safe_room);
1294 c = *charbuf++;
1295 if (CHAR_BYTE8_P (c))
1296 {
1297 c = CHAR_TO_BYTE8 (c);
1298 EMIT_ONE_BYTE (c);
1299 }
1300 else
1301 {
1302 CHAR_STRING_ADVANCE (c, pend);
1303 for (p = str; p < pend; p++)
1304 EMIT_ONE_BYTE (*p);
1305 }
1306 }
1307 }
1308 else
1309 {
1310 int safe_room = MAX_MULTIBYTE_LENGTH;
1311
1312 while (charbuf < charbuf_end)
1313 {
1314 ASSURE_DESTINATION (safe_room);
1315 c = *charbuf++;
1316 dst += CHAR_STRING (c, dst);
1317 produced_chars++;
1318 }
1319 }
1320 coding->result = CODING_RESULT_SUCCESS;
1321 coding->produced_char += produced_chars;
1322 coding->produced = dst - coding->destination;
1323 return 0;
1324 }
1325
1326
1327 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1328 Check if a text is encoded in one of UTF-16 based coding systems.
1329 If it is, return 1, else return 0. */
1330
1331 #define UTF_16_HIGH_SURROGATE_P(val) \
1332 (((val) & 0xFC00) == 0xD800)
1333
1334 #define UTF_16_LOW_SURROGATE_P(val) \
1335 (((val) & 0xFC00) == 0xDC00)
1336
1337 #define UTF_16_INVALID_P(val) \
1338 (((val) == 0xFFFE) \
1339 || ((val) == 0xFFFF) \
1340 || UTF_16_LOW_SURROGATE_P (val))
1341
1342
1343 static int
1344 detect_coding_utf_16 (coding, detect_info)
1345 struct coding_system *coding;
1346 struct coding_detection_info *detect_info;
1347 {
1348 unsigned char *src = coding->source, *src_base = src;
1349 unsigned char *src_end = coding->source + coding->src_bytes;
1350 int multibytep = coding->src_multibyte;
1351 int consumed_chars = 0;
1352 int c1, c2;
1353
1354 detect_info->checked |= CATEGORY_MASK_UTF_16;
1355
1356 if (coding->mode & CODING_MODE_LAST_BLOCK
1357 && (coding->src_bytes & 1))
1358 {
1359 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1360 return 0;
1361 }
1362 ONE_MORE_BYTE (c1);
1363 ONE_MORE_BYTE (c2);
1364
1365 if ((c1 == 0xFF) && (c2 == 0xFE))
1366 {
1367 detect_info->found |= CATEGORY_MASK_UTF_16_LE;
1368 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1369 }
1370 else if ((c1 == 0xFE) && (c2 == 0xFF))
1371 {
1372 detect_info->found |= CATEGORY_MASK_UTF_16_BE;
1373 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1374 }
1375 no_more_source:
1376 return 1;
1377 }
1378
1379 static void
1380 decode_coding_utf_16 (coding)
1381 struct coding_system *coding;
1382 {
1383 unsigned char *src = coding->source + coding->consumed;
1384 unsigned char *src_end = coding->source + coding->src_bytes;
1385 unsigned char *src_base;
1386 int *charbuf = coding->charbuf;
1387 int *charbuf_end = charbuf + coding->charbuf_size;
1388 int consumed_chars = 0, consumed_chars_base;
1389 int multibytep = coding->src_multibyte;
1390 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1391 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1392 int surrogate = CODING_UTF_16_SURROGATE (coding);
1393 Lisp_Object attr, eol_type, charset_list;
1394
1395 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1396
1397 if (bom != utf_16_without_bom)
1398 {
1399 int c, c1, c2;
1400
1401 src_base = src;
1402 ONE_MORE_BYTE (c1);
1403 ONE_MORE_BYTE (c2);
1404 c = (c1 << 8) | c2;
1405 if (bom == utf_16_with_bom)
1406 {
1407 if (endian == utf_16_big_endian
1408 ? c != 0xFFFE : c != 0xFEFF)
1409 {
1410 /* We are sure that there's enouph room at CHARBUF. */
1411 *charbuf++ = c1;
1412 *charbuf++ = c2;
1413 coding->errors++;
1414 }
1415 }
1416 else
1417 {
1418 if (c == 0xFFFE)
1419 CODING_UTF_16_ENDIAN (coding)
1420 = endian = utf_16_big_endian;
1421 else if (c == 0xFEFF)
1422 CODING_UTF_16_ENDIAN (coding)
1423 = endian = utf_16_little_endian;
1424 else
1425 {
1426 CODING_UTF_16_ENDIAN (coding)
1427 = endian = utf_16_big_endian;
1428 src = src_base;
1429 }
1430 }
1431 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1432 }
1433
1434 while (1)
1435 {
1436 int c, c1, c2;
1437
1438 src_base = src;
1439 consumed_chars_base = consumed_chars;
1440
1441 if (charbuf + 2 >= charbuf_end)
1442 break;
1443
1444 ONE_MORE_BYTE (c1);
1445 ONE_MORE_BYTE (c2);
1446 c = (endian == utf_16_big_endian
1447 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1448 if (surrogate)
1449 {
1450 if (! UTF_16_LOW_SURROGATE_P (c))
1451 {
1452 if (endian == utf_16_big_endian)
1453 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1454 else
1455 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1456 *charbuf++ = c1;
1457 *charbuf++ = c2;
1458 coding->errors++;
1459 if (UTF_16_HIGH_SURROGATE_P (c))
1460 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1461 else
1462 *charbuf++ = c;
1463 }
1464 else
1465 {
1466 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1467 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1468 *charbuf++ = c;
1469 }
1470 }
1471 else
1472 {
1473 if (UTF_16_HIGH_SURROGATE_P (c))
1474 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1475 else
1476 *charbuf++ = c;
1477 }
1478 }
1479
1480 no_more_source:
1481 coding->consumed_char += consumed_chars_base;
1482 coding->consumed = src_base - coding->source;
1483 coding->charbuf_used = charbuf - coding->charbuf;
1484 }
1485
1486 static int
1487 encode_coding_utf_16 (coding)
1488 struct coding_system *coding;
1489 {
1490 int multibytep = coding->dst_multibyte;
1491 int *charbuf = coding->charbuf;
1492 int *charbuf_end = charbuf + coding->charbuf_used;
1493 unsigned char *dst = coding->destination + coding->produced;
1494 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1495 int safe_room = 8;
1496 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1497 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1498 int produced_chars = 0;
1499 Lisp_Object attrs, eol_type, charset_list;
1500 int c;
1501
1502 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1503
1504 if (bom == utf_16_with_bom)
1505 {
1506 ASSURE_DESTINATION (safe_room);
1507 if (big_endian)
1508 EMIT_TWO_BYTES (0xFF, 0xFE);
1509 else
1510 EMIT_TWO_BYTES (0xFE, 0xFF);
1511 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1512 }
1513
1514 while (charbuf < charbuf_end)
1515 {
1516 ASSURE_DESTINATION (safe_room);
1517 c = *charbuf++;
1518 if (c >= MAX_UNICODE_CHAR)
1519 c = coding->default_char;
1520
1521 if (c < 0x10000)
1522 {
1523 if (big_endian)
1524 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1525 else
1526 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1527 }
1528 else
1529 {
1530 int c1, c2;
1531
1532 c -= 0x10000;
1533 c1 = (c >> 10) + 0xD800;
1534 c2 = (c & 0x3FF) + 0xDC00;
1535 if (big_endian)
1536 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1537 else
1538 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1539 }
1540 }
1541 coding->result = CODING_RESULT_SUCCESS;
1542 coding->produced = dst - coding->destination;
1543 coding->produced_char += produced_chars;
1544 return 0;
1545 }
1546
1547 \f
1548 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1549
1550 /* Emacs' internal format for representation of multiple character
1551 sets is a kind of multi-byte encoding, i.e. characters are
1552 represented by variable-length sequences of one-byte codes.
1553
1554 ASCII characters and control characters (e.g. `tab', `newline') are
1555 represented by one-byte sequences which are their ASCII codes, in
1556 the range 0x00 through 0x7F.
1557
1558 8-bit characters of the range 0x80..0x9F are represented by
1559 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1560 code + 0x20).
1561
1562 8-bit characters of the range 0xA0..0xFF are represented by
1563 one-byte sequences which are their 8-bit code.
1564
1565 The other characters are represented by a sequence of `base
1566 leading-code', optional `extended leading-code', and one or two
1567 `position-code's. The length of the sequence is determined by the
1568 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1569 whereas extended leading-code and position-code take the range 0xA0
1570 through 0xFF. See `charset.h' for more details about leading-code
1571 and position-code.
1572
1573 --- CODE RANGE of Emacs' internal format ---
1574 character set range
1575 ------------- -----
1576 ascii 0x00..0x7F
1577 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1578 eight-bit-graphic 0xA0..0xBF
1579 ELSE 0x81..0x9D + [0xA0..0xFF]+
1580 ---------------------------------------------
1581
1582 As this is the internal character representation, the format is
1583 usually not used externally (i.e. in a file or in a data sent to a
1584 process). But, it is possible to have a text externally in this
1585 format (i.e. by encoding by the coding system `emacs-mule').
1586
1587 In that case, a sequence of one-byte codes has a slightly different
1588 form.
1589
1590 At first, all characters in eight-bit-control are represented by
1591 one-byte sequences which are their 8-bit code.
1592
1593 Next, character composition data are represented by the byte
1594 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1595 where,
1596 METHOD is 0xF0 plus one of composition method (enum
1597 composition_method),
1598
1599 BYTES is 0xA0 plus a byte length of this composition data,
1600
1601 CHARS is 0x20 plus a number of characters composed by this
1602 data,
1603
1604 COMPONENTs are characters of multibye form or composition
1605 rules encoded by two-byte of ASCII codes.
1606
1607 In addition, for backward compatibility, the following formats are
1608 also recognized as composition data on decoding.
1609
1610 0x80 MSEQ ...
1611 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1612
1613 Here,
1614 MSEQ is a multibyte form but in these special format:
1615 ASCII: 0xA0 ASCII_CODE+0x80,
1616 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1617 RULE is a one byte code of the range 0xA0..0xF0 that
1618 represents a composition rule.
1619 */
1620
1621 char emacs_mule_bytes[256];
1622
1623 int
1624 emacs_mule_char (coding, src, nbytes, nchars, id)
1625 struct coding_system *coding;
1626 unsigned char *src;
1627 int *nbytes, *nchars, *id;
1628 {
1629 unsigned char *src_end = coding->source + coding->src_bytes;
1630 int multibytep = coding->src_multibyte;
1631 unsigned char *src_base = src;
1632 struct charset *charset;
1633 unsigned code;
1634 int c;
1635 int consumed_chars = 0;
1636
1637 ONE_MORE_BYTE (c);
1638 switch (emacs_mule_bytes[c])
1639 {
1640 case 2:
1641 if (! (charset = emacs_mule_charset[c]))
1642 goto invalid_code;
1643 ONE_MORE_BYTE (c);
1644 code = c & 0x7F;
1645 break;
1646
1647 case 3:
1648 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1649 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1650 {
1651 ONE_MORE_BYTE (c);
1652 if (! (charset = emacs_mule_charset[c]))
1653 goto invalid_code;
1654 ONE_MORE_BYTE (c);
1655 code = c & 0x7F;
1656 }
1657 else
1658 {
1659 if (! (charset = emacs_mule_charset[c]))
1660 goto invalid_code;
1661 ONE_MORE_BYTE (c);
1662 code = (c & 0x7F) << 8;
1663 ONE_MORE_BYTE (c);
1664 code |= c & 0x7F;
1665 }
1666 break;
1667
1668 case 4:
1669 ONE_MORE_BYTE (c);
1670 if (! (charset = emacs_mule_charset[c]))
1671 goto invalid_code;
1672 ONE_MORE_BYTE (c);
1673 code = (c & 0x7F) << 8;
1674 ONE_MORE_BYTE (c);
1675 code |= c & 0x7F;
1676 break;
1677
1678 case 1:
1679 code = c;
1680 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1681 ? charset_ascii : charset_eight_bit);
1682 break;
1683
1684 default:
1685 abort ();
1686 }
1687 c = DECODE_CHAR (charset, code);
1688 if (c < 0)
1689 goto invalid_code;
1690 *nbytes = src - src_base;
1691 *nchars = consumed_chars;
1692 if (id)
1693 *id = charset->id;
1694 return c;
1695
1696 no_more_source:
1697 return -2;
1698
1699 invalid_code:
1700 return -1;
1701 }
1702
1703
1704 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1705 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1706 else return 0. */
1707
1708 static int
1709 detect_coding_emacs_mule (coding, detect_info)
1710 struct coding_system *coding;
1711 struct coding_detection_info *detect_info;
1712 {
1713 unsigned char *src = coding->source, *src_base = src;
1714 unsigned char *src_end = coding->source + coding->src_bytes;
1715 int multibytep = coding->src_multibyte;
1716 int consumed_chars = 0;
1717 int c;
1718 int found = 0;
1719 int incomplete;
1720
1721 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1722 /* A coding system of this category is always ASCII compatible. */
1723 src += coding->head_ascii;
1724
1725 while (1)
1726 {
1727 incomplete = 0;
1728 ONE_MORE_BYTE (c);
1729 incomplete = 1;
1730
1731 if (c == 0x80)
1732 {
1733 /* Perhaps the start of composite character. We simple skip
1734 it because analyzing it is too heavy for detecting. But,
1735 at least, we check that the composite character
1736 constitues of more than 4 bytes. */
1737 unsigned char *src_base;
1738
1739 repeat:
1740 src_base = src;
1741 do
1742 {
1743 ONE_MORE_BYTE (c);
1744 }
1745 while (c >= 0xA0);
1746
1747 if (src - src_base <= 4)
1748 break;
1749 found = CATEGORY_MASK_EMACS_MULE;
1750 if (c == 0x80)
1751 goto repeat;
1752 }
1753
1754 if (c < 0x80)
1755 {
1756 if (c < 0x20
1757 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1758 break;
1759 }
1760 else
1761 {
1762 unsigned char *src_base = src - 1;
1763
1764 do
1765 {
1766 ONE_MORE_BYTE (c);
1767 }
1768 while (c >= 0xA0);
1769 if (src - src_base != emacs_mule_bytes[*src_base])
1770 break;
1771 found = CATEGORY_MASK_EMACS_MULE;
1772 }
1773 }
1774 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1775 return 0;
1776
1777 no_more_source:
1778 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1779 {
1780 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1781 return 0;
1782 }
1783 detect_info->found |= found;
1784 return 1;
1785 }
1786
1787
1788 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1789
1790 /* Decode a character represented as a component of composition
1791 sequence of Emacs 20/21 style at SRC. Set C to that character and
1792 update SRC to the head of next character (or an encoded composition
1793 rule). If SRC doesn't points a composition component, set C to -1.
1794 If SRC points an invalid byte sequence, global exit by a return
1795 value 0. */
1796
1797 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1798 if (1) \
1799 { \
1800 int c; \
1801 int nbytes, nchars; \
1802 \
1803 if (src == src_end) \
1804 break; \
1805 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1806 if (c < 0) \
1807 { \
1808 if (c == -2) \
1809 break; \
1810 goto invalid_code; \
1811 } \
1812 *buf++ = c; \
1813 src += nbytes; \
1814 consumed_chars += nchars; \
1815 } \
1816 else
1817
1818
1819 /* Decode a composition rule represented as a component of composition
1820 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1821 and increment BUF. If SRC points an invalid byte sequence, set C
1822 to -1. */
1823
1824 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1825 do { \
1826 int c, gref, nref; \
1827 \
1828 if (src >= src_end) \
1829 goto invalid_code; \
1830 ONE_MORE_BYTE_NO_CHECK (c); \
1831 c -= 0x20; \
1832 if (c < 0 || c >= 81) \
1833 goto invalid_code; \
1834 \
1835 gref = c / 9, nref = c % 9; \
1836 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1837 } while (0)
1838
1839
1840 /* Decode a composition rule represented as a component of composition
1841 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1842 and increment BUF. If SRC points an invalid byte sequence, set C
1843 to -1. */
1844
1845 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1846 do { \
1847 int gref, nref; \
1848 \
1849 if (src + 1>= src_end) \
1850 goto invalid_code; \
1851 ONE_MORE_BYTE_NO_CHECK (gref); \
1852 gref -= 0x20; \
1853 ONE_MORE_BYTE_NO_CHECK (nref); \
1854 nref -= 0x20; \
1855 if (gref < 0 || gref >= 81 \
1856 || nref < 0 || nref >= 81) \
1857 goto invalid_code; \
1858 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1859 } while (0)
1860
1861
1862 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1863 do { \
1864 /* Emacs 21 style format. The first three bytes at SRC are \
1865 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1866 the byte length of this composition information, CHARS is the \
1867 number of characters composed by this composition. */ \
1868 enum composition_method method = c - 0xF2; \
1869 int *charbuf_base = charbuf; \
1870 int from, to; \
1871 int consumed_chars_limit; \
1872 int nbytes, nchars; \
1873 \
1874 ONE_MORE_BYTE (c); \
1875 nbytes = c - 0xA0; \
1876 if (nbytes < 3) \
1877 goto invalid_code; \
1878 ONE_MORE_BYTE (c); \
1879 nchars = c - 0xA0; \
1880 from = coding->produced + char_offset; \
1881 to = from + nchars; \
1882 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1883 consumed_chars_limit = consumed_chars_base + nbytes; \
1884 if (method != COMPOSITION_RELATIVE) \
1885 { \
1886 int i = 0; \
1887 while (consumed_chars < consumed_chars_limit) \
1888 { \
1889 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1890 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1891 else \
1892 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1893 i++; \
1894 } \
1895 if (consumed_chars < consumed_chars_limit) \
1896 goto invalid_code; \
1897 charbuf_base[0] -= i; \
1898 } \
1899 } while (0)
1900
1901
1902 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1903 do { \
1904 /* Emacs 20 style format for relative composition. */ \
1905 /* Store multibyte form of characters to be composed. */ \
1906 enum composition_method method = COMPOSITION_RELATIVE; \
1907 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1908 int *buf = components; \
1909 int i, j; \
1910 int from, to; \
1911 \
1912 src = src_base; \
1913 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1914 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1915 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1916 if (i < 2) \
1917 goto invalid_code; \
1918 from = coding->produced_char + char_offset; \
1919 to = from + i; \
1920 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1921 for (j = 0; j < i; j++) \
1922 *charbuf++ = components[j]; \
1923 } while (0)
1924
1925
1926 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1927 do { \
1928 /* Emacs 20 style format for rule-base composition. */ \
1929 /* Store multibyte form of characters to be composed. */ \
1930 enum composition_method method = COMPOSITION_WITH_RULE; \
1931 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1932 int *buf = components; \
1933 int i, j; \
1934 int from, to; \
1935 \
1936 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1937 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1938 { \
1939 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1940 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1941 } \
1942 if (i < 1 || (buf - components) % 2 == 0) \
1943 goto invalid_code; \
1944 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1945 goto no_more_source; \
1946 from = coding->produced_char + char_offset; \
1947 to = from + i; \
1948 ADD_COMPOSITION_DATA (buf, from, to, method); \
1949 for (j = 0; j < i; j++) \
1950 *charbuf++ = components[j]; \
1951 for (j = 0; j < i; j += 2) \
1952 *charbuf++ = components[j]; \
1953 } while (0)
1954
1955
1956 static void
1957 decode_coding_emacs_mule (coding)
1958 struct coding_system *coding;
1959 {
1960 unsigned char *src = coding->source + coding->consumed;
1961 unsigned char *src_end = coding->source + coding->src_bytes;
1962 unsigned char *src_base;
1963 int *charbuf = coding->charbuf;
1964 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
1965 int consumed_chars = 0, consumed_chars_base;
1966 int multibytep = coding->src_multibyte;
1967 Lisp_Object attrs, eol_type, charset_list;
1968 int char_offset = coding->produced_char;
1969 int last_offset = char_offset;
1970 int last_id = charset_ascii;
1971
1972 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1973
1974 while (1)
1975 {
1976 int c;
1977
1978 src_base = src;
1979 consumed_chars_base = consumed_chars;
1980
1981 if (charbuf >= charbuf_end)
1982 break;
1983
1984 ONE_MORE_BYTE (c);
1985
1986 if (c < 0x80)
1987 {
1988 if (c == '\r')
1989 {
1990 if (EQ (eol_type, Qdos))
1991 {
1992 if (src == src_end)
1993 {
1994 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1995 goto no_more_source;
1996 }
1997 if (*src == '\n')
1998 ONE_MORE_BYTE (c);
1999 }
2000 else if (EQ (eol_type, Qmac))
2001 c = '\n';
2002 }
2003 *charbuf++ = c;
2004 char_offset++;
2005 }
2006 else if (c == 0x80)
2007 {
2008 ONE_MORE_BYTE (c);
2009 if (c - 0xF2 >= COMPOSITION_RELATIVE
2010 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2011 DECODE_EMACS_MULE_21_COMPOSITION (c);
2012 else if (c < 0xC0)
2013 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2014 else if (c == 0xFF)
2015 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2016 else
2017 goto invalid_code;
2018 }
2019 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2020 {
2021 int nbytes, nchars;
2022 int id;
2023
2024 src = src_base;
2025 consumed_chars = consumed_chars_base;
2026 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2027 if (c < 0)
2028 {
2029 if (c == -2)
2030 break;
2031 goto invalid_code;
2032 }
2033 if (last_id != id)
2034 {
2035 if (last_id != charset_ascii)
2036 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2037 last_id = id;
2038 last_offset = char_offset;
2039 }
2040 *charbuf++ = c;
2041 src += nbytes;
2042 consumed_chars += nchars;
2043 char_offset++;
2044 }
2045 continue;
2046
2047 invalid_code:
2048 src = src_base;
2049 consumed_chars = consumed_chars_base;
2050 ONE_MORE_BYTE (c);
2051 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2052 char_offset++;
2053 coding->errors++;
2054 }
2055
2056 no_more_source:
2057 if (last_id != charset_ascii)
2058 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2059 coding->consumed_char += consumed_chars_base;
2060 coding->consumed = src_base - coding->source;
2061 coding->charbuf_used = charbuf - coding->charbuf;
2062 }
2063
2064
2065 #define EMACS_MULE_LEADING_CODES(id, codes) \
2066 do { \
2067 if (id < 0xA0) \
2068 codes[0] = id, codes[1] = 0; \
2069 else if (id < 0xE0) \
2070 codes[0] = 0x9A, codes[1] = id; \
2071 else if (id < 0xF0) \
2072 codes[0] = 0x9B, codes[1] = id; \
2073 else if (id < 0xF5) \
2074 codes[0] = 0x9C, codes[1] = id; \
2075 else \
2076 codes[0] = 0x9D, codes[1] = id; \
2077 } while (0);
2078
2079
2080 static int
2081 encode_coding_emacs_mule (coding)
2082 struct coding_system *coding;
2083 {
2084 int multibytep = coding->dst_multibyte;
2085 int *charbuf = coding->charbuf;
2086 int *charbuf_end = charbuf + coding->charbuf_used;
2087 unsigned char *dst = coding->destination + coding->produced;
2088 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2089 int safe_room = 8;
2090 int produced_chars = 0;
2091 Lisp_Object attrs, eol_type, charset_list;
2092 int c;
2093 int preferred_charset_id = -1;
2094
2095 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2096
2097 while (charbuf < charbuf_end)
2098 {
2099 ASSURE_DESTINATION (safe_room);
2100 c = *charbuf++;
2101
2102 if (c < 0)
2103 {
2104 /* Handle an annotation. */
2105 switch (*charbuf)
2106 {
2107 case CODING_ANNOTATE_COMPOSITION_MASK:
2108 /* Not yet implemented. */
2109 break;
2110 case CODING_ANNOTATE_CHARSET_MASK:
2111 preferred_charset_id = charbuf[3];
2112 if (preferred_charset_id >= 0
2113 && NILP (Fmemq (make_number (preferred_charset_id),
2114 charset_list)))
2115 preferred_charset_id = -1;
2116 break;
2117 default:
2118 abort ();
2119 }
2120 charbuf += -c - 1;
2121 continue;
2122 }
2123
2124 if (ASCII_CHAR_P (c))
2125 EMIT_ONE_ASCII_BYTE (c);
2126 else if (CHAR_BYTE8_P (c))
2127 {
2128 c = CHAR_TO_BYTE8 (c);
2129 EMIT_ONE_BYTE (c);
2130 }
2131 else
2132 {
2133 struct charset *charset;
2134 unsigned code;
2135 int dimension;
2136 int emacs_mule_id;
2137 unsigned char leading_codes[2];
2138
2139 if (preferred_charset_id >= 0)
2140 {
2141 charset = CHARSET_FROM_ID (preferred_charset_id);
2142 if (! CHAR_CHARSET_P (c, charset))
2143 charset = char_charset (c, charset_list, NULL);
2144 }
2145 else
2146 charset = char_charset (c, charset_list, &code);
2147 if (! charset)
2148 {
2149 c = coding->default_char;
2150 if (ASCII_CHAR_P (c))
2151 {
2152 EMIT_ONE_ASCII_BYTE (c);
2153 continue;
2154 }
2155 charset = char_charset (c, charset_list, &code);
2156 }
2157 dimension = CHARSET_DIMENSION (charset);
2158 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2159 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2160 EMIT_ONE_BYTE (leading_codes[0]);
2161 if (leading_codes[1])
2162 EMIT_ONE_BYTE (leading_codes[1]);
2163 if (dimension == 1)
2164 EMIT_ONE_BYTE (code);
2165 else
2166 {
2167 EMIT_ONE_BYTE (code >> 8);
2168 EMIT_ONE_BYTE (code & 0xFF);
2169 }
2170 }
2171 }
2172 coding->result = CODING_RESULT_SUCCESS;
2173 coding->produced_char += produced_chars;
2174 coding->produced = dst - coding->destination;
2175 return 0;
2176 }
2177
2178 \f
2179 /*** 7. ISO2022 handlers ***/
2180
2181 /* The following note describes the coding system ISO2022 briefly.
2182 Since the intention of this note is to help understand the
2183 functions in this file, some parts are NOT ACCURATE or are OVERLY
2184 SIMPLIFIED. For thorough understanding, please refer to the
2185 original document of ISO2022. This is equivalent to the standard
2186 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2187
2188 ISO2022 provides many mechanisms to encode several character sets
2189 in 7-bit and 8-bit environments. For 7-bit environments, all text
2190 is encoded using bytes less than 128. This may make the encoded
2191 text a little bit longer, but the text passes more easily through
2192 several types of gateway, some of which strip off the MSB (Most
2193 Significant Bit).
2194
2195 There are two kinds of character sets: control character sets and
2196 graphic character sets. The former contain control characters such
2197 as `newline' and `escape' to provide control functions (control
2198 functions are also provided by escape sequences). The latter
2199 contain graphic characters such as 'A' and '-'. Emacs recognizes
2200 two control character sets and many graphic character sets.
2201
2202 Graphic character sets are classified into one of the following
2203 four classes, according to the number of bytes (DIMENSION) and
2204 number of characters in one dimension (CHARS) of the set:
2205 - DIMENSION1_CHARS94
2206 - DIMENSION1_CHARS96
2207 - DIMENSION2_CHARS94
2208 - DIMENSION2_CHARS96
2209
2210 In addition, each character set is assigned an identification tag,
2211 unique for each set, called the "final character" (denoted as <F>
2212 hereafter). The <F> of each character set is decided by ECMA(*)
2213 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2214 (0x30..0x3F are for private use only).
2215
2216 Note (*): ECMA = European Computer Manufacturers Association
2217
2218 Here are examples of graphic character sets [NAME(<F>)]:
2219 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2220 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2221 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2222 o DIMENSION2_CHARS96 -- none for the moment
2223
2224 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2225 C0 [0x00..0x1F] -- control character plane 0
2226 GL [0x20..0x7F] -- graphic character plane 0
2227 C1 [0x80..0x9F] -- control character plane 1
2228 GR [0xA0..0xFF] -- graphic character plane 1
2229
2230 A control character set is directly designated and invoked to C0 or
2231 C1 by an escape sequence. The most common case is that:
2232 - ISO646's control character set is designated/invoked to C0, and
2233 - ISO6429's control character set is designated/invoked to C1,
2234 and usually these designations/invocations are omitted in encoded
2235 text. In a 7-bit environment, only C0 can be used, and a control
2236 character for C1 is encoded by an appropriate escape sequence to
2237 fit into the environment. All control characters for C1 are
2238 defined to have corresponding escape sequences.
2239
2240 A graphic character set is at first designated to one of four
2241 graphic registers (G0 through G3), then these graphic registers are
2242 invoked to GL or GR. These designations and invocations can be
2243 done independently. The most common case is that G0 is invoked to
2244 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2245 these invocations and designations are omitted in encoded text.
2246 In a 7-bit environment, only GL can be used.
2247
2248 When a graphic character set of CHARS94 is invoked to GL, codes
2249 0x20 and 0x7F of the GL area work as control characters SPACE and
2250 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2251 be used.
2252
2253 There are two ways of invocation: locking-shift and single-shift.
2254 With locking-shift, the invocation lasts until the next different
2255 invocation, whereas with single-shift, the invocation affects the
2256 following character only and doesn't affect the locking-shift
2257 state. Invocations are done by the following control characters or
2258 escape sequences:
2259
2260 ----------------------------------------------------------------------
2261 abbrev function cntrl escape seq description
2262 ----------------------------------------------------------------------
2263 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2264 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2265 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2266 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2267 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2268 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2269 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2270 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2271 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2272 ----------------------------------------------------------------------
2273 (*) These are not used by any known coding system.
2274
2275 Control characters for these functions are defined by macros
2276 ISO_CODE_XXX in `coding.h'.
2277
2278 Designations are done by the following escape sequences:
2279 ----------------------------------------------------------------------
2280 escape sequence description
2281 ----------------------------------------------------------------------
2282 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2283 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2284 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2285 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2286 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2287 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2288 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2289 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2290 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2291 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2292 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2293 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2294 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2295 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2296 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2297 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2298 ----------------------------------------------------------------------
2299
2300 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2301 of dimension 1, chars 94, and final character <F>, etc...
2302
2303 Note (*): Although these designations are not allowed in ISO2022,
2304 Emacs accepts them on decoding, and produces them on encoding
2305 CHARS96 character sets in a coding system which is characterized as
2306 7-bit environment, non-locking-shift, and non-single-shift.
2307
2308 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2309 '(' must be omitted. We refer to this as "short-form" hereafter.
2310
2311 Now you may notice that there are a lot of ways of encoding the
2312 same multilingual text in ISO2022. Actually, there exist many
2313 coding systems such as Compound Text (used in X11's inter client
2314 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2315 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2316 localized platforms), and all of these are variants of ISO2022.
2317
2318 In addition to the above, Emacs handles two more kinds of escape
2319 sequences: ISO6429's direction specification and Emacs' private
2320 sequence for specifying character composition.
2321
2322 ISO6429's direction specification takes the following form:
2323 o CSI ']' -- end of the current direction
2324 o CSI '0' ']' -- end of the current direction
2325 o CSI '1' ']' -- start of left-to-right text
2326 o CSI '2' ']' -- start of right-to-left text
2327 The control character CSI (0x9B: control sequence introducer) is
2328 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2329
2330 Character composition specification takes the following form:
2331 o ESC '0' -- start relative composition
2332 o ESC '1' -- end composition
2333 o ESC '2' -- start rule-base composition (*)
2334 o ESC '3' -- start relative composition with alternate chars (**)
2335 o ESC '4' -- start rule-base composition with alternate chars (**)
2336 Since these are not standard escape sequences of any ISO standard,
2337 the use of them with these meanings is restricted to Emacs only.
2338
2339 (*) This form is used only in Emacs 20.7 and older versions,
2340 but newer versions can safely decode it.
2341 (**) This form is used only in Emacs 21.1 and newer versions,
2342 and older versions can't decode it.
2343
2344 Here's a list of example usages of these composition escape
2345 sequences (categorized by `enum composition_method').
2346
2347 COMPOSITION_RELATIVE:
2348 ESC 0 CHAR [ CHAR ] ESC 1
2349 COMPOSITION_WITH_RULE:
2350 ESC 2 CHAR [ RULE CHAR ] ESC 1
2351 COMPOSITION_WITH_ALTCHARS:
2352 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2353 COMPOSITION_WITH_RULE_ALTCHARS:
2354 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2355
2356 enum iso_code_class_type iso_code_class[256];
2357
2358 #define SAFE_CHARSET_P(coding, id) \
2359 ((id) <= (coding)->max_charset_id \
2360 && (coding)->safe_charsets[id] >= 0)
2361
2362
2363 #define SHIFT_OUT_OK(category) \
2364 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2365
2366 static void
2367 setup_iso_safe_charsets (attrs)
2368 Lisp_Object attrs;
2369 {
2370 Lisp_Object charset_list, safe_charsets;
2371 Lisp_Object request;
2372 Lisp_Object reg_usage;
2373 Lisp_Object tail;
2374 int reg94, reg96;
2375 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2376 int max_charset_id;
2377
2378 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2379 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2380 && ! EQ (charset_list, Viso_2022_charset_list))
2381 {
2382 CODING_ATTR_CHARSET_LIST (attrs)
2383 = charset_list = Viso_2022_charset_list;
2384 ASET (attrs, coding_attr_safe_charsets, Qnil);
2385 }
2386
2387 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2388 return;
2389
2390 max_charset_id = 0;
2391 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2392 {
2393 int id = XINT (XCAR (tail));
2394 if (max_charset_id < id)
2395 max_charset_id = id;
2396 }
2397
2398 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2399 make_number (255));
2400 request = AREF (attrs, coding_attr_iso_request);
2401 reg_usage = AREF (attrs, coding_attr_iso_usage);
2402 reg94 = XINT (XCAR (reg_usage));
2403 reg96 = XINT (XCDR (reg_usage));
2404
2405 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2406 {
2407 Lisp_Object id;
2408 Lisp_Object reg;
2409 struct charset *charset;
2410
2411 id = XCAR (tail);
2412 charset = CHARSET_FROM_ID (XINT (id));
2413 reg = Fcdr (Fassq (id, request));
2414 if (! NILP (reg))
2415 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2416 else if (charset->iso_chars_96)
2417 {
2418 if (reg96 < 4)
2419 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2420 }
2421 else
2422 {
2423 if (reg94 < 4)
2424 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2425 }
2426 }
2427 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2428 }
2429
2430
2431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2432 Check if a text is encoded in one of ISO-2022 based codig systems.
2433 If it is, return 1, else return 0. */
2434
2435 static int
2436 detect_coding_iso_2022 (coding, detect_info)
2437 struct coding_system *coding;
2438 struct coding_detection_info *detect_info;
2439 {
2440 unsigned char *src = coding->source, *src_base = src;
2441 unsigned char *src_end = coding->source + coding->src_bytes;
2442 int multibytep = coding->src_multibyte;
2443 int single_shifting = 0;
2444 int id;
2445 int c, c1;
2446 int consumed_chars = 0;
2447 int i;
2448 int rejected = 0;
2449 int found = 0;
2450
2451 detect_info->checked |= CATEGORY_MASK_ISO;
2452
2453 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2454 {
2455 struct coding_system *this = &(coding_categories[i]);
2456 Lisp_Object attrs, val;
2457
2458 attrs = CODING_ID_ATTRS (this->id);
2459 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2460 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2461 setup_iso_safe_charsets (attrs);
2462 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2463 this->max_charset_id = XSTRING (val)->size - 1;
2464 this->safe_charsets = (char *) XSTRING (val)->data;
2465 }
2466
2467 /* A coding system of this category is always ASCII compatible. */
2468 src += coding->head_ascii;
2469
2470 while (rejected != CATEGORY_MASK_ISO)
2471 {
2472 ONE_MORE_BYTE (c);
2473 switch (c)
2474 {
2475 case ISO_CODE_ESC:
2476 if (inhibit_iso_escape_detection)
2477 break;
2478 single_shifting = 0;
2479 ONE_MORE_BYTE (c);
2480 if (c >= '(' && c <= '/')
2481 {
2482 /* Designation sequence for a charset of dimension 1. */
2483 ONE_MORE_BYTE (c1);
2484 if (c1 < ' ' || c1 >= 0x80
2485 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2486 /* Invalid designation sequence. Just ignore. */
2487 break;
2488 }
2489 else if (c == '$')
2490 {
2491 /* Designation sequence for a charset of dimension 2. */
2492 ONE_MORE_BYTE (c);
2493 if (c >= '@' && c <= 'B')
2494 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2495 id = iso_charset_table[1][0][c];
2496 else if (c >= '(' && c <= '/')
2497 {
2498 ONE_MORE_BYTE (c1);
2499 if (c1 < ' ' || c1 >= 0x80
2500 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2501 /* Invalid designation sequence. Just ignore. */
2502 break;
2503 }
2504 else
2505 /* Invalid designation sequence. Just ignore it. */
2506 break;
2507 }
2508 else if (c == 'N' || c == 'O')
2509 {
2510 /* ESC <Fe> for SS2 or SS3. */
2511 single_shifting = 1;
2512 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2513 break;
2514 }
2515 else if (c >= '0' && c <= '4')
2516 {
2517 /* ESC <Fp> for start/end composition. */
2518 found |= CATEGORY_MASK_ISO;
2519 break;
2520 }
2521 else
2522 {
2523 /* Invalid escape sequence. Just ignore it. */
2524 break;
2525 }
2526
2527 /* We found a valid designation sequence for CHARSET. */
2528 rejected |= CATEGORY_MASK_ISO_8BIT;
2529 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2530 id))
2531 found |= CATEGORY_MASK_ISO_7;
2532 else
2533 rejected |= CATEGORY_MASK_ISO_7;
2534 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2535 id))
2536 found |= CATEGORY_MASK_ISO_7_TIGHT;
2537 else
2538 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2539 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2540 id))
2541 found |= CATEGORY_MASK_ISO_7_ELSE;
2542 else
2543 rejected |= CATEGORY_MASK_ISO_7_ELSE;
2544 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2545 id))
2546 found |= CATEGORY_MASK_ISO_8_ELSE;
2547 else
2548 rejected |= CATEGORY_MASK_ISO_8_ELSE;
2549 break;
2550
2551 case ISO_CODE_SO:
2552 case ISO_CODE_SI:
2553 /* Locking shift out/in. */
2554 if (inhibit_iso_escape_detection)
2555 break;
2556 single_shifting = 0;
2557 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2558 found |= CATEGORY_MASK_ISO_ELSE;
2559 break;
2560
2561 case ISO_CODE_CSI:
2562 /* Control sequence introducer. */
2563 single_shifting = 0;
2564 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2565 found |= CATEGORY_MASK_ISO_8_ELSE;
2566 goto check_extra_latin;
2567
2568
2569 case ISO_CODE_SS2:
2570 case ISO_CODE_SS3:
2571 /* Single shift. */
2572 if (inhibit_iso_escape_detection)
2573 break;
2574 single_shifting = 1;
2575 rejected |= CATEGORY_MASK_ISO_7BIT;
2576 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2577 & CODING_ISO_FLAG_SINGLE_SHIFT)
2578 found |= CATEGORY_MASK_ISO_8_1;
2579 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2580 & CODING_ISO_FLAG_SINGLE_SHIFT)
2581 found |= CATEGORY_MASK_ISO_8_2;
2582 goto check_extra_latin;
2583
2584 default:
2585 if (c < 0x80)
2586 {
2587 single_shifting = 0;
2588 break;
2589 }
2590 if (c >= 0xA0)
2591 {
2592 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2593 found |= CATEGORY_MASK_ISO_8_1;
2594 /* Check the length of succeeding codes of the range
2595 0xA0..0FF. If the byte length is even, we include
2596 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2597 only when we are not single shifting. */
2598 if (! single_shifting
2599 && ! (rejected & CATEGORY_MASK_ISO_8_2))
2600 {
2601 int i = 1;
2602 while (src < src_end)
2603 {
2604 ONE_MORE_BYTE (c);
2605 if (c < 0xA0)
2606 break;
2607 i++;
2608 }
2609
2610 if (i & 1 && src < src_end)
2611 rejected |= CATEGORY_MASK_ISO_8_2;
2612 else
2613 found |= CATEGORY_MASK_ISO_8_2;
2614 }
2615 break;
2616 }
2617 check_extra_latin:
2618 single_shifting = 0;
2619 if (! VECTORP (Vlatin_extra_code_table)
2620 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2621 {
2622 rejected = CATEGORY_MASK_ISO;
2623 break;
2624 }
2625 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2626 & CODING_ISO_FLAG_LATIN_EXTRA)
2627 found |= CATEGORY_MASK_ISO_8_1;
2628 else
2629 rejected |= CATEGORY_MASK_ISO_8_1;
2630 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2631 & CODING_ISO_FLAG_LATIN_EXTRA)
2632 found |= CATEGORY_MASK_ISO_8_2;
2633 else
2634 rejected |= CATEGORY_MASK_ISO_8_2;
2635 }
2636 }
2637 detect_info->rejected |= CATEGORY_MASK_ISO;
2638 return 0;
2639
2640 no_more_source:
2641 detect_info->rejected |= rejected;
2642 detect_info->found |= (found & ~rejected);
2643 return 1;
2644 }
2645
2646
2647 /* Set designation state into CODING. */
2648 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2649 do { \
2650 int id, prev; \
2651 \
2652 if (final < '0' || final >= 128 \
2653 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2654 || !SAFE_CHARSET_P (coding, id)) \
2655 { \
2656 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2657 goto invalid_code; \
2658 } \
2659 prev = CODING_ISO_DESIGNATION (coding, reg); \
2660 if (id == charset_jisx0201_roman) \
2661 { \
2662 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2663 id = charset_ascii; \
2664 } \
2665 else if (id == charset_jisx0208_1978) \
2666 { \
2667 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2668 id = charset_jisx0208; \
2669 } \
2670 CODING_ISO_DESIGNATION (coding, reg) = id; \
2671 /* If there was an invalid designation to REG previously, and this \
2672 designation is ASCII to REG, we should keep this designation \
2673 sequence. */ \
2674 if (prev == -2 && id == charset_ascii) \
2675 goto invalid_code; \
2676 } while (0)
2677
2678
2679 #define MAYBE_FINISH_COMPOSITION() \
2680 do { \
2681 int i; \
2682 if (composition_state == COMPOSING_NO) \
2683 break; \
2684 /* It is assured that we have enough room for producing \
2685 characters stored in the table `components'. */ \
2686 if (charbuf + component_idx > charbuf_end) \
2687 goto no_more_source; \
2688 composition_state = COMPOSING_NO; \
2689 if (method == COMPOSITION_RELATIVE \
2690 || method == COMPOSITION_WITH_ALTCHARS) \
2691 { \
2692 for (i = 0; i < component_idx; i++) \
2693 *charbuf++ = components[i]; \
2694 char_offset += component_idx; \
2695 } \
2696 else \
2697 { \
2698 for (i = 0; i < component_idx; i += 2) \
2699 *charbuf++ = components[i]; \
2700 char_offset += (component_idx / 2) + 1; \
2701 } \
2702 } while (0)
2703
2704
2705 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2706 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2707 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2708 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2709 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2710 */
2711
2712 #define DECODE_COMPOSITION_START(c1) \
2713 do { \
2714 if (c1 == '0' \
2715 && composition_state == COMPOSING_COMPONENT_RULE) \
2716 { \
2717 component_len = component_idx; \
2718 composition_state = COMPOSING_CHAR; \
2719 } \
2720 else \
2721 { \
2722 unsigned char *p; \
2723 \
2724 MAYBE_FINISH_COMPOSITION (); \
2725 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2726 goto no_more_source; \
2727 for (p = src; p < src_end - 1; p++) \
2728 if (*p == ISO_CODE_ESC && p[1] == '1') \
2729 break; \
2730 if (p == src_end - 1) \
2731 { \
2732 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2733 goto invalid_code; \
2734 goto no_more_source; \
2735 } \
2736 \
2737 /* This is surely the start of a composition. */ \
2738 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2739 : c1 == '2' ? COMPOSITION_WITH_RULE \
2740 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2741 : COMPOSITION_WITH_RULE_ALTCHARS); \
2742 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2743 : COMPOSING_COMPONENT_CHAR); \
2744 component_idx = component_len = 0; \
2745 } \
2746 } while (0)
2747
2748
2749 /* Handle compositoin end sequence ESC 1. */
2750
2751 #define DECODE_COMPOSITION_END() \
2752 do { \
2753 int nchars = (component_len > 0 ? component_idx - component_len \
2754 : method == COMPOSITION_RELATIVE ? component_idx \
2755 : (component_idx + 1) / 2); \
2756 int i; \
2757 int *saved_charbuf = charbuf; \
2758 int from = coding->produced_char + char_offset; \
2759 int to = from + nchars; \
2760 \
2761 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
2762 if (method != COMPOSITION_RELATIVE) \
2763 { \
2764 if (component_len == 0) \
2765 for (i = 0; i < component_idx; i++) \
2766 *charbuf++ = components[i]; \
2767 else \
2768 for (i = 0; i < component_len; i++) \
2769 *charbuf++ = components[i]; \
2770 *saved_charbuf = saved_charbuf - charbuf; \
2771 } \
2772 if (method == COMPOSITION_WITH_RULE) \
2773 for (i = 0; i < component_idx; i += 2, char_offset++) \
2774 *charbuf++ = components[i]; \
2775 else \
2776 for (i = component_len; i < component_idx; i++, char_offset++) \
2777 *charbuf++ = components[i]; \
2778 coding->annotated = 1; \
2779 composition_state = COMPOSING_NO; \
2780 } while (0)
2781
2782
2783 /* Decode a composition rule from the byte C1 (and maybe one more byte
2784 from SRC) and store one encoded composition rule in
2785 coding->cmp_data. */
2786
2787 #define DECODE_COMPOSITION_RULE(c1) \
2788 do { \
2789 (c1) -= 32; \
2790 if (c1 < 81) /* old format (before ver.21) */ \
2791 { \
2792 int gref = (c1) / 9; \
2793 int nref = (c1) % 9; \
2794 if (gref == 4) gref = 10; \
2795 if (nref == 4) nref = 10; \
2796 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2797 } \
2798 else if (c1 < 93) /* new format (after ver.21) */ \
2799 { \
2800 ONE_MORE_BYTE (c2); \
2801 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2802 } \
2803 else \
2804 c1 = 0; \
2805 } while (0)
2806
2807
2808 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2809
2810 static void
2811 decode_coding_iso_2022 (coding)
2812 struct coding_system *coding;
2813 {
2814 unsigned char *src = coding->source + coding->consumed;
2815 unsigned char *src_end = coding->source + coding->src_bytes;
2816 unsigned char *src_base;
2817 int *charbuf = coding->charbuf;
2818 int *charbuf_end
2819 = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2820 int consumed_chars = 0, consumed_chars_base;
2821 int multibytep = coding->src_multibyte;
2822 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2823 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2824 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2825 struct charset *charset;
2826 int c;
2827 /* For handling composition sequence. */
2828 #define COMPOSING_NO 0
2829 #define COMPOSING_CHAR 1
2830 #define COMPOSING_RULE 2
2831 #define COMPOSING_COMPONENT_CHAR 3
2832 #define COMPOSING_COMPONENT_RULE 4
2833
2834 int composition_state = COMPOSING_NO;
2835 enum composition_method method;
2836 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2837 int component_idx;
2838 int component_len;
2839 Lisp_Object attrs, eol_type, charset_list;
2840 int char_offset = coding->produced_char;
2841 int last_offset = char_offset;
2842 int last_id = charset_ascii;
2843
2844 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2845 setup_iso_safe_charsets (attrs);
2846
2847 while (1)
2848 {
2849 int c1, c2;
2850
2851 src_base = src;
2852 consumed_chars_base = consumed_chars;
2853
2854 if (charbuf >= charbuf_end)
2855 break;
2856
2857 ONE_MORE_BYTE (c1);
2858
2859 /* We produce at most one character. */
2860 switch (iso_code_class [c1])
2861 {
2862 case ISO_0x20_or_0x7F:
2863 if (composition_state != COMPOSING_NO)
2864 {
2865 if (composition_state == COMPOSING_RULE
2866 || composition_state == COMPOSING_COMPONENT_RULE)
2867 {
2868 DECODE_COMPOSITION_RULE (c1);
2869 components[component_idx++] = c1;
2870 composition_state--;
2871 continue;
2872 }
2873 }
2874 if (charset_id_0 < 0
2875 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2876 /* This is SPACE or DEL. */
2877 charset = CHARSET_FROM_ID (charset_ascii);
2878 else
2879 charset = CHARSET_FROM_ID (charset_id_0);
2880 break;
2881
2882 case ISO_graphic_plane_0:
2883 if (composition_state != COMPOSING_NO)
2884 {
2885 if (composition_state == COMPOSING_RULE
2886 || composition_state == COMPOSING_COMPONENT_RULE)
2887 {
2888 DECODE_COMPOSITION_RULE (c1);
2889 components[component_idx++] = c1;
2890 composition_state--;
2891 continue;
2892 }
2893 }
2894 charset = CHARSET_FROM_ID (charset_id_0);
2895 break;
2896
2897 case ISO_0xA0_or_0xFF:
2898 if (charset_id_1 < 0
2899 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2900 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2901 goto invalid_code;
2902 /* This is a graphic character, we fall down ... */
2903
2904 case ISO_graphic_plane_1:
2905 if (charset_id_1 < 0)
2906 goto invalid_code;
2907 charset = CHARSET_FROM_ID (charset_id_1);
2908 break;
2909
2910 case ISO_carriage_return:
2911 if (c1 == '\r')
2912 {
2913 if (EQ (eol_type, Qdos))
2914 {
2915 if (src == src_end)
2916 {
2917 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2918 goto no_more_source;
2919 }
2920 if (*src == '\n')
2921 ONE_MORE_BYTE (c1);
2922 }
2923 else if (EQ (eol_type, Qmac))
2924 c1 = '\n';
2925 }
2926 /* fall through */
2927
2928 case ISO_control_0:
2929 MAYBE_FINISH_COMPOSITION ();
2930 charset = CHARSET_FROM_ID (charset_ascii);
2931 break;
2932
2933 case ISO_control_1:
2934 MAYBE_FINISH_COMPOSITION ();
2935 goto invalid_code;
2936
2937 case ISO_shift_out:
2938 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2939 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2940 goto invalid_code;
2941 CODING_ISO_INVOCATION (coding, 0) = 1;
2942 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2943 continue;
2944
2945 case ISO_shift_in:
2946 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2947 goto invalid_code;
2948 CODING_ISO_INVOCATION (coding, 0) = 0;
2949 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2950 continue;
2951
2952 case ISO_single_shift_2_7:
2953 case ISO_single_shift_2:
2954 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2955 goto invalid_code;
2956 /* SS2 is handled as an escape sequence of ESC 'N' */
2957 c1 = 'N';
2958 goto label_escape_sequence;
2959
2960 case ISO_single_shift_3:
2961 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2962 goto invalid_code;
2963 /* SS2 is handled as an escape sequence of ESC 'O' */
2964 c1 = 'O';
2965 goto label_escape_sequence;
2966
2967 case ISO_control_sequence_introducer:
2968 /* CSI is handled as an escape sequence of ESC '[' ... */
2969 c1 = '[';
2970 goto label_escape_sequence;
2971
2972 case ISO_escape:
2973 ONE_MORE_BYTE (c1);
2974 label_escape_sequence:
2975 /* Escape sequences handled here are invocation,
2976 designation, direction specification, and character
2977 composition specification. */
2978 switch (c1)
2979 {
2980 case '&': /* revision of following character set */
2981 ONE_MORE_BYTE (c1);
2982 if (!(c1 >= '@' && c1 <= '~'))
2983 goto invalid_code;
2984 ONE_MORE_BYTE (c1);
2985 if (c1 != ISO_CODE_ESC)
2986 goto invalid_code;
2987 ONE_MORE_BYTE (c1);
2988 goto label_escape_sequence;
2989
2990 case '$': /* designation of 2-byte character set */
2991 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2992 goto invalid_code;
2993 ONE_MORE_BYTE (c1);
2994 if (c1 >= '@' && c1 <= 'B')
2995 { /* designation of JISX0208.1978, GB2312.1980,
2996 or JISX0208.1980 */
2997 DECODE_DESIGNATION (0, 2, 0, c1);
2998 }
2999 else if (c1 >= 0x28 && c1 <= 0x2B)
3000 { /* designation of DIMENSION2_CHARS94 character set */
3001 ONE_MORE_BYTE (c2);
3002 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
3003 }
3004 else if (c1 >= 0x2C && c1 <= 0x2F)
3005 { /* designation of DIMENSION2_CHARS96 character set */
3006 ONE_MORE_BYTE (c2);
3007 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
3008 }
3009 else
3010 goto invalid_code;
3011 /* We must update these variables now. */
3012 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3013 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3014 continue;
3015
3016 case 'n': /* invocation of locking-shift-2 */
3017 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3018 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3019 goto invalid_code;
3020 CODING_ISO_INVOCATION (coding, 0) = 2;
3021 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3022 continue;
3023
3024 case 'o': /* invocation of locking-shift-3 */
3025 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3026 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3027 goto invalid_code;
3028 CODING_ISO_INVOCATION (coding, 0) = 3;
3029 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3030 continue;
3031
3032 case 'N': /* invocation of single-shift-2 */
3033 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3034 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3035 goto invalid_code;
3036 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3037 ONE_MORE_BYTE (c1);
3038 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3039 goto invalid_code;
3040 break;
3041
3042 case 'O': /* invocation of single-shift-3 */
3043 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3044 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3045 goto invalid_code;
3046 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3047 ONE_MORE_BYTE (c1);
3048 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3049 goto invalid_code;
3050 break;
3051
3052 case '0': case '2': case '3': case '4': /* start composition */
3053 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3054 goto invalid_code;
3055 DECODE_COMPOSITION_START (c1);
3056 continue;
3057
3058 case '1': /* end composition */
3059 if (composition_state == COMPOSING_NO)
3060 goto invalid_code;
3061 DECODE_COMPOSITION_END ();
3062 continue;
3063
3064 case '[': /* specification of direction */
3065 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3066 goto invalid_code;
3067 /* For the moment, nested direction is not supported.
3068 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3069 left-to-right, and nozero means right-to-left. */
3070 ONE_MORE_BYTE (c1);
3071 switch (c1)
3072 {
3073 case ']': /* end of the current direction */
3074 coding->mode &= ~CODING_MODE_DIRECTION;
3075
3076 case '0': /* end of the current direction */
3077 case '1': /* start of left-to-right direction */
3078 ONE_MORE_BYTE (c1);
3079 if (c1 == ']')
3080 coding->mode &= ~CODING_MODE_DIRECTION;
3081 else
3082 goto invalid_code;
3083 break;
3084
3085 case '2': /* start of right-to-left direction */
3086 ONE_MORE_BYTE (c1);
3087 if (c1 == ']')
3088 coding->mode |= CODING_MODE_DIRECTION;
3089 else
3090 goto invalid_code;
3091 break;
3092
3093 default:
3094 goto invalid_code;
3095 }
3096 continue;
3097
3098 default:
3099 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3100 goto invalid_code;
3101 if (c1 >= 0x28 && c1 <= 0x2B)
3102 { /* designation of DIMENSION1_CHARS94 character set */
3103 ONE_MORE_BYTE (c2);
3104 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3105 }
3106 else if (c1 >= 0x2C && c1 <= 0x2F)
3107 { /* designation of DIMENSION1_CHARS96 character set */
3108 ONE_MORE_BYTE (c2);
3109 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3110 }
3111 else
3112 goto invalid_code;
3113 /* We must update these variables now. */
3114 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3115 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3116 continue;
3117 }
3118 }
3119
3120 if (charset->id != charset_ascii
3121 && last_id != charset->id)
3122 {
3123 if (last_id != charset_ascii)
3124 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3125 last_id = charset->id;
3126 last_offset = char_offset;
3127 }
3128
3129 /* Now we know CHARSET and 1st position code C1 of a character.
3130 Produce a decoded character while getting 2nd position code
3131 C2 if necessary. */
3132 c1 &= 0x7F;
3133 if (CHARSET_DIMENSION (charset) > 1)
3134 {
3135 ONE_MORE_BYTE (c2);
3136 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3137 /* C2 is not in a valid range. */
3138 goto invalid_code;
3139 c1 = (c1 << 8) | (c2 & 0x7F);
3140 if (CHARSET_DIMENSION (charset) > 2)
3141 {
3142 ONE_MORE_BYTE (c2);
3143 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3144 /* C2 is not in a valid range. */
3145 goto invalid_code;
3146 c1 = (c1 << 8) | (c2 & 0x7F);
3147 }
3148 }
3149
3150 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3151 if (c < 0)
3152 {
3153 MAYBE_FINISH_COMPOSITION ();
3154 for (; src_base < src; src_base++, char_offset++)
3155 {
3156 if (ASCII_BYTE_P (*src_base))
3157 *charbuf++ = *src_base;
3158 else
3159 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3160 char_offset++;
3161 }
3162 }
3163 else if (composition_state == COMPOSING_NO)
3164 {
3165 *charbuf++ = c;
3166 char_offset++;
3167 }
3168 else
3169 {
3170 components[component_idx++] = c;
3171 if (method == COMPOSITION_WITH_RULE
3172 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3173 && composition_state == COMPOSING_COMPONENT_CHAR))
3174 composition_state++;
3175 }
3176 continue;
3177
3178 invalid_code:
3179 MAYBE_FINISH_COMPOSITION ();
3180 src = src_base;
3181 consumed_chars = consumed_chars_base;
3182 ONE_MORE_BYTE (c);
3183 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3184 char_offset++;
3185 coding->errors++;
3186 }
3187
3188 no_more_source:
3189 if (last_id != charset_ascii)
3190 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3191 coding->consumed_char += consumed_chars_base;
3192 coding->consumed = src_base - coding->source;
3193 coding->charbuf_used = charbuf - coding->charbuf;
3194 }
3195
3196
3197 /* ISO2022 encoding stuff. */
3198
3199 /*
3200 It is not enough to say just "ISO2022" on encoding, we have to
3201 specify more details. In Emacs, each coding system of ISO2022
3202 variant has the following specifications:
3203 1. Initial designation to G0 thru G3.
3204 2. Allows short-form designation?
3205 3. ASCII should be designated to G0 before control characters?
3206 4. ASCII should be designated to G0 at end of line?
3207 5. 7-bit environment or 8-bit environment?
3208 6. Use locking-shift?
3209 7. Use Single-shift?
3210 And the following two are only for Japanese:
3211 8. Use ASCII in place of JIS0201-1976-Roman?
3212 9. Use JISX0208-1983 in place of JISX0208-1978?
3213 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3214 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3215 details.
3216 */
3217
3218 /* Produce codes (escape sequence) for designating CHARSET to graphic
3219 register REG at DST, and increment DST. If <final-char> of CHARSET is
3220 '@', 'A', or 'B' and the coding system CODING allows, produce
3221 designation sequence of short-form. */
3222
3223 #define ENCODE_DESIGNATION(charset, reg, coding) \
3224 do { \
3225 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3226 char *intermediate_char_94 = "()*+"; \
3227 char *intermediate_char_96 = ",-./"; \
3228 int revision = -1; \
3229 int c; \
3230 \
3231 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3232 revision = CHARSET_ISO_REVISION (charset); \
3233 \
3234 if (revision >= 0) \
3235 { \
3236 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3237 EMIT_ONE_BYTE ('@' + revision); \
3238 } \
3239 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3240 if (CHARSET_DIMENSION (charset) == 1) \
3241 { \
3242 if (! CHARSET_ISO_CHARS_96 (charset)) \
3243 c = intermediate_char_94[reg]; \
3244 else \
3245 c = intermediate_char_96[reg]; \
3246 EMIT_ONE_ASCII_BYTE (c); \
3247 } \
3248 else \
3249 { \
3250 EMIT_ONE_ASCII_BYTE ('$'); \
3251 if (! CHARSET_ISO_CHARS_96 (charset)) \
3252 { \
3253 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3254 || reg != 0 \
3255 || final_char < '@' || final_char > 'B') \
3256 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3257 } \
3258 else \
3259 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3260 } \
3261 EMIT_ONE_ASCII_BYTE (final_char); \
3262 \
3263 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3264 } while (0)
3265
3266
3267 /* The following two macros produce codes (control character or escape
3268 sequence) for ISO2022 single-shift functions (single-shift-2 and
3269 single-shift-3). */
3270
3271 #define ENCODE_SINGLE_SHIFT_2 \
3272 do { \
3273 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3274 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3275 else \
3276 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3277 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3278 } while (0)
3279
3280
3281 #define ENCODE_SINGLE_SHIFT_3 \
3282 do { \
3283 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3284 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3285 else \
3286 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3287 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3288 } while (0)
3289
3290
3291 /* The following four macros produce codes (control character or
3292 escape sequence) for ISO2022 locking-shift functions (shift-in,
3293 shift-out, locking-shift-2, and locking-shift-3). */
3294
3295 #define ENCODE_SHIFT_IN \
3296 do { \
3297 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3298 CODING_ISO_INVOCATION (coding, 0) = 0; \
3299 } while (0)
3300
3301
3302 #define ENCODE_SHIFT_OUT \
3303 do { \
3304 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3305 CODING_ISO_INVOCATION (coding, 0) = 1; \
3306 } while (0)
3307
3308
3309 #define ENCODE_LOCKING_SHIFT_2 \
3310 do { \
3311 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3312 CODING_ISO_INVOCATION (coding, 0) = 2; \
3313 } while (0)
3314
3315
3316 #define ENCODE_LOCKING_SHIFT_3 \
3317 do { \
3318 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3319 CODING_ISO_INVOCATION (coding, 0) = 3; \
3320 } while (0)
3321
3322
3323 /* Produce codes for a DIMENSION1 character whose character set is
3324 CHARSET and whose position-code is C1. Designation and invocation
3325 sequences are also produced in advance if necessary. */
3326
3327 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3328 do { \
3329 int id = CHARSET_ID (charset); \
3330 \
3331 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3332 && id == charset_ascii) \
3333 { \
3334 id = charset_jisx0201_roman; \
3335 charset = CHARSET_FROM_ID (id); \
3336 } \
3337 \
3338 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3339 { \
3340 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3341 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3342 else \
3343 EMIT_ONE_BYTE (c1 | 0x80); \
3344 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3345 break; \
3346 } \
3347 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3348 { \
3349 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3350 break; \
3351 } \
3352 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3353 { \
3354 EMIT_ONE_BYTE (c1 | 0x80); \
3355 break; \
3356 } \
3357 else \
3358 /* Since CHARSET is not yet invoked to any graphic planes, we \
3359 must invoke it, or, at first, designate it to some graphic \
3360 register. Then repeat the loop to actually produce the \
3361 character. */ \
3362 dst = encode_invocation_designation (charset, coding, dst, \
3363 &produced_chars); \
3364 } while (1)
3365
3366
3367 /* Produce codes for a DIMENSION2 character whose character set is
3368 CHARSET and whose position-codes are C1 and C2. Designation and
3369 invocation codes are also produced in advance if necessary. */
3370
3371 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3372 do { \
3373 int id = CHARSET_ID (charset); \
3374 \
3375 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3376 && id == charset_jisx0208) \
3377 { \
3378 id = charset_jisx0208_1978; \
3379 charset = CHARSET_FROM_ID (id); \
3380 } \
3381 \
3382 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3383 { \
3384 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3385 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3386 else \
3387 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3388 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3389 break; \
3390 } \
3391 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3392 { \
3393 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3394 break; \
3395 } \
3396 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3397 { \
3398 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3399 break; \
3400 } \
3401 else \
3402 /* Since CHARSET is not yet invoked to any graphic planes, we \
3403 must invoke it, or, at first, designate it to some graphic \
3404 register. Then repeat the loop to actually produce the \
3405 character. */ \
3406 dst = encode_invocation_designation (charset, coding, dst, \
3407 &produced_chars); \
3408 } while (1)
3409
3410
3411 #define ENCODE_ISO_CHARACTER(charset, c) \
3412 do { \
3413 int code = ENCODE_CHAR ((charset),(c)); \
3414 \
3415 if (CHARSET_DIMENSION (charset) == 1) \
3416 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3417 else \
3418 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3419 } while (0)
3420
3421
3422 /* Produce designation and invocation codes at a place pointed by DST
3423 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3424 Return new DST. */
3425
3426 unsigned char *
3427 encode_invocation_designation (charset, coding, dst, p_nchars)
3428 struct charset *charset;
3429 struct coding_system *coding;
3430 unsigned char *dst;
3431 int *p_nchars;
3432 {
3433 int multibytep = coding->dst_multibyte;
3434 int produced_chars = *p_nchars;
3435 int reg; /* graphic register number */
3436 int id = CHARSET_ID (charset);
3437
3438 /* At first, check designations. */
3439 for (reg = 0; reg < 4; reg++)
3440 if (id == CODING_ISO_DESIGNATION (coding, reg))
3441 break;
3442
3443 if (reg >= 4)
3444 {
3445 /* CHARSET is not yet designated to any graphic registers. */
3446 /* At first check the requested designation. */
3447 reg = CODING_ISO_REQUEST (coding, id);
3448 if (reg < 0)
3449 /* Since CHARSET requests no special designation, designate it
3450 to graphic register 0. */
3451 reg = 0;
3452
3453 ENCODE_DESIGNATION (charset, reg, coding);
3454 }
3455
3456 if (CODING_ISO_INVOCATION (coding, 0) != reg
3457 && CODING_ISO_INVOCATION (coding, 1) != reg)
3458 {
3459 /* Since the graphic register REG is not invoked to any graphic
3460 planes, invoke it to graphic plane 0. */
3461 switch (reg)
3462 {
3463 case 0: /* graphic register 0 */
3464 ENCODE_SHIFT_IN;
3465 break;
3466
3467 case 1: /* graphic register 1 */
3468 ENCODE_SHIFT_OUT;
3469 break;
3470
3471 case 2: /* graphic register 2 */
3472 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3473 ENCODE_SINGLE_SHIFT_2;
3474 else
3475 ENCODE_LOCKING_SHIFT_2;
3476 break;
3477
3478 case 3: /* graphic register 3 */
3479 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3480 ENCODE_SINGLE_SHIFT_3;
3481 else
3482 ENCODE_LOCKING_SHIFT_3;
3483 break;
3484 }
3485 }
3486
3487 *p_nchars = produced_chars;
3488 return dst;
3489 }
3490
3491 /* The following three macros produce codes for indicating direction
3492 of text. */
3493 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3494 do { \
3495 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3496 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3497 else \
3498 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3499 } while (0)
3500
3501
3502 #define ENCODE_DIRECTION_R2L() \
3503 do { \
3504 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3505 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3506 } while (0)
3507
3508
3509 #define ENCODE_DIRECTION_L2R() \
3510 do { \
3511 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3512 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3513 } while (0)
3514
3515
3516 /* Produce codes for designation and invocation to reset the graphic
3517 planes and registers to initial state. */
3518 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3519 do { \
3520 int reg; \
3521 struct charset *charset; \
3522 \
3523 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3524 ENCODE_SHIFT_IN; \
3525 for (reg = 0; reg < 4; reg++) \
3526 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3527 && (CODING_ISO_DESIGNATION (coding, reg) \
3528 != CODING_ISO_INITIAL (coding, reg))) \
3529 { \
3530 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3531 ENCODE_DESIGNATION (charset, reg, coding); \
3532 } \
3533 } while (0)
3534
3535
3536 /* Produce designation sequences of charsets in the line started from
3537 SRC to a place pointed by DST, and return updated DST.
3538
3539 If the current block ends before any end-of-line, we may fail to
3540 find all the necessary designations. */
3541
3542 static unsigned char *
3543 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3544 struct coding_system *coding;
3545 int *charbuf, *charbuf_end;
3546 unsigned char *dst;
3547 {
3548 struct charset *charset;
3549 /* Table of charsets to be designated to each graphic register. */
3550 int r[4];
3551 int c, found = 0, reg;
3552 int produced_chars = 0;
3553 int multibytep = coding->dst_multibyte;
3554 Lisp_Object attrs;
3555 Lisp_Object charset_list;
3556
3557 attrs = CODING_ID_ATTRS (coding->id);
3558 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3559 if (EQ (charset_list, Qiso_2022))
3560 charset_list = Viso_2022_charset_list;
3561
3562 for (reg = 0; reg < 4; reg++)
3563 r[reg] = -1;
3564
3565 while (found < 4)
3566 {
3567 int id;
3568
3569 c = *charbuf++;
3570 if (c == '\n')
3571 break;
3572 charset = char_charset (c, charset_list, NULL);
3573 id = CHARSET_ID (charset);
3574 reg = CODING_ISO_REQUEST (coding, id);
3575 if (reg >= 0 && r[reg] < 0)
3576 {
3577 found++;
3578 r[reg] = id;
3579 }
3580 }
3581
3582 if (found)
3583 {
3584 for (reg = 0; reg < 4; reg++)
3585 if (r[reg] >= 0
3586 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3587 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3588 }
3589
3590 return dst;
3591 }
3592
3593 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3594
3595 static int
3596 encode_coding_iso_2022 (coding)
3597 struct coding_system *coding;
3598 {
3599 int multibytep = coding->dst_multibyte;
3600 int *charbuf = coding->charbuf;
3601 int *charbuf_end = charbuf + coding->charbuf_used;
3602 unsigned char *dst = coding->destination + coding->produced;
3603 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3604 int safe_room = 16;
3605 int bol_designation
3606 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3607 && CODING_ISO_BOL (coding));
3608 int produced_chars = 0;
3609 Lisp_Object attrs, eol_type, charset_list;
3610 int ascii_compatible;
3611 int c;
3612 int preferred_charset_id = -1;
3613
3614 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3615 setup_iso_safe_charsets (attrs);
3616 /* Charset list may have been changed. */
3617 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
3618 coding->safe_charsets
3619 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
3620
3621 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3622
3623 while (charbuf < charbuf_end)
3624 {
3625 ASSURE_DESTINATION (safe_room);
3626
3627 if (bol_designation)
3628 {
3629 unsigned char *dst_prev = dst;
3630
3631 /* We have to produce designation sequences if any now. */
3632 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3633 bol_designation = 0;
3634 /* We are sure that designation sequences are all ASCII bytes. */
3635 produced_chars += dst - dst_prev;
3636 }
3637
3638 c = *charbuf++;
3639
3640 if (c < 0)
3641 {
3642 /* Handle an annotation. */
3643 switch (*charbuf)
3644 {
3645 case CODING_ANNOTATE_COMPOSITION_MASK:
3646 /* Not yet implemented. */
3647 break;
3648 case CODING_ANNOTATE_CHARSET_MASK:
3649 preferred_charset_id = charbuf[3];
3650 if (preferred_charset_id >= 0
3651 && NILP (Fmemq (make_number (preferred_charset_id),
3652 charset_list)))
3653 preferred_charset_id = -1;
3654 break;
3655 default:
3656 abort ();
3657 }
3658 charbuf += -c - 1;
3659 continue;
3660 }
3661
3662 /* Now encode the character C. */
3663 if (c < 0x20 || c == 0x7F)
3664 {
3665 if (c == '\n'
3666 || (c == '\r' && EQ (eol_type, Qmac)))
3667 {
3668 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3669 ENCODE_RESET_PLANE_AND_REGISTER ();
3670 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3671 {
3672 int i;
3673
3674 for (i = 0; i < 4; i++)
3675 CODING_ISO_DESIGNATION (coding, i)
3676 = CODING_ISO_INITIAL (coding, i);
3677 }
3678 bol_designation
3679 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3680 }
3681 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3682 ENCODE_RESET_PLANE_AND_REGISTER ();
3683 EMIT_ONE_ASCII_BYTE (c);
3684 }
3685 else if (ASCII_CHAR_P (c))
3686 {
3687 if (ascii_compatible)
3688 EMIT_ONE_ASCII_BYTE (c);
3689 else
3690 {
3691 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3692 ENCODE_ISO_CHARACTER (charset, c);
3693 }
3694 }
3695 else if (CHAR_BYTE8_P (c))
3696 {
3697 c = CHAR_TO_BYTE8 (c);
3698 EMIT_ONE_BYTE (c);
3699 }
3700 else
3701 {
3702 struct charset *charset;
3703
3704 if (preferred_charset_id >= 0)
3705 {
3706 charset = CHARSET_FROM_ID (preferred_charset_id);
3707 if (! CHAR_CHARSET_P (c, charset))
3708 charset = char_charset (c, charset_list, NULL);
3709 }
3710 else
3711 charset = char_charset (c, charset_list, NULL);
3712 if (!charset)
3713 {
3714 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3715 {
3716 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3717 charset = CHARSET_FROM_ID (charset_ascii);
3718 }
3719 else
3720 {
3721 c = coding->default_char;
3722 charset = char_charset (c, charset_list, NULL);
3723 }
3724 }
3725 ENCODE_ISO_CHARACTER (charset, c);
3726 }
3727 }
3728
3729 if (coding->mode & CODING_MODE_LAST_BLOCK
3730 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3731 {
3732 ASSURE_DESTINATION (safe_room);
3733 ENCODE_RESET_PLANE_AND_REGISTER ();
3734 }
3735 coding->result = CODING_RESULT_SUCCESS;
3736 CODING_ISO_BOL (coding) = bol_designation;
3737 coding->produced_char += produced_chars;
3738 coding->produced = dst - coding->destination;
3739 return 0;
3740 }
3741
3742 \f
3743 /*** 8,9. SJIS and BIG5 handlers ***/
3744
3745 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3746 quite widely. So, for the moment, Emacs supports them in the bare
3747 C code. But, in the future, they may be supported only by CCL. */
3748
3749 /* SJIS is a coding system encoding three character sets: ASCII, right
3750 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3751 as is. A character of charset katakana-jisx0201 is encoded by
3752 "position-code + 0x80". A character of charset japanese-jisx0208
3753 is encoded in 2-byte but two position-codes are divided and shifted
3754 so that it fit in the range below.
3755
3756 --- CODE RANGE of SJIS ---
3757 (character set) (range)
3758 ASCII 0x00 .. 0x7F
3759 KATAKANA-JISX0201 0xA0 .. 0xDF
3760 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3761 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3762 -------------------------------
3763
3764 */
3765
3766 /* BIG5 is a coding system encoding two character sets: ASCII and
3767 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3768 character set and is encoded in two-byte.
3769
3770 --- CODE RANGE of BIG5 ---
3771 (character set) (range)
3772 ASCII 0x00 .. 0x7F
3773 Big5 (1st byte) 0xA1 .. 0xFE
3774 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3775 --------------------------
3776
3777 */
3778
3779 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3780 Check if a text is encoded in SJIS. If it is, return
3781 CATEGORY_MASK_SJIS, else return 0. */
3782
3783 static int
3784 detect_coding_sjis (coding, detect_info)
3785 struct coding_system *coding;
3786 struct coding_detection_info *detect_info;
3787 {
3788 unsigned char *src = coding->source, *src_base = src;
3789 unsigned char *src_end = coding->source + coding->src_bytes;
3790 int multibytep = coding->src_multibyte;
3791 int consumed_chars = 0;
3792 int found = 0;
3793 int c;
3794 int incomplete;
3795
3796 detect_info->checked |= CATEGORY_MASK_SJIS;
3797 /* A coding system of this category is always ASCII compatible. */
3798 src += coding->head_ascii;
3799
3800 while (1)
3801 {
3802 incomplete = 0;
3803 ONE_MORE_BYTE (c);
3804 incomplete = 1;
3805 if (c < 0x80)
3806 continue;
3807 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3808 {
3809 ONE_MORE_BYTE (c);
3810 if (c < 0x40 || c == 0x7F || c > 0xFC)
3811 break;
3812 found = CATEGORY_MASK_SJIS;
3813 }
3814 else if (c >= 0xA0 && c < 0xE0)
3815 found = CATEGORY_MASK_SJIS;
3816 else
3817 break;
3818 }
3819 detect_info->rejected |= CATEGORY_MASK_SJIS;
3820 return 0;
3821
3822 no_more_source:
3823 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3824 {
3825 detect_info->rejected |= CATEGORY_MASK_SJIS;
3826 return 0;
3827 }
3828 detect_info->found |= found;
3829 return 1;
3830 }
3831
3832 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3833 Check if a text is encoded in BIG5. If it is, return
3834 CATEGORY_MASK_BIG5, else return 0. */
3835
3836 static int
3837 detect_coding_big5 (coding, detect_info)
3838 struct coding_system *coding;
3839 struct coding_detection_info *detect_info;
3840 {
3841 unsigned char *src = coding->source, *src_base = src;
3842 unsigned char *src_end = coding->source + coding->src_bytes;
3843 int multibytep = coding->src_multibyte;
3844 int consumed_chars = 0;
3845 int found = 0;
3846 int c;
3847 int incomplete;
3848
3849 detect_info->checked |= CATEGORY_MASK_BIG5;
3850 /* A coding system of this category is always ASCII compatible. */
3851 src += coding->head_ascii;
3852
3853 while (1)
3854 {
3855 incomplete = 0;
3856 ONE_MORE_BYTE (c);
3857 incomplete = 1;
3858 if (c < 0x80)
3859 continue;
3860 if (c >= 0xA1)
3861 {
3862 ONE_MORE_BYTE (c);
3863 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3864 return 0;
3865 found = CATEGORY_MASK_BIG5;
3866 }
3867 else
3868 break;
3869 }
3870 detect_info->rejected |= CATEGORY_MASK_BIG5;
3871 return 0;
3872
3873 no_more_source:
3874 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3875 {
3876 detect_info->rejected |= CATEGORY_MASK_BIG5;
3877 return 0;
3878 }
3879 detect_info->found |= found;
3880 return 1;
3881 }
3882
3883 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3884 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3885
3886 static void
3887 decode_coding_sjis (coding)
3888 struct coding_system *coding;
3889 {
3890 unsigned char *src = coding->source + coding->consumed;
3891 unsigned char *src_end = coding->source + coding->src_bytes;
3892 unsigned char *src_base;
3893 int *charbuf = coding->charbuf;
3894 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3895 int consumed_chars = 0, consumed_chars_base;
3896 int multibytep = coding->src_multibyte;
3897 struct charset *charset_roman, *charset_kanji, *charset_kana;
3898 Lisp_Object attrs, eol_type, charset_list, val;
3899 int char_offset = coding->produced_char;
3900 int last_offset = char_offset;
3901 int last_id = charset_ascii;
3902
3903 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3904
3905 val = charset_list;
3906 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3907 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3908 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3909
3910 while (1)
3911 {
3912 int c, c1;
3913
3914 src_base = src;
3915 consumed_chars_base = consumed_chars;
3916
3917 if (charbuf >= charbuf_end)
3918 break;
3919
3920 ONE_MORE_BYTE (c);
3921
3922 if (c == '\r')
3923 {
3924 if (EQ (eol_type, Qdos))
3925 {
3926 if (src == src_end)
3927 {
3928 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3929 goto no_more_source;
3930 }
3931 if (*src == '\n')
3932 ONE_MORE_BYTE (c);
3933 }
3934 else if (EQ (eol_type, Qmac))
3935 c = '\n';
3936 }
3937 else
3938 {
3939 struct charset *charset;
3940
3941 if (c < 0x80)
3942 charset = charset_roman;
3943 else
3944 {
3945 if (c >= 0xF0)
3946 goto invalid_code;
3947 if (c < 0xA0 || c >= 0xE0)
3948 {
3949 /* SJIS -> JISX0208 */
3950 ONE_MORE_BYTE (c1);
3951 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3952 goto invalid_code;
3953 c = (c << 8) | c1;
3954 SJIS_TO_JIS (c);
3955 charset = charset_kanji;
3956 }
3957 else
3958 {
3959 /* SJIS -> JISX0201-Kana */
3960 c &= 0x7F;
3961 charset = charset_kana;
3962 }
3963 }
3964 if (charset->id != charset_ascii
3965 && last_id != charset->id)
3966 {
3967 if (last_id != charset_ascii)
3968 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3969 last_id = charset->id;
3970 last_offset = char_offset;
3971 }
3972 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3973 }
3974 *charbuf++ = c;
3975 char_offset++;
3976 continue;
3977
3978 invalid_code:
3979 src = src_base;
3980 consumed_chars = consumed_chars_base;
3981 ONE_MORE_BYTE (c);
3982 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3983 char_offset++;
3984 coding->errors++;
3985 }
3986
3987 no_more_source:
3988 if (last_id != charset_ascii)
3989 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3990 coding->consumed_char += consumed_chars_base;
3991 coding->consumed = src_base - coding->source;
3992 coding->charbuf_used = charbuf - coding->charbuf;
3993 }
3994
3995 static void
3996 decode_coding_big5 (coding)
3997 struct coding_system *coding;
3998 {
3999 unsigned char *src = coding->source + coding->consumed;
4000 unsigned char *src_end = coding->source + coding->src_bytes;
4001 unsigned char *src_base;
4002 int *charbuf = coding->charbuf;
4003 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4004 int consumed_chars = 0, consumed_chars_base;
4005 int multibytep = coding->src_multibyte;
4006 struct charset *charset_roman, *charset_big5;
4007 Lisp_Object attrs, eol_type, charset_list, val;
4008 int char_offset = coding->produced_char;
4009 int last_offset = char_offset;
4010 int last_id = charset_ascii;
4011
4012 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4013 val = charset_list;
4014 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4015 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4016
4017 while (1)
4018 {
4019 int c, c1;
4020
4021 src_base = src;
4022 consumed_chars_base = consumed_chars;
4023
4024 if (charbuf >= charbuf_end)
4025 break;
4026
4027 ONE_MORE_BYTE (c);
4028
4029 if (c == '\r')
4030 {
4031 if (EQ (eol_type, Qdos))
4032 {
4033 if (src == src_end)
4034 {
4035 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4036 goto no_more_source;
4037 }
4038 if (*src == '\n')
4039 ONE_MORE_BYTE (c);
4040 }
4041 else if (EQ (eol_type, Qmac))
4042 c = '\n';
4043 }
4044 else
4045 {
4046 struct charset *charset;
4047 if (c < 0x80)
4048 charset = charset_roman;
4049 else
4050 {
4051 /* BIG5 -> Big5 */
4052 if (c < 0xA1 || c > 0xFE)
4053 goto invalid_code;
4054 ONE_MORE_BYTE (c1);
4055 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4056 goto invalid_code;
4057 c = c << 8 | c1;
4058 charset = charset_big5;
4059 }
4060 if (charset->id != charset_ascii
4061 && last_id != charset->id)
4062 {
4063 if (last_id != charset_ascii)
4064 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4065 last_id = charset->id;
4066 last_offset = char_offset;
4067 }
4068 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4069 }
4070
4071 *charbuf++ = c;
4072 char_offset++;
4073 continue;
4074
4075 invalid_code:
4076 src = src_base;
4077 consumed_chars = consumed_chars_base;
4078 ONE_MORE_BYTE (c);
4079 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4080 char_offset++;
4081 coding->errors++;
4082 }
4083
4084 no_more_source:
4085 if (last_id != charset_ascii)
4086 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4087 coding->consumed_char += consumed_chars_base;
4088 coding->consumed = src_base - coding->source;
4089 coding->charbuf_used = charbuf - coding->charbuf;
4090 }
4091
4092 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4093 This function can encode charsets `ascii', `katakana-jisx0201',
4094 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4095 are sure that all these charsets are registered as official charset
4096 (i.e. do not have extended leading-codes). Characters of other
4097 charsets are produced without any encoding. If SJIS_P is 1, encode
4098 SJIS text, else encode BIG5 text. */
4099
4100 static int
4101 encode_coding_sjis (coding)
4102 struct coding_system *coding;
4103 {
4104 int multibytep = coding->dst_multibyte;
4105 int *charbuf = coding->charbuf;
4106 int *charbuf_end = charbuf + coding->charbuf_used;
4107 unsigned char *dst = coding->destination + coding->produced;
4108 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4109 int safe_room = 4;
4110 int produced_chars = 0;
4111 Lisp_Object attrs, eol_type, charset_list, val;
4112 int ascii_compatible;
4113 struct charset *charset_roman, *charset_kanji, *charset_kana;
4114 int c;
4115
4116 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4117 val = charset_list;
4118 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4119 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4120 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4121
4122 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4123
4124 while (charbuf < charbuf_end)
4125 {
4126 ASSURE_DESTINATION (safe_room);
4127 c = *charbuf++;
4128 /* Now encode the character C. */
4129 if (ASCII_CHAR_P (c) && ascii_compatible)
4130 EMIT_ONE_ASCII_BYTE (c);
4131 else if (CHAR_BYTE8_P (c))
4132 {
4133 c = CHAR_TO_BYTE8 (c);
4134 EMIT_ONE_BYTE (c);
4135 }
4136 else
4137 {
4138 unsigned code;
4139 struct charset *charset = char_charset (c, charset_list, &code);
4140
4141 if (!charset)
4142 {
4143 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4144 {
4145 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4146 charset = CHARSET_FROM_ID (charset_ascii);
4147 }
4148 else
4149 {
4150 c = coding->default_char;
4151 charset = char_charset (c, charset_list, &code);
4152 }
4153 }
4154 if (code == CHARSET_INVALID_CODE (charset))
4155 abort ();
4156 if (charset == charset_kanji)
4157 {
4158 int c1, c2;
4159 JIS_TO_SJIS (code);
4160 c1 = code >> 8, c2 = code & 0xFF;
4161 EMIT_TWO_BYTES (c1, c2);
4162 }
4163 else if (charset == charset_kana)
4164 EMIT_ONE_BYTE (code | 0x80);
4165 else
4166 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4167 }
4168 }
4169 coding->result = CODING_RESULT_SUCCESS;
4170 coding->produced_char += produced_chars;
4171 coding->produced = dst - coding->destination;
4172 return 0;
4173 }
4174
4175 static int
4176 encode_coding_big5 (coding)
4177 struct coding_system *coding;
4178 {
4179 int multibytep = coding->dst_multibyte;
4180 int *charbuf = coding->charbuf;
4181 int *charbuf_end = charbuf + coding->charbuf_used;
4182 unsigned char *dst = coding->destination + coding->produced;
4183 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4184 int safe_room = 4;
4185 int produced_chars = 0;
4186 Lisp_Object attrs, eol_type, charset_list, val;
4187 int ascii_compatible;
4188 struct charset *charset_roman, *charset_big5;
4189 int c;
4190
4191 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4192 val = charset_list;
4193 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4194 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4195 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4196
4197 while (charbuf < charbuf_end)
4198 {
4199 ASSURE_DESTINATION (safe_room);
4200 c = *charbuf++;
4201 /* Now encode the character C. */
4202 if (ASCII_CHAR_P (c) && ascii_compatible)
4203 EMIT_ONE_ASCII_BYTE (c);
4204 else if (CHAR_BYTE8_P (c))
4205 {
4206 c = CHAR_TO_BYTE8 (c);
4207 EMIT_ONE_BYTE (c);
4208 }
4209 else
4210 {
4211 unsigned code;
4212 struct charset *charset = char_charset (c, charset_list, &code);
4213
4214 if (! charset)
4215 {
4216 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4217 {
4218 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4219 charset = CHARSET_FROM_ID (charset_ascii);
4220 }
4221 else
4222 {
4223 c = coding->default_char;
4224 charset = char_charset (c, charset_list, &code);
4225 }
4226 }
4227 if (code == CHARSET_INVALID_CODE (charset))
4228 abort ();
4229 if (charset == charset_big5)
4230 {
4231 int c1, c2;
4232
4233 c1 = code >> 8, c2 = code & 0xFF;
4234 EMIT_TWO_BYTES (c1, c2);
4235 }
4236 else
4237 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4238 }
4239 }
4240 coding->result = CODING_RESULT_SUCCESS;
4241 coding->produced_char += produced_chars;
4242 coding->produced = dst - coding->destination;
4243 return 0;
4244 }
4245
4246 \f
4247 /*** 10. CCL handlers ***/
4248
4249 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4250 Check if a text is encoded in a coding system of which
4251 encoder/decoder are written in CCL program. If it is, return
4252 CATEGORY_MASK_CCL, else return 0. */
4253
4254 static int
4255 detect_coding_ccl (coding, detect_info)
4256 struct coding_system *coding;
4257 struct coding_detection_info *detect_info;
4258 {
4259 unsigned char *src = coding->source, *src_base = src;
4260 unsigned char *src_end = coding->source + coding->src_bytes;
4261 int multibytep = coding->src_multibyte;
4262 int consumed_chars = 0;
4263 int found = 0;
4264 unsigned char *valids = CODING_CCL_VALIDS (coding);
4265 int head_ascii = coding->head_ascii;
4266 Lisp_Object attrs;
4267
4268 detect_info->checked |= CATEGORY_MASK_CCL;
4269
4270 coding = &coding_categories[coding_category_ccl];
4271 attrs = CODING_ID_ATTRS (coding->id);
4272 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4273 src += head_ascii;
4274
4275 while (1)
4276 {
4277 int c;
4278 ONE_MORE_BYTE (c);
4279 if (! valids[c])
4280 break;
4281 if ((valids[c] > 1))
4282 found = CATEGORY_MASK_CCL;
4283 }
4284 detect_info->rejected |= CATEGORY_MASK_CCL;
4285 return 0;
4286
4287 no_more_source:
4288 detect_info->found |= found;
4289 return 1;
4290 }
4291
4292 static void
4293 decode_coding_ccl (coding)
4294 struct coding_system *coding;
4295 {
4296 const unsigned char *src = coding->source + coding->consumed;
4297 unsigned char *src_end = coding->source + coding->src_bytes;
4298 int *charbuf = coding->charbuf;
4299 int *charbuf_end = charbuf + coding->charbuf_size;
4300 int consumed_chars = 0;
4301 int multibytep = coding->src_multibyte;
4302 struct ccl_program ccl;
4303 int source_charbuf[1024];
4304 int source_byteidx[1024];
4305 Lisp_Object attrs, eol_type, charset_list, valids;
4306
4307 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4308 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4309
4310 while (src < src_end)
4311 {
4312 const unsigned char *p = src;
4313 int *source, *source_end;
4314 int i = 0;
4315
4316 if (multibytep)
4317 while (i < 1024 && p < src_end)
4318 {
4319 source_byteidx[i] = p - src;
4320 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4321 }
4322 else
4323 while (i < 1024 && p < src_end)
4324 source_charbuf[i++] = *p++;
4325
4326 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4327 ccl.last_block = 1;
4328
4329 source = source_charbuf;
4330 source_end = source + i;
4331 while (source < source_end)
4332 {
4333 ccl_driver (&ccl, source, charbuf,
4334 source_end - source, charbuf_end - charbuf,
4335 charset_list);
4336 source += ccl.consumed;
4337 charbuf += ccl.produced;
4338 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4339 break;
4340 }
4341 if (source < source_end)
4342 src += source_byteidx[source - source_charbuf];
4343 else
4344 src = p;
4345 consumed_chars += source - source_charbuf;
4346
4347 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4348 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4349 break;
4350 }
4351
4352 switch (ccl.status)
4353 {
4354 case CCL_STAT_SUSPEND_BY_SRC:
4355 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4356 break;
4357 case CCL_STAT_SUSPEND_BY_DST:
4358 break;
4359 case CCL_STAT_QUIT:
4360 case CCL_STAT_INVALID_CMD:
4361 coding->result = CODING_RESULT_INTERRUPT;
4362 break;
4363 default:
4364 coding->result = CODING_RESULT_SUCCESS;
4365 break;
4366 }
4367 coding->consumed_char += consumed_chars;
4368 coding->consumed = src - coding->source;
4369 coding->charbuf_used = charbuf - coding->charbuf;
4370 }
4371
4372 static int
4373 encode_coding_ccl (coding)
4374 struct coding_system *coding;
4375 {
4376 struct ccl_program ccl;
4377 int multibytep = coding->dst_multibyte;
4378 int *charbuf = coding->charbuf;
4379 int *charbuf_end = charbuf + coding->charbuf_used;
4380 unsigned char *dst = coding->destination + coding->produced;
4381 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4382 unsigned char *adjusted_dst_end = dst_end - 1;
4383 int destination_charbuf[1024];
4384 int i, produced_chars = 0;
4385 Lisp_Object attrs, eol_type, charset_list;
4386
4387 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4388 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4389
4390 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4391 ccl.dst_multibyte = coding->dst_multibyte;
4392
4393 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4394 {
4395 int dst_bytes = dst_end - dst;
4396 if (dst_bytes > 1024)
4397 dst_bytes = 1024;
4398
4399 ccl_driver (&ccl, charbuf, destination_charbuf,
4400 charbuf_end - charbuf, dst_bytes, charset_list);
4401 charbuf += ccl.consumed;
4402 if (multibytep)
4403 for (i = 0; i < ccl.produced; i++)
4404 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4405 else
4406 {
4407 for (i = 0; i < ccl.produced; i++)
4408 *dst++ = destination_charbuf[i] & 0xFF;
4409 produced_chars += ccl.produced;
4410 }
4411 }
4412
4413 switch (ccl.status)
4414 {
4415 case CCL_STAT_SUSPEND_BY_SRC:
4416 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4417 break;
4418 case CCL_STAT_SUSPEND_BY_DST:
4419 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4420 break;
4421 case CCL_STAT_QUIT:
4422 case CCL_STAT_INVALID_CMD:
4423 coding->result = CODING_RESULT_INTERRUPT;
4424 break;
4425 default:
4426 coding->result = CODING_RESULT_SUCCESS;
4427 break;
4428 }
4429
4430 coding->produced_char += produced_chars;
4431 coding->produced = dst - coding->destination;
4432 return 0;
4433 }
4434
4435
4436 \f
4437 /*** 10, 11. no-conversion handlers ***/
4438
4439 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4440
4441 static void
4442 decode_coding_raw_text (coding)
4443 struct coding_system *coding;
4444 {
4445 coding->chars_at_source = 1;
4446 coding->consumed_char = 0;
4447 coding->consumed = 0;
4448 coding->result = CODING_RESULT_SUCCESS;
4449 }
4450
4451 static int
4452 encode_coding_raw_text (coding)
4453 struct coding_system *coding;
4454 {
4455 int multibytep = coding->dst_multibyte;
4456 int *charbuf = coding->charbuf;
4457 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4458 unsigned char *dst = coding->destination + coding->produced;
4459 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4460 int produced_chars = 0;
4461 int c;
4462
4463 if (multibytep)
4464 {
4465 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4466
4467 if (coding->src_multibyte)
4468 while (charbuf < charbuf_end)
4469 {
4470 ASSURE_DESTINATION (safe_room);
4471 c = *charbuf++;
4472 if (ASCII_CHAR_P (c))
4473 EMIT_ONE_ASCII_BYTE (c);
4474 else if (CHAR_BYTE8_P (c))
4475 {
4476 c = CHAR_TO_BYTE8 (c);
4477 EMIT_ONE_BYTE (c);
4478 }
4479 else
4480 {
4481 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4482
4483 CHAR_STRING_ADVANCE (c, p1);
4484 while (p0 < p1)
4485 {
4486 EMIT_ONE_BYTE (*p0);
4487 p0++;
4488 }
4489 }
4490 }
4491 else
4492 while (charbuf < charbuf_end)
4493 {
4494 ASSURE_DESTINATION (safe_room);
4495 c = *charbuf++;
4496 EMIT_ONE_BYTE (c);
4497 }
4498 }
4499 else
4500 {
4501 if (coding->src_multibyte)
4502 {
4503 int safe_room = MAX_MULTIBYTE_LENGTH;
4504
4505 while (charbuf < charbuf_end)
4506 {
4507 ASSURE_DESTINATION (safe_room);
4508 c = *charbuf++;
4509 if (ASCII_CHAR_P (c))
4510 *dst++ = c;
4511 else if (CHAR_BYTE8_P (c))
4512 *dst++ = CHAR_TO_BYTE8 (c);
4513 else
4514 CHAR_STRING_ADVANCE (c, dst);
4515 produced_chars++;
4516 }
4517 }
4518 else
4519 {
4520 ASSURE_DESTINATION (charbuf_end - charbuf);
4521 while (charbuf < charbuf_end && dst < dst_end)
4522 *dst++ = *charbuf++;
4523 produced_chars = dst - (coding->destination + coding->dst_bytes);
4524 }
4525 }
4526 coding->result = CODING_RESULT_SUCCESS;
4527 coding->produced_char += produced_chars;
4528 coding->produced = dst - coding->destination;
4529 return 0;
4530 }
4531
4532 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4533 Check if a text is encoded in a charset-based coding system. If it
4534 is, return 1, else return 0. */
4535
4536 static int
4537 detect_coding_charset (coding, detect_info)
4538 struct coding_system *coding;
4539 struct coding_detection_info *detect_info;
4540 {
4541 unsigned char *src = coding->source, *src_base = src;
4542 unsigned char *src_end = coding->source + coding->src_bytes;
4543 int multibytep = coding->src_multibyte;
4544 int consumed_chars = 0;
4545 Lisp_Object attrs, valids;
4546 int found = 0;
4547
4548 detect_info->checked |= CATEGORY_MASK_CHARSET;
4549
4550 coding = &coding_categories[coding_category_charset];
4551 attrs = CODING_ID_ATTRS (coding->id);
4552 valids = AREF (attrs, coding_attr_charset_valids);
4553
4554 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4555 src += coding->head_ascii;
4556
4557 while (1)
4558 {
4559 int c;
4560
4561 ONE_MORE_BYTE (c);
4562 if (NILP (AREF (valids, c)))
4563 break;
4564 if (c >= 0x80)
4565 found = CATEGORY_MASK_CHARSET;
4566 }
4567 detect_info->rejected |= CATEGORY_MASK_CHARSET;
4568 return 0;
4569
4570 no_more_source:
4571 detect_info->found |= found;
4572 return 1;
4573 }
4574
4575 static void
4576 decode_coding_charset (coding)
4577 struct coding_system *coding;
4578 {
4579 unsigned char *src = coding->source + coding->consumed;
4580 unsigned char *src_end = coding->source + coding->src_bytes;
4581 unsigned char *src_base;
4582 int *charbuf = coding->charbuf;
4583 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4584 int consumed_chars = 0, consumed_chars_base;
4585 int multibytep = coding->src_multibyte;
4586 Lisp_Object attrs, eol_type, charset_list, valids;
4587 int char_offset = coding->produced_char;
4588 int last_offset = char_offset;
4589 int last_id = charset_ascii;
4590
4591 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4592 valids = AREF (attrs, coding_attr_charset_valids);
4593
4594 while (1)
4595 {
4596 int c;
4597
4598 src_base = src;
4599 consumed_chars_base = consumed_chars;
4600
4601 if (charbuf >= charbuf_end)
4602 break;
4603
4604 ONE_MORE_BYTE (c);
4605 if (c == '\r')
4606 {
4607 /* Here we assume that no charset maps '\r' to something
4608 else. */
4609 if (EQ (eol_type, Qdos))
4610 {
4611 if (src == src_end)
4612 {
4613 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4614 goto no_more_source;
4615 }
4616 if (*src == '\n')
4617 ONE_MORE_BYTE (c);
4618 }
4619 else if (EQ (eol_type, Qmac))
4620 c = '\n';
4621 }
4622 else
4623 {
4624 Lisp_Object val;
4625 struct charset *charset;
4626 int dim;
4627 int len = 1;
4628 unsigned code = c;
4629
4630 val = AREF (valids, c);
4631 if (NILP (val))
4632 goto invalid_code;
4633 if (INTEGERP (val))
4634 {
4635 charset = CHARSET_FROM_ID (XFASTINT (val));
4636 dim = CHARSET_DIMENSION (charset);
4637 while (len < dim)
4638 {
4639 ONE_MORE_BYTE (c);
4640 code = (code << 8) | c;
4641 len++;
4642 }
4643 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4644 charset, code, c);
4645 }
4646 else
4647 {
4648 /* VAL is a list of charset IDs. It is assured that the
4649 list is sorted by charset dimensions (smaller one
4650 comes first). */
4651 while (CONSP (val))
4652 {
4653 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4654 dim = CHARSET_DIMENSION (charset);
4655 while (len < dim)
4656 {
4657 ONE_MORE_BYTE (c);
4658 code = (code << 8) | c;
4659 len++;
4660 }
4661 CODING_DECODE_CHAR (coding, src, src_base,
4662 src_end, charset, code, c);
4663 if (c >= 0)
4664 break;
4665 val = XCDR (val);
4666 }
4667 }
4668 if (c < 0)
4669 goto invalid_code;
4670 if (charset->id != charset_ascii
4671 && last_id != charset->id)
4672 {
4673 if (last_id != charset_ascii)
4674 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4675 last_id = charset->id;
4676 last_offset = char_offset;
4677 }
4678 }
4679 *charbuf++ = c;
4680 char_offset++;
4681 continue;
4682
4683 invalid_code:
4684 src = src_base;
4685 consumed_chars = consumed_chars_base;
4686 ONE_MORE_BYTE (c);
4687 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4688 char_offset++;
4689 coding->errors++;
4690 }
4691
4692 no_more_source:
4693 if (last_id != charset_ascii)
4694 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4695 coding->consumed_char += consumed_chars_base;
4696 coding->consumed = src_base - coding->source;
4697 coding->charbuf_used = charbuf - coding->charbuf;
4698 }
4699
4700 static int
4701 encode_coding_charset (coding)
4702 struct coding_system *coding;
4703 {
4704 int multibytep = coding->dst_multibyte;
4705 int *charbuf = coding->charbuf;
4706 int *charbuf_end = charbuf + coding->charbuf_used;
4707 unsigned char *dst = coding->destination + coding->produced;
4708 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4709 int safe_room = MAX_MULTIBYTE_LENGTH;
4710 int produced_chars = 0;
4711 Lisp_Object attrs, eol_type, charset_list;
4712 int ascii_compatible;
4713 int c;
4714
4715 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4716 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4717
4718 while (charbuf < charbuf_end)
4719 {
4720 struct charset *charset;
4721 unsigned code;
4722
4723 ASSURE_DESTINATION (safe_room);
4724 c = *charbuf++;
4725 if (ascii_compatible && ASCII_CHAR_P (c))
4726 EMIT_ONE_ASCII_BYTE (c);
4727 else if (CHAR_BYTE8_P (c))
4728 {
4729 c = CHAR_TO_BYTE8 (c);
4730 EMIT_ONE_BYTE (c);
4731 }
4732 else
4733 {
4734 charset = char_charset (c, charset_list, &code);
4735 if (charset)
4736 {
4737 if (CHARSET_DIMENSION (charset) == 1)
4738 EMIT_ONE_BYTE (code);
4739 else if (CHARSET_DIMENSION (charset) == 2)
4740 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4741 else if (CHARSET_DIMENSION (charset) == 3)
4742 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4743 else
4744 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4745 (code >> 8) & 0xFF, code & 0xFF);
4746 }
4747 else
4748 {
4749 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4750 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4751 else
4752 c = coding->default_char;
4753 EMIT_ONE_BYTE (c);
4754 }
4755 }
4756 }
4757
4758 coding->result = CODING_RESULT_SUCCESS;
4759 coding->produced_char += produced_chars;
4760 coding->produced = dst - coding->destination;
4761 return 0;
4762 }
4763
4764 \f
4765 /*** 7. C library functions ***/
4766
4767 /* Setup coding context CODING from information about CODING_SYSTEM.
4768 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4769 CODING_SYSTEM is invalid, signal an error. */
4770
4771 void
4772 setup_coding_system (coding_system, coding)
4773 Lisp_Object coding_system;
4774 struct coding_system *coding;
4775 {
4776 Lisp_Object attrs;
4777 Lisp_Object eol_type;
4778 Lisp_Object coding_type;
4779 Lisp_Object val;
4780
4781 if (NILP (coding_system))
4782 coding_system = Qno_conversion;
4783
4784 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4785
4786 attrs = CODING_ID_ATTRS (coding->id);
4787 eol_type = CODING_ID_EOL_TYPE (coding->id);
4788
4789 coding->mode = 0;
4790 coding->head_ascii = -1;
4791 coding->common_flags
4792 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4793
4794 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4795 coding->max_charset_id = XSTRING (val)->size - 1;
4796 coding->safe_charsets = (char *) XSTRING (val)->data;
4797 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4798
4799 coding_type = CODING_ATTR_TYPE (attrs);
4800 if (EQ (coding_type, Qundecided))
4801 {
4802 coding->detector = NULL;
4803 coding->decoder = decode_coding_raw_text;
4804 coding->encoder = encode_coding_raw_text;
4805 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4806 }
4807 else if (EQ (coding_type, Qiso_2022))
4808 {
4809 int i;
4810 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4811 enum coding_category category = XINT (CODING_ATTR_CATEGORY (attrs));
4812
4813 /* Invoke graphic register 0 to plane 0. */
4814 CODING_ISO_INVOCATION (coding, 0) = 0;
4815 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4816 CODING_ISO_INVOCATION (coding, 1)
4817 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4818 /* Setup the initial status of designation. */
4819 for (i = 0; i < 4; i++)
4820 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4821 /* Not single shifting initially. */
4822 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4823 /* Beginning of buffer should also be regarded as bol. */
4824 CODING_ISO_BOL (coding) = 1;
4825 coding->detector = detect_coding_iso_2022;
4826 coding->decoder = decode_coding_iso_2022;
4827 coding->encoder = encode_coding_iso_2022;
4828 if (flags & CODING_ISO_FLAG_SAFE)
4829 coding->mode |= CODING_MODE_SAFE_ENCODING;
4830 coding->common_flags
4831 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4832 | CODING_REQUIRE_FLUSHING_MASK);
4833 if (flags & CODING_ISO_FLAG_COMPOSITION)
4834 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4835 if (flags & CODING_ISO_FLAG_DESIGNATION)
4836 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4837 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4838 {
4839 setup_iso_safe_charsets (attrs);
4840 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4841 coding->max_charset_id = XSTRING (val)->size - 1;
4842 coding->safe_charsets = (char *) XSTRING (val)->data;
4843 }
4844 CODING_ISO_FLAGS (coding) = flags;
4845 }
4846 else if (EQ (coding_type, Qcharset))
4847 {
4848 coding->detector = detect_coding_charset;
4849 coding->decoder = decode_coding_charset;
4850 coding->encoder = encode_coding_charset;
4851 coding->common_flags
4852 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4853 }
4854 else if (EQ (coding_type, Qutf_8))
4855 {
4856 coding->detector = detect_coding_utf_8;
4857 coding->decoder = decode_coding_utf_8;
4858 coding->encoder = encode_coding_utf_8;
4859 coding->common_flags
4860 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4861 }
4862 else if (EQ (coding_type, Qutf_16))
4863 {
4864 val = AREF (attrs, coding_attr_utf_16_bom);
4865 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4866 : EQ (val, Qt) ? utf_16_with_bom
4867 : utf_16_without_bom);
4868 val = AREF (attrs, coding_attr_utf_16_endian);
4869 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4870 : utf_16_little_endian);
4871 CODING_UTF_16_SURROGATE (coding) = 0;
4872 coding->detector = detect_coding_utf_16;
4873 coding->decoder = decode_coding_utf_16;
4874 coding->encoder = encode_coding_utf_16;
4875 coding->common_flags
4876 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4877 }
4878 else if (EQ (coding_type, Qccl))
4879 {
4880 coding->detector = detect_coding_ccl;
4881 coding->decoder = decode_coding_ccl;
4882 coding->encoder = encode_coding_ccl;
4883 coding->common_flags
4884 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4885 | CODING_REQUIRE_FLUSHING_MASK);
4886 }
4887 else if (EQ (coding_type, Qemacs_mule))
4888 {
4889 coding->detector = detect_coding_emacs_mule;
4890 coding->decoder = decode_coding_emacs_mule;
4891 coding->encoder = encode_coding_emacs_mule;
4892 coding->common_flags
4893 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4894 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4895 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4896 {
4897 Lisp_Object tail, safe_charsets;
4898 int max_charset_id = 0;
4899
4900 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4901 tail = XCDR (tail))
4902 if (max_charset_id < XFASTINT (XCAR (tail)))
4903 max_charset_id = XFASTINT (XCAR (tail));
4904 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4905 make_number (255));
4906 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4907 tail = XCDR (tail))
4908 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4909 coding->max_charset_id = max_charset_id;
4910 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4911 }
4912 }
4913 else if (EQ (coding_type, Qshift_jis))
4914 {
4915 coding->detector = detect_coding_sjis;
4916 coding->decoder = decode_coding_sjis;
4917 coding->encoder = encode_coding_sjis;
4918 coding->common_flags
4919 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4920 }
4921 else if (EQ (coding_type, Qbig5))
4922 {
4923 coding->detector = detect_coding_big5;
4924 coding->decoder = decode_coding_big5;
4925 coding->encoder = encode_coding_big5;
4926 coding->common_flags
4927 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4928 }
4929 else /* EQ (coding_type, Qraw_text) */
4930 {
4931 coding->detector = NULL;
4932 coding->decoder = decode_coding_raw_text;
4933 coding->encoder = encode_coding_raw_text;
4934 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4935 }
4936
4937 return;
4938 }
4939
4940 /* Return raw-text or one of its subsidiaries that has the same
4941 eol_type as CODING-SYSTEM. */
4942
4943 Lisp_Object
4944 raw_text_coding_system (coding_system)
4945 Lisp_Object coding_system;
4946 {
4947 Lisp_Object spec, attrs;
4948 Lisp_Object eol_type, raw_text_eol_type;
4949
4950 spec = CODING_SYSTEM_SPEC (coding_system);
4951 attrs = AREF (spec, 0);
4952
4953 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4954 return coding_system;
4955
4956 eol_type = AREF (spec, 2);
4957 if (VECTORP (eol_type))
4958 return Qraw_text;
4959 spec = CODING_SYSTEM_SPEC (Qraw_text);
4960 raw_text_eol_type = AREF (spec, 2);
4961 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4962 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4963 : AREF (raw_text_eol_type, 2));
4964 }
4965
4966
4967 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4968 does, return one of the subsidiary that has the same eol-spec as
4969 PARENT. Otherwise, return CODING_SYSTEM. */
4970
4971 Lisp_Object
4972 coding_inherit_eol_type (coding_system, parent)
4973 Lisp_Object coding_system, parent;
4974 {
4975 Lisp_Object spec, attrs, eol_type;
4976
4977 spec = CODING_SYSTEM_SPEC (coding_system);
4978 attrs = AREF (spec, 0);
4979 eol_type = AREF (spec, 2);
4980 if (VECTORP (eol_type))
4981 {
4982 Lisp_Object parent_spec;
4983 Lisp_Object parent_eol_type;
4984
4985 parent_spec
4986 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4987 parent_eol_type = AREF (parent_spec, 2);
4988 if (EQ (parent_eol_type, Qunix))
4989 coding_system = AREF (eol_type, 0);
4990 else if (EQ (parent_eol_type, Qdos))
4991 coding_system = AREF (eol_type, 1);
4992 else if (EQ (parent_eol_type, Qmac))
4993 coding_system = AREF (eol_type, 2);
4994 }
4995 return coding_system;
4996 }
4997
4998 /* Emacs has a mechanism to automatically detect a coding system if it
4999 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5000 it's impossible to distinguish some coding systems accurately
5001 because they use the same range of codes. So, at first, coding
5002 systems are categorized into 7, those are:
5003
5004 o coding-category-emacs-mule
5005
5006 The category for a coding system which has the same code range
5007 as Emacs' internal format. Assigned the coding-system (Lisp
5008 symbol) `emacs-mule' by default.
5009
5010 o coding-category-sjis
5011
5012 The category for a coding system which has the same code range
5013 as SJIS. Assigned the coding-system (Lisp
5014 symbol) `japanese-shift-jis' by default.
5015
5016 o coding-category-iso-7
5017
5018 The category for a coding system which has the same code range
5019 as ISO2022 of 7-bit environment. This doesn't use any locking
5020 shift and single shift functions. This can encode/decode all
5021 charsets. Assigned the coding-system (Lisp symbol)
5022 `iso-2022-7bit' by default.
5023
5024 o coding-category-iso-7-tight
5025
5026 Same as coding-category-iso-7 except that this can
5027 encode/decode only the specified charsets.
5028
5029 o coding-category-iso-8-1
5030
5031 The category for a coding system which has the same code range
5032 as ISO2022 of 8-bit environment and graphic plane 1 used only
5033 for DIMENSION1 charset. This doesn't use any locking shift
5034 and single shift functions. Assigned the coding-system (Lisp
5035 symbol) `iso-latin-1' by default.
5036
5037 o coding-category-iso-8-2
5038
5039 The category for a coding system which has the same code range
5040 as ISO2022 of 8-bit environment and graphic plane 1 used only
5041 for DIMENSION2 charset. This doesn't use any locking shift
5042 and single shift functions. Assigned the coding-system (Lisp
5043 symbol) `japanese-iso-8bit' by default.
5044
5045 o coding-category-iso-7-else
5046
5047 The category for a coding system which has the same code range
5048 as ISO2022 of 7-bit environemnt but uses locking shift or
5049 single shift functions. Assigned the coding-system (Lisp
5050 symbol) `iso-2022-7bit-lock' by default.
5051
5052 o coding-category-iso-8-else
5053
5054 The category for a coding system which has the same code range
5055 as ISO2022 of 8-bit environemnt but uses locking shift or
5056 single shift functions. Assigned the coding-system (Lisp
5057 symbol) `iso-2022-8bit-ss2' by default.
5058
5059 o coding-category-big5
5060
5061 The category for a coding system which has the same code range
5062 as BIG5. Assigned the coding-system (Lisp symbol)
5063 `cn-big5' by default.
5064
5065 o coding-category-utf-8
5066
5067 The category for a coding system which has the same code range
5068 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5069 symbol) `utf-8' by default.
5070
5071 o coding-category-utf-16-be
5072
5073 The category for a coding system in which a text has an
5074 Unicode signature (cf. Unicode Standard) in the order of BIG
5075 endian at the head. Assigned the coding-system (Lisp symbol)
5076 `utf-16-be' by default.
5077
5078 o coding-category-utf-16-le
5079
5080 The category for a coding system in which a text has an
5081 Unicode signature (cf. Unicode Standard) in the order of
5082 LITTLE endian at the head. Assigned the coding-system (Lisp
5083 symbol) `utf-16-le' by default.
5084
5085 o coding-category-ccl
5086
5087 The category for a coding system of which encoder/decoder is
5088 written in CCL programs. The default value is nil, i.e., no
5089 coding system is assigned.
5090
5091 o coding-category-binary
5092
5093 The category for a coding system not categorized in any of the
5094 above. Assigned the coding-system (Lisp symbol)
5095 `no-conversion' by default.
5096
5097 Each of them is a Lisp symbol and the value is an actual
5098 `coding-system's (this is also a Lisp symbol) assigned by a user.
5099 What Emacs does actually is to detect a category of coding system.
5100 Then, it uses a `coding-system' assigned to it. If Emacs can't
5101 decide only one possible category, it selects a category of the
5102 highest priority. Priorities of categories are also specified by a
5103 user in a Lisp variable `coding-category-list'.
5104
5105 */
5106
5107 #define EOL_SEEN_NONE 0
5108 #define EOL_SEEN_LF 1
5109 #define EOL_SEEN_CR 2
5110 #define EOL_SEEN_CRLF 4
5111
5112 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5113 SOURCE is encoded. If CATEGORY is one of
5114 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5115 two-byte, else they are encoded by one-byte.
5116
5117 Return one of EOL_SEEN_XXX. */
5118
5119 #define MAX_EOL_CHECK_COUNT 3
5120
5121 static int
5122 detect_eol (source, src_bytes, category)
5123 unsigned char *source;
5124 EMACS_INT src_bytes;
5125 enum coding_category category;
5126 {
5127 unsigned char *src = source, *src_end = src + src_bytes;
5128 unsigned char c;
5129 int total = 0;
5130 int eol_seen = EOL_SEEN_NONE;
5131
5132 if ((1 << category) & CATEGORY_MASK_UTF_16)
5133 {
5134 int msb, lsb;
5135
5136 msb = category == (coding_category_utf_16_le
5137 | coding_category_utf_16_le_nosig);
5138 lsb = 1 - msb;
5139
5140 while (src + 1 < src_end)
5141 {
5142 c = src[lsb];
5143 if (src[msb] == 0 && (c == '\n' || c == '\r'))
5144 {
5145 int this_eol;
5146
5147 if (c == '\n')
5148 this_eol = EOL_SEEN_LF;
5149 else if (src + 3 >= src_end
5150 || src[msb + 2] != 0
5151 || src[lsb + 2] != '\n')
5152 this_eol = EOL_SEEN_CR;
5153 else
5154 this_eol = EOL_SEEN_CRLF;
5155
5156 if (eol_seen == EOL_SEEN_NONE)
5157 /* This is the first end-of-line. */
5158 eol_seen = this_eol;
5159 else if (eol_seen != this_eol)
5160 {
5161 /* The found type is different from what found before. */
5162 eol_seen = EOL_SEEN_LF;
5163 break;
5164 }
5165 if (++total == MAX_EOL_CHECK_COUNT)
5166 break;
5167 }
5168 src += 2;
5169 }
5170 }
5171 else
5172 {
5173 while (src < src_end)
5174 {
5175 c = *src++;
5176 if (c == '\n' || c == '\r')
5177 {
5178 int this_eol;
5179
5180 if (c == '\n')
5181 this_eol = EOL_SEEN_LF;
5182 else if (src >= src_end || *src != '\n')
5183 this_eol = EOL_SEEN_CR;
5184 else
5185 this_eol = EOL_SEEN_CRLF, src++;
5186
5187 if (eol_seen == EOL_SEEN_NONE)
5188 /* This is the first end-of-line. */
5189 eol_seen = this_eol;
5190 else if (eol_seen != this_eol)
5191 {
5192 /* The found type is different from what found before. */
5193 eol_seen = EOL_SEEN_LF;
5194 break;
5195 }
5196 if (++total == MAX_EOL_CHECK_COUNT)
5197 break;
5198 }
5199 }
5200 }
5201 return eol_seen;
5202 }
5203
5204
5205 static void
5206 adjust_coding_eol_type (coding, eol_seen)
5207 struct coding_system *coding;
5208 int eol_seen;
5209 {
5210 Lisp_Object eol_type;
5211
5212 eol_type = CODING_ID_EOL_TYPE (coding->id);
5213 if (eol_seen & EOL_SEEN_LF)
5214 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5215 else if (eol_seen & EOL_SEEN_CRLF)
5216 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5217 else if (eol_seen & EOL_SEEN_CR)
5218 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5219 }
5220
5221 /* Detect how a text specified in CODING is encoded. If a coding
5222 system is detected, update fields of CODING by the detected coding
5223 system. */
5224
5225 void
5226 detect_coding (coding)
5227 struct coding_system *coding;
5228 {
5229 unsigned char *src, *src_end;
5230 Lisp_Object attrs, coding_type;
5231
5232 coding->consumed = coding->consumed_char = 0;
5233 coding->produced = coding->produced_char = 0;
5234 coding_set_source (coding);
5235
5236 src_end = coding->source + coding->src_bytes;
5237
5238 /* If we have not yet decided the text encoding type, detect it
5239 now. */
5240 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5241 {
5242 int c, i;
5243
5244 for (src = coding->source; src < src_end; src++)
5245 {
5246 c = *src;
5247 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5248 || c == ISO_CODE_SI
5249 || c == ISO_CODE_SO)))
5250 break;
5251 }
5252 coding->head_ascii = src - (coding->source + coding->consumed);
5253
5254 if (coding->head_ascii < coding->src_bytes)
5255 {
5256 struct coding_detection_info detect_info;
5257 enum coding_category category;
5258 struct coding_system *this;
5259
5260 detect_info.checked = detect_info.found = detect_info.rejected = 0;
5261 for (i = 0; i < coding_category_raw_text; i++)
5262 {
5263 category = coding_priorities[i];
5264 this = coding_categories + category;
5265 if (this->id < 0)
5266 {
5267 /* No coding system of this category is defined. */
5268 detect_info.rejected |= (1 << category);
5269 }
5270 else if (category >= coding_category_raw_text)
5271 continue;
5272 else if (detect_info.checked & (1 << category))
5273 {
5274 if (detect_info.found & (1 << category))
5275 break;
5276 }
5277 else if ((*(this->detector)) (coding, &detect_info)
5278 && detect_info.found & (1 << category))
5279 break;
5280 }
5281 if (i < coding_category_raw_text)
5282 setup_coding_system (CODING_ID_NAME (this->id), coding);
5283 else if (detect_info.rejected == CATEGORY_MASK_ANY)
5284 setup_coding_system (Qraw_text, coding);
5285 else if (detect_info.rejected)
5286 for (i = 0; i < coding_category_raw_text; i++)
5287 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5288 {
5289 this = coding_categories + coding_priorities[i];
5290 setup_coding_system (CODING_ID_NAME (this->id), coding);
5291 break;
5292 }
5293 }
5294 }
5295
5296 attrs = CODING_ID_ATTRS (coding->id);
5297 coding_type = CODING_ATTR_TYPE (attrs);
5298
5299 /* If we have not yet decided the EOL type, detect it now. But, the
5300 detection is impossible for a CCL based coding system, in which
5301 case, we detct the EOL type after decoding. */
5302 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5303 && ! EQ (coding_type, Qccl))
5304 {
5305 int eol_seen = detect_eol (coding->source, coding->src_bytes,
5306 XINT (CODING_ATTR_CATEGORY (attrs)));
5307
5308 if (eol_seen != EOL_SEEN_NONE)
5309 adjust_coding_eol_type (coding, eol_seen);
5310 }
5311 }
5312
5313
5314 static void
5315 decode_eol (coding)
5316 struct coding_system *coding;
5317 {
5318 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5319 {
5320 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5321 unsigned char *pend = p + coding->produced;
5322 int eol_seen = EOL_SEEN_NONE;
5323
5324 for (; p < pend; p++)
5325 {
5326 if (*p == '\n')
5327 eol_seen |= EOL_SEEN_LF;
5328 else if (*p == '\r')
5329 {
5330 if (p + 1 < pend && *(p + 1) == '\n')
5331 {
5332 eol_seen |= EOL_SEEN_CRLF;
5333 p++;
5334 }
5335 else
5336 eol_seen |= EOL_SEEN_CR;
5337 }
5338 }
5339 if (eol_seen != EOL_SEEN_NONE)
5340 adjust_coding_eol_type (coding, eol_seen);
5341 }
5342
5343 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5344 {
5345 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5346 unsigned char *pend = p + coding->produced;
5347
5348 for (; p < pend; p++)
5349 if (*p == '\r')
5350 *p = '\n';
5351 }
5352 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5353 {
5354 unsigned char *p, *pbeg, *pend;
5355 Lisp_Object undo_list;
5356
5357 move_gap_both (coding->dst_pos + coding->produced_char,
5358 coding->dst_pos_byte + coding->produced);
5359 undo_list = current_buffer->undo_list;
5360 current_buffer->undo_list = Qt;
5361 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
5362 current_buffer->undo_list = undo_list;
5363 pbeg = GPT_ADDR;
5364 pend = pbeg + coding->produced;
5365
5366 for (p = pend - 1; p >= pbeg; p--)
5367 if (*p == '\r')
5368 {
5369 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5370 pend--;
5371 }
5372 coding->produced_char -= coding->produced - (pend - pbeg);
5373 coding->produced = pend - pbeg;
5374 insert_from_gap (coding->produced_char, coding->produced);
5375 }
5376 }
5377
5378 static void
5379 translate_chars (coding, table)
5380 struct coding_system *coding;
5381 Lisp_Object table;
5382 {
5383 int *charbuf = coding->charbuf;
5384 int *charbuf_end = charbuf + coding->charbuf_used;
5385 int c;
5386
5387 if (coding->chars_at_source)
5388 return;
5389
5390 while (charbuf < charbuf_end)
5391 {
5392 c = *charbuf;
5393 if (c < 0)
5394 charbuf += c;
5395 else
5396 *charbuf++ = translate_char (table, c);
5397 }
5398 }
5399
5400 static int
5401 produce_chars (coding)
5402 struct coding_system *coding;
5403 {
5404 unsigned char *dst = coding->destination + coding->produced;
5405 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5406 int produced;
5407 int produced_chars = 0;
5408
5409 if (! coding->chars_at_source)
5410 {
5411 /* Characters are in coding->charbuf. */
5412 int *buf = coding->charbuf;
5413 int *buf_end = buf + coding->charbuf_used;
5414 unsigned char *adjusted_dst_end;
5415
5416 if (BUFFERP (coding->src_object)
5417 && EQ (coding->src_object, coding->dst_object))
5418 dst_end = coding->source + coding->consumed;
5419 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5420
5421 while (buf < buf_end)
5422 {
5423 int c = *buf++;
5424
5425 if (dst >= adjusted_dst_end)
5426 {
5427 dst = alloc_destination (coding,
5428 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5429 dst);
5430 dst_end = coding->destination + coding->dst_bytes;
5431 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5432 }
5433 if (c >= 0)
5434 {
5435 if (coding->dst_multibyte
5436 || ! CHAR_BYTE8_P (c))
5437 CHAR_STRING_ADVANCE (c, dst);
5438 else
5439 *dst++ = CHAR_TO_BYTE8 (c);
5440 produced_chars++;
5441 }
5442 else
5443 /* This is an annotation datum. */
5444 buf -= c + 1;
5445 }
5446 }
5447 else
5448 {
5449 unsigned char *src = coding->source;
5450 unsigned char *src_end = src + coding->src_bytes;
5451 Lisp_Object eol_type;
5452
5453 eol_type = CODING_ID_EOL_TYPE (coding->id);
5454
5455 if (coding->src_multibyte != coding->dst_multibyte)
5456 {
5457 if (coding->src_multibyte)
5458 {
5459 int multibytep = 1;
5460 int consumed_chars;
5461
5462 while (1)
5463 {
5464 unsigned char *src_base = src;
5465 int c;
5466
5467 ONE_MORE_BYTE (c);
5468 if (c == '\r')
5469 {
5470 if (EQ (eol_type, Qdos))
5471 {
5472 if (src == src_end)
5473 {
5474 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5475 goto no_more_source;
5476 }
5477 if (*src == '\n')
5478 c = *src++;
5479 }
5480 else if (EQ (eol_type, Qmac))
5481 c = '\n';
5482 }
5483 if (dst == dst_end)
5484 {
5485 coding->consumed = src - coding->source;
5486
5487 if (EQ (coding->src_object, coding->dst_object))
5488 dst_end = src;
5489 if (dst == dst_end)
5490 {
5491 dst = alloc_destination (coding, src_end - src + 1,
5492 dst);
5493 dst_end = coding->destination + coding->dst_bytes;
5494 coding_set_source (coding);
5495 src = coding->source + coding->consumed;
5496 src_end = coding->source + coding->src_bytes;
5497 }
5498 }
5499 *dst++ = c;
5500 produced_chars++;
5501 }
5502 no_more_source:
5503 ;
5504 }
5505 else
5506 while (src < src_end)
5507 {
5508 int multibytep = 1;
5509 int c = *src++;
5510
5511 if (c == '\r')
5512 {
5513 if (EQ (eol_type, Qdos))
5514 {
5515 if (src < src_end
5516 && *src == '\n')
5517 c = *src++;
5518 }
5519 else if (EQ (eol_type, Qmac))
5520 c = '\n';
5521 }
5522 if (dst >= dst_end - 1)
5523 {
5524 coding->consumed = src - coding->source;
5525
5526 if (EQ (coding->src_object, coding->dst_object))
5527 dst_end = src;
5528 if (dst >= dst_end - 1)
5529 {
5530 dst = alloc_destination (coding, src_end - src + 2,
5531 dst);
5532 dst_end = coding->destination + coding->dst_bytes;
5533 coding_set_source (coding);
5534 src = coding->source + coding->consumed;
5535 src_end = coding->source + coding->src_bytes;
5536 }
5537 }
5538 EMIT_ONE_BYTE (c);
5539 }
5540 }
5541 else
5542 {
5543 if (!EQ (coding->src_object, coding->dst_object))
5544 {
5545 int require = coding->src_bytes - coding->dst_bytes;
5546
5547 if (require > 0)
5548 {
5549 EMACS_INT offset = src - coding->source;
5550
5551 dst = alloc_destination (coding, require, dst);
5552 coding_set_source (coding);
5553 src = coding->source + offset;
5554 src_end = coding->source + coding->src_bytes;
5555 }
5556 }
5557 produced_chars = coding->src_chars;
5558 while (src < src_end)
5559 {
5560 int c = *src++;
5561
5562 if (c == '\r')
5563 {
5564 if (EQ (eol_type, Qdos))
5565 {
5566 if (src < src_end
5567 && *src == '\n')
5568 c = *src++;
5569 produced_chars--;
5570 }
5571 else if (EQ (eol_type, Qmac))
5572 c = '\n';
5573 }
5574 *dst++ = c;
5575 }
5576 }
5577 coding->consumed = coding->src_bytes;
5578 coding->consumed_char = coding->src_chars;
5579 }
5580
5581 produced = dst - (coding->destination + coding->produced);
5582 if (BUFFERP (coding->dst_object))
5583 insert_from_gap (produced_chars, produced);
5584 coding->produced += produced;
5585 coding->produced_char += produced_chars;
5586 return produced_chars;
5587 }
5588
5589 /* Compose text in CODING->object according to the annotation data at
5590 CHARBUF. CHARBUF is an array:
5591 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5592 */
5593
5594 static INLINE void
5595 produce_composition (coding, charbuf)
5596 struct coding_system *coding;
5597 int *charbuf;
5598 {
5599 int len;
5600 EMACS_INT from, to;
5601 enum composition_method method;
5602 Lisp_Object components;
5603
5604 len = -charbuf[0];
5605 from = coding->dst_pos + charbuf[2];
5606 to = coding->dst_pos + charbuf[3];
5607 method = (enum composition_method) (charbuf[4]);
5608
5609 if (method == COMPOSITION_RELATIVE)
5610 components = Qnil;
5611 else
5612 {
5613 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5614 int i;
5615
5616 len -= 5;
5617 charbuf += 5;
5618 for (i = 0; i < len; i++)
5619 args[i] = make_number (charbuf[i]);
5620 components = (method == COMPOSITION_WITH_ALTCHARS
5621 ? Fstring (len, args) : Fvector (len, args));
5622 }
5623 compose_text (from, to, components, Qnil, coding->dst_object);
5624 }
5625
5626
5627 /* Put `charset' property on text in CODING->object according to
5628 the annotation data at CHARBUF. CHARBUF is an array:
5629 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5630 */
5631
5632 static INLINE void
5633 produce_charset (coding, charbuf)
5634 struct coding_system *coding;
5635 int *charbuf;
5636 {
5637 EMACS_INT from = coding->dst_pos + charbuf[2];
5638 EMACS_INT to = coding->dst_pos + charbuf[3];
5639 struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5640
5641 Fput_text_property (make_number (from), make_number (to),
5642 Qcharset, CHARSET_NAME (charset),
5643 coding->dst_object);
5644 }
5645
5646
5647 #define CHARBUF_SIZE 0x4000
5648
5649 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5650 do { \
5651 int size = CHARBUF_SIZE;; \
5652 \
5653 coding->charbuf = NULL; \
5654 while (size > 1024) \
5655 { \
5656 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5657 if (coding->charbuf) \
5658 break; \
5659 size >>= 1; \
5660 } \
5661 if (! coding->charbuf) \
5662 { \
5663 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5664 return coding->result; \
5665 } \
5666 coding->charbuf_size = size; \
5667 } while (0)
5668
5669
5670 static void
5671 produce_annotation (coding)
5672 struct coding_system *coding;
5673 {
5674 int *charbuf = coding->charbuf;
5675 int *charbuf_end = charbuf + coding->charbuf_used;
5676
5677 if (NILP (coding->dst_object))
5678 return;
5679
5680 while (charbuf < charbuf_end)
5681 {
5682 if (*charbuf >= 0)
5683 charbuf++;
5684 else
5685 {
5686 int len = -*charbuf;
5687 switch (charbuf[1])
5688 {
5689 case CODING_ANNOTATE_COMPOSITION_MASK:
5690 produce_composition (coding, charbuf);
5691 break;
5692 case CODING_ANNOTATE_CHARSET_MASK:
5693 produce_charset (coding, charbuf);
5694 break;
5695 default:
5696 abort ();
5697 }
5698 charbuf += len;
5699 }
5700 }
5701 }
5702
5703 /* Decode the data at CODING->src_object into CODING->dst_object.
5704 CODING->src_object is a buffer, a string, or nil.
5705 CODING->dst_object is a buffer.
5706
5707 If CODING->src_object is a buffer, it must be the current buffer.
5708 In this case, if CODING->src_pos is positive, it is a position of
5709 the source text in the buffer, otherwise, the source text is in the
5710 gap area of the buffer, and CODING->src_pos specifies the offset of
5711 the text from GPT (which must be the same as PT). If this is the
5712 same buffer as CODING->dst_object, CODING->src_pos must be
5713 negative.
5714
5715 If CODING->src_object is a string, CODING->src_pos in an index to
5716 that string.
5717
5718 If CODING->src_object is nil, CODING->source must already point to
5719 the non-relocatable memory area. In this case, CODING->src_pos is
5720 an offset from CODING->source.
5721
5722 The decoded data is inserted at the current point of the buffer
5723 CODING->dst_object.
5724 */
5725
5726 static int
5727 decode_coding (coding)
5728 struct coding_system *coding;
5729 {
5730 Lisp_Object attrs;
5731
5732 if (BUFFERP (coding->src_object)
5733 && coding->src_pos > 0
5734 && coding->src_pos < GPT
5735 && coding->src_pos + coding->src_chars > GPT)
5736 move_gap_both (coding->src_pos, coding->src_pos_byte);
5737
5738 if (BUFFERP (coding->dst_object))
5739 {
5740 if (current_buffer != XBUFFER (coding->dst_object))
5741 set_buffer_internal (XBUFFER (coding->dst_object));
5742 if (GPT != PT)
5743 move_gap_both (PT, PT_BYTE);
5744 }
5745
5746 coding->consumed = coding->consumed_char = 0;
5747 coding->produced = coding->produced_char = 0;
5748 coding->chars_at_source = 0;
5749 coding->result = CODING_RESULT_SUCCESS;
5750 coding->errors = 0;
5751
5752 ALLOC_CONVERSION_WORK_AREA (coding);
5753
5754 attrs = CODING_ID_ATTRS (coding->id);
5755
5756 do
5757 {
5758 coding_set_source (coding);
5759 coding->annotated = 0;
5760 (*(coding->decoder)) (coding);
5761 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5762 translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5763 else if (!NILP (Vstandard_translation_table_for_decode))
5764 translate_chars (coding, Vstandard_translation_table_for_decode);
5765 coding_set_destination (coding);
5766 produce_chars (coding);
5767 if (coding->annotated)
5768 produce_annotation (coding);
5769 }
5770 while (coding->consumed < coding->src_bytes
5771 && ! coding->result);
5772
5773 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5774 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5775 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5776 decode_eol (coding);
5777
5778 coding->carryover_bytes = 0;
5779 if (coding->consumed < coding->src_bytes)
5780 {
5781 int nbytes = coding->src_bytes - coding->consumed;
5782 unsigned char *src;
5783
5784 coding_set_source (coding);
5785 coding_set_destination (coding);
5786 src = coding->source + coding->consumed;
5787
5788 if (coding->mode & CODING_MODE_LAST_BLOCK)
5789 {
5790 /* Flush out unprocessed data as binary chars. We are sure
5791 that the number of data is less than the size of
5792 coding->charbuf. */
5793 while (nbytes-- > 0)
5794 {
5795 int c = *src++;
5796
5797 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
5798 }
5799 produce_chars (coding);
5800 }
5801 else
5802 {
5803 /* Record unprocessed bytes in coding->carryover. We are
5804 sure that the number of data is less than the size of
5805 coding->carryover. */
5806 unsigned char *p = coding->carryover;
5807
5808 coding->carryover_bytes = nbytes;
5809 while (nbytes-- > 0)
5810 *p++ = *src++;
5811 }
5812 coding->consumed = coding->src_bytes;
5813 }
5814
5815 return coding->result;
5816 }
5817
5818
5819 /* Extract an annotation datum from a composition starting at POS and
5820 ending before LIMIT of CODING->src_object (buffer or string), store
5821 the data in BUF, set *STOP to a starting position of the next
5822 composition (if any) or to LIMIT, and return the address of the
5823 next element of BUF.
5824
5825 If such an annotation is not found, set *STOP to a starting
5826 position of a composition after POS (if any) or to LIMIT, and
5827 return BUF. */
5828
5829 static INLINE int *
5830 handle_composition_annotation (pos, limit, coding, buf, stop)
5831 EMACS_INT pos, limit;
5832 struct coding_system *coding;
5833 int *buf;
5834 EMACS_INT *stop;
5835 {
5836 EMACS_INT start, end;
5837 Lisp_Object prop;
5838
5839 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5840 || end > limit)
5841 *stop = limit;
5842 else if (start > pos)
5843 *stop = start;
5844 else
5845 {
5846 if (start == pos)
5847 {
5848 /* We found a composition. Store the corresponding
5849 annotation data in BUF. */
5850 int *head = buf;
5851 enum composition_method method = COMPOSITION_METHOD (prop);
5852 int nchars = COMPOSITION_LENGTH (prop);
5853
5854 ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5855 if (method != COMPOSITION_RELATIVE)
5856 {
5857 Lisp_Object components;
5858 int len, i, i_byte;
5859
5860 components = COMPOSITION_COMPONENTS (prop);
5861 if (VECTORP (components))
5862 {
5863 len = XVECTOR (components)->size;
5864 for (i = 0; i < len; i++)
5865 *buf++ = XINT (AREF (components, i));
5866 }
5867 else if (STRINGP (components))
5868 {
5869 len = XSTRING (components)->size;
5870 i = i_byte = 0;
5871 while (i < len)
5872 {
5873 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5874 buf++;
5875 }
5876 }
5877 else if (INTEGERP (components))
5878 {
5879 len = 1;
5880 *buf++ = XINT (components);
5881 }
5882 else if (CONSP (components))
5883 {
5884 for (len = 0; CONSP (components);
5885 len++, components = XCDR (components))
5886 *buf++ = XINT (XCAR (components));
5887 }
5888 else
5889 abort ();
5890 *head -= len;
5891 }
5892 }
5893
5894 if (find_composition (end, limit, &start, &end, &prop,
5895 coding->src_object)
5896 && end <= limit)
5897 *stop = start;
5898 else
5899 *stop = limit;
5900 }
5901 return buf;
5902 }
5903
5904
5905 /* Extract an annotation datum from a text property `charset' at POS of
5906 CODING->src_object (buffer of string), store the data in BUF, set
5907 *STOP to the position where the value of `charset' property changes
5908 (limiting by LIMIT), and return the address of the next element of
5909 BUF.
5910
5911 If the property value is nil, set *STOP to the position where the
5912 property value is non-nil (limiting by LIMIT), and return BUF. */
5913
5914 static INLINE int *
5915 handle_charset_annotation (pos, limit, coding, buf, stop)
5916 EMACS_INT pos, limit;
5917 struct coding_system *coding;
5918 int *buf;
5919 EMACS_INT *stop;
5920 {
5921 Lisp_Object val, next;
5922 int id;
5923
5924 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5925 if (! NILP (val) && CHARSETP (val))
5926 id = XINT (CHARSET_SYMBOL_ID (val));
5927 else
5928 id = -1;
5929 ADD_CHARSET_DATA (buf, 0, 0, id);
5930 next = Fnext_single_property_change (make_number (pos), Qcharset,
5931 coding->src_object,
5932 make_number (limit));
5933 *stop = XINT (next);
5934 return buf;
5935 }
5936
5937
5938 static void
5939 consume_chars (coding)
5940 struct coding_system *coding;
5941 {
5942 int *buf = coding->charbuf;
5943 int *buf_end = coding->charbuf + coding->charbuf_size;
5944 const unsigned char *src = coding->source + coding->consumed;
5945 EMACS_INT pos = coding->src_pos + coding->consumed_char;
5946 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
5947 int multibytep = coding->src_multibyte;
5948 Lisp_Object eol_type;
5949 int c;
5950 EMACS_INT stop, stop_composition, stop_charset;
5951 int id;
5952
5953 eol_type = CODING_ID_EOL_TYPE (coding->id);
5954 if (VECTORP (eol_type))
5955 eol_type = Qunix;
5956
5957 /* Note: composition handling is not yet implemented. */
5958 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
5959
5960 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
5961 stop = stop_composition = pos;
5962 else
5963 stop = stop_composition = end_pos;
5964 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
5965 stop = stop_charset = pos;
5966 else
5967 stop_charset = end_pos;
5968
5969 /* Compensate for CRLF and annotation. */
5970 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
5971 while (buf < buf_end)
5972 {
5973 if (pos == stop)
5974 {
5975 int *p;
5976
5977 if (pos == end_pos)
5978 break;
5979 if (pos == stop_composition)
5980 buf = handle_composition_annotation (pos, end_pos, coding,
5981 buf, &stop_composition);
5982 if (pos == stop_charset)
5983 buf = handle_charset_annotation (pos, end_pos, coding,
5984 buf, &stop_charset);
5985 stop = (stop_composition < stop_charset
5986 ? stop_composition : stop_charset);
5987 }
5988
5989 if (! multibytep)
5990 c = *src++;
5991 else
5992 c = STRING_CHAR_ADVANCE (src);
5993 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5994 c = '\n';
5995 if (! EQ (eol_type, Qunix))
5996 {
5997 if (c == '\n')
5998 {
5999 if (EQ (eol_type, Qdos))
6000 *buf++ = '\r';
6001 else
6002 c = '\r';
6003 }
6004 }
6005 *buf++ = c;
6006 pos++;
6007 }
6008
6009 coding->consumed = src - coding->source;
6010 coding->consumed_char = pos - coding->src_pos;
6011 coding->charbuf_used = buf - coding->charbuf;
6012 coding->chars_at_source = 0;
6013 }
6014
6015
6016 /* Encode the text at CODING->src_object into CODING->dst_object.
6017 CODING->src_object is a buffer or a string.
6018 CODING->dst_object is a buffer or nil.
6019
6020 If CODING->src_object is a buffer, it must be the current buffer.
6021 In this case, if CODING->src_pos is positive, it is a position of
6022 the source text in the buffer, otherwise. the source text is in the
6023 gap area of the buffer, and coding->src_pos specifies the offset of
6024 the text from GPT (which must be the same as PT). If this is the
6025 same buffer as CODING->dst_object, CODING->src_pos must be
6026 negative and CODING should not have `pre-write-conversion'.
6027
6028 If CODING->src_object is a string, CODING should not have
6029 `pre-write-conversion'.
6030
6031 If CODING->dst_object is a buffer, the encoded data is inserted at
6032 the current point of that buffer.
6033
6034 If CODING->dst_object is nil, the encoded data is placed at the
6035 memory area specified by CODING->destination. */
6036
6037 static int
6038 encode_coding (coding)
6039 struct coding_system *coding;
6040 {
6041 Lisp_Object attrs;
6042
6043 attrs = CODING_ID_ATTRS (coding->id);
6044
6045 if (BUFFERP (coding->dst_object))
6046 {
6047 set_buffer_internal (XBUFFER (coding->dst_object));
6048 coding->dst_multibyte
6049 = ! NILP (current_buffer->enable_multibyte_characters);
6050 }
6051
6052 coding->consumed = coding->consumed_char = 0;
6053 coding->produced = coding->produced_char = 0;
6054 coding->result = CODING_RESULT_SUCCESS;
6055 coding->errors = 0;
6056
6057 ALLOC_CONVERSION_WORK_AREA (coding);
6058
6059 do {
6060 coding_set_source (coding);
6061 consume_chars (coding);
6062
6063 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
6064 translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6065 else if (!NILP (Vstandard_translation_table_for_encode))
6066 translate_chars (coding, Vstandard_translation_table_for_encode);
6067
6068 coding_set_destination (coding);
6069 (*(coding->encoder)) (coding);
6070 } while (coding->consumed_char < coding->src_chars);
6071
6072 if (BUFFERP (coding->dst_object))
6073 insert_from_gap (coding->produced_char, coding->produced);
6074
6075 return (coding->result);
6076 }
6077
6078 /* Work buffer */
6079
6080 /* List of currently used working buffer. */
6081 Lisp_Object Vcode_conversion_work_buf_list;
6082
6083 /* A working buffer used by the top level conversion. */
6084 Lisp_Object Vcode_conversion_reused_work_buf;
6085
6086
6087 /* Return a working buffer that can be freely used by the following
6088 code conversion. MULTIBYTEP specifies the multibyteness of the
6089 buffer. */
6090
6091 Lisp_Object
6092 make_conversion_work_buffer (multibytep)
6093 int multibytep;
6094 {
6095 struct buffer *current = current_buffer;
6096 Lisp_Object buf;
6097
6098 if (NILP (Vcode_conversion_work_buf_list))
6099 {
6100 if (NILP (Vcode_conversion_reused_work_buf))
6101 Vcode_conversion_reused_work_buf
6102 = Fget_buffer_create (build_string (" *code-conversion-work*"));
6103 Vcode_conversion_work_buf_list
6104 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
6105 }
6106 else
6107 {
6108 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6109 char str[128];
6110
6111 sprintf (str, " *code-conversion-work*<%d>", depth);
6112 Vcode_conversion_work_buf_list
6113 = Fcons (Fget_buffer_create (build_string (str)),
6114 Vcode_conversion_work_buf_list);
6115 }
6116
6117 buf = XCAR (Vcode_conversion_work_buf_list);
6118 set_buffer_internal (XBUFFER (buf));
6119 current_buffer->undo_list = Qt;
6120 Ferase_buffer ();
6121 Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
6122 set_buffer_internal (current);
6123 return buf;
6124 }
6125
6126 static struct coding_system *saved_coding;
6127
6128 Lisp_Object
6129 code_conversion_restore (info)
6130 Lisp_Object info;
6131 {
6132 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6133 Lisp_Object buf;
6134
6135 if (depth > 0)
6136 {
6137 buf = XCAR (Vcode_conversion_work_buf_list);
6138 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6139 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
6140 Fkill_buffer (buf);
6141 }
6142
6143 if (EQ (saved_coding->dst_object, Qt)
6144 && saved_coding->destination)
6145 xfree (saved_coding->destination);
6146
6147 return save_excursion_restore (info);
6148 }
6149
6150
6151 int
6152 decode_coding_gap (coding, chars, bytes)
6153 struct coding_system *coding;
6154 EMACS_INT chars, bytes;
6155 {
6156 int count = specpdl_ptr - specpdl;
6157
6158 saved_coding = coding;
6159 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6160
6161 coding->src_object = Fcurrent_buffer ();
6162 coding->src_chars = chars;
6163 coding->src_bytes = bytes;
6164 coding->src_pos = -chars;
6165 coding->src_pos_byte = -bytes;
6166 coding->src_multibyte = chars < bytes;
6167 coding->dst_object = coding->src_object;
6168 coding->dst_pos = PT;
6169 coding->dst_pos_byte = PT_BYTE;
6170 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6171 coding->mode |= CODING_MODE_LAST_BLOCK;
6172
6173 if (CODING_REQUIRE_DETECTION (coding))
6174 detect_coding (coding);
6175
6176 decode_coding (coding);
6177
6178 unbind_to (count, Qnil);
6179 return coding->result;
6180 }
6181
6182 int
6183 encode_coding_gap (coding, chars, bytes)
6184 struct coding_system *coding;
6185 EMACS_INT chars, bytes;
6186 {
6187 int count = specpdl_ptr - specpdl;
6188 Lisp_Object buffer;
6189
6190 saved_coding = coding;
6191 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6192
6193 buffer = Fcurrent_buffer ();
6194 coding->src_object = buffer;
6195 coding->src_chars = chars;
6196 coding->src_bytes = bytes;
6197 coding->src_pos = -chars;
6198 coding->src_pos_byte = -bytes;
6199 coding->src_multibyte = chars < bytes;
6200 coding->dst_object = coding->src_object;
6201 coding->dst_pos = PT;
6202 coding->dst_pos_byte = PT_BYTE;
6203
6204 encode_coding (coding);
6205
6206 unbind_to (count, Qnil);
6207 return coding->result;
6208 }
6209
6210
6211 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6212 SRC_OBJECT into DST_OBJECT by coding context CODING.
6213
6214 SRC_OBJECT is a buffer, a string, or Qnil.
6215
6216 If it is a buffer, the text is at point of the buffer. FROM and TO
6217 are positions in the buffer.
6218
6219 If it is a string, the text is at the beginning of the string.
6220 FROM and TO are indices to the string.
6221
6222 If it is nil, the text is at coding->source. FROM and TO are
6223 indices to coding->source.
6224
6225 DST_OBJECT is a buffer, Qt, or Qnil.
6226
6227 If it is a buffer, the decoded text is inserted at point of the
6228 buffer. If the buffer is the same as SRC_OBJECT, the source text
6229 is deleted.
6230
6231 If it is Qt, a string is made from the decoded text, and
6232 set in CODING->dst_object.
6233
6234 If it is Qnil, the decoded text is stored at CODING->destination.
6235 The called must allocate CODING->dst_bytes bytes at
6236 CODING->destination by xmalloc. If the decoded text is longer than
6237 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6238 */
6239
6240 void
6241 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6242 dst_object)
6243 struct coding_system *coding;
6244 Lisp_Object src_object;
6245 EMACS_INT from, from_byte, to, to_byte;
6246 Lisp_Object dst_object;
6247 {
6248 int count = specpdl_ptr - specpdl;
6249 unsigned char *destination;
6250 EMACS_INT dst_bytes;
6251 EMACS_INT chars = to - from;
6252 EMACS_INT bytes = to_byte - from_byte;
6253 Lisp_Object attrs;
6254
6255 saved_coding = coding;
6256 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6257
6258 if (NILP (dst_object))
6259 {
6260 destination = coding->destination;
6261 dst_bytes = coding->dst_bytes;
6262 }
6263
6264 coding->src_object = src_object;
6265 coding->src_chars = chars;
6266 coding->src_bytes = bytes;
6267 coding->src_multibyte = chars < bytes;
6268
6269 if (STRINGP (src_object))
6270 {
6271 coding->src_pos = from;
6272 coding->src_pos_byte = from_byte;
6273 }
6274 else if (BUFFERP (src_object))
6275 {
6276 set_buffer_internal (XBUFFER (src_object));
6277 if (from != GPT)
6278 move_gap_both (from, from_byte);
6279 if (EQ (src_object, dst_object))
6280 {
6281 TEMP_SET_PT_BOTH (from, from_byte);
6282 del_range_both (from, from_byte, to, to_byte, 1);
6283 coding->src_pos = -chars;
6284 coding->src_pos_byte = -bytes;
6285 }
6286 else
6287 {
6288 coding->src_pos = from;
6289 coding->src_pos_byte = from_byte;
6290 }
6291 }
6292
6293 if (CODING_REQUIRE_DETECTION (coding))
6294 detect_coding (coding);
6295 attrs = CODING_ID_ATTRS (coding->id);
6296
6297 if (! NILP (CODING_ATTR_POST_READ (attrs))
6298 || EQ (dst_object, Qt))
6299 {
6300 coding->dst_object = make_conversion_work_buffer (1);
6301 coding->dst_pos = BEG;
6302 coding->dst_pos_byte = BEG_BYTE;
6303 coding->dst_multibyte = 1;
6304 }
6305 else if (BUFFERP (dst_object))
6306 {
6307 coding->dst_object = dst_object;
6308 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6309 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6310 coding->dst_multibyte
6311 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6312 }
6313 else
6314 {
6315 coding->dst_object = Qnil;
6316 coding->dst_multibyte = 1;
6317 }
6318
6319 decode_coding (coding);
6320
6321 if (BUFFERP (coding->dst_object))
6322 set_buffer_internal (XBUFFER (coding->dst_object));
6323
6324 if (! NILP (CODING_ATTR_POST_READ (attrs)))
6325 {
6326 struct gcpro gcpro1, gcpro2;
6327 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6328 Lisp_Object val;
6329
6330 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6331 GCPRO2 (coding->src_object, coding->dst_object);
6332 val = call1 (CODING_ATTR_POST_READ (attrs),
6333 make_number (coding->produced_char));
6334 UNGCPRO;
6335 CHECK_NATNUM (val);
6336 coding->produced_char += Z - prev_Z;
6337 coding->produced += Z_BYTE - prev_Z_BYTE;
6338 }
6339
6340 if (EQ (dst_object, Qt))
6341 {
6342 coding->dst_object = Fbuffer_string ();
6343 }
6344 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6345 {
6346 set_buffer_internal (XBUFFER (coding->dst_object));
6347 if (dst_bytes < coding->produced)
6348 {
6349 destination
6350 = (unsigned char *) xrealloc (destination, coding->produced);
6351 if (! destination)
6352 {
6353 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6354 unbind_to (count, Qnil);
6355 return;
6356 }
6357 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6358 move_gap_both (BEGV, BEGV_BYTE);
6359 bcopy (BEGV_ADDR, destination, coding->produced);
6360 coding->destination = destination;
6361 }
6362 }
6363
6364 unbind_to (count, Qnil);
6365 }
6366
6367
6368 void
6369 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6370 dst_object)
6371 struct coding_system *coding;
6372 Lisp_Object src_object;
6373 EMACS_INT from, from_byte, to, to_byte;
6374 Lisp_Object dst_object;
6375 {
6376 int count = specpdl_ptr - specpdl;
6377 EMACS_INT chars = to - from;
6378 EMACS_INT bytes = to_byte - from_byte;
6379 Lisp_Object attrs;
6380
6381 saved_coding = coding;
6382 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6383
6384 coding->src_object = src_object;
6385 coding->src_chars = chars;
6386 coding->src_bytes = bytes;
6387 coding->src_multibyte = chars < bytes;
6388
6389 attrs = CODING_ID_ATTRS (coding->id);
6390
6391 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6392 {
6393 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6394 set_buffer_internal (XBUFFER (coding->src_object));
6395 if (STRINGP (src_object))
6396 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6397 else if (BUFFERP (src_object))
6398 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6399 else
6400 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6401
6402 if (EQ (src_object, dst_object))
6403 {
6404 set_buffer_internal (XBUFFER (src_object));
6405 del_range_both (from, from_byte, to, to_byte, 1);
6406 set_buffer_internal (XBUFFER (coding->src_object));
6407 }
6408
6409 call2 (CODING_ATTR_PRE_WRITE (attrs),
6410 make_number (BEG), make_number (Z));
6411 coding->src_object = Fcurrent_buffer ();
6412 if (BEG != GPT)
6413 move_gap_both (BEG, BEG_BYTE);
6414 coding->src_chars = Z - BEG;
6415 coding->src_bytes = Z_BYTE - BEG_BYTE;
6416 coding->src_pos = BEG;
6417 coding->src_pos_byte = BEG_BYTE;
6418 coding->src_multibyte = Z < Z_BYTE;
6419 }
6420 else if (STRINGP (src_object))
6421 {
6422 coding->src_pos = from;
6423 coding->src_pos_byte = from_byte;
6424 }
6425 else if (BUFFERP (src_object))
6426 {
6427 set_buffer_internal (XBUFFER (src_object));
6428 if (EQ (src_object, dst_object))
6429 {
6430 coding->src_object = del_range_1 (from, to, 1, 1);
6431 coding->src_pos = 0;
6432 coding->src_pos_byte = 0;
6433 }
6434 else
6435 {
6436 if (from < GPT && to >= GPT)
6437 move_gap_both (from, from_byte);
6438 coding->src_pos = from;
6439 coding->src_pos_byte = from_byte;
6440 }
6441 }
6442
6443 if (BUFFERP (dst_object))
6444 {
6445 coding->dst_object = dst_object;
6446 if (EQ (src_object, dst_object))
6447 {
6448 coding->dst_pos = from;
6449 coding->dst_pos_byte = from_byte;
6450 }
6451 else
6452 {
6453 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6454 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6455 }
6456 coding->dst_multibyte
6457 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6458 }
6459 else if (EQ (dst_object, Qt))
6460 {
6461 coding->dst_object = Qnil;
6462 coding->dst_bytes = coding->src_chars;
6463 if (coding->dst_bytes == 0)
6464 coding->dst_bytes = 1;
6465 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6466 coding->dst_multibyte = 0;
6467 }
6468 else
6469 {
6470 coding->dst_object = Qnil;
6471 coding->dst_multibyte = 0;
6472 }
6473
6474 encode_coding (coding);
6475
6476 if (EQ (dst_object, Qt))
6477 {
6478 if (BUFFERP (coding->dst_object))
6479 coding->dst_object = Fbuffer_string ();
6480 else
6481 {
6482 coding->dst_object
6483 = make_unibyte_string ((char *) coding->destination,
6484 coding->produced);
6485 xfree (coding->destination);
6486 }
6487 }
6488
6489 unbind_to (count, Qnil);
6490 }
6491
6492
6493 Lisp_Object
6494 preferred_coding_system ()
6495 {
6496 int id = coding_categories[coding_priorities[0]].id;
6497
6498 return CODING_ID_NAME (id);
6499 }
6500
6501 \f
6502 #ifdef emacs
6503 /*** 8. Emacs Lisp library functions ***/
6504
6505 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6506 doc: /* Return t if OBJECT is nil or a coding-system.
6507 See the documentation of `define-coding-system' for information
6508 about coding-system objects. */)
6509 (obj)
6510 Lisp_Object obj;
6511 {
6512 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6513 }
6514
6515 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6516 Sread_non_nil_coding_system, 1, 1, 0,
6517 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6518 (prompt)
6519 Lisp_Object prompt;
6520 {
6521 Lisp_Object val;
6522 do
6523 {
6524 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6525 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6526 }
6527 while (XSTRING (val)->size == 0);
6528 return (Fintern (val, Qnil));
6529 }
6530
6531 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6532 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6533 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6534 (prompt, default_coding_system)
6535 Lisp_Object prompt, default_coding_system;
6536 {
6537 Lisp_Object val;
6538 if (SYMBOLP (default_coding_system))
6539 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6540 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6541 Qt, Qnil, Qcoding_system_history,
6542 default_coding_system, Qnil);
6543 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6544 }
6545
6546 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6547 1, 1, 0,
6548 doc: /* Check validity of CODING-SYSTEM.
6549 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
6550 (coding_system)
6551 Lisp_Object coding_system;
6552 {
6553 CHECK_SYMBOL (coding_system);
6554 if (!NILP (Fcoding_system_p (coding_system)))
6555 return coding_system;
6556 while (1)
6557 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6558 }
6559
6560 \f
6561 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6562 HIGHEST is nonzero, return the coding system of the highest
6563 priority among the detected coding systems. Otherwize return a
6564 list of detected coding systems sorted by their priorities. If
6565 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6566 multibyte form but contains only ASCII and eight-bit chars.
6567 Otherwise, the bytes are raw bytes.
6568
6569 CODING-SYSTEM controls the detection as below:
6570
6571 If it is nil, detect both text-format and eol-format. If the
6572 text-format part of CODING-SYSTEM is already specified
6573 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6574 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6575 detect only text-format. */
6576
6577 Lisp_Object
6578 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6579 unsigned char *src;
6580 int src_bytes, highest;
6581 int multibytep;
6582 Lisp_Object coding_system;
6583 {
6584 unsigned char *src_end = src + src_bytes;
6585 int mask = CATEGORY_MASK_ANY;
6586 Lisp_Object attrs, eol_type;
6587 Lisp_Object val;
6588 struct coding_system coding;
6589 int id;
6590 struct coding_detection_info detect_info;
6591
6592 if (NILP (coding_system))
6593 coding_system = Qundecided;
6594 setup_coding_system (coding_system, &coding);
6595 attrs = CODING_ID_ATTRS (coding.id);
6596 eol_type = CODING_ID_EOL_TYPE (coding.id);
6597 coding_system = CODING_ATTR_BASE_NAME (attrs);
6598
6599 coding.source = src;
6600 coding.src_bytes = src_bytes;
6601 coding.src_multibyte = multibytep;
6602 coding.consumed = 0;
6603 coding.mode |= CODING_MODE_LAST_BLOCK;
6604
6605 detect_info.checked = detect_info.found = detect_info.rejected = 0;
6606
6607 /* At first, detect text-format if necessary. */
6608 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
6609 {
6610 enum coding_category category;
6611 struct coding_system *this;
6612 int c, i;
6613
6614 for (; src < src_end; src++)
6615 {
6616 c = *src;
6617 if (c & 0x80
6618 || (c < 0x20 && (c == ISO_CODE_ESC
6619 || c == ISO_CODE_SI
6620 || c == ISO_CODE_SO)))
6621 break;
6622 }
6623 coding.head_ascii = src - coding.source;
6624
6625 if (src < src_end)
6626 for (i = 0; i < coding_category_raw_text; i++)
6627 {
6628 category = coding_priorities[i];
6629 this = coding_categories + category;
6630
6631 if (this->id < 0)
6632 {
6633 /* No coding system of this category is defined. */
6634 detect_info.rejected |= (1 << category);
6635 }
6636 else if (category >= coding_category_raw_text)
6637 continue;
6638 else if (detect_info.checked & (1 << category))
6639 {
6640 if (highest
6641 && (detect_info.found & (1 << category)))
6642 break;
6643 }
6644 else
6645 {
6646 if ((*(this->detector)) (&coding, &detect_info)
6647 && highest
6648 && (detect_info.found & (1 << category)))
6649 break;
6650 }
6651 }
6652
6653
6654 if (detect_info.rejected == CATEGORY_MASK_ANY)
6655 {
6656 detect_info.found = CATEGORY_MASK_RAW_TEXT;
6657 id = coding_categories[coding_category_raw_text].id;
6658 val = Fcons (make_number (id), Qnil);
6659 }
6660 else if (! detect_info.rejected && ! detect_info.found)
6661 {
6662 detect_info.found = CATEGORY_MASK_ANY;
6663 id = coding_categories[coding_category_undecided].id;
6664 val = Fcons (make_number (id), Qnil);
6665 }
6666 else if (highest)
6667 {
6668 if (detect_info.found)
6669 {
6670 detect_info.found = 1 << category;
6671 val = Fcons (make_number (this->id), Qnil);
6672 }
6673 else
6674 for (i = 0; i < coding_category_raw_text; i++)
6675 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6676 {
6677 detect_info.found = 1 << coding_priorities[i];
6678 id = coding_categories[coding_priorities[i]].id;
6679 val = Fcons (make_number (id), Qnil);
6680 break;
6681 }
6682 }
6683 else
6684 {
6685 int mask = detect_info.rejected | detect_info.found;
6686 int found = 0;
6687 val = Qnil;
6688
6689 for (i = coding_category_raw_text - 1; i >= 0; i--)
6690 {
6691 category = coding_priorities[i];
6692 if (! (mask & (1 << category)))
6693 {
6694 found |= 1 << category;
6695 id = coding_categories[category].id;
6696 val = Fcons (make_number (id), val);
6697 }
6698 }
6699 for (i = coding_category_raw_text - 1; i >= 0; i--)
6700 {
6701 category = coding_priorities[i];
6702 if (detect_info.found & (1 << category))
6703 {
6704 id = coding_categories[category].id;
6705 val = Fcons (make_number (id), val);
6706 }
6707 }
6708 detect_info.found |= found;
6709 }
6710 }
6711 else
6712 {
6713 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6714 val = Fcons (make_number (coding.id), Qnil);
6715 }
6716
6717 /* Then, detect eol-format if necessary. */
6718 {
6719 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
6720 Lisp_Object tail;
6721
6722 if (VECTORP (eol_type))
6723 {
6724 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
6725 normal_eol = detect_eol (coding.source, src_bytes,
6726 coding_category_raw_text);
6727 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6728 | CATEGORY_MASK_UTF_16_BE_NOSIG))
6729 utf_16_be_eol = detect_eol (coding.source, src_bytes,
6730 coding_category_utf_16_be);
6731 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6732 | CATEGORY_MASK_UTF_16_LE_NOSIG))
6733 utf_16_le_eol = detect_eol (coding.source, src_bytes,
6734 coding_category_utf_16_le);
6735 }
6736 else
6737 {
6738 if (EQ (eol_type, Qunix))
6739 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6740 else if (EQ (eol_type, Qdos))
6741 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6742 else
6743 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6744 }
6745
6746 for (tail = val; CONSP (tail); tail = XCDR (tail))
6747 {
6748 enum coding_category category;
6749 int this_eol;
6750
6751 id = XINT (XCAR (tail));
6752 attrs = CODING_ID_ATTRS (id);
6753 category = XINT (CODING_ATTR_CATEGORY (attrs));
6754 eol_type = CODING_ID_EOL_TYPE (id);
6755 if (VECTORP (eol_type))
6756 {
6757 if (category == coding_category_utf_16_be
6758 || category == coding_category_utf_16_be_nosig)
6759 this_eol = utf_16_be_eol;
6760 else if (category == coding_category_utf_16_le
6761 || category == coding_category_utf_16_le_nosig)
6762 this_eol = utf_16_le_eol;
6763 else
6764 this_eol = normal_eol;
6765
6766 if (this_eol == EOL_SEEN_LF)
6767 XSETCAR (tail, AREF (eol_type, 0));
6768 else if (this_eol == EOL_SEEN_CRLF)
6769 XSETCAR (tail, AREF (eol_type, 1));
6770 else if (this_eol == EOL_SEEN_CR)
6771 XSETCAR (tail, AREF (eol_type, 2));
6772 else
6773 XSETCAR (tail, CODING_ID_NAME (id));
6774 }
6775 else
6776 XSETCAR (tail, CODING_ID_NAME (id));
6777 }
6778 }
6779
6780 return (highest ? XCAR (val) : val);
6781 }
6782
6783
6784 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6785 2, 3, 0,
6786 doc: /* Detect coding system of the text in the region between START and END.
6787 Return a list of possible coding systems ordered by priority.
6788
6789 If only ASCII characters are found, it returns a list of single element
6790 `undecided' or its subsidiary coding system according to a detected
6791 end-of-line format.
6792
6793 If optional argument HIGHEST is non-nil, return the coding system of
6794 highest priority. */)
6795 (start, end, highest)
6796 Lisp_Object start, end, highest;
6797 {
6798 int from, to;
6799 int from_byte, to_byte;
6800
6801 CHECK_NUMBER_COERCE_MARKER (start);
6802 CHECK_NUMBER_COERCE_MARKER (end);
6803
6804 validate_region (&start, &end);
6805 from = XINT (start), to = XINT (end);
6806 from_byte = CHAR_TO_BYTE (from);
6807 to_byte = CHAR_TO_BYTE (to);
6808
6809 if (from < GPT && to >= GPT)
6810 move_gap_both (to, to_byte);
6811
6812 return detect_coding_system (BYTE_POS_ADDR (from_byte),
6813 to_byte - from_byte,
6814 !NILP (highest),
6815 !NILP (current_buffer
6816 ->enable_multibyte_characters),
6817 Qnil);
6818 }
6819
6820 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6821 1, 2, 0,
6822 doc: /* Detect coding system of the text in STRING.
6823 Return a list of possible coding systems ordered by priority.
6824
6825 If only ASCII characters are found, it returns a list of single element
6826 `undecided' or its subsidiary coding system according to a detected
6827 end-of-line format.
6828
6829 If optional argument HIGHEST is non-nil, return the coding system of
6830 highest priority. */)
6831 (string, highest)
6832 Lisp_Object string, highest;
6833 {
6834 CHECK_STRING (string);
6835
6836 return detect_coding_system (XSTRING (string)->data,
6837 STRING_BYTES (XSTRING (string)),
6838 !NILP (highest),
6839 STRING_MULTIBYTE (string),
6840 Qnil);
6841 }
6842
6843
6844 static INLINE int
6845 char_encodable_p (c, attrs)
6846 int c;
6847 Lisp_Object attrs;
6848 {
6849 Lisp_Object tail;
6850 struct charset *charset;
6851
6852 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6853 CONSP (tail); tail = XCDR (tail))
6854 {
6855 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6856 if (CHAR_CHARSET_P (c, charset))
6857 break;
6858 }
6859 return (! NILP (tail));
6860 }
6861
6862
6863 /* Return a list of coding systems that safely encode the text between
6864 START and END. If EXCLUDE is non-nil, it is a list of coding
6865 systems not to check. The returned list doesn't contain any such
6866 coding systems. In any case, if the text contains only ASCII or is
6867 unibyte, return t. */
6868
6869 DEFUN ("find-coding-systems-region-internal",
6870 Ffind_coding_systems_region_internal,
6871 Sfind_coding_systems_region_internal, 2, 3, 0,
6872 doc: /* Internal use only. */)
6873 (start, end, exclude)
6874 Lisp_Object start, end, exclude;
6875 {
6876 Lisp_Object coding_attrs_list, safe_codings;
6877 EMACS_INT start_byte, end_byte;
6878 const unsigned char *p, *pbeg, *pend;
6879 int c;
6880 Lisp_Object tail, elt;
6881
6882 if (STRINGP (start))
6883 {
6884 if (!STRING_MULTIBYTE (start)
6885 || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
6886 return Qt;
6887 start_byte = 0;
6888 end_byte = STRING_BYTES (XSTRING (start));
6889 }
6890 else
6891 {
6892 CHECK_NUMBER_COERCE_MARKER (start);
6893 CHECK_NUMBER_COERCE_MARKER (end);
6894 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6895 args_out_of_range (start, end);
6896 if (NILP (current_buffer->enable_multibyte_characters))
6897 return Qt;
6898 start_byte = CHAR_TO_BYTE (XINT (start));
6899 end_byte = CHAR_TO_BYTE (XINT (end));
6900 if (XINT (end) - XINT (start) == end_byte - start_byte)
6901 return Qt;
6902
6903 if (XINT (start) < GPT && XINT (end) > GPT)
6904 {
6905 if ((GPT - XINT (start)) < (XINT (end) - GPT))
6906 move_gap_both (XINT (start), start_byte);
6907 else
6908 move_gap_both (XINT (end), end_byte);
6909 }
6910 }
6911
6912 coding_attrs_list = Qnil;
6913 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6914 if (NILP (exclude)
6915 || NILP (Fmemq (XCAR (tail), exclude)))
6916 {
6917 Lisp_Object attrs;
6918
6919 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6920 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6921 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6922 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6923 }
6924
6925 if (STRINGP (start))
6926 p = pbeg = XSTRING (start)->data;
6927 else
6928 p = pbeg = BYTE_POS_ADDR (start_byte);
6929 pend = p + (end_byte - start_byte);
6930
6931 while (p < pend && ASCII_BYTE_P (*p)) p++;
6932 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6933
6934 while (p < pend)
6935 {
6936 if (ASCII_BYTE_P (*p))
6937 p++;
6938 else
6939 {
6940 c = STRING_CHAR_ADVANCE (p);
6941
6942 charset_map_loaded = 0;
6943 for (tail = coding_attrs_list; CONSP (tail);)
6944 {
6945 elt = XCAR (tail);
6946 if (NILP (elt))
6947 tail = XCDR (tail);
6948 else if (char_encodable_p (c, elt))
6949 tail = XCDR (tail);
6950 else if (CONSP (XCDR (tail)))
6951 {
6952 XSETCAR (tail, XCAR (XCDR (tail)));
6953 XSETCDR (tail, XCDR (XCDR (tail)));
6954 }
6955 else
6956 {
6957 XSETCAR (tail, Qnil);
6958 tail = XCDR (tail);
6959 }
6960 }
6961 if (charset_map_loaded)
6962 {
6963 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6964
6965 if (STRINGP (start))
6966 pbeg = XSTRING (start)->data;
6967 else
6968 pbeg = BYTE_POS_ADDR (start_byte);
6969 p = pbeg + p_offset;
6970 pend = pbeg + pend_offset;
6971 }
6972 }
6973 }
6974
6975 safe_codings = Qnil;
6976 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6977 if (! NILP (XCAR (tail)))
6978 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6979
6980 return safe_codings;
6981 }
6982
6983
6984 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6985 Scheck_coding_systems_region, 3, 3, 0,
6986 doc: /* Check if the region is encodable by coding systems.
6987
6988 START and END are buffer positions specifying the region.
6989 CODING-SYSTEM-LIST is a list of coding systems to check.
6990
6991 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6992 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6993 whole region, POS0, POS1, ... are buffer positions where non-encodable
6994 characters are found.
6995
6996 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6997 value is nil.
6998
6999 START may be a string. In that case, check if the string is
7000 encodable, and the value contains indices to the string instead of
7001 buffer positions. END is ignored. */)
7002 (start, end, coding_system_list)
7003 Lisp_Object start, end, coding_system_list;
7004 {
7005 Lisp_Object list;
7006 EMACS_INT start_byte, end_byte;
7007 int pos;
7008 const unsigned char *p, *pbeg, *pend;
7009 int c;
7010 Lisp_Object tail, elt;
7011
7012 if (STRINGP (start))
7013 {
7014 if (!STRING_MULTIBYTE (start)
7015 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
7016 return Qnil;
7017 start_byte = 0;
7018 end_byte = STRING_BYTES (XSTRING (start));
7019 pos = 0;
7020 }
7021 else
7022 {
7023 CHECK_NUMBER_COERCE_MARKER (start);
7024 CHECK_NUMBER_COERCE_MARKER (end);
7025 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7026 args_out_of_range (start, end);
7027 if (NILP (current_buffer->enable_multibyte_characters))
7028 return Qnil;
7029 start_byte = CHAR_TO_BYTE (XINT (start));
7030 end_byte = CHAR_TO_BYTE (XINT (end));
7031 if (XINT (end) - XINT (start) == end_byte - start_byte)
7032 return Qt;
7033
7034 if (XINT (start) < GPT && XINT (end) > GPT)
7035 {
7036 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7037 move_gap_both (XINT (start), start_byte);
7038 else
7039 move_gap_both (XINT (end), end_byte);
7040 }
7041 pos = XINT (start);
7042 }
7043
7044 list = Qnil;
7045 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7046 {
7047 elt = XCAR (tail);
7048 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7049 Qnil)),
7050 list);
7051 }
7052
7053 if (STRINGP (start))
7054 p = pbeg = XSTRING (start)->data;
7055 else
7056 p = pbeg = BYTE_POS_ADDR (start_byte);
7057 pend = p + (end_byte - start_byte);
7058
7059 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7060 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7061
7062 while (p < pend)
7063 {
7064 if (ASCII_BYTE_P (*p))
7065 p++;
7066 else
7067 {
7068 c = STRING_CHAR_ADVANCE (p);
7069
7070 charset_map_loaded = 0;
7071 for (tail = list; CONSP (tail); tail = XCDR (tail))
7072 {
7073 elt = XCDR (XCAR (tail));
7074 if (! char_encodable_p (c, XCAR (elt)))
7075 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7076 }
7077 if (charset_map_loaded)
7078 {
7079 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7080
7081 if (STRINGP (start))
7082 pbeg = XSTRING (start)->data;
7083 else
7084 pbeg = BYTE_POS_ADDR (start_byte);
7085 p = pbeg + p_offset;
7086 pend = pbeg + pend_offset;
7087 }
7088 }
7089 pos++;
7090 }
7091
7092 tail = list;
7093 list = Qnil;
7094 for (; CONSP (tail); tail = XCDR (tail))
7095 {
7096 elt = XCAR (tail);
7097 if (CONSP (XCDR (XCDR (elt))))
7098 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7099 list);
7100 }
7101
7102 return list;
7103 }
7104
7105
7106
7107 Lisp_Object
7108 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7109 Lisp_Object start, end, coding_system, dst_object;
7110 int encodep, norecord;
7111 {
7112 struct coding_system coding;
7113 EMACS_INT from, from_byte, to, to_byte;
7114 Lisp_Object src_object;
7115
7116 CHECK_NUMBER_COERCE_MARKER (start);
7117 CHECK_NUMBER_COERCE_MARKER (end);
7118 if (NILP (coding_system))
7119 coding_system = Qno_conversion;
7120 else
7121 CHECK_CODING_SYSTEM (coding_system);
7122 src_object = Fcurrent_buffer ();
7123 if (NILP (dst_object))
7124 dst_object = src_object;
7125 else if (! EQ (dst_object, Qt))
7126 CHECK_BUFFER (dst_object);
7127
7128 validate_region (&start, &end);
7129 from = XFASTINT (start);
7130 from_byte = CHAR_TO_BYTE (from);
7131 to = XFASTINT (end);
7132 to_byte = CHAR_TO_BYTE (to);
7133
7134 setup_coding_system (coding_system, &coding);
7135 coding.mode |= CODING_MODE_LAST_BLOCK;
7136
7137 if (encodep)
7138 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7139 dst_object);
7140 else
7141 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7142 dst_object);
7143 if (! norecord)
7144 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7145
7146 if (coding.result != CODING_RESULT_SUCCESS)
7147 error ("Code conversion error: %d", coding.result);
7148
7149 return (BUFFERP (dst_object)
7150 ? make_number (coding.produced_char)
7151 : coding.dst_object);
7152 }
7153
7154
7155 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7156 3, 4, "r\nzCoding system: ",
7157 doc: /* Decode the current region from the specified coding system.
7158 When called from a program, takes four arguments:
7159 START, END, CODING-SYSTEM, and DESTINATION.
7160 START and END are buffer positions.
7161
7162 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7163 If nil, the region between START and END is replace by the decoded text.
7164 If buffer, the decoded text is inserted in the buffer.
7165 If t, the decoded text is returned.
7166
7167 This function sets `last-coding-system-used' to the precise coding system
7168 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7169 not fully specified.)
7170 It returns the length of the decoded text. */)
7171 (start, end, coding_system, destination)
7172 Lisp_Object start, end, coding_system, destination;
7173 {
7174 return code_convert_region (start, end, coding_system, destination, 0, 0);
7175 }
7176
7177 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7178 3, 4, "r\nzCoding system: ",
7179 doc: /* Encode the current region by specified coding system.
7180 When called from a program, takes three arguments:
7181 START, END, and CODING-SYSTEM. START and END are buffer positions.
7182
7183 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7184 If nil, the region between START and END is replace by the encoded text.
7185 If buffer, the encoded text is inserted in the buffer.
7186 If t, the encoded text is returned.
7187
7188 This function sets `last-coding-system-used' to the precise coding system
7189 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7190 not fully specified.)
7191 It returns the length of the encoded text. */)
7192 (start, end, coding_system, destination)
7193 Lisp_Object start, end, coding_system, destination;
7194 {
7195 return code_convert_region (start, end, coding_system, destination, 1, 0);
7196 }
7197
7198 Lisp_Object
7199 code_convert_string (string, coding_system, dst_object,
7200 encodep, nocopy, norecord)
7201 Lisp_Object string, coding_system, dst_object;
7202 int encodep, nocopy, norecord;
7203 {
7204 struct coding_system coding;
7205 EMACS_INT chars, bytes;
7206
7207 CHECK_STRING (string);
7208 if (NILP (coding_system))
7209 {
7210 if (! norecord)
7211 Vlast_coding_system_used = Qno_conversion;
7212 if (NILP (dst_object))
7213 return (nocopy ? Fcopy_sequence (string) : string);
7214 }
7215
7216 if (NILP (coding_system))
7217 coding_system = Qno_conversion;
7218 else
7219 CHECK_CODING_SYSTEM (coding_system);
7220 if (NILP (dst_object))
7221 dst_object = Qt;
7222 else if (! EQ (dst_object, Qt))
7223 CHECK_BUFFER (dst_object);
7224
7225 setup_coding_system (coding_system, &coding);
7226 coding.mode |= CODING_MODE_LAST_BLOCK;
7227 chars = XSTRING (string)->size;
7228 bytes = STRING_BYTES (XSTRING (string));
7229 if (encodep)
7230 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7231 else
7232 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7233 if (! norecord)
7234 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7235
7236 if (coding.result != CODING_RESULT_SUCCESS)
7237 error ("Code conversion error: %d", coding.result);
7238
7239 return (BUFFERP (dst_object)
7240 ? make_number (coding.produced_char)
7241 : coding.dst_object);
7242 }
7243
7244
7245 /* Encode or decode STRING according to CODING_SYSTEM.
7246 Do not set Vlast_coding_system_used.
7247
7248 This function is called only from macros DECODE_FILE and
7249 ENCODE_FILE, thus we ignore character composition. */
7250
7251 Lisp_Object
7252 code_convert_string_norecord (string, coding_system, encodep)
7253 Lisp_Object string, coding_system;
7254 int encodep;
7255 {
7256 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7257 }
7258
7259
7260 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7261 2, 4, 0,
7262 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7263
7264 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7265 if the decoding operation is trivial.
7266
7267 Optional fourth arg BUFFER non-nil meant that the decoded text is
7268 inserted in BUFFER instead of returned as a string. In this case,
7269 the return value is BUFFER.
7270
7271 This function sets `last-coding-system-used' to the precise coding system
7272 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7273 not fully specified. */)
7274 (string, coding_system, nocopy, buffer)
7275 Lisp_Object string, coding_system, nocopy, buffer;
7276 {
7277 return code_convert_string (string, coding_system, buffer,
7278 0, ! NILP (nocopy), 0);
7279 }
7280
7281 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7282 2, 4, 0,
7283 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7284
7285 Optional third arg NOCOPY non-nil means it is OK to return STRING
7286 itself if the encoding operation is trivial.
7287
7288 Optional fourth arg BUFFER non-nil meant that the encoded text is
7289 inserted in BUFFER instead of returned as a string. In this case,
7290 the return value is BUFFER.
7291
7292 This function sets `last-coding-system-used' to the precise coding system
7293 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7294 not fully specified.) */)
7295 (string, coding_system, nocopy, buffer)
7296 Lisp_Object string, coding_system, nocopy, buffer;
7297 {
7298 return code_convert_string (string, coding_system, buffer,
7299 1, ! NILP (nocopy), 1);
7300 }
7301
7302 \f
7303 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7304 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7305 Return the corresponding character. */)
7306 (code)
7307 Lisp_Object code;
7308 {
7309 Lisp_Object spec, attrs, val;
7310 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7311 int c;
7312
7313 CHECK_NATNUM (code);
7314 c = XFASTINT (code);
7315 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7316 attrs = AREF (spec, 0);
7317
7318 if (ASCII_BYTE_P (c)
7319 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7320 return code;
7321
7322 val = CODING_ATTR_CHARSET_LIST (attrs);
7323 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7324 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7325 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7326
7327 if (c <= 0x7F)
7328 charset = charset_roman;
7329 else if (c >= 0xA0 && c < 0xDF)
7330 {
7331 charset = charset_kana;
7332 c -= 0x80;
7333 }
7334 else
7335 {
7336 int s1 = c >> 8, s2 = c & 0xFF;
7337
7338 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7339 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7340 error ("Invalid code: %d", code);
7341 SJIS_TO_JIS (c);
7342 charset = charset_kanji;
7343 }
7344 c = DECODE_CHAR (charset, c);
7345 if (c < 0)
7346 error ("Invalid code: %d", code);
7347 return make_number (c);
7348 }
7349
7350
7351 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7352 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7353 Return the corresponding code in SJIS. */)
7354 (ch)
7355 Lisp_Object ch;
7356 {
7357 Lisp_Object spec, attrs, charset_list;
7358 int c;
7359 struct charset *charset;
7360 unsigned code;
7361
7362 CHECK_CHARACTER (ch);
7363 c = XFASTINT (ch);
7364 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7365 attrs = AREF (spec, 0);
7366
7367 if (ASCII_CHAR_P (c)
7368 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7369 return ch;
7370
7371 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7372 charset = char_charset (c, charset_list, &code);
7373 if (code == CHARSET_INVALID_CODE (charset))
7374 error ("Can't encode by shift_jis encoding: %d", c);
7375 JIS_TO_SJIS (code);
7376
7377 return make_number (code);
7378 }
7379
7380 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7381 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7382 Return the corresponding character. */)
7383 (code)
7384 Lisp_Object code;
7385 {
7386 Lisp_Object spec, attrs, val;
7387 struct charset *charset_roman, *charset_big5, *charset;
7388 int c;
7389
7390 CHECK_NATNUM (code);
7391 c = XFASTINT (code);
7392 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7393 attrs = AREF (spec, 0);
7394
7395 if (ASCII_BYTE_P (c)
7396 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7397 return code;
7398
7399 val = CODING_ATTR_CHARSET_LIST (attrs);
7400 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7401 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7402
7403 if (c <= 0x7F)
7404 charset = charset_roman;
7405 else
7406 {
7407 int b1 = c >> 8, b2 = c & 0x7F;
7408 if (b1 < 0xA1 || b1 > 0xFE
7409 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7410 error ("Invalid code: %d", code);
7411 charset = charset_big5;
7412 }
7413 c = DECODE_CHAR (charset, (unsigned )c);
7414 if (c < 0)
7415 error ("Invalid code: %d", code);
7416 return make_number (c);
7417 }
7418
7419 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7420 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7421 Return the corresponding character code in Big5. */)
7422 (ch)
7423 Lisp_Object ch;
7424 {
7425 Lisp_Object spec, attrs, charset_list;
7426 struct charset *charset;
7427 int c;
7428 unsigned code;
7429
7430 CHECK_CHARACTER (ch);
7431 c = XFASTINT (ch);
7432 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7433 attrs = AREF (spec, 0);
7434 if (ASCII_CHAR_P (c)
7435 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7436 return ch;
7437
7438 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7439 charset = char_charset (c, charset_list, &code);
7440 if (code == CHARSET_INVALID_CODE (charset))
7441 error ("Can't encode by Big5 encoding: %d", c);
7442
7443 return make_number (code);
7444 }
7445
7446 \f
7447 DEFUN ("set-terminal-coding-system-internal",
7448 Fset_terminal_coding_system_internal,
7449 Sset_terminal_coding_system_internal, 1, 1, 0,
7450 doc: /* Internal use only. */)
7451 (coding_system)
7452 Lisp_Object coding_system;
7453 {
7454 CHECK_SYMBOL (coding_system);
7455 setup_coding_system (Fcheck_coding_system (coding_system),
7456 &terminal_coding);
7457
7458 /* We had better not send unsafe characters to terminal. */
7459 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7460 /* Characer composition should be disabled. */
7461 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7462 terminal_coding.src_multibyte = 1;
7463 terminal_coding.dst_multibyte = 0;
7464 return Qnil;
7465 }
7466
7467 DEFUN ("set-safe-terminal-coding-system-internal",
7468 Fset_safe_terminal_coding_system_internal,
7469 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7470 doc: /* Internal use only. */)
7471 (coding_system)
7472 Lisp_Object coding_system;
7473 {
7474 CHECK_SYMBOL (coding_system);
7475 setup_coding_system (Fcheck_coding_system (coding_system),
7476 &safe_terminal_coding);
7477 /* Characer composition should be disabled. */
7478 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7479 safe_terminal_coding.src_multibyte = 1;
7480 safe_terminal_coding.dst_multibyte = 0;
7481 return Qnil;
7482 }
7483
7484 DEFUN ("terminal-coding-system",
7485 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7486 doc: /* Return coding system specified for terminal output. */)
7487 ()
7488 {
7489 return CODING_ID_NAME (terminal_coding.id);
7490 }
7491
7492 DEFUN ("set-keyboard-coding-system-internal",
7493 Fset_keyboard_coding_system_internal,
7494 Sset_keyboard_coding_system_internal, 1, 1, 0,
7495 doc: /* Internal use only. */)
7496 (coding_system)
7497 Lisp_Object coding_system;
7498 {
7499 CHECK_SYMBOL (coding_system);
7500 setup_coding_system (Fcheck_coding_system (coding_system),
7501 &keyboard_coding);
7502 /* Characer composition should be disabled. */
7503 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7504 return Qnil;
7505 }
7506
7507 DEFUN ("keyboard-coding-system",
7508 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7509 doc: /* Return coding system specified for decoding keyboard input. */)
7510 ()
7511 {
7512 return CODING_ID_NAME (keyboard_coding.id);
7513 }
7514
7515 \f
7516 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7517 Sfind_operation_coding_system, 1, MANY, 0,
7518 doc: /* Choose a coding system for an operation based on the target name.
7519 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7520 DECODING-SYSTEM is the coding system to use for decoding
7521 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7522 for encoding (in case OPERATION does encoding).
7523
7524 The first argument OPERATION specifies an I/O primitive:
7525 For file I/O, `insert-file-contents' or `write-region'.
7526 For process I/O, `call-process', `call-process-region', or `start-process'.
7527 For network I/O, `open-network-stream'.
7528
7529 The remaining arguments should be the same arguments that were passed
7530 to the primitive. Depending on which primitive, one of those arguments
7531 is selected as the TARGET. For example, if OPERATION does file I/O,
7532 whichever argument specifies the file name is TARGET.
7533
7534 TARGET has a meaning which depends on OPERATION:
7535 For file I/O, TARGET is a file name.
7536 For process I/O, TARGET is a process name.
7537 For network I/O, TARGET is a service name or a port number
7538
7539 This function looks up what specified for TARGET in,
7540 `file-coding-system-alist', `process-coding-system-alist',
7541 or `network-coding-system-alist' depending on OPERATION.
7542 They may specify a coding system, a cons of coding systems,
7543 or a function symbol to call.
7544 In the last case, we call the function with one argument,
7545 which is a list of all the arguments given to this function.
7546
7547 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7548 (nargs, args)
7549 int nargs;
7550 Lisp_Object *args;
7551 {
7552 Lisp_Object operation, target_idx, target, val;
7553 register Lisp_Object chain;
7554
7555 if (nargs < 2)
7556 error ("Too few arguments");
7557 operation = args[0];
7558 if (!SYMBOLP (operation)
7559 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7560 error ("Invalid first arguement");
7561 if (nargs < 1 + XINT (target_idx))
7562 error ("Too few arguments for operation: %s",
7563 XSYMBOL (operation)->name->data);
7564 target = args[XINT (target_idx) + 1];
7565 if (!(STRINGP (target)
7566 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7567 error ("Invalid %dth argument", XINT (target_idx) + 1);
7568
7569 chain = ((EQ (operation, Qinsert_file_contents)
7570 || EQ (operation, Qwrite_region))
7571 ? Vfile_coding_system_alist
7572 : (EQ (operation, Qopen_network_stream)
7573 ? Vnetwork_coding_system_alist
7574 : Vprocess_coding_system_alist));
7575 if (NILP (chain))
7576 return Qnil;
7577
7578 for (; CONSP (chain); chain = XCDR (chain))
7579 {
7580 Lisp_Object elt;
7581
7582 elt = XCAR (chain);
7583 if (CONSP (elt)
7584 && ((STRINGP (target)
7585 && STRINGP (XCAR (elt))
7586 && fast_string_match (XCAR (elt), target) >= 0)
7587 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7588 {
7589 val = XCDR (elt);
7590 /* Here, if VAL is both a valid coding system and a valid
7591 function symbol, we return VAL as a coding system. */
7592 if (CONSP (val))
7593 return val;
7594 if (! SYMBOLP (val))
7595 return Qnil;
7596 if (! NILP (Fcoding_system_p (val)))
7597 return Fcons (val, val);
7598 if (! NILP (Ffboundp (val)))
7599 {
7600 val = call1 (val, Flist (nargs, args));
7601 if (CONSP (val))
7602 return val;
7603 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7604 return Fcons (val, val);
7605 }
7606 return Qnil;
7607 }
7608 }
7609 return Qnil;
7610 }
7611
7612 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7613 Sset_coding_system_priority, 0, MANY, 0,
7614 doc: /* Assign higher priority to the coding systems given as arguments.
7615 usage: (set-coding-system-priority CODING-SYSTEM ...) */)
7616 (nargs, args)
7617 int nargs;
7618 Lisp_Object *args;
7619 {
7620 int i, j;
7621 int changed[coding_category_max];
7622 enum coding_category priorities[coding_category_max];
7623
7624 bzero (changed, sizeof changed);
7625
7626 for (i = j = 0; i < nargs; i++)
7627 {
7628 enum coding_category category;
7629 Lisp_Object spec, attrs;
7630
7631 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7632 attrs = AREF (spec, 0);
7633 category = XINT (CODING_ATTR_CATEGORY (attrs));
7634 if (changed[category])
7635 /* Ignore this coding system because a coding system of the
7636 same category already had a higher priority. */
7637 continue;
7638 changed[category] = 1;
7639 priorities[j++] = category;
7640 if (coding_categories[category].id >= 0
7641 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7642 setup_coding_system (args[i], &coding_categories[category]);
7643 }
7644
7645 /* Now we have decided top J priorities. Reflect the order of the
7646 original priorities to the remaining priorities. */
7647
7648 for (i = j, j = 0; i < coding_category_max; i++, j++)
7649 {
7650 while (j < coding_category_max
7651 && changed[coding_priorities[j]])
7652 j++;
7653 if (j == coding_category_max)
7654 abort ();
7655 priorities[i] = coding_priorities[j];
7656 }
7657
7658 bcopy (priorities, coding_priorities, sizeof priorities);
7659 return Qnil;
7660 }
7661
7662 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7663 Scoding_system_priority_list, 0, 1, 0,
7664 doc: /* Return a list of coding systems ordered by their priorities.
7665 HIGHESTP non-nil means just return the highest priority one. */)
7666 (highestp)
7667 Lisp_Object highestp;
7668 {
7669 int i;
7670 Lisp_Object val;
7671
7672 for (i = 0, val = Qnil; i < coding_category_max; i++)
7673 {
7674 enum coding_category category = coding_priorities[i];
7675 int id = coding_categories[category].id;
7676 Lisp_Object attrs;
7677
7678 if (id < 0)
7679 continue;
7680 attrs = CODING_ID_ATTRS (id);
7681 if (! NILP (highestp))
7682 return CODING_ATTR_BASE_NAME (attrs);
7683 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7684 }
7685 return Fnreverse (val);
7686 }
7687
7688 static char *suffixes[] = { "-unix", "-dos", "-mac" };
7689
7690 static Lisp_Object
7691 make_subsidiaries (base)
7692 Lisp_Object base;
7693 {
7694 Lisp_Object subsidiaries;
7695 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7696 char *buf = (char *) alloca (base_name_len + 6);
7697 int i;
7698
7699 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7700 subsidiaries = Fmake_vector (make_number (3), Qnil);
7701 for (i = 0; i < 3; i++)
7702 {
7703 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7704 ASET (subsidiaries, i, intern (buf));
7705 }
7706 return subsidiaries;
7707 }
7708
7709
7710 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7711 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7712 doc: /* For internal use only.
7713 usage: (define-coding-system-internal ...) */)
7714 (nargs, args)
7715 int nargs;
7716 Lisp_Object *args;
7717 {
7718 Lisp_Object name;
7719 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7720 Lisp_Object attrs; /* Vector of attributes. */
7721 Lisp_Object eol_type;
7722 Lisp_Object aliases;
7723 Lisp_Object coding_type, charset_list, safe_charsets;
7724 enum coding_category category;
7725 Lisp_Object tail, val;
7726 int max_charset_id = 0;
7727 int i;
7728
7729 if (nargs < coding_arg_max)
7730 goto short_args;
7731
7732 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7733
7734 name = args[coding_arg_name];
7735 CHECK_SYMBOL (name);
7736 CODING_ATTR_BASE_NAME (attrs) = name;
7737
7738 val = args[coding_arg_mnemonic];
7739 if (! STRINGP (val))
7740 CHECK_CHARACTER (val);
7741 CODING_ATTR_MNEMONIC (attrs) = val;
7742
7743 coding_type = args[coding_arg_coding_type];
7744 CHECK_SYMBOL (coding_type);
7745 CODING_ATTR_TYPE (attrs) = coding_type;
7746
7747 charset_list = args[coding_arg_charset_list];
7748 if (SYMBOLP (charset_list))
7749 {
7750 if (EQ (charset_list, Qiso_2022))
7751 {
7752 if (! EQ (coding_type, Qiso_2022))
7753 error ("Invalid charset-list");
7754 charset_list = Viso_2022_charset_list;
7755 }
7756 else if (EQ (charset_list, Qemacs_mule))
7757 {
7758 if (! EQ (coding_type, Qemacs_mule))
7759 error ("Invalid charset-list");
7760 charset_list = Vemacs_mule_charset_list;
7761 }
7762 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7763 if (max_charset_id < XFASTINT (XCAR (tail)))
7764 max_charset_id = XFASTINT (XCAR (tail));
7765 }
7766 else
7767 {
7768 charset_list = Fcopy_sequence (charset_list);
7769 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7770 {
7771 struct charset *charset;
7772
7773 val = Fcar (tail);
7774 CHECK_CHARSET_GET_CHARSET (val, charset);
7775 if (EQ (coding_type, Qiso_2022)
7776 ? CHARSET_ISO_FINAL (charset) < 0
7777 : EQ (coding_type, Qemacs_mule)
7778 ? CHARSET_EMACS_MULE_ID (charset) < 0
7779 : 0)
7780 error ("Can't handle charset `%s'",
7781 XSYMBOL (CHARSET_NAME (charset))->name->data);
7782
7783 XCAR (tail) = make_number (charset->id);
7784 if (max_charset_id < charset->id)
7785 max_charset_id = charset->id;
7786 }
7787 }
7788 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7789
7790 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7791 make_number (255));
7792 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7793 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7794 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7795
7796 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
7797
7798 val = args[coding_arg_decode_translation_table];
7799 if (! NILP (val))
7800 CHECK_CHAR_TABLE (val);
7801 CODING_ATTR_DECODE_TBL (attrs) = val;
7802
7803 val = args[coding_arg_encode_translation_table];
7804 if (! NILP (val))
7805 CHECK_CHAR_TABLE (val);
7806 CODING_ATTR_ENCODE_TBL (attrs) = val;
7807
7808 val = args[coding_arg_post_read_conversion];
7809 CHECK_SYMBOL (val);
7810 CODING_ATTR_POST_READ (attrs) = val;
7811
7812 val = args[coding_arg_pre_write_conversion];
7813 CHECK_SYMBOL (val);
7814 CODING_ATTR_PRE_WRITE (attrs) = val;
7815
7816 val = args[coding_arg_default_char];
7817 if (NILP (val))
7818 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7819 else
7820 {
7821 CHECK_CHARACTER (val);
7822 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7823 }
7824
7825 val = args[coding_arg_plist];
7826 CHECK_LIST (val);
7827 CODING_ATTR_PLIST (attrs) = val;
7828
7829 if (EQ (coding_type, Qcharset))
7830 {
7831 /* Generate a lisp vector of 256 elements. Each element is nil,
7832 integer, or a list of charset IDs.
7833
7834 If Nth element is nil, the byte code N is invalid in this
7835 coding system.
7836
7837 If Nth element is a number NUM, N is the first byte of a
7838 charset whose ID is NUM.
7839
7840 If Nth element is a list of charset IDs, N is the first byte
7841 of one of them. The list is sorted by dimensions of the
7842 charsets. A charset of smaller dimension comes firtst.
7843 */
7844 val = Fmake_vector (make_number (256), Qnil);
7845
7846 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7847 {
7848 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7849 int dim = CHARSET_DIMENSION (charset);
7850 int idx = (dim - 1) * 4;
7851
7852 if (CHARSET_ASCII_COMPATIBLE_P (charset))
7853 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7854
7855 for (i = charset->code_space[idx];
7856 i <= charset->code_space[idx + 1]; i++)
7857 {
7858 Lisp_Object tmp, tmp2;
7859 int dim2;
7860
7861 tmp = AREF (val, i);
7862 if (NILP (tmp))
7863 tmp = XCAR (tail);
7864 else if (NUMBERP (tmp))
7865 {
7866 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7867 if (dim < dim2)
7868 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
7869 else
7870 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
7871 }
7872 else
7873 {
7874 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7875 {
7876 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7877 if (dim < dim2)
7878 break;
7879 }
7880 if (NILP (tmp2))
7881 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7882 else
7883 {
7884 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7885 XSETCAR (tmp2, XCAR (tail));
7886 }
7887 }
7888 ASET (val, i, tmp);
7889 }
7890 }
7891 ASET (attrs, coding_attr_charset_valids, val);
7892 category = coding_category_charset;
7893 }
7894 else if (EQ (coding_type, Qccl))
7895 {
7896 Lisp_Object valids;
7897
7898 if (nargs < coding_arg_ccl_max)
7899 goto short_args;
7900
7901 val = args[coding_arg_ccl_decoder];
7902 CHECK_CCL_PROGRAM (val);
7903 if (VECTORP (val))
7904 val = Fcopy_sequence (val);
7905 ASET (attrs, coding_attr_ccl_decoder, val);
7906
7907 val = args[coding_arg_ccl_encoder];
7908 CHECK_CCL_PROGRAM (val);
7909 if (VECTORP (val))
7910 val = Fcopy_sequence (val);
7911 ASET (attrs, coding_attr_ccl_encoder, val);
7912
7913 val = args[coding_arg_ccl_valids];
7914 valids = Fmake_string (make_number (256), make_number (0));
7915 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7916 {
7917 int from, to;
7918
7919 val = Fcar (tail);
7920 if (INTEGERP (val))
7921 {
7922 from = to = XINT (val);
7923 if (from < 0 || from > 255)
7924 args_out_of_range_3 (val, make_number (0), make_number (255));
7925 }
7926 else
7927 {
7928 CHECK_CONS (val);
7929 CHECK_NUMBER (XCAR (val));
7930 CHECK_NUMBER (XCDR (val));
7931 from = XINT (XCAR (val));
7932 if (from < 0 || from > 255)
7933 args_out_of_range_3 (XCAR (val),
7934 make_number (0), make_number (255));
7935 to = XINT (XCDR (val));
7936 if (to < from || to > 255)
7937 args_out_of_range_3 (XCDR (val),
7938 XCAR (val), make_number (255));
7939 }
7940 for (i = from; i <= to; i++)
7941 XSTRING (valids)->data[i] = 1;
7942 }
7943 ASET (attrs, coding_attr_ccl_valids, valids);
7944
7945 category = coding_category_ccl;
7946 }
7947 else if (EQ (coding_type, Qutf_16))
7948 {
7949 Lisp_Object bom, endian;
7950
7951 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
7952
7953 if (nargs < coding_arg_utf16_max)
7954 goto short_args;
7955
7956 bom = args[coding_arg_utf16_bom];
7957 if (! NILP (bom) && ! EQ (bom, Qt))
7958 {
7959 CHECK_CONS (bom);
7960 CHECK_CODING_SYSTEM (XCAR (bom));
7961 CHECK_CODING_SYSTEM (XCDR (bom));
7962 }
7963 ASET (attrs, coding_attr_utf_16_bom, bom);
7964
7965 endian = args[coding_arg_utf16_endian];
7966 ASET (attrs, coding_attr_utf_16_endian, endian);
7967
7968 category = (CONSP (bom)
7969 ? coding_category_utf_16_auto
7970 : NILP (bom)
7971 ? (NILP (endian)
7972 ? coding_category_utf_16_be_nosig
7973 : coding_category_utf_16_le_nosig)
7974 : (NILP (endian)
7975 ? coding_category_utf_16_be
7976 : coding_category_utf_16_le));
7977 }
7978 else if (EQ (coding_type, Qiso_2022))
7979 {
7980 Lisp_Object initial, reg_usage, request, flags;
7981 int i, id;
7982
7983 if (nargs < coding_arg_iso2022_max)
7984 goto short_args;
7985
7986 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7987 CHECK_VECTOR (initial);
7988 for (i = 0; i < 4; i++)
7989 {
7990 val = Faref (initial, make_number (i));
7991 if (! NILP (val))
7992 {
7993 struct charset *charset;
7994
7995 CHECK_CHARSET_GET_CHARSET (val, charset);
7996 ASET (initial, i, make_number (CHARSET_ID (charset)));
7997 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
7998 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7999 }
8000 else
8001 ASET (initial, i, make_number (-1));
8002 }
8003
8004 reg_usage = args[coding_arg_iso2022_reg_usage];
8005 CHECK_CONS (reg_usage);
8006 CHECK_NATNUM (XCAR (reg_usage));
8007 CHECK_NATNUM (XCDR (reg_usage));
8008
8009 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8010 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8011 {
8012 int id;
8013
8014 val = Fcar (tail);
8015 CHECK_CONS (val);
8016 CHECK_CHARSET_GET_ID (XCAR (val), id);
8017 CHECK_NATNUM (XCDR (val));
8018 if (XINT (XCDR (val)) >= 4)
8019 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8020 XCAR (val) = make_number (id);
8021 }
8022
8023 flags = args[coding_arg_iso2022_flags];
8024 CHECK_NATNUM (flags);
8025 i = XINT (flags);
8026 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8027 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8028
8029 ASET (attrs, coding_attr_iso_initial, initial);
8030 ASET (attrs, coding_attr_iso_usage, reg_usage);
8031 ASET (attrs, coding_attr_iso_request, request);
8032 ASET (attrs, coding_attr_iso_flags, flags);
8033 setup_iso_safe_charsets (attrs);
8034
8035 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8036 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8037 | CODING_ISO_FLAG_SINGLE_SHIFT))
8038 ? coding_category_iso_7_else
8039 : EQ (args[coding_arg_charset_list], Qiso_2022)
8040 ? coding_category_iso_7
8041 : coding_category_iso_7_tight);
8042 else
8043 {
8044 int id = XINT (AREF (initial, 1));
8045
8046 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8047 || EQ (args[coding_arg_charset_list], Qiso_2022)
8048 || id < 0)
8049 ? coding_category_iso_8_else
8050 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8051 ? coding_category_iso_8_1
8052 : coding_category_iso_8_2);
8053 }
8054 if (category != coding_category_iso_8_1
8055 && category != coding_category_iso_8_2)
8056 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8057 }
8058 else if (EQ (coding_type, Qemacs_mule))
8059 {
8060 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8061 ASET (attrs, coding_attr_emacs_mule_full, Qt);
8062 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8063 category = coding_category_emacs_mule;
8064 }
8065 else if (EQ (coding_type, Qshift_jis))
8066 {
8067
8068 struct charset *charset;
8069
8070 if (XINT (Flength (charset_list)) != 3)
8071 error ("There should be just three charsets");
8072
8073 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8074 if (CHARSET_DIMENSION (charset) != 1)
8075 error ("Dimension of charset %s is not one",
8076 XSYMBOL (CHARSET_NAME (charset))->name->data);
8077 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8078 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8079
8080 charset_list = XCDR (charset_list);
8081 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8082 if (CHARSET_DIMENSION (charset) != 1)
8083 error ("Dimension of charset %s is not one",
8084 XSYMBOL (CHARSET_NAME (charset))->name->data);
8085
8086 charset_list = XCDR (charset_list);
8087 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8088 if (CHARSET_DIMENSION (charset) != 2)
8089 error ("Dimension of charset %s is not two",
8090 XSYMBOL (CHARSET_NAME (charset))->name->data);
8091
8092 category = coding_category_sjis;
8093 Vsjis_coding_system = name;
8094 }
8095 else if (EQ (coding_type, Qbig5))
8096 {
8097 struct charset *charset;
8098
8099 if (XINT (Flength (charset_list)) != 2)
8100 error ("There should be just two charsets");
8101
8102 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8103 if (CHARSET_DIMENSION (charset) != 1)
8104 error ("Dimension of charset %s is not one",
8105 XSYMBOL (CHARSET_NAME (charset))->name->data);
8106 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8107 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8108
8109 charset_list = XCDR (charset_list);
8110 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8111 if (CHARSET_DIMENSION (charset) != 2)
8112 error ("Dimension of charset %s is not two",
8113 XSYMBOL (CHARSET_NAME (charset))->name->data);
8114
8115 category = coding_category_big5;
8116 Vbig5_coding_system = name;
8117 }
8118 else if (EQ (coding_type, Qraw_text))
8119 {
8120 category = coding_category_raw_text;
8121 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8122 }
8123 else if (EQ (coding_type, Qutf_8))
8124 {
8125 category = coding_category_utf_8;
8126 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8127 }
8128 else if (EQ (coding_type, Qundecided))
8129 category = coding_category_undecided;
8130 else
8131 error ("Invalid coding system type: %s",
8132 XSYMBOL (coding_type)->name->data);
8133
8134 CODING_ATTR_CATEGORY (attrs) = make_number (category);
8135
8136 eol_type = args[coding_arg_eol_type];
8137 if (! NILP (eol_type)
8138 && ! EQ (eol_type, Qunix)
8139 && ! EQ (eol_type, Qdos)
8140 && ! EQ (eol_type, Qmac))
8141 error ("Invalid eol-type");
8142
8143 aliases = Fcons (name, Qnil);
8144
8145 if (NILP (eol_type))
8146 {
8147 eol_type = make_subsidiaries (name);
8148 for (i = 0; i < 3; i++)
8149 {
8150 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8151
8152 this_name = AREF (eol_type, i);
8153 this_aliases = Fcons (this_name, Qnil);
8154 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8155 this_spec = Fmake_vector (make_number (3), attrs);
8156 ASET (this_spec, 1, this_aliases);
8157 ASET (this_spec, 2, this_eol_type);
8158 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8159 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8160 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8161 Vcoding_system_alist);
8162 }
8163 }
8164
8165 spec_vec = Fmake_vector (make_number (3), attrs);
8166 ASET (spec_vec, 1, aliases);
8167 ASET (spec_vec, 2, eol_type);
8168
8169 Fputhash (name, spec_vec, Vcoding_system_hash_table);
8170 Vcoding_system_list = Fcons (name, Vcoding_system_list);
8171 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8172 Vcoding_system_alist);
8173
8174 {
8175 int id = coding_categories[category].id;
8176
8177 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8178 setup_coding_system (name, &coding_categories[category]);
8179 }
8180
8181 return Qnil;
8182
8183 short_args:
8184 return Fsignal (Qwrong_number_of_arguments,
8185 Fcons (intern ("define-coding-system-internal"),
8186 make_number (nargs)));
8187 }
8188
8189 /* Fixme: should this record the alias relationships for
8190 diagnostics? Should it update coding-system-list? */
8191 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8192 Sdefine_coding_system_alias, 2, 2, 0,
8193 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8194 (alias, coding_system)
8195 Lisp_Object alias, coding_system;
8196 {
8197 Lisp_Object spec, aliases, eol_type;
8198
8199 CHECK_SYMBOL (alias);
8200 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8201 aliases = AREF (spec, 1);
8202 while (!NILP (XCDR (aliases)))
8203 aliases = XCDR (aliases);
8204 XCDR (aliases) = Fcons (alias, Qnil);
8205
8206 eol_type = AREF (spec, 2);
8207 if (VECTORP (eol_type))
8208 {
8209 Lisp_Object subsidiaries;
8210 int i;
8211
8212 subsidiaries = make_subsidiaries (alias);
8213 for (i = 0; i < 3; i++)
8214 Fdefine_coding_system_alias (AREF (subsidiaries, i),
8215 AREF (eol_type, i));
8216
8217 ASET (spec, 2, subsidiaries);
8218 }
8219
8220 Fputhash (alias, spec, Vcoding_system_hash_table);
8221 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8222 Vcoding_system_alist);
8223
8224 return Qnil;
8225 }
8226
8227 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8228 1, 1, 0,
8229 doc: /* Return the base of CODING-SYSTEM.
8230 Any alias or subsidiary coding system is not a base coding system. */)
8231 (coding_system)
8232 Lisp_Object coding_system;
8233 {
8234 Lisp_Object spec, attrs;
8235
8236 if (NILP (coding_system))
8237 return (Qno_conversion);
8238 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8239 attrs = AREF (spec, 0);
8240 return CODING_ATTR_BASE_NAME (attrs);
8241 }
8242
8243 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8244 1, 1, 0,
8245 doc: "Return the property list of CODING-SYSTEM.")
8246 (coding_system)
8247 Lisp_Object coding_system;
8248 {
8249 Lisp_Object spec, attrs;
8250
8251 if (NILP (coding_system))
8252 coding_system = Qno_conversion;
8253 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8254 attrs = AREF (spec, 0);
8255 return CODING_ATTR_PLIST (attrs);
8256 }
8257
8258
8259 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8260 1, 1, 0,
8261 doc: /* Return the list of aliases of CODING-SYSTEM. */)
8262 (coding_system)
8263 Lisp_Object coding_system;
8264 {
8265 Lisp_Object spec;
8266
8267 if (NILP (coding_system))
8268 coding_system = Qno_conversion;
8269 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8270 return AREF (spec, 1);
8271 }
8272
8273 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8274 Scoding_system_eol_type, 1, 1, 0,
8275 doc: /* Return eol-type of CODING-SYSTEM.
8276 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8277
8278 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8279 and CR respectively.
8280
8281 A vector value indicates that a format of end-of-line should be
8282 detected automatically. Nth element of the vector is the subsidiary
8283 coding system whose eol-type is N. */)
8284 (coding_system)
8285 Lisp_Object coding_system;
8286 {
8287 Lisp_Object spec, eol_type;
8288 int n;
8289
8290 if (NILP (coding_system))
8291 coding_system = Qno_conversion;
8292 if (! CODING_SYSTEM_P (coding_system))
8293 return Qnil;
8294 spec = CODING_SYSTEM_SPEC (coding_system);
8295 eol_type = AREF (spec, 2);
8296 if (VECTORP (eol_type))
8297 return Fcopy_sequence (eol_type);
8298 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8299 return make_number (n);
8300 }
8301
8302 #endif /* emacs */
8303
8304 \f
8305 /*** 9. Post-amble ***/
8306
8307 void
8308 init_coding_once ()
8309 {
8310 int i;
8311
8312 for (i = 0; i < coding_category_max; i++)
8313 {
8314 coding_categories[i].id = -1;
8315 coding_priorities[i] = i;
8316 }
8317
8318 /* ISO2022 specific initialize routine. */
8319 for (i = 0; i < 0x20; i++)
8320 iso_code_class[i] = ISO_control_0;
8321 for (i = 0x21; i < 0x7F; i++)
8322 iso_code_class[i] = ISO_graphic_plane_0;
8323 for (i = 0x80; i < 0xA0; i++)
8324 iso_code_class[i] = ISO_control_1;
8325 for (i = 0xA1; i < 0xFF; i++)
8326 iso_code_class[i] = ISO_graphic_plane_1;
8327 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8328 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8329 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8330 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8331 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8332 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8333 iso_code_class[ISO_CODE_ESC] = ISO_escape;
8334 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8335 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8336 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8337
8338 inhibit_pre_post_conversion = 0;
8339
8340 for (i = 0; i < 256; i++)
8341 {
8342 emacs_mule_bytes[i] = 1;
8343 }
8344 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8345 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8346 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8347 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8348 }
8349
8350 #ifdef emacs
8351
8352 void
8353 syms_of_coding ()
8354 {
8355 staticpro (&Vcoding_system_hash_table);
8356 Vcoding_system_hash_table = Fmakehash (Qeq);
8357
8358 staticpro (&Vsjis_coding_system);
8359 Vsjis_coding_system = Qnil;
8360
8361 staticpro (&Vbig5_coding_system);
8362 Vbig5_coding_system = Qnil;
8363
8364 staticpro (&Vcode_conversion_work_buf_list);
8365 Vcode_conversion_work_buf_list = Qnil;
8366
8367 staticpro (&Vcode_conversion_reused_work_buf);
8368 Vcode_conversion_reused_work_buf = Qnil;
8369
8370 DEFSYM (Qcharset, "charset");
8371 DEFSYM (Qtarget_idx, "target-idx");
8372 DEFSYM (Qcoding_system_history, "coding-system-history");
8373 Fset (Qcoding_system_history, Qnil);
8374
8375 /* Target FILENAME is the first argument. */
8376 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8377 /* Target FILENAME is the third argument. */
8378 Fput (Qwrite_region, Qtarget_idx, make_number (2));
8379
8380 DEFSYM (Qcall_process, "call-process");
8381 /* Target PROGRAM is the first argument. */
8382 Fput (Qcall_process, Qtarget_idx, make_number (0));
8383
8384 DEFSYM (Qcall_process_region, "call-process-region");
8385 /* Target PROGRAM is the third argument. */
8386 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8387
8388 DEFSYM (Qstart_process, "start-process");
8389 /* Target PROGRAM is the third argument. */
8390 Fput (Qstart_process, Qtarget_idx, make_number (2));
8391
8392 DEFSYM (Qopen_network_stream, "open-network-stream");
8393 /* Target SERVICE is the fourth argument. */
8394 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8395
8396 DEFSYM (Qcoding_system, "coding-system");
8397 DEFSYM (Qcoding_aliases, "coding-aliases");
8398
8399 DEFSYM (Qeol_type, "eol-type");
8400 DEFSYM (Qunix, "unix");
8401 DEFSYM (Qdos, "dos");
8402
8403 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8404 DEFSYM (Qpost_read_conversion, "post-read-conversion");
8405 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8406 DEFSYM (Qdefault_char, "default-char");
8407 DEFSYM (Qundecided, "undecided");
8408 DEFSYM (Qno_conversion, "no-conversion");
8409 DEFSYM (Qraw_text, "raw-text");
8410
8411 DEFSYM (Qiso_2022, "iso-2022");
8412
8413 DEFSYM (Qutf_8, "utf-8");
8414
8415 DEFSYM (Qutf_16, "utf-16");
8416 DEFSYM (Qutf_16_be, "utf-16-be");
8417 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
8418 DEFSYM (Qutf_16_le, "utf-16-l3");
8419 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
8420 DEFSYM (Qsignature, "signature");
8421 DEFSYM (Qendian, "endian");
8422 DEFSYM (Qbig, "big");
8423 DEFSYM (Qlittle, "little");
8424
8425 DEFSYM (Qshift_jis, "shift-jis");
8426 DEFSYM (Qbig5, "big5");
8427
8428 DEFSYM (Qcoding_system_p, "coding-system-p");
8429
8430 DEFSYM (Qcoding_system_error, "coding-system-error");
8431 Fput (Qcoding_system_error, Qerror_conditions,
8432 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8433 Fput (Qcoding_system_error, Qerror_message,
8434 build_string ("Invalid coding system"));
8435
8436 /* Intern this now in case it isn't already done.
8437 Setting this variable twice is harmless.
8438 But don't staticpro it here--that is done in alloc.c. */
8439 Qchar_table_extra_slots = intern ("char-table-extra-slots");
8440
8441 DEFSYM (Qtranslation_table, "translation-table");
8442 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
8443 DEFSYM (Qtranslation_table_id, "translation-table-id");
8444 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8445 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
8446
8447 DEFSYM (Qvalid_codes, "valid-codes");
8448
8449 DEFSYM (Qemacs_mule, "emacs-mule");
8450
8451 Vcoding_category_table
8452 = Fmake_vector (make_number (coding_category_max), Qnil);
8453 staticpro (&Vcoding_category_table);
8454 /* Followings are target of code detection. */
8455 ASET (Vcoding_category_table, coding_category_iso_7,
8456 intern ("coding-category-iso-7"));
8457 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8458 intern ("coding-category-iso-7-tight"));
8459 ASET (Vcoding_category_table, coding_category_iso_8_1,
8460 intern ("coding-category-iso-8-1"));
8461 ASET (Vcoding_category_table, coding_category_iso_8_2,
8462 intern ("coding-category-iso-8-2"));
8463 ASET (Vcoding_category_table, coding_category_iso_7_else,
8464 intern ("coding-category-iso-7-else"));
8465 ASET (Vcoding_category_table, coding_category_iso_8_else,
8466 intern ("coding-category-iso-8-else"));
8467 ASET (Vcoding_category_table, coding_category_utf_8,
8468 intern ("coding-category-utf-8"));
8469 ASET (Vcoding_category_table, coding_category_utf_16_be,
8470 intern ("coding-category-utf-16-be"));
8471 ASET (Vcoding_category_table, coding_category_utf_16_le,
8472 intern ("coding-category-utf-16-le"));
8473 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8474 intern ("coding-category-utf-16-be-nosig"));
8475 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8476 intern ("coding-category-utf-16-le-nosig"));
8477 ASET (Vcoding_category_table, coding_category_charset,
8478 intern ("coding-category-charset"));
8479 ASET (Vcoding_category_table, coding_category_sjis,
8480 intern ("coding-category-sjis"));
8481 ASET (Vcoding_category_table, coding_category_big5,
8482 intern ("coding-category-big5"));
8483 ASET (Vcoding_category_table, coding_category_ccl,
8484 intern ("coding-category-ccl"));
8485 ASET (Vcoding_category_table, coding_category_emacs_mule,
8486 intern ("coding-category-emacs-mule"));
8487 /* Followings are NOT target of code detection. */
8488 ASET (Vcoding_category_table, coding_category_raw_text,
8489 intern ("coding-category-raw-text"));
8490 ASET (Vcoding_category_table, coding_category_undecided,
8491 intern ("coding-category-undecided"));
8492
8493 defsubr (&Scoding_system_p);
8494 defsubr (&Sread_coding_system);
8495 defsubr (&Sread_non_nil_coding_system);
8496 defsubr (&Scheck_coding_system);
8497 defsubr (&Sdetect_coding_region);
8498 defsubr (&Sdetect_coding_string);
8499 defsubr (&Sfind_coding_systems_region_internal);
8500 defsubr (&Scheck_coding_systems_region);
8501 defsubr (&Sdecode_coding_region);
8502 defsubr (&Sencode_coding_region);
8503 defsubr (&Sdecode_coding_string);
8504 defsubr (&Sencode_coding_string);
8505 defsubr (&Sdecode_sjis_char);
8506 defsubr (&Sencode_sjis_char);
8507 defsubr (&Sdecode_big5_char);
8508 defsubr (&Sencode_big5_char);
8509 defsubr (&Sset_terminal_coding_system_internal);
8510 defsubr (&Sset_safe_terminal_coding_system_internal);
8511 defsubr (&Sterminal_coding_system);
8512 defsubr (&Sset_keyboard_coding_system_internal);
8513 defsubr (&Skeyboard_coding_system);
8514 defsubr (&Sfind_operation_coding_system);
8515 defsubr (&Sset_coding_system_priority);
8516 defsubr (&Sdefine_coding_system_internal);
8517 defsubr (&Sdefine_coding_system_alias);
8518 defsubr (&Scoding_system_base);
8519 defsubr (&Scoding_system_plist);
8520 defsubr (&Scoding_system_aliases);
8521 defsubr (&Scoding_system_eol_type);
8522 defsubr (&Scoding_system_priority_list);
8523
8524 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8525 doc: /* List of coding systems.
8526
8527 Do not alter the value of this variable manually. This variable should be
8528 updated by the functions `define-coding-system' and
8529 `define-coding-system-alias'. */);
8530 Vcoding_system_list = Qnil;
8531
8532 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8533 doc: /* Alist of coding system names.
8534 Each element is one element list of coding system name.
8535 This variable is given to `completing-read' as TABLE argument.
8536
8537 Do not alter the value of this variable manually. This variable should be
8538 updated by the functions `make-coding-system' and
8539 `define-coding-system-alias'. */);
8540 Vcoding_system_alist = Qnil;
8541
8542 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8543 doc: /* List of coding-categories (symbols) ordered by priority.
8544
8545 On detecting a coding system, Emacs tries code detection algorithms
8546 associated with each coding-category one by one in this order. When
8547 one algorithm agrees with a byte sequence of source text, the coding
8548 system bound to the corresponding coding-category is selected. */);
8549 {
8550 int i;
8551
8552 Vcoding_category_list = Qnil;
8553 for (i = coding_category_max - 1; i >= 0; i--)
8554 Vcoding_category_list
8555 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8556 Vcoding_category_list);
8557 }
8558
8559 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8560 doc: /* Specify the coding system for read operations.
8561 It is useful to bind this variable with `let', but do not set it globally.
8562 If the value is a coding system, it is used for decoding on read operation.
8563 If not, an appropriate element is used from one of the coding system alists:
8564 There are three such tables, `file-coding-system-alist',
8565 `process-coding-system-alist', and `network-coding-system-alist'. */);
8566 Vcoding_system_for_read = Qnil;
8567
8568 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8569 doc: /* Specify the coding system for write operations.
8570 Programs bind this variable with `let', but you should not set it globally.
8571 If the value is a coding system, it is used for encoding of output,
8572 when writing it to a file and when sending it to a file or subprocess.
8573
8574 If this does not specify a coding system, an appropriate element
8575 is used from one of the coding system alists:
8576 There are three such tables, `file-coding-system-alist',
8577 `process-coding-system-alist', and `network-coding-system-alist'.
8578 For output to files, if the above procedure does not specify a coding system,
8579 the value of `buffer-file-coding-system' is used. */);
8580 Vcoding_system_for_write = Qnil;
8581
8582 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8583 doc: /*
8584 Coding system used in the latest file or process I/O. */);
8585 Vlast_coding_system_used = Qnil;
8586
8587 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8588 doc: /*
8589 *Non-nil means always inhibit code conversion of end-of-line format.
8590 See info node `Coding Systems' and info node `Text and Binary' concerning
8591 such conversion. */);
8592 inhibit_eol_conversion = 0;
8593
8594 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8595 doc: /*
8596 Non-nil means process buffer inherits coding system of process output.
8597 Bind it to t if the process output is to be treated as if it were a file
8598 read from some filesystem. */);
8599 inherit_process_coding_system = 0;
8600
8601 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8602 doc: /*
8603 Alist to decide a coding system to use for a file I/O operation.
8604 The format is ((PATTERN . VAL) ...),
8605 where PATTERN is a regular expression matching a file name,
8606 VAL is a coding system, a cons of coding systems, or a function symbol.
8607 If VAL is a coding system, it is used for both decoding and encoding
8608 the file contents.
8609 If VAL is a cons of coding systems, the car part is used for decoding,
8610 and the cdr part is used for encoding.
8611 If VAL is a function symbol, the function must return a coding system
8612 or a cons of coding systems which are used as above. The function gets
8613 the arguments with which `find-operation-coding-systems' was called.
8614
8615 See also the function `find-operation-coding-system'
8616 and the variable `auto-coding-alist'. */);
8617 Vfile_coding_system_alist = Qnil;
8618
8619 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8620 doc: /*
8621 Alist to decide a coding system to use for a process I/O operation.
8622 The format is ((PATTERN . VAL) ...),
8623 where PATTERN is a regular expression matching a program name,
8624 VAL is a coding system, a cons of coding systems, or a function symbol.
8625 If VAL is a coding system, it is used for both decoding what received
8626 from the program and encoding what sent to the program.
8627 If VAL is a cons of coding systems, the car part is used for decoding,
8628 and the cdr part is used for encoding.
8629 If VAL is a function symbol, the function must return a coding system
8630 or a cons of coding systems which are used as above.
8631
8632 See also the function `find-operation-coding-system'. */);
8633 Vprocess_coding_system_alist = Qnil;
8634
8635 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8636 doc: /*
8637 Alist to decide a coding system to use for a network I/O operation.
8638 The format is ((PATTERN . VAL) ...),
8639 where PATTERN is a regular expression matching a network service name
8640 or is a port number to connect to,
8641 VAL is a coding system, a cons of coding systems, or a function symbol.
8642 If VAL is a coding system, it is used for both decoding what received
8643 from the network stream and encoding what sent to the network stream.
8644 If VAL is a cons of coding systems, the car part is used for decoding,
8645 and the cdr part is used for encoding.
8646 If VAL is a function symbol, the function must return a coding system
8647 or a cons of coding systems which are used as above.
8648
8649 See also the function `find-operation-coding-system'. */);
8650 Vnetwork_coding_system_alist = Qnil;
8651
8652 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8653 doc: /* Coding system to use with system messages.
8654 Also used for decoding keyboard input on X Window system. */);
8655 Vlocale_coding_system = Qnil;
8656
8657 /* The eol mnemonics are reset in startup.el system-dependently. */
8658 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8659 doc: /*
8660 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8661 eol_mnemonic_unix = build_string (":");
8662
8663 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8664 doc: /*
8665 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8666 eol_mnemonic_dos = build_string ("\\");
8667
8668 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8669 doc: /*
8670 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
8671 eol_mnemonic_mac = build_string ("/");
8672
8673 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8674 doc: /*
8675 *String displayed in mode line when end-of-line format is not yet determined. */);
8676 eol_mnemonic_undecided = build_string (":");
8677
8678 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8679 doc: /*
8680 *Non-nil enables character translation while encoding and decoding. */);
8681 Venable_character_translation = Qt;
8682
8683 DEFVAR_LISP ("standard-translation-table-for-decode",
8684 &Vstandard_translation_table_for_decode,
8685 doc: /* Table for translating characters while decoding. */);
8686 Vstandard_translation_table_for_decode = Qnil;
8687
8688 DEFVAR_LISP ("standard-translation-table-for-encode",
8689 &Vstandard_translation_table_for_encode,
8690 doc: /* Table for translating characters while encoding. */);
8691 Vstandard_translation_table_for_encode = Qnil;
8692
8693 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8694 doc: /* Alist of charsets vs revision numbers.
8695 While encoding, if a charset (car part of an element) is found,
8696 designate it with the escape sequence identifying revision (cdr part
8697 of the element). */);
8698 Vcharset_revision_table = Qnil;
8699
8700 DEFVAR_LISP ("default-process-coding-system",
8701 &Vdefault_process_coding_system,
8702 doc: /* Cons of coding systems used for process I/O by default.
8703 The car part is used for decoding a process output,
8704 the cdr part is used for encoding a text to be sent to a process. */);
8705 Vdefault_process_coding_system = Qnil;
8706
8707 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8708 doc: /*
8709 Table of extra Latin codes in the range 128..159 (inclusive).
8710 This is a vector of length 256.
8711 If Nth element is non-nil, the existence of code N in a file
8712 \(or output of subprocess) doesn't prevent it to be detected as
8713 a coding system of ISO 2022 variant which has a flag
8714 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8715 or reading output of a subprocess.
8716 Only 128th through 159th elements has a meaning. */);
8717 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8718
8719 DEFVAR_LISP ("select-safe-coding-system-function",
8720 &Vselect_safe_coding_system_function,
8721 doc: /*
8722 Function to call to select safe coding system for encoding a text.
8723
8724 If set, this function is called to force a user to select a proper
8725 coding system which can encode the text in the case that a default
8726 coding system used in each operation can't encode the text.
8727
8728 The default value is `select-safe-coding-system' (which see). */);
8729 Vselect_safe_coding_system_function = Qnil;
8730
8731 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8732 &inhibit_iso_escape_detection,
8733 doc: /*
8734 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8735
8736 By default, on reading a file, Emacs tries to detect how the text is
8737 encoded. This code detection is sensitive to escape sequences. If
8738 the sequence is valid as ISO2022, the code is determined as one of
8739 the ISO2022 encodings, and the file is decoded by the corresponding
8740 coding system (e.g. `iso-2022-7bit').
8741
8742 However, there may be a case that you want to read escape sequences in
8743 a file as is. In such a case, you can set this variable to non-nil.
8744 Then, as the code detection ignores any escape sequences, no file is
8745 detected as encoded in some ISO2022 encoding. The result is that all
8746 escape sequences become visible in a buffer.
8747
8748 The default value is nil, and it is strongly recommended not to change
8749 it. That is because many Emacs Lisp source files that contain
8750 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8751 in Emacs's distribution, and they won't be decoded correctly on
8752 reading if you suppress escape sequence detection.
8753
8754 The other way to read escape sequences in a file without decoding is
8755 to explicitly specify some coding system that doesn't use ISO2022's
8756 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8757 inhibit_iso_escape_detection = 0;
8758
8759 {
8760 Lisp_Object args[coding_arg_max];
8761 Lisp_Object plist[14];
8762 int i;
8763
8764 for (i = 0; i < coding_arg_max; i++)
8765 args[i] = Qnil;
8766
8767 plist[0] = intern (":name");
8768 plist[1] = args[coding_arg_name] = Qno_conversion;
8769 plist[2] = intern (":mnemonic");
8770 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8771 plist[4] = intern (":coding-type");
8772 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8773 plist[6] = intern (":ascii-compatible-p");
8774 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8775 plist[8] = intern (":default-char");
8776 plist[9] = args[coding_arg_default_char] = make_number (0);
8777 plist[10] = intern (":docstring");
8778 plist[11] = build_string ("Do no conversion.\n\
8779 \n\
8780 When you visit a file with this coding, the file is read into a\n\
8781 unibyte buffer as is, thus each byte of a file is treated as a\n\
8782 character.");
8783 plist[12] = intern (":eol-type");
8784 plist[13] = args[coding_arg_eol_type] = Qunix;
8785 args[coding_arg_plist] = Flist (14, plist);
8786 Fdefine_coding_system_internal (coding_arg_max, args);
8787 }
8788
8789 setup_coding_system (Qno_conversion, &keyboard_coding);
8790 setup_coding_system (Qno_conversion, &terminal_coding);
8791 setup_coding_system (Qno_conversion, &safe_terminal_coding);
8792 }
8793
8794 char *
8795 emacs_strerror (error_number)
8796 int error_number;
8797 {
8798 char *str;
8799
8800 synchronize_system_messages_locale ();
8801 str = strerror (error_number);
8802
8803 if (! NILP (Vlocale_coding_system))
8804 {
8805 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8806 Vlocale_coding_system,
8807 0);
8808 str = (char *) XSTRING (dec)->data;
8809 }
8810
8811 return str;
8812 }
8813
8814 #endif /* emacs */