(costs_set): Declare static, non-initialized for pcc.
[bpt/emacs.git] / src / coding.c
... / ...
CommitLineData
1/* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
15
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
25
26/*** TABLE OF CONTENTS ***
27
28 0. General comments
29 1. Preamble
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
41
42*/
43
44/*** 0. General comments ***
45
46
47CODING SYSTEM
48
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
75 character set.
76
77 o Old Emacs internal format (emacs-mule)
78
79 The coding system adopted by old versions of Emacs (20 and 21).
80
81 o ISO2022-base coding system
82
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
92 section 8.
93
94 o BIG5
95
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
101
102 o CCL
103
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
108
109 o Raw-text
110
111 A coding system for a text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
114
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
118
119
120END-OF-LINE FORMAT
121
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
127
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
131
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
138
139*/
140
141/* COMMON MACROS */
142
143
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
157#if 0
158static int
159detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
162{
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
188}
189#endif
190
191/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
197
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
202
203 Below is the template of these functions. */
204
205#if 0
206static void
207decode_coding_XXXX (coding)
208 struct coding_system *coding;
209{
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
243}
244#endif
245
246/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
252
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
257
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
261
262 Below is a template of these functions. */
263#if 0
264static void
265encode_coding_XXX (coding)
266 struct coding_system *coding;
267{
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
285}
286#endif
287
288\f
289/*** 1. Preamble ***/
290
291#include <config.h>
292#include <stdio.h>
293
294#include "lisp.h"
295#include "buffer.h"
296#include "character.h"
297#include "charset.h"
298#include "ccl.h"
299#include "composite.h"
300#include "coding.h"
301#include "window.h"
302
303Lisp_Object Vcoding_system_hash_table;
304
305Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
306Lisp_Object Qunix, Qdos, Qmac;
307Lisp_Object Qbuffer_file_coding_system;
308Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
309Lisp_Object Qdefault_char;
310Lisp_Object Qno_conversion, Qundecided;
311Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
312Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
313Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
314Lisp_Object Qcoding_system_history;
315Lisp_Object Qvalid_codes;
316
317extern Lisp_Object Qinsert_file_contents, Qwrite_region;
318Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
319Lisp_Object Qstart_process, Qopen_network_stream;
320Lisp_Object Qtarget_idx;
321
322Lisp_Object Vselect_safe_coding_system_function;
323
324/* Mnemonic string for each format of end-of-line. */
325Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
326/* Mnemonic string to indicate format of end-of-line is not yet
327 decided. */
328Lisp_Object eol_mnemonic_undecided;
329
330#ifdef emacs
331
332Lisp_Object Vcoding_system_list, Vcoding_system_alist;
333
334Lisp_Object Qcoding_system_p, Qcoding_system_error;
335
336/* Coding system emacs-mule and raw-text are for converting only
337 end-of-line format. */
338Lisp_Object Qemacs_mule, Qraw_text;
339
340/* Coding-systems are handed between Emacs Lisp programs and C internal
341 routines by the following three variables. */
342/* Coding-system for reading files and receiving data from process. */
343Lisp_Object Vcoding_system_for_read;
344/* Coding-system for writing files and sending data to process. */
345Lisp_Object Vcoding_system_for_write;
346/* Coding-system actually used in the latest I/O. */
347Lisp_Object Vlast_coding_system_used;
348
349/* A vector of length 256 which contains information about special
350 Latin codes (especially for dealing with Microsoft codes). */
351Lisp_Object Vlatin_extra_code_table;
352
353/* Flag to inhibit code conversion of end-of-line format. */
354int inhibit_eol_conversion;
355
356/* Flag to inhibit ISO2022 escape sequence detection. */
357int inhibit_iso_escape_detection;
358
359/* Flag to make buffer-file-coding-system inherit from process-coding. */
360int inherit_process_coding_system;
361
362/* Coding system to be used to encode text for terminal display. */
363struct coding_system terminal_coding;
364
365/* Coding system to be used to encode text for terminal display when
366 terminal coding system is nil. */
367struct coding_system safe_terminal_coding;
368
369/* Coding system of what is sent from terminal keyboard. */
370struct coding_system keyboard_coding;
371
372Lisp_Object Vfile_coding_system_alist;
373Lisp_Object Vprocess_coding_system_alist;
374Lisp_Object Vnetwork_coding_system_alist;
375
376Lisp_Object Vlocale_coding_system;
377
378#endif /* emacs */
379
380/* Flag to tell if we look up translation table on character code
381 conversion. */
382Lisp_Object Venable_character_translation;
383/* Standard translation table to look up on decoding (reading). */
384Lisp_Object Vstandard_translation_table_for_decode;
385/* Standard translation table to look up on encoding (writing). */
386Lisp_Object Vstandard_translation_table_for_encode;
387
388Lisp_Object Qtranslation_table;
389Lisp_Object Qtranslation_table_id;
390Lisp_Object Qtranslation_table_for_decode;
391Lisp_Object Qtranslation_table_for_encode;
392
393/* Alist of charsets vs revision number. */
394static Lisp_Object Vcharset_revision_table;
395
396/* Default coding systems used for process I/O. */
397Lisp_Object Vdefault_process_coding_system;
398
399/* Global flag to tell that we can't call post-read-conversion and
400 pre-write-conversion functions. Usually the value is zero, but it
401 is set to 1 temporarily while such functions are running. This is
402 to avoid infinite recursive call. */
403static int inhibit_pre_post_conversion;
404
405/* Two special coding systems. */
406Lisp_Object Vsjis_coding_system;
407Lisp_Object Vbig5_coding_system;
408
409
410static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
411static void decode_coding_utf_8 P_ ((struct coding_system *));
412static int encode_coding_utf_8 P_ ((struct coding_system *));
413
414static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
415static void decode_coding_utf_16 P_ ((struct coding_system *));
416static int encode_coding_utf_16 P_ ((struct coding_system *));
417
418static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
419static void decode_coding_iso_2022 P_ ((struct coding_system *));
420static int encode_coding_iso_2022 P_ ((struct coding_system *));
421
422static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
423static void decode_coding_emacs_mule P_ ((struct coding_system *));
424static int encode_coding_emacs_mule P_ ((struct coding_system *));
425
426static int detect_coding_sjis P_ ((struct coding_system *, int *));
427static void decode_coding_sjis P_ ((struct coding_system *));
428static int encode_coding_sjis P_ ((struct coding_system *));
429
430static int detect_coding_big5 P_ ((struct coding_system *, int *));
431static void decode_coding_big5 P_ ((struct coding_system *));
432static int encode_coding_big5 P_ ((struct coding_system *));
433
434static int detect_coding_ccl P_ ((struct coding_system *, int *));
435static void decode_coding_ccl P_ ((struct coding_system *));
436static int encode_coding_ccl P_ ((struct coding_system *));
437
438static void decode_coding_raw_text P_ ((struct coding_system *));
439static int encode_coding_raw_text P_ ((struct coding_system *));
440
441
442/* ISO2022 section */
443
444#define CODING_ISO_INITIAL(coding, reg) \
445 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
446 coding_attr_iso_initial), \
447 reg)))
448
449
450#define CODING_ISO_REQUEST(coding, charset_id) \
451 ((charset_id <= (coding)->max_charset_id \
452 ? (coding)->safe_charsets[charset_id] \
453 : -1))
454
455
456#define CODING_ISO_FLAGS(coding) \
457 ((coding)->spec.iso_2022.flags)
458#define CODING_ISO_DESIGNATION(coding, reg) \
459 ((coding)->spec.iso_2022.current_designation[reg])
460#define CODING_ISO_INVOCATION(coding, plane) \
461 ((coding)->spec.iso_2022.current_invocation[plane])
462#define CODING_ISO_SINGLE_SHIFTING(coding) \
463 ((coding)->spec.iso_2022.single_shifting)
464#define CODING_ISO_BOL(coding) \
465 ((coding)->spec.iso_2022.bol)
466#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
467 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
468
469/* Control characters of ISO2022. */
470 /* code */ /* function */
471#define ISO_CODE_LF 0x0A /* line-feed */
472#define ISO_CODE_CR 0x0D /* carriage-return */
473#define ISO_CODE_SO 0x0E /* shift-out */
474#define ISO_CODE_SI 0x0F /* shift-in */
475#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
476#define ISO_CODE_ESC 0x1B /* escape */
477#define ISO_CODE_SS2 0x8E /* single-shift-2 */
478#define ISO_CODE_SS3 0x8F /* single-shift-3 */
479#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
480
481/* All code (1-byte) of ISO2022 is classified into one of the
482 followings. */
483enum iso_code_class_type
484 {
485 ISO_control_0, /* Control codes in the range
486 0x00..0x1F and 0x7F, except for the
487 following 5 codes. */
488 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
489 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
490 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
491 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
492 ISO_escape, /* ISO_CODE_SO (0x1B) */
493 ISO_control_1, /* Control codes in the range
494 0x80..0x9F, except for the
495 following 3 codes. */
496 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
497 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
498 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
499 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
500 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
501 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
502 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
503 };
504
505/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
506 `iso-flags' attribute of an iso2022 coding system. */
507
508/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
509 instead of the correct short-form sequence (e.g. ESC $ A). */
510#define CODING_ISO_FLAG_LONG_FORM 0x0001
511
512/* If set, reset graphic planes and registers at end-of-line to the
513 initial state. */
514#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
515
516/* If set, reset graphic planes and registers before any control
517 characters to the initial state. */
518#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
519
520/* If set, encode by 7-bit environment. */
521#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
522
523/* If set, use locking-shift function. */
524#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
525
526/* If set, use single-shift function. Overwrite
527 CODING_ISO_FLAG_LOCKING_SHIFT. */
528#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
529
530/* If set, use designation escape sequence. */
531#define CODING_ISO_FLAG_DESIGNATION 0x0040
532
533/* If set, produce revision number sequence. */
534#define CODING_ISO_FLAG_REVISION 0x0080
535
536/* If set, produce ISO6429's direction specifying sequence. */
537#define CODING_ISO_FLAG_DIRECTION 0x0100
538
539/* If set, assume designation states are reset at beginning of line on
540 output. */
541#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
542
543/* If set, designation sequence should be placed at beginning of line
544 on output. */
545#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
546
547/* If set, do not encode unsafe charactes on output. */
548#define CODING_ISO_FLAG_SAFE 0x0800
549
550/* If set, extra latin codes (128..159) are accepted as a valid code
551 on input. */
552#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
553
554#define CODING_ISO_FLAG_COMPOSITION 0x2000
555
556#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
557
558#define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
559
560/* A character to be produced on output if encoding of the original
561 character is prohibited by CODING_ISO_FLAG_SAFE. */
562#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
563
564
565/* UTF-16 section */
566#define CODING_UTF_16_BOM(coding) \
567 ((coding)->spec.utf_16.bom)
568
569#define CODING_UTF_16_ENDIAN(coding) \
570 ((coding)->spec.utf_16.endian)
571
572#define CODING_UTF_16_SURROGATE(coding) \
573 ((coding)->spec.utf_16.surrogate)
574
575
576/* CCL section */
577#define CODING_CCL_DECODER(coding) \
578 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
579#define CODING_CCL_ENCODER(coding) \
580 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
581#define CODING_CCL_VALIDS(coding) \
582 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
583 ->data)
584
585/* Index for each coding category in `coding_category_table' */
586
587enum coding_category
588 {
589 coding_category_iso_7,
590 coding_category_iso_7_tight,
591 coding_category_iso_8_1,
592 coding_category_iso_8_2,
593 coding_category_iso_7_else,
594 coding_category_iso_8_else,
595 coding_category_utf_8,
596 coding_category_utf_16_auto,
597 coding_category_utf_16_be,
598 coding_category_utf_16_le,
599 coding_category_utf_16_be_nosig,
600 coding_category_utf_16_le_nosig,
601 coding_category_charset,
602 coding_category_sjis,
603 coding_category_big5,
604 coding_category_ccl,
605 coding_category_emacs_mule,
606 /* All above are targets of code detection. */
607 coding_category_raw_text,
608 coding_category_undecided,
609 coding_category_max
610 };
611
612/* Definitions of flag bits used in detect_coding_XXXX. */
613#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
614#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
615#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
616#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
617#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
618#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
619#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
620#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
621#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
622#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
623#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
624#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
625#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
626#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
627#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
628#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
629
630/* This value is returned if detect_coding_mask () find nothing other
631 than ASCII characters. */
632#define CATEGORY_MASK_ANY \
633 (CATEGORY_MASK_ISO_7 \
634 | CATEGORY_MASK_ISO_7_TIGHT \
635 | CATEGORY_MASK_ISO_8_1 \
636 | CATEGORY_MASK_ISO_8_2 \
637 | CATEGORY_MASK_ISO_7_ELSE \
638 | CATEGORY_MASK_ISO_8_ELSE \
639 | CATEGORY_MASK_UTF_8 \
640 | CATEGORY_MASK_UTF_16_BE \
641 | CATEGORY_MASK_UTF_16_LE \
642 | CATEGORY_MASK_UTF_16_BE_NOSIG \
643 | CATEGORY_MASK_UTF_16_LE_NOSIG \
644 | CATEGORY_MASK_CHARSET \
645 | CATEGORY_MASK_SJIS \
646 | CATEGORY_MASK_BIG5 \
647 | CATEGORY_MASK_CCL \
648 | CATEGORY_MASK_EMACS_MULE)
649
650
651#define CATEGORY_MASK_ISO_7BIT \
652 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
653
654#define CATEGORY_MASK_ISO_8BIT \
655 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
656
657#define CATEGORY_MASK_ISO_ELSE \
658 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
659
660#define CATEGORY_MASK_ISO_ESCAPE \
661 (CATEGORY_MASK_ISO_7 \
662 | CATEGORY_MASK_ISO_7_TIGHT \
663 | CATEGORY_MASK_ISO_7_ELSE \
664 | CATEGORY_MASK_ISO_8_ELSE)
665
666#define CATEGORY_MASK_ISO \
667 ( CATEGORY_MASK_ISO_7BIT \
668 | CATEGORY_MASK_ISO_8BIT \
669 | CATEGORY_MASK_ISO_ELSE)
670
671#define CATEGORY_MASK_UTF_16 \
672 (CATEGORY_MASK_UTF_16_BE \
673 | CATEGORY_MASK_UTF_16_LE \
674 | CATEGORY_MASK_UTF_16_BE_NOSIG \
675 | CATEGORY_MASK_UTF_16_LE_NOSIG)
676
677
678/* List of symbols `coding-category-xxx' ordered by priority. This
679 variable is exposed to Emacs Lisp. */
680static Lisp_Object Vcoding_category_list;
681
682/* Table of coding categories (Lisp symbols). This variable is for
683 internal use oly. */
684static Lisp_Object Vcoding_category_table;
685
686/* Table of coding-categories ordered by priority. */
687static enum coding_category coding_priorities[coding_category_max];
688
689/* Nth element is a coding context for the coding system bound to the
690 Nth coding category. */
691static struct coding_system coding_categories[coding_category_max];
692
693static int detected_mask[coding_category_raw_text] =
694 { CATEGORY_MASK_ISO,
695 CATEGORY_MASK_ISO,
696 CATEGORY_MASK_ISO,
697 CATEGORY_MASK_ISO,
698 CATEGORY_MASK_ISO,
699 CATEGORY_MASK_ISO,
700 CATEGORY_MASK_UTF_8,
701 CATEGORY_MASK_UTF_16,
702 CATEGORY_MASK_UTF_16,
703 CATEGORY_MASK_UTF_16,
704 CATEGORY_MASK_UTF_16,
705 CATEGORY_MASK_UTF_16,
706 CATEGORY_MASK_CHARSET,
707 CATEGORY_MASK_SJIS,
708 CATEGORY_MASK_BIG5,
709 CATEGORY_MASK_CCL,
710 CATEGORY_MASK_EMACS_MULE
711 };
712
713/*** Commonly used macros and functions ***/
714
715#ifndef min
716#define min(a, b) ((a) < (b) ? (a) : (b))
717#endif
718#ifndef max
719#define max(a, b) ((a) > (b) ? (a) : (b))
720#endif
721
722#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
723 do { \
724 attrs = CODING_ID_ATTRS (coding->id); \
725 eol_type = CODING_ID_EOL_TYPE (coding->id); \
726 if (VECTORP (eol_type)) \
727 eol_type = Qunix; \
728 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
729 } while (0)
730
731
732/* Safely get one byte from the source text pointed by SRC which ends
733 at SRC_END, and set C to that byte. If there are not enough bytes
734 in the source, it jumps to `no_more_source'. The caller
735 should declare and set these variables appropriately in advance:
736 src, src_end, multibytep
737*/
738
739#define ONE_MORE_BYTE(c) \
740 do { \
741 if (src == src_end) \
742 { \
743 if (src_base < src) \
744 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
745 goto no_more_source; \
746 } \
747 c = *src++; \
748 if (multibytep && (c & 0x80)) \
749 { \
750 if ((c & 0xFE) != 0xC0) \
751 error ("Undecodable char found"); \
752 c = ((c & 1) << 6) | *src++; \
753 } \
754 consumed_chars++; \
755 } while (0)
756
757
758#define ONE_MORE_BYTE_NO_CHECK(c) \
759 do { \
760 c = *src++; \
761 if (multibytep && (c & 0x80)) \
762 { \
763 if ((c & 0xFE) != 0xC0) \
764 error ("Undecodable char found"); \
765 c = ((c & 1) << 6) | *src++; \
766 } \
767 consumed_chars++; \
768 } while (0)
769
770
771/* Store a byte C in the place pointed by DST and increment DST to the
772 next free point, and increment PRODUCED_CHARS. The caller should
773 assure that C is 0..127, and declare and set the variable `dst'
774 appropriately in advance.
775*/
776
777
778#define EMIT_ONE_ASCII_BYTE(c) \
779 do { \
780 produced_chars++; \
781 *dst++ = (c); \
782 } while (0)
783
784
785/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
786
787#define EMIT_TWO_ASCII_BYTES(c1, c2) \
788 do { \
789 produced_chars += 2; \
790 *dst++ = (c1), *dst++ = (c2); \
791 } while (0)
792
793
794/* Store a byte C in the place pointed by DST and increment DST to the
795 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
796 nonzero, store in an appropriate multibyte from. The caller should
797 declare and set the variables `dst' and `multibytep' appropriately
798 in advance. */
799
800#define EMIT_ONE_BYTE(c) \
801 do { \
802 produced_chars++; \
803 if (multibytep) \
804 { \
805 int ch = (c); \
806 if (ch >= 0x80) \
807 ch = BYTE8_TO_CHAR (ch); \
808 CHAR_STRING_ADVANCE (ch, dst); \
809 } \
810 else \
811 *dst++ = (c); \
812 } while (0)
813
814
815/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
816
817#define EMIT_TWO_BYTES(c1, c2) \
818 do { \
819 produced_chars += 2; \
820 if (multibytep) \
821 { \
822 int ch; \
823 \
824 ch = (c1); \
825 if (ch >= 0x80) \
826 ch = BYTE8_TO_CHAR (ch); \
827 CHAR_STRING_ADVANCE (ch, dst); \
828 ch = (c2); \
829 if (ch >= 0x80) \
830 ch = BYTE8_TO_CHAR (ch); \
831 CHAR_STRING_ADVANCE (ch, dst); \
832 } \
833 else \
834 { \
835 *dst++ = (c1); \
836 *dst++ = (c2); \
837 } \
838 } while (0)
839
840
841#define EMIT_THREE_BYTES(c1, c2, c3) \
842 do { \
843 EMIT_ONE_BYTE (c1); \
844 EMIT_TWO_BYTES (c2, c3); \
845 } while (0)
846
847
848#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
849 do { \
850 EMIT_TWO_BYTES (c1, c2); \
851 EMIT_TWO_BYTES (c3, c4); \
852 } while (0)
853
854
855#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
856 do { \
857 charset_map_loaded = 0; \
858 c = DECODE_CHAR (charset, code); \
859 if (charset_map_loaded) \
860 { \
861 unsigned char *orig = coding->source; \
862 EMACS_INT offset; \
863 \
864 coding_set_source (coding); \
865 offset = coding->source - orig; \
866 src += offset; \
867 src_base += offset; \
868 src_end += offset; \
869 } \
870 } while (0)
871
872
873#define ASSURE_DESTINATION(bytes) \
874 do { \
875 if (dst + (bytes) >= dst_end) \
876 { \
877 int more_bytes = charbuf_end - charbuf + (bytes); \
878 \
879 dst = alloc_destination (coding, more_bytes, dst); \
880 dst_end = coding->destination + coding->dst_bytes; \
881 } \
882 } while (0)
883
884
885
886static void
887coding_set_source (coding)
888 struct coding_system *coding;
889{
890 if (BUFFERP (coding->src_object))
891 {
892 if (coding->src_pos < 0)
893 coding->source = GAP_END_ADDR + coding->src_pos_byte;
894 else
895 {
896 struct buffer *buf = XBUFFER (coding->src_object);
897 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
898 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
899
900 coding->source = beg_addr + coding->src_pos_byte - 1;
901 if (coding->src_pos_byte >= gpt_byte)
902 coding->source += BUF_GAP_SIZE (buf);
903 }
904 }
905 else if (STRINGP (coding->src_object))
906 {
907 coding->source = (XSTRING (coding->src_object)->data
908 + coding->src_pos_byte);
909 }
910 else
911 /* Otherwise, the source is C string and is never relocated
912 automatically. Thus we don't have to update anything. */
913 ;
914}
915
916static void
917coding_set_destination (coding)
918 struct coding_system *coding;
919{
920 if (BUFFERP (coding->dst_object))
921 {
922 /* We are sure that coding->dst_pos_byte is before the gap of the
923 buffer. */
924 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
925 + coding->dst_pos_byte - 1);
926 if (coding->src_pos < 0)
927 coding->dst_bytes = (GAP_END_ADDR
928 - (coding->src_bytes - coding->consumed)
929 - coding->destination);
930 else
931 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
932 - coding->destination);
933 }
934 else
935 /* Otherwise, the destination is C string and is never relocated
936 automatically. Thus we don't have to update anything. */
937 ;
938}
939
940
941static void
942coding_alloc_by_realloc (coding, bytes)
943 struct coding_system *coding;
944 EMACS_INT bytes;
945{
946 coding->destination = (unsigned char *) xrealloc (coding->destination,
947 coding->dst_bytes + bytes);
948 coding->dst_bytes += bytes;
949}
950
951static void
952coding_alloc_by_making_gap (coding, bytes)
953 struct coding_system *coding;
954 EMACS_INT bytes;
955{
956 if (BUFFERP (coding->dst_object)
957 && EQ (coding->src_object, coding->dst_object))
958 {
959 EMACS_INT add = coding->src_bytes - coding->consumed;
960
961 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
962 make_gap (bytes);
963 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
964 }
965 else
966 {
967 Lisp_Object this_buffer;
968
969 this_buffer = Fcurrent_buffer ();
970 set_buffer_internal (XBUFFER (coding->dst_object));
971 make_gap (bytes);
972 set_buffer_internal (XBUFFER (this_buffer));
973 }
974}
975
976
977static unsigned char *
978alloc_destination (coding, nbytes, dst)
979 struct coding_system *coding;
980 int nbytes;
981 unsigned char *dst;
982{
983 EMACS_INT offset = dst - coding->destination;
984
985 if (BUFFERP (coding->dst_object))
986 coding_alloc_by_making_gap (coding, nbytes);
987 else
988 coding_alloc_by_realloc (coding, nbytes);
989 coding->result = CODING_RESULT_SUCCESS;
990 coding_set_destination (coding);
991 dst = coding->destination + offset;
992 return dst;
993}
994
995\f
996/*** 2. Emacs' internal format (emacs-utf-8) ***/
997
998
999
1000\f
1001/*** 3. UTF-8 ***/
1002
1003/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1004 Check if a text is encoded in UTF-8. If it is, return
1005 CATEGORY_MASK_UTF_8, else return 0. */
1006
1007#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1008#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1009#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1010#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1011#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1012#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1013
1014static int
1015detect_coding_utf_8 (coding, mask)
1016 struct coding_system *coding;
1017 int *mask;
1018{
1019 unsigned char *src = coding->source, *src_base = src;
1020 unsigned char *src_end = coding->source + coding->src_bytes;
1021 int multibytep = coding->src_multibyte;
1022 int consumed_chars = 0;
1023 int found = 0;
1024
1025 /* A coding system of this category is always ASCII compatible. */
1026 src += coding->head_ascii;
1027
1028 while (1)
1029 {
1030 int c, c1, c2, c3, c4;
1031
1032 ONE_MORE_BYTE (c);
1033 if (UTF_8_1_OCTET_P (c))
1034 continue;
1035 ONE_MORE_BYTE (c1);
1036 if (! UTF_8_EXTRA_OCTET_P (c1))
1037 break;
1038 if (UTF_8_2_OCTET_LEADING_P (c))
1039 {
1040 found++;
1041 continue;
1042 }
1043 ONE_MORE_BYTE (c2);
1044 if (! UTF_8_EXTRA_OCTET_P (c2))
1045 break;
1046 if (UTF_8_3_OCTET_LEADING_P (c))
1047 {
1048 found++;
1049 continue;
1050 }
1051 ONE_MORE_BYTE (c3);
1052 if (! UTF_8_EXTRA_OCTET_P (c3))
1053 break;
1054 if (UTF_8_4_OCTET_LEADING_P (c))
1055 {
1056 found++;
1057 continue;
1058 }
1059 ONE_MORE_BYTE (c4);
1060 if (! UTF_8_EXTRA_OCTET_P (c4))
1061 break;
1062 if (UTF_8_5_OCTET_LEADING_P (c))
1063 {
1064 found++;
1065 continue;
1066 }
1067 break;
1068 }
1069 *mask &= ~CATEGORY_MASK_UTF_8;
1070 return 0;
1071
1072 no_more_source:
1073 if (! found)
1074 return 0;
1075 *mask &= CATEGORY_MASK_UTF_8;
1076 return 1;
1077}
1078
1079
1080static void
1081decode_coding_utf_8 (coding)
1082 struct coding_system *coding;
1083{
1084 unsigned char *src = coding->source + coding->consumed;
1085 unsigned char *src_end = coding->source + coding->src_bytes;
1086 unsigned char *src_base;
1087 int *charbuf = coding->charbuf;
1088 int *charbuf_end = charbuf + coding->charbuf_size;
1089 int consumed_chars = 0, consumed_chars_base;
1090 int multibytep = coding->src_multibyte;
1091 Lisp_Object attr, eol_type, charset_list;
1092
1093 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1094
1095 while (1)
1096 {
1097 int c, c1, c2, c3, c4, c5;
1098
1099 src_base = src;
1100 consumed_chars_base = consumed_chars;
1101
1102 if (charbuf >= charbuf_end)
1103 break;
1104
1105 ONE_MORE_BYTE (c1);
1106 if (UTF_8_1_OCTET_P(c1))
1107 {
1108 c = c1;
1109 if (c == '\r')
1110 {
1111 if (EQ (eol_type, Qdos))
1112 {
1113 if (src == src_end)
1114 goto no_more_source;
1115 if (*src == '\n')
1116 ONE_MORE_BYTE (c);
1117 }
1118 else if (EQ (eol_type, Qmac))
1119 c = '\n';
1120 }
1121 }
1122 else
1123 {
1124 ONE_MORE_BYTE (c2);
1125 if (! UTF_8_EXTRA_OCTET_P (c2))
1126 goto invalid_code;
1127 if (UTF_8_2_OCTET_LEADING_P (c1))
1128 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1129 else
1130 {
1131 ONE_MORE_BYTE (c3);
1132 if (! UTF_8_EXTRA_OCTET_P (c3))
1133 goto invalid_code;
1134 if (UTF_8_3_OCTET_LEADING_P (c1))
1135 c = (((c1 & 0xF) << 12)
1136 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1137 else
1138 {
1139 ONE_MORE_BYTE (c4);
1140 if (! UTF_8_EXTRA_OCTET_P (c4))
1141 goto invalid_code;
1142 if (UTF_8_4_OCTET_LEADING_P (c1))
1143 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1144 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1145 else
1146 {
1147 ONE_MORE_BYTE (c5);
1148 if (! UTF_8_EXTRA_OCTET_P (c5))
1149 goto invalid_code;
1150 if (UTF_8_5_OCTET_LEADING_P (c1))
1151 {
1152 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1153 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1154 | (c5 & 0x3F));
1155 if (c > MAX_CHAR)
1156 goto invalid_code;
1157 }
1158 else
1159 goto invalid_code;
1160 }
1161 }
1162 }
1163 }
1164
1165 *charbuf++ = c;
1166 continue;
1167
1168 invalid_code:
1169 src = src_base;
1170 consumed_chars = consumed_chars_base;
1171 ONE_MORE_BYTE (c);
1172 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1173 coding->errors++;
1174 }
1175
1176 no_more_source:
1177 coding->consumed_char += consumed_chars_base;
1178 coding->consumed = src_base - coding->source;
1179 coding->charbuf_used = charbuf - coding->charbuf;
1180}
1181
1182
1183static int
1184encode_coding_utf_8 (coding)
1185 struct coding_system *coding;
1186{
1187 int multibytep = coding->dst_multibyte;
1188 int *charbuf = coding->charbuf;
1189 int *charbuf_end = charbuf + coding->charbuf_used;
1190 unsigned char *dst = coding->destination + coding->produced;
1191 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1192 int produced_chars = 0;
1193 int c;
1194
1195 if (multibytep)
1196 {
1197 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1198
1199 while (charbuf < charbuf_end)
1200 {
1201 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1202
1203 ASSURE_DESTINATION (safe_room);
1204 c = *charbuf++;
1205 CHAR_STRING_ADVANCE (c, pend);
1206 for (p = str; p < pend; p++)
1207 EMIT_ONE_BYTE (*p);
1208 }
1209 }
1210 else
1211 {
1212 int safe_room = MAX_MULTIBYTE_LENGTH;
1213
1214 while (charbuf < charbuf_end)
1215 {
1216 ASSURE_DESTINATION (safe_room);
1217 c = *charbuf++;
1218 dst += CHAR_STRING (c, dst);
1219 produced_chars++;
1220 }
1221 }
1222 coding->result = CODING_RESULT_SUCCESS;
1223 coding->produced_char += produced_chars;
1224 coding->produced = dst - coding->destination;
1225 return 0;
1226}
1227
1228
1229/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1230 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1231 Little Endian (otherwise). If it is, return
1232 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1233 else return 0. */
1234
1235#define UTF_16_HIGH_SURROGATE_P(val) \
1236 (((val) & 0xFC00) == 0xD800)
1237
1238#define UTF_16_LOW_SURROGATE_P(val) \
1239 (((val) & 0xFC00) == 0xDC00)
1240
1241#define UTF_16_INVALID_P(val) \
1242 (((val) == 0xFFFE) \
1243 || ((val) == 0xFFFF) \
1244 || UTF_16_LOW_SURROGATE_P (val))
1245
1246
1247static int
1248detect_coding_utf_16 (coding, mask)
1249 struct coding_system *coding;
1250 int *mask;
1251{
1252 unsigned char *src = coding->source, *src_base = src;
1253 unsigned char *src_end = coding->source + coding->src_bytes;
1254 int multibytep = coding->src_multibyte;
1255 int consumed_chars = 0;
1256 int c1, c2;
1257
1258 ONE_MORE_BYTE (c1);
1259 ONE_MORE_BYTE (c2);
1260
1261 if ((c1 == 0xFF) && (c2 == 0xFE))
1262 {
1263 *mask &= CATEGORY_MASK_UTF_16_LE;
1264 return 1;
1265 }
1266 else if ((c1 == 0xFE) && (c2 == 0xFF))
1267 {
1268 *mask &= CATEGORY_MASK_UTF_16_BE;
1269 return 1;
1270 }
1271 no_more_source:
1272 return 0;
1273}
1274
1275static void
1276decode_coding_utf_16 (coding)
1277 struct coding_system *coding;
1278{
1279 unsigned char *src = coding->source + coding->consumed;
1280 unsigned char *src_end = coding->source + coding->src_bytes;
1281 unsigned char *src_base;
1282 int *charbuf = coding->charbuf;
1283 int *charbuf_end = charbuf + coding->charbuf_size;
1284 int consumed_chars = 0, consumed_chars_base;
1285 int multibytep = coding->src_multibyte;
1286 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1287 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1288 int surrogate = CODING_UTF_16_SURROGATE (coding);
1289 Lisp_Object attr, eol_type, charset_list;
1290
1291 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1292
1293 if (bom != utf_16_without_bom)
1294 {
1295 int c, c1, c2;
1296
1297 src_base = src;
1298 ONE_MORE_BYTE (c1);
1299 ONE_MORE_BYTE (c2);
1300 c = (c1 << 8) | c2;
1301 if (bom == utf_16_with_bom)
1302 {
1303 if (endian == utf_16_big_endian
1304 ? c != 0xFFFE : c != 0xFEFF)
1305 {
1306 /* We are sure that there's enouph room at CHARBUF. */
1307 *charbuf++ = c1;
1308 *charbuf++ = c2;
1309 coding->errors++;
1310 }
1311 }
1312 else
1313 {
1314 if (c == 0xFFFE)
1315 CODING_UTF_16_ENDIAN (coding)
1316 = endian = utf_16_big_endian;
1317 else if (c == 0xFEFF)
1318 CODING_UTF_16_ENDIAN (coding)
1319 = endian = utf_16_little_endian;
1320 else
1321 {
1322 CODING_UTF_16_ENDIAN (coding)
1323 = endian = utf_16_big_endian;
1324 src = src_base;
1325 }
1326 }
1327 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1328 }
1329
1330 while (1)
1331 {
1332 int c, c1, c2;
1333
1334 src_base = src;
1335 consumed_chars_base = consumed_chars;
1336
1337 if (charbuf + 2 >= charbuf_end)
1338 break;
1339
1340 ONE_MORE_BYTE (c1);
1341 ONE_MORE_BYTE (c2);
1342 c = (endian == utf_16_big_endian
1343 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1344 if (surrogate)
1345 {
1346 if (! UTF_16_LOW_SURROGATE_P (c))
1347 {
1348 if (endian == utf_16_big_endian)
1349 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1350 else
1351 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1352 *charbuf++ = c1;
1353 *charbuf++ = c2;
1354 coding->errors++;
1355 if (UTF_16_HIGH_SURROGATE_P (c))
1356 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1357 else
1358 *charbuf++ = c;
1359 }
1360 else
1361 {
1362 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1363 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1364 *charbuf++ = c;
1365 }
1366 }
1367 else
1368 {
1369 if (UTF_16_HIGH_SURROGATE_P (c))
1370 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1371 else
1372 *charbuf++ = c;
1373 }
1374 }
1375
1376 no_more_source:
1377 coding->consumed_char += consumed_chars_base;
1378 coding->consumed = src_base - coding->source;
1379 coding->charbuf_used = charbuf - coding->charbuf;
1380}
1381
1382static int
1383encode_coding_utf_16 (coding)
1384 struct coding_system *coding;
1385{
1386 int multibytep = coding->dst_multibyte;
1387 int *charbuf = coding->charbuf;
1388 int *charbuf_end = charbuf + coding->charbuf_used;
1389 unsigned char *dst = coding->destination + coding->produced;
1390 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1391 int safe_room = 8;
1392 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1393 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1394 int produced_chars = 0;
1395 Lisp_Object attrs, eol_type, charset_list;
1396 int c;
1397
1398 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1399
1400 if (bom == utf_16_with_bom)
1401 {
1402 ASSURE_DESTINATION (safe_room);
1403 if (big_endian)
1404 EMIT_TWO_BYTES (0xFF, 0xFE);
1405 else
1406 EMIT_TWO_BYTES (0xFE, 0xFF);
1407 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1408 }
1409
1410 while (charbuf < charbuf_end)
1411 {
1412 ASSURE_DESTINATION (safe_room);
1413 c = *charbuf++;
1414 if (c >= MAX_UNICODE_CHAR)
1415 c = coding->default_char;
1416
1417 if (c < 0x10000)
1418 {
1419 if (big_endian)
1420 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1421 else
1422 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1423 }
1424 else
1425 {
1426 int c1, c2;
1427
1428 c -= 0x10000;
1429 c1 = (c >> 10) + 0xD800;
1430 c2 = (c & 0x3FF) + 0xDC00;
1431 if (big_endian)
1432 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1433 else
1434 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1435 }
1436 }
1437 coding->result = CODING_RESULT_SUCCESS;
1438 coding->produced = dst - coding->destination;
1439 coding->produced_char += produced_chars;
1440 return 0;
1441}
1442
1443\f
1444/*** 6. Old Emacs' internal format (emacs-mule) ***/
1445
1446/* Emacs' internal format for representation of multiple character
1447 sets is a kind of multi-byte encoding, i.e. characters are
1448 represented by variable-length sequences of one-byte codes.
1449
1450 ASCII characters and control characters (e.g. `tab', `newline') are
1451 represented by one-byte sequences which are their ASCII codes, in
1452 the range 0x00 through 0x7F.
1453
1454 8-bit characters of the range 0x80..0x9F are represented by
1455 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1456 code + 0x20).
1457
1458 8-bit characters of the range 0xA0..0xFF are represented by
1459 one-byte sequences which are their 8-bit code.
1460
1461 The other characters are represented by a sequence of `base
1462 leading-code', optional `extended leading-code', and one or two
1463 `position-code's. The length of the sequence is determined by the
1464 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1465 whereas extended leading-code and position-code take the range 0xA0
1466 through 0xFF. See `charset.h' for more details about leading-code
1467 and position-code.
1468
1469 --- CODE RANGE of Emacs' internal format ---
1470 character set range
1471 ------------- -----
1472 ascii 0x00..0x7F
1473 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1474 eight-bit-graphic 0xA0..0xBF
1475 ELSE 0x81..0x9D + [0xA0..0xFF]+
1476 ---------------------------------------------
1477
1478 As this is the internal character representation, the format is
1479 usually not used externally (i.e. in a file or in a data sent to a
1480 process). But, it is possible to have a text externally in this
1481 format (i.e. by encoding by the coding system `emacs-mule').
1482
1483 In that case, a sequence of one-byte codes has a slightly different
1484 form.
1485
1486 At first, all characters in eight-bit-control are represented by
1487 one-byte sequences which are their 8-bit code.
1488
1489 Next, character composition data are represented by the byte
1490 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1491 where,
1492 METHOD is 0xF0 plus one of composition method (enum
1493 composition_method),
1494
1495 BYTES is 0xA0 plus a byte length of this composition data,
1496
1497 CHARS is 0x20 plus a number of characters composed by this
1498 data,
1499
1500 COMPONENTs are characters of multibye form or composition
1501 rules encoded by two-byte of ASCII codes.
1502
1503 In addition, for backward compatibility, the following formats are
1504 also recognized as composition data on decoding.
1505
1506 0x80 MSEQ ...
1507 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1508
1509 Here,
1510 MSEQ is a multibyte form but in these special format:
1511 ASCII: 0xA0 ASCII_CODE+0x80,
1512 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1513 RULE is a one byte code of the range 0xA0..0xF0 that
1514 represents a composition rule.
1515 */
1516
1517char emacs_mule_bytes[256];
1518
1519/* Leading-code followed by extended leading-code. */
1520#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1521#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1522#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1523#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1524
1525
1526int
1527emacs_mule_char (coding, src, nbytes, nchars)
1528 struct coding_system *coding;
1529 unsigned char *src;
1530 int *nbytes, *nchars;
1531{
1532 unsigned char *src_end = coding->source + coding->src_bytes;
1533 int multibytep = coding->src_multibyte;
1534 unsigned char *src_base = src;
1535 struct charset *charset;
1536 unsigned code;
1537 int c;
1538 int consumed_chars = 0;
1539
1540 ONE_MORE_BYTE (c);
1541 switch (emacs_mule_bytes[c])
1542 {
1543 case 2:
1544 if (! (charset = emacs_mule_charset[c]))
1545 goto invalid_code;
1546 ONE_MORE_BYTE (c);
1547 code = c & 0x7F;
1548 break;
1549
1550 case 3:
1551 if (c == LEADING_CODE_PRIVATE_11
1552 || c == LEADING_CODE_PRIVATE_12)
1553 {
1554 ONE_MORE_BYTE (c);
1555 if (! (charset = emacs_mule_charset[c]))
1556 goto invalid_code;
1557 ONE_MORE_BYTE (c);
1558 code = c & 0x7F;
1559 }
1560 else
1561 {
1562 if (! (charset = emacs_mule_charset[c]))
1563 goto invalid_code;
1564 ONE_MORE_BYTE (c);
1565 code = (c & 0x7F) << 8;
1566 ONE_MORE_BYTE (c);
1567 code |= c & 0x7F;
1568 }
1569 break;
1570
1571 case 4:
1572 ONE_MORE_BYTE (c);
1573 if (! (charset = emacs_mule_charset[c]))
1574 goto invalid_code;
1575 ONE_MORE_BYTE (c);
1576 code = (c & 0x7F) << 8;
1577 ONE_MORE_BYTE (c);
1578 code |= c & 0x7F;
1579 break;
1580
1581 case 1:
1582 code = c;
1583 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) ? charset_ascii
1584 : code < 0xA0 ? charset_8_bit_control
1585 : charset_8_bit_graphic);
1586 break;
1587
1588 default:
1589 abort ();
1590 }
1591 c = DECODE_CHAR (charset, code);
1592 if (c < 0)
1593 goto invalid_code;
1594 *nbytes = src - src_base;
1595 *nchars = consumed_chars;
1596 return c;
1597
1598 no_more_source:
1599 return -2;
1600
1601 invalid_code:
1602 return -1;
1603}
1604
1605
1606/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1607 Check if a text is encoded in `emacs-mule'. */
1608
1609static int
1610detect_coding_emacs_mule (coding, mask)
1611 struct coding_system *coding;
1612 int *mask;
1613{
1614 unsigned char *src = coding->source, *src_base = src;
1615 unsigned char *src_end = coding->source + coding->src_bytes;
1616 int multibytep = coding->src_multibyte;
1617 int consumed_chars = 0;
1618 int c;
1619 int found = 0;
1620
1621 /* A coding system of this category is always ASCII compatible. */
1622 src += coding->head_ascii;
1623
1624 while (1)
1625 {
1626 ONE_MORE_BYTE (c);
1627
1628 if (c == 0x80)
1629 {
1630 /* Perhaps the start of composite character. We simple skip
1631 it because analyzing it is too heavy for detecting. But,
1632 at least, we check that the composite character
1633 constitues of more than 4 bytes. */
1634 unsigned char *src_base;
1635
1636 repeat:
1637 src_base = src;
1638 do
1639 {
1640 ONE_MORE_BYTE (c);
1641 }
1642 while (c >= 0xA0);
1643
1644 if (src - src_base <= 4)
1645 break;
1646 found = 1;
1647 if (c == 0x80)
1648 goto repeat;
1649 }
1650
1651 if (c < 0x80)
1652 {
1653 if (c < 0x20
1654 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1655 break;
1656 }
1657 else
1658 {
1659 unsigned char *src_base = src - 1;
1660
1661 do
1662 {
1663 ONE_MORE_BYTE (c);
1664 }
1665 while (c >= 0xA0);
1666 if (src - src_base != emacs_mule_bytes[*src_base])
1667 break;
1668 found = 1;
1669 }
1670 }
1671 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1672 return 0;
1673
1674 no_more_source:
1675 if (!found)
1676 return 0;
1677 *mask &= CATEGORY_MASK_EMACS_MULE;
1678 return 1;
1679}
1680
1681
1682/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1683
1684/* Decode a character represented as a component of composition
1685 sequence of Emacs 20/21 style at SRC. Set C to that character and
1686 update SRC to the head of next character (or an encoded composition
1687 rule). If SRC doesn't points a composition component, set C to -1.
1688 If SRC points an invalid byte sequence, global exit by a return
1689 value 0. */
1690
1691#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1692 if (1) \
1693 { \
1694 int c; \
1695 int nbytes, nchars; \
1696 \
1697 if (src == src_end) \
1698 break; \
1699 c = emacs_mule_char (coding, src, &nbytes, &nchars); \
1700 if (c < 0) \
1701 { \
1702 if (c == -2) \
1703 break; \
1704 goto invalid_code; \
1705 } \
1706 *buf++ = c; \
1707 src += nbytes; \
1708 consumed_chars += nchars; \
1709 } \
1710 else
1711
1712
1713/* Decode a composition rule represented as a component of composition
1714 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1715 and increment BUF. If SRC points an invalid byte sequence, set C
1716 to -1. */
1717
1718#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1719 do { \
1720 int c, gref, nref; \
1721 \
1722 if (src >= src_end) \
1723 goto invalid_code; \
1724 ONE_MORE_BYTE_NO_CHECK (c); \
1725 c -= 0x20; \
1726 if (c < 0 || c >= 81) \
1727 goto invalid_code; \
1728 \
1729 gref = c / 9, nref = c % 9; \
1730 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1731 } while (0)
1732
1733
1734/* Decode a composition rule represented as a component of composition
1735 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1736 and increment BUF. If SRC points an invalid byte sequence, set C
1737 to -1. */
1738
1739#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1740 do { \
1741 int gref, nref; \
1742 \
1743 if (src + 1>= src_end) \
1744 goto invalid_code; \
1745 ONE_MORE_BYTE_NO_CHECK (gref); \
1746 gref -= 0x20; \
1747 ONE_MORE_BYTE_NO_CHECK (nref); \
1748 nref -= 0x20; \
1749 if (gref < 0 || gref >= 81 \
1750 || nref < 0 || nref >= 81) \
1751 goto invalid_code; \
1752 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1753 } while (0)
1754
1755
1756#define ADD_COMPOSITION_DATA(buf, method, nchars) \
1757 do { \
1758 *buf++ = -5; \
1759 *buf++ = coding->produced_char + char_offset; \
1760 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1761 *buf++ = method; \
1762 *buf++ = nchars; \
1763 } while (0)
1764
1765
1766#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1767 do { \
1768 /* Emacs 21 style format. The first three bytes at SRC are \
1769 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1770 the byte length of this composition information, CHARS is the \
1771 number of characters composed by this composition. */ \
1772 enum composition_method method = c - 0xF2; \
1773 int *charbuf_base = charbuf; \
1774 int consumed_chars_limit; \
1775 int nbytes, nchars; \
1776 \
1777 ONE_MORE_BYTE (c); \
1778 nbytes = c - 0xA0; \
1779 if (nbytes < 3) \
1780 goto invalid_code; \
1781 ONE_MORE_BYTE (c); \
1782 nchars = c - 0xA0; \
1783 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1784 consumed_chars_limit = consumed_chars_base + nbytes; \
1785 if (method != COMPOSITION_RELATIVE) \
1786 { \
1787 int i = 0; \
1788 while (consumed_chars < consumed_chars_limit) \
1789 { \
1790 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1791 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1792 else \
1793 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1794 i++; \
1795 } \
1796 if (consumed_chars < consumed_chars_limit) \
1797 goto invalid_code; \
1798 charbuf_base[0] -= i; \
1799 } \
1800 } while (0)
1801
1802
1803#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1804 do { \
1805 /* Emacs 20 style format for relative composition. */ \
1806 /* Store multibyte form of characters to be composed. */ \
1807 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1808 int *buf = components; \
1809 int i, j; \
1810 \
1811 src = src_base; \
1812 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1813 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1814 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1815 if (i < 2) \
1816 goto invalid_code; \
1817 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1818 for (j = 0; j < i; j++) \
1819 *charbuf++ = components[j]; \
1820 } while (0)
1821
1822
1823#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1824 do { \
1825 /* Emacs 20 style format for rule-base composition. */ \
1826 /* Store multibyte form of characters to be composed. */ \
1827 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1828 int *buf = components; \
1829 int i, j; \
1830 \
1831 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1832 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1833 { \
1834 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1835 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1836 } \
1837 if (i < 1 || (buf - components) % 2 == 0) \
1838 goto invalid_code; \
1839 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1840 goto no_more_source; \
1841 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1842 for (j = 0; j < i; j++) \
1843 *charbuf++ = components[j]; \
1844 for (j = 0; j < i; j += 2) \
1845 *charbuf++ = components[j]; \
1846 } while (0)
1847
1848
1849static void
1850decode_coding_emacs_mule (coding)
1851 struct coding_system *coding;
1852{
1853 unsigned char *src = coding->source + coding->consumed;
1854 unsigned char *src_end = coding->source + coding->src_bytes;
1855 unsigned char *src_base;
1856 int *charbuf = coding->charbuf;
1857 int *charbuf_end = charbuf + coding->charbuf_size;
1858 int consumed_chars = 0, consumed_chars_base;
1859 int char_offset = 0;
1860 int multibytep = coding->src_multibyte;
1861 Lisp_Object attrs, eol_type, charset_list;
1862
1863 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1864
1865 while (1)
1866 {
1867 int c;
1868
1869 src_base = src;
1870 consumed_chars_base = consumed_chars;
1871
1872 if (charbuf >= charbuf_end)
1873 break;
1874
1875 ONE_MORE_BYTE (c);
1876
1877 if (c < 0x80)
1878 {
1879 if (c == '\r')
1880 {
1881 if (EQ (eol_type, Qdos))
1882 {
1883 if (src == src_end)
1884 goto no_more_source;
1885 if (*src == '\n')
1886 ONE_MORE_BYTE (c);
1887 }
1888 else if (EQ (eol_type, Qmac))
1889 c = '\n';
1890 }
1891 *charbuf++ = c;
1892 char_offset++;
1893 }
1894 else if (c == 0x80)
1895 {
1896 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1897 break;
1898 ONE_MORE_BYTE (c);
1899 if (c - 0xF2 >= COMPOSITION_RELATIVE
1900 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
1901 DECODE_EMACS_MULE_21_COMPOSITION (c);
1902 else if (c < 0xC0)
1903 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1904 else if (c == 0xFF)
1905 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1906 else
1907 goto invalid_code;
1908 coding->annotated = 1;
1909 }
1910 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1911 {
1912 int nbytes, nchars;
1913 src = src_base;
1914 consumed_chars = consumed_chars_base;
1915 c = emacs_mule_char (coding, src, &nbytes, &nchars);
1916 if (c < 0)
1917 {
1918 if (c == -2)
1919 break;
1920 goto invalid_code;
1921 }
1922 *charbuf++ = c;
1923 src += nbytes;
1924 consumed_chars += nchars;
1925 char_offset++;
1926 }
1927 continue;
1928
1929 invalid_code:
1930 src = src_base;
1931 consumed_chars = consumed_chars_base;
1932 ONE_MORE_BYTE (c);
1933 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1934 coding->errors++;
1935 }
1936
1937 no_more_source:
1938 coding->consumed_char += consumed_chars_base;
1939 coding->consumed = src_base - coding->source;
1940 coding->charbuf_used = charbuf - coding->charbuf;
1941}
1942
1943
1944#define EMACS_MULE_LEADING_CODES(id, codes) \
1945 do { \
1946 if (id < 0xA0) \
1947 codes[0] = id, codes[1] = 0; \
1948 else if (id < 0xE0) \
1949 codes[0] = 0x9A, codes[1] = id; \
1950 else if (id < 0xF0) \
1951 codes[0] = 0x9B, codes[1] = id; \
1952 else if (id < 0xF5) \
1953 codes[0] = 0x9C, codes[1] = id; \
1954 else \
1955 codes[0] = 0x9D, codes[1] = id; \
1956 } while (0);
1957
1958
1959static int
1960encode_coding_emacs_mule (coding)
1961 struct coding_system *coding;
1962{
1963 int multibytep = coding->dst_multibyte;
1964 int *charbuf = coding->charbuf;
1965 int *charbuf_end = charbuf + coding->charbuf_used;
1966 unsigned char *dst = coding->destination + coding->produced;
1967 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1968 int safe_room = 8;
1969 int produced_chars = 0;
1970 Lisp_Object attrs, eol_type, charset_list;
1971 int c;
1972
1973 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1974
1975 while (charbuf < charbuf_end)
1976 {
1977 ASSURE_DESTINATION (safe_room);
1978 c = *charbuf++;
1979 if (ASCII_CHAR_P (c))
1980 EMIT_ONE_ASCII_BYTE (c);
1981 else
1982 {
1983 struct charset *charset;
1984 unsigned code;
1985 int dimension;
1986 int emacs_mule_id;
1987 unsigned char leading_codes[2];
1988
1989 charset = char_charset (c, charset_list, &code);
1990 if (! charset)
1991 {
1992 c = coding->default_char;
1993 if (ASCII_CHAR_P (c))
1994 {
1995 EMIT_ONE_ASCII_BYTE (c);
1996 continue;
1997 }
1998 charset = char_charset (c, charset_list, &code);
1999 }
2000 dimension = CHARSET_DIMENSION (charset);
2001 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2002 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2003 EMIT_ONE_BYTE (leading_codes[0]);
2004 if (leading_codes[1])
2005 EMIT_ONE_BYTE (leading_codes[1]);
2006 if (dimension == 1)
2007 EMIT_ONE_BYTE (code);
2008 else
2009 {
2010 EMIT_ONE_BYTE (code >> 8);
2011 EMIT_ONE_BYTE (code & 0xFF);
2012 }
2013 }
2014 }
2015 coding->result = CODING_RESULT_SUCCESS;
2016 coding->produced_char += produced_chars;
2017 coding->produced = dst - coding->destination;
2018 return 0;
2019}
2020
2021\f
2022/*** 7. ISO2022 handlers ***/
2023
2024/* The following note describes the coding system ISO2022 briefly.
2025 Since the intention of this note is to help understand the
2026 functions in this file, some parts are NOT ACCURATE or OVERLY
2027 SIMPLIFIED. For thorough understanding, please refer to the
2028 original document of ISO2022.
2029
2030 ISO2022 provides many mechanisms to encode several character sets
2031 in 7-bit and 8-bit environments. For 7-bite environments, all text
2032 is encoded using bytes less than 128. This may make the encoded
2033 text a little bit longer, but the text passes more easily through
2034 several gateways, some of which strip off MSB (Most Signigant Bit).
2035
2036 There are two kinds of character sets: control character set and
2037 graphic character set. The former contains control characters such
2038 as `newline' and `escape' to provide control functions (control
2039 functions are also provided by escape sequences). The latter
2040 contains graphic characters such as 'A' and '-'. Emacs recognizes
2041 two control character sets and many graphic character sets.
2042
2043 Graphic character sets are classified into one of the following
2044 four classes, according to the number of bytes (DIMENSION) and
2045 number of characters in one dimension (CHARS) of the set:
2046 - DIMENSION1_CHARS94
2047 - DIMENSION1_CHARS96
2048 - DIMENSION2_CHARS94
2049 - DIMENSION2_CHARS96
2050
2051 In addition, each character set is assigned an identification tag,
2052 unique for each set, called "final character" (denoted as <F>
2053 hereafter). The <F> of each character set is decided by ECMA(*)
2054 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2055 (0x30..0x3F are for private use only).
2056
2057 Note (*): ECMA = European Computer Manufacturers Association
2058
2059 Here are examples of graphic character set [NAME(<F>)]:
2060 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2061 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2062 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2063 o DIMENSION2_CHARS96 -- none for the moment
2064
2065 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2066 C0 [0x00..0x1F] -- control character plane 0
2067 GL [0x20..0x7F] -- graphic character plane 0
2068 C1 [0x80..0x9F] -- control character plane 1
2069 GR [0xA0..0xFF] -- graphic character plane 1
2070
2071 A control character set is directly designated and invoked to C0 or
2072 C1 by an escape sequence. The most common case is that:
2073 - ISO646's control character set is designated/invoked to C0, and
2074 - ISO6429's control character set is designated/invoked to C1,
2075 and usually these designations/invocations are omitted in encoded
2076 text. In a 7-bit environment, only C0 can be used, and a control
2077 character for C1 is encoded by an appropriate escape sequence to
2078 fit into the environment. All control characters for C1 are
2079 defined to have corresponding escape sequences.
2080
2081 A graphic character set is at first designated to one of four
2082 graphic registers (G0 through G3), then these graphic registers are
2083 invoked to GL or GR. These designations and invocations can be
2084 done independently. The most common case is that G0 is invoked to
2085 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2086 these invocations and designations are omitted in encoded text.
2087 In a 7-bit environment, only GL can be used.
2088
2089 When a graphic character set of CHARS94 is invoked to GL, codes
2090 0x20 and 0x7F of the GL area work as control characters SPACE and
2091 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2092 be used.
2093
2094 There are two ways of invocation: locking-shift and single-shift.
2095 With locking-shift, the invocation lasts until the next different
2096 invocation, whereas with single-shift, the invocation affects the
2097 following character only and doesn't affect the locking-shift
2098 state. Invocations are done by the following control characters or
2099 escape sequences:
2100
2101 ----------------------------------------------------------------------
2102 abbrev function cntrl escape seq description
2103 ----------------------------------------------------------------------
2104 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2105 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2106 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2107 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2108 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2109 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2110 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2111 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2112 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2113 ----------------------------------------------------------------------
2114 (*) These are not used by any known coding system.
2115
2116 Control characters for these functions are defined by macros
2117 ISO_CODE_XXX in `coding.h'.
2118
2119 Designations are done by the following escape sequences:
2120 ----------------------------------------------------------------------
2121 escape sequence description
2122 ----------------------------------------------------------------------
2123 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2124 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2125 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2126 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2127 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2128 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2129 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2130 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2131 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2132 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2133 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2134 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2135 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2136 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2137 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2138 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2139 ----------------------------------------------------------------------
2140
2141 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2142 of dimension 1, chars 94, and final character <F>, etc...
2143
2144 Note (*): Although these designations are not allowed in ISO2022,
2145 Emacs accepts them on decoding, and produces them on encoding
2146 CHARS96 character sets in a coding system which is characterized as
2147 7-bit environment, non-locking-shift, and non-single-shift.
2148
2149 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2150 '(' must be omitted. We refer to this as "short-form" hereafter.
2151
2152 Now you may notice that there are a lot of ways for encoding the
2153 same multilingual text in ISO2022. Actually, there exist many
2154 coding systems such as Compound Text (used in X11's inter client
2155 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2156 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
2157 localized platforms), and all of these are variants of ISO2022.
2158
2159 In addition to the above, Emacs handles two more kinds of escape
2160 sequences: ISO6429's direction specification and Emacs' private
2161 sequence for specifying character composition.
2162
2163 ISO6429's direction specification takes the following form:
2164 o CSI ']' -- end of the current direction
2165 o CSI '0' ']' -- end of the current direction
2166 o CSI '1' ']' -- start of left-to-right text
2167 o CSI '2' ']' -- start of right-to-left text
2168 The control character CSI (0x9B: control sequence introducer) is
2169 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2170
2171 Character composition specification takes the following form:
2172 o ESC '0' -- start relative composition
2173 o ESC '1' -- end composition
2174 o ESC '2' -- start rule-base composition (*)
2175 o ESC '3' -- start relative composition with alternate chars (**)
2176 o ESC '4' -- start rule-base composition with alternate chars (**)
2177 Since these are not standard escape sequences of any ISO standard,
2178 the use of them for these meaning is restricted to Emacs only.
2179
2180 (*) This form is used only in Emacs 20.5 and the older versions,
2181 but the newer versions can safely decode it.
2182 (**) This form is used only in Emacs 21.1 and the newer versions,
2183 and the older versions can't decode it.
2184
2185 Here's a list of examples usages of these composition escape
2186 sequences (categorized by `enum composition_method').
2187
2188 COMPOSITION_RELATIVE:
2189 ESC 0 CHAR [ CHAR ] ESC 1
2190 COMPOSITOIN_WITH_RULE:
2191 ESC 2 CHAR [ RULE CHAR ] ESC 1
2192 COMPOSITION_WITH_ALTCHARS:
2193 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2194 COMPOSITION_WITH_RULE_ALTCHARS:
2195 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2196
2197enum iso_code_class_type iso_code_class[256];
2198
2199#define SAFE_CHARSET_P(coding, id) \
2200 ((id) <= (coding)->max_charset_id \
2201 && (coding)->safe_charsets[id] >= 0)
2202
2203
2204#define SHIFT_OUT_OK(category) \
2205 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2206
2207static void
2208setup_iso_safe_charsets (Lisp_Object attrs)
2209{
2210 Lisp_Object charset_list, safe_charsets;
2211 Lisp_Object request;
2212 Lisp_Object reg_usage;
2213 Lisp_Object tail;
2214 int reg94, reg96;
2215 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2216 int max_charset_id;
2217
2218 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2219 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2220 && ! EQ (charset_list, Viso_2022_charset_list))
2221 {
2222 CODING_ATTR_CHARSET_LIST (attrs)
2223 = charset_list = Viso_2022_charset_list;
2224 ASET (attrs, coding_attr_safe_charsets, Qnil);
2225 }
2226
2227 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2228 return;
2229
2230 max_charset_id = 0;
2231 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2232 {
2233 int id = XINT (XCAR (tail));
2234 if (max_charset_id < id)
2235 max_charset_id = id;
2236 }
2237
2238 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2239 make_number (255));
2240 request = AREF (attrs, coding_attr_iso_request);
2241 reg_usage = AREF (attrs, coding_attr_iso_usage);
2242 reg94 = XINT (XCAR (reg_usage));
2243 reg96 = XINT (XCDR (reg_usage));
2244
2245 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2246 {
2247 Lisp_Object id;
2248 Lisp_Object reg;
2249 struct charset *charset;
2250
2251 id = XCAR (tail);
2252 charset = CHARSET_FROM_ID (XINT (id));
2253 reg = Fcdr (Fassq (request, id));
2254 if (! NILP (reg))
2255 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2256 else if (charset->iso_chars_96)
2257 {
2258 if (reg96 < 4)
2259 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2260 }
2261 else
2262 {
2263 if (reg94 < 4)
2264 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2265 }
2266 }
2267 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2268}
2269
2270
2271/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2272 Check if a text is encoded in ISO2022. If it is, returns an
2273 integer in which appropriate flag bits any of:
2274 CATEGORY_MASK_ISO_7
2275 CATEGORY_MASK_ISO_7_TIGHT
2276 CATEGORY_MASK_ISO_8_1
2277 CATEGORY_MASK_ISO_8_2
2278 CATEGORY_MASK_ISO_7_ELSE
2279 CATEGORY_MASK_ISO_8_ELSE
2280 are set. If a code which should never appear in ISO2022 is found,
2281 returns 0. */
2282
2283static int
2284detect_coding_iso_2022 (coding, mask)
2285 struct coding_system *coding;
2286 int *mask;
2287{
2288 unsigned char *src = coding->source, *src_base = src;
2289 unsigned char *src_end = coding->source + coding->src_bytes;
2290 int multibytep = coding->src_multibyte;
2291 int mask_iso = CATEGORY_MASK_ISO;
2292 int mask_found = 0, mask_8bit_found = 0;
2293 int reg[4], shift_out = 0, single_shifting = 0;
2294 int id;
2295 int c, c1;
2296 int consumed_chars = 0;
2297 int i;
2298
2299 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2300 {
2301 struct coding_system *this = &(coding_categories[i]);
2302 Lisp_Object attrs, val;
2303
2304 attrs = CODING_ID_ATTRS (this->id);
2305 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2306 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2307 setup_iso_safe_charsets (attrs);
2308 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2309 this->max_charset_id = XSTRING (val)->size - 1;
2310 this->safe_charsets = (char *) XSTRING (val)->data;
2311 }
2312
2313 /* A coding system of this category is always ASCII compatible. */
2314 src += coding->head_ascii;
2315
2316 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2317 while (mask_iso && src < src_end)
2318 {
2319 ONE_MORE_BYTE (c);
2320 switch (c)
2321 {
2322 case ISO_CODE_ESC:
2323 if (inhibit_iso_escape_detection)
2324 break;
2325 single_shifting = 0;
2326 ONE_MORE_BYTE (c);
2327 if (c >= '(' && c <= '/')
2328 {
2329 /* Designation sequence for a charset of dimension 1. */
2330 ONE_MORE_BYTE (c1);
2331 if (c1 < ' ' || c1 >= 0x80
2332 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2333 /* Invalid designation sequence. Just ignore. */
2334 break;
2335 reg[(c - '(') % 4] = id;
2336 }
2337 else if (c == '$')
2338 {
2339 /* Designation sequence for a charset of dimension 2. */
2340 ONE_MORE_BYTE (c);
2341 if (c >= '@' && c <= 'B')
2342 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2343 reg[0] = id = iso_charset_table[1][0][c];
2344 else if (c >= '(' && c <= '/')
2345 {
2346 ONE_MORE_BYTE (c1);
2347 if (c1 < ' ' || c1 >= 0x80
2348 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2349 /* Invalid designation sequence. Just ignore. */
2350 break;
2351 reg[(c - '(') % 4] = id;
2352 }
2353 else
2354 /* Invalid designation sequence. Just ignore. */
2355 break;
2356 }
2357 else if (c == 'N' || c == 'O')
2358 {
2359 /* ESC <Fe> for SS2 or SS3. */
2360 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
2361 break;
2362 }
2363 else if (c >= '0' && c <= '4')
2364 {
2365 /* ESC <Fp> for start/end composition. */
2366 mask_found |= CATEGORY_MASK_ISO;
2367 break;
2368 }
2369 else
2370 {
2371 /* Invalid escape sequence. */
2372 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2373 break;
2374 }
2375
2376 /* We found a valid designation sequence for CHARSET. */
2377 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2378 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2379 id))
2380 mask_found |= CATEGORY_MASK_ISO_7;
2381 else
2382 mask_iso &= ~CATEGORY_MASK_ISO_7;
2383 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2384 id))
2385 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
2386 else
2387 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2388 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2389 id))
2390 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
2391 else
2392 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2393 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2394 id))
2395 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
2396 else
2397 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
2398 break;
2399
2400 case ISO_CODE_SO:
2401 if (inhibit_iso_escape_detection)
2402 break;
2403 single_shifting = 0;
2404 if (shift_out == 0
2405 && (reg[1] >= 0
2406 || SHIFT_OUT_OK (coding_category_iso_7_else)
2407 || SHIFT_OUT_OK (coding_category_iso_8_else)))
2408 {
2409 /* Locking shift out. */
2410 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2411 mask_found |= CATEGORY_MASK_ISO_ELSE;
2412 }
2413 break;
2414
2415 case ISO_CODE_SI:
2416 if (inhibit_iso_escape_detection)
2417 break;
2418 single_shifting = 0;
2419 if (shift_out == 1)
2420 {
2421 /* Locking shift in. */
2422 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2423 mask_found |= CATEGORY_MASK_ISO_ELSE;
2424 }
2425 break;
2426
2427 case ISO_CODE_CSI:
2428 single_shifting = 0;
2429 case ISO_CODE_SS2:
2430 case ISO_CODE_SS3:
2431 {
2432 int newmask = CATEGORY_MASK_ISO_8_ELSE;
2433
2434 if (inhibit_iso_escape_detection)
2435 break;
2436 if (c != ISO_CODE_CSI)
2437 {
2438 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2439 & CODING_ISO_FLAG_SINGLE_SHIFT)
2440 newmask |= CATEGORY_MASK_ISO_8_1;
2441 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2442 & CODING_ISO_FLAG_SINGLE_SHIFT)
2443 newmask |= CATEGORY_MASK_ISO_8_2;
2444 single_shifting = 1;
2445 }
2446 if (VECTORP (Vlatin_extra_code_table)
2447 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2448 {
2449 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2450 & CODING_ISO_FLAG_LATIN_EXTRA)
2451 newmask |= CATEGORY_MASK_ISO_8_1;
2452 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2453 & CODING_ISO_FLAG_LATIN_EXTRA)
2454 newmask |= CATEGORY_MASK_ISO_8_2;
2455 }
2456 mask_iso &= newmask;
2457 mask_found |= newmask;
2458 }
2459 break;
2460
2461 default:
2462 if (c < 0x80)
2463 {
2464 single_shifting = 0;
2465 break;
2466 }
2467 else if (c < 0xA0)
2468 {
2469 single_shifting = 0;
2470 mask_8bit_found = 1;
2471 if (VECTORP (Vlatin_extra_code_table)
2472 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2473 {
2474 int newmask = 0;
2475
2476 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2477 & CODING_ISO_FLAG_LATIN_EXTRA)
2478 newmask |= CATEGORY_MASK_ISO_8_1;
2479 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2480 & CODING_ISO_FLAG_LATIN_EXTRA)
2481 newmask |= CATEGORY_MASK_ISO_8_2;
2482 mask_iso &= newmask;
2483 mask_found |= newmask;
2484 }
2485 else
2486 return 0;
2487 }
2488 else
2489 {
2490 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2491 | CATEGORY_MASK_ISO_7_ELSE);
2492 mask_found |= CATEGORY_MASK_ISO_8_1;
2493 mask_8bit_found = 1;
2494 /* Check the length of succeeding codes of the range
2495 0xA0..0FF. If the byte length is odd, we exclude
2496 CATEGORY_MASK_ISO_8_2. We can check this only
2497 when we are not single shifting. */
2498 if (!single_shifting
2499 && mask_iso & CATEGORY_MASK_ISO_8_2)
2500 {
2501 int i = 1;
2502 while (src < src_end)
2503 {
2504 ONE_MORE_BYTE (c);
2505 if (c < 0xA0)
2506 break;
2507 i++;
2508 }
2509
2510 if (i & 1 && src < src_end)
2511 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
2512 else
2513 mask_found |= CATEGORY_MASK_ISO_8_2;
2514 }
2515 }
2516 break;
2517 }
2518 }
2519 no_more_source:
2520 if (!mask_iso)
2521 {
2522 *mask &= ~CATEGORY_MASK_ISO;
2523 return 0;
2524 }
2525 if (!mask_found)
2526 return 0;
2527 *mask &= mask_iso & mask_found;
2528 if (! mask_8bit_found)
2529 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2530 return 1;
2531}
2532
2533
2534/* Set designation state into CODING. */
2535#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2536 do { \
2537 int id, prev; \
2538 \
2539 if (final < '0' || final >= 128 \
2540 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2541 || !SAFE_CHARSET_P (coding, id)) \
2542 { \
2543 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2544 goto invalid_code; \
2545 } \
2546 prev = CODING_ISO_DESIGNATION (coding, reg); \
2547 CODING_ISO_DESIGNATION (coding, reg) = id; \
2548 /* If there was an invalid designation to REG previously, and this \
2549 designation is ASCII to REG, we should keep this designation \
2550 sequence. */ \
2551 if (prev == -2 && id == charset_ascii) \
2552 goto invalid_code; \
2553 } while (0)
2554
2555
2556#define MAYBE_FINISH_COMPOSITION() \
2557 do { \
2558 int i; \
2559 if (composition_state == COMPOSING_NO) \
2560 break; \
2561 /* It is assured that we have enough room for producing \
2562 characters stored in the table `components'. */ \
2563 if (charbuf + component_idx > charbuf_end) \
2564 goto no_more_source; \
2565 composition_state = COMPOSING_NO; \
2566 if (method == COMPOSITION_RELATIVE \
2567 || method == COMPOSITION_WITH_ALTCHARS) \
2568 { \
2569 for (i = 0; i < component_idx; i++) \
2570 *charbuf++ = components[i]; \
2571 char_offset += component_idx; \
2572 } \
2573 else \
2574 { \
2575 for (i = 0; i < component_idx; i += 2) \
2576 *charbuf++ = components[i]; \
2577 char_offset += (component_idx / 2) + 1; \
2578 } \
2579 } while (0)
2580
2581
2582/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2583 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2584 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2585 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2586 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2587 */
2588
2589#define DECODE_COMPOSITION_START(c1) \
2590 do { \
2591 if (c1 == '0' \
2592 && composition_state == COMPOSING_COMPONENT_RULE) \
2593 { \
2594 component_len = component_idx; \
2595 composition_state = COMPOSING_CHAR; \
2596 } \
2597 else \
2598 { \
2599 unsigned char *p; \
2600 \
2601 MAYBE_FINISH_COMPOSITION (); \
2602 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2603 goto no_more_source; \
2604 for (p = src; p < src_end - 1; p++) \
2605 if (*p == ISO_CODE_ESC && p[1] == '1') \
2606 break; \
2607 if (p == src_end - 1) \
2608 { \
2609 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2610 goto invalid_code; \
2611 goto no_more_source; \
2612 } \
2613 \
2614 /* This is surely the start of a composition. */ \
2615 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2616 : c1 == '2' ? COMPOSITION_WITH_RULE \
2617 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2618 : COMPOSITION_WITH_RULE_ALTCHARS); \
2619 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2620 : COMPOSING_COMPONENT_CHAR); \
2621 component_idx = component_len = 0; \
2622 } \
2623 } while (0)
2624
2625
2626/* Handle compositoin end sequence ESC 1. */
2627
2628#define DECODE_COMPOSITION_END() \
2629 do { \
2630 int nchars = (component_len > 0 ? component_idx - component_len \
2631 : method == COMPOSITION_RELATIVE ? component_idx \
2632 : (component_idx + 1) / 2); \
2633 int i; \
2634 int *saved_charbuf = charbuf; \
2635 \
2636 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2637 if (method != COMPOSITION_RELATIVE) \
2638 { \
2639 if (component_len == 0) \
2640 for (i = 0; i < component_idx; i++) \
2641 *charbuf++ = components[i]; \
2642 else \
2643 for (i = 0; i < component_len; i++) \
2644 *charbuf++ = components[i]; \
2645 *saved_charbuf = saved_charbuf - charbuf; \
2646 } \
2647 if (method == COMPOSITION_WITH_RULE) \
2648 for (i = 0; i < component_idx; i += 2, char_offset++) \
2649 *charbuf++ = components[i]; \
2650 else \
2651 for (i = component_len; i < component_idx; i++, char_offset++) \
2652 *charbuf++ = components[i]; \
2653 coding->annotated = 1; \
2654 composition_state = COMPOSING_NO; \
2655 } while (0)
2656
2657
2658/* Decode a composition rule from the byte C1 (and maybe one more byte
2659 from SRC) and store one encoded composition rule in
2660 coding->cmp_data. */
2661
2662#define DECODE_COMPOSITION_RULE(c1) \
2663 do { \
2664 (c1) -= 32; \
2665 if (c1 < 81) /* old format (before ver.21) */ \
2666 { \
2667 int gref = (c1) / 9; \
2668 int nref = (c1) % 9; \
2669 if (gref == 4) gref = 10; \
2670 if (nref == 4) nref = 10; \
2671 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2672 } \
2673 else if (c1 < 93) /* new format (after ver.21) */ \
2674 { \
2675 ONE_MORE_BYTE (c2); \
2676 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2677 } \
2678 else \
2679 c1 = 0; \
2680 } while (0)
2681
2682
2683/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2684
2685static void
2686decode_coding_iso_2022 (coding)
2687 struct coding_system *coding;
2688{
2689 unsigned char *src = coding->source + coding->consumed;
2690 unsigned char *src_end = coding->source + coding->src_bytes;
2691 unsigned char *src_base;
2692 int *charbuf = coding->charbuf;
2693 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2694 int consumed_chars = 0, consumed_chars_base;
2695 int char_offset = 0;
2696 int multibytep = coding->src_multibyte;
2697 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2698 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2699 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2700 struct charset *charset;
2701 int c;
2702 /* For handling composition sequence. */
2703#define COMPOSING_NO 0
2704#define COMPOSING_CHAR 1
2705#define COMPOSING_RULE 2
2706#define COMPOSING_COMPONENT_CHAR 3
2707#define COMPOSING_COMPONENT_RULE 4
2708
2709 int composition_state = COMPOSING_NO;
2710 enum composition_method method;
2711 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2712 int component_idx;
2713 int component_len;
2714 Lisp_Object attrs, eol_type, charset_list;
2715
2716 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2717 setup_iso_safe_charsets (attrs);
2718
2719 while (1)
2720 {
2721 int c1, c2;
2722
2723 src_base = src;
2724 consumed_chars_base = consumed_chars;
2725
2726 if (charbuf >= charbuf_end)
2727 break;
2728
2729 ONE_MORE_BYTE (c1);
2730
2731 /* We produce no character or one character. */
2732 switch (iso_code_class [c1])
2733 {
2734 case ISO_0x20_or_0x7F:
2735 if (composition_state != COMPOSING_NO)
2736 {
2737 if (composition_state == COMPOSING_RULE
2738 || composition_state == COMPOSING_COMPONENT_RULE)
2739 {
2740 DECODE_COMPOSITION_RULE (c1);
2741 components[component_idx++] = c1;
2742 composition_state--;
2743 continue;
2744 }
2745 }
2746 if (charset_id_0 < 0
2747 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2748 /* This is SPACE or DEL. */
2749 charset = CHARSET_FROM_ID (charset_ascii);
2750 else
2751 charset = CHARSET_FROM_ID (charset_id_0);
2752 break;
2753
2754 case ISO_graphic_plane_0:
2755 if (composition_state != COMPOSING_NO)
2756 {
2757 if (composition_state == COMPOSING_RULE
2758 || composition_state == COMPOSING_COMPONENT_RULE)
2759 {
2760 DECODE_COMPOSITION_RULE (c1);
2761 components[component_idx++] = c1;
2762 composition_state--;
2763 continue;
2764 }
2765 }
2766 charset = CHARSET_FROM_ID (charset_id_0);
2767 break;
2768
2769 case ISO_0xA0_or_0xFF:
2770 if (charset_id_1 < 0
2771 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2772 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2773 goto invalid_code;
2774 /* This is a graphic character, we fall down ... */
2775
2776 case ISO_graphic_plane_1:
2777 if (charset_id_1 < 0)
2778 goto invalid_code;
2779 charset = CHARSET_FROM_ID (charset_id_1);
2780 break;
2781
2782 case ISO_carriage_return:
2783 if (c1 == '\r')
2784 {
2785 if (EQ (eol_type, Qdos))
2786 {
2787 if (src == src_end)
2788 goto no_more_source;
2789 if (*src == '\n')
2790 ONE_MORE_BYTE (c1);
2791 }
2792 else if (EQ (eol_type, Qmac))
2793 c1 = '\n';
2794 }
2795 /* fall through */
2796
2797 case ISO_control_0:
2798 MAYBE_FINISH_COMPOSITION ();
2799 charset = CHARSET_FROM_ID (charset_ascii);
2800 break;
2801
2802 case ISO_control_1:
2803 MAYBE_FINISH_COMPOSITION ();
2804 goto invalid_code;
2805
2806 case ISO_shift_out:
2807 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2808 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2809 goto invalid_code;
2810 CODING_ISO_INVOCATION (coding, 0) = 1;
2811 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2812 continue;
2813
2814 case ISO_shift_in:
2815 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2816 goto invalid_code;
2817 CODING_ISO_INVOCATION (coding, 0) = 0;
2818 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2819 continue;
2820
2821 case ISO_single_shift_2_7:
2822 case ISO_single_shift_2:
2823 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2824 goto invalid_code;
2825 /* SS2 is handled as an escape sequence of ESC 'N' */
2826 c1 = 'N';
2827 goto label_escape_sequence;
2828
2829 case ISO_single_shift_3:
2830 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2831 goto invalid_code;
2832 /* SS2 is handled as an escape sequence of ESC 'O' */
2833 c1 = 'O';
2834 goto label_escape_sequence;
2835
2836 case ISO_control_sequence_introducer:
2837 /* CSI is handled as an escape sequence of ESC '[' ... */
2838 c1 = '[';
2839 goto label_escape_sequence;
2840
2841 case ISO_escape:
2842 ONE_MORE_BYTE (c1);
2843 label_escape_sequence:
2844 /* Escape sequences handled here are invocation,
2845 designation, direction specification, and character
2846 composition specification. */
2847 switch (c1)
2848 {
2849 case '&': /* revision of following character set */
2850 ONE_MORE_BYTE (c1);
2851 if (!(c1 >= '@' && c1 <= '~'))
2852 goto invalid_code;
2853 ONE_MORE_BYTE (c1);
2854 if (c1 != ISO_CODE_ESC)
2855 goto invalid_code;
2856 ONE_MORE_BYTE (c1);
2857 goto label_escape_sequence;
2858
2859 case '$': /* designation of 2-byte character set */
2860 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2861 goto invalid_code;
2862 ONE_MORE_BYTE (c1);
2863 if (c1 >= '@' && c1 <= 'B')
2864 { /* designation of JISX0208.1978, GB2312.1980,
2865 or JISX0208.1980 */
2866 DECODE_DESIGNATION (0, 2, 0, c1);
2867 }
2868 else if (c1 >= 0x28 && c1 <= 0x2B)
2869 { /* designation of DIMENSION2_CHARS94 character set */
2870 ONE_MORE_BYTE (c2);
2871 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
2872 }
2873 else if (c1 >= 0x2C && c1 <= 0x2F)
2874 { /* designation of DIMENSION2_CHARS96 character set */
2875 ONE_MORE_BYTE (c2);
2876 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
2877 }
2878 else
2879 goto invalid_code;
2880 /* We must update these variables now. */
2881 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2882 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2883 continue;
2884
2885 case 'n': /* invocation of locking-shift-2 */
2886 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2887 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2888 goto invalid_code;
2889 CODING_ISO_INVOCATION (coding, 0) = 2;
2890 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2891 continue;
2892
2893 case 'o': /* invocation of locking-shift-3 */
2894 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2895 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2896 goto invalid_code;
2897 CODING_ISO_INVOCATION (coding, 0) = 3;
2898 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2899 continue;
2900
2901 case 'N': /* invocation of single-shift-2 */
2902 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2903 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2904 goto invalid_code;
2905 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
2906 ONE_MORE_BYTE (c1);
2907 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2908 goto invalid_code;
2909 break;
2910
2911 case 'O': /* invocation of single-shift-3 */
2912 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2913 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2914 goto invalid_code;
2915 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
2916 ONE_MORE_BYTE (c1);
2917 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2918 goto invalid_code;
2919 break;
2920
2921 case '0': case '2': case '3': case '4': /* start composition */
2922 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2923 goto invalid_code;
2924 DECODE_COMPOSITION_START (c1);
2925 continue;
2926
2927 case '1': /* end composition */
2928 if (composition_state == COMPOSING_NO)
2929 goto invalid_code;
2930 DECODE_COMPOSITION_END ();
2931 continue;
2932
2933 case '[': /* specification of direction */
2934 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2935 goto invalid_code;
2936 /* For the moment, nested direction is not supported.
2937 So, `coding->mode & CODING_MODE_DIRECTION' zero means
2938 left-to-right, and nozero means right-to-left. */
2939 ONE_MORE_BYTE (c1);
2940 switch (c1)
2941 {
2942 case ']': /* end of the current direction */
2943 coding->mode &= ~CODING_MODE_DIRECTION;
2944
2945 case '0': /* end of the current direction */
2946 case '1': /* start of left-to-right direction */
2947 ONE_MORE_BYTE (c1);
2948 if (c1 == ']')
2949 coding->mode &= ~CODING_MODE_DIRECTION;
2950 else
2951 goto invalid_code;
2952 break;
2953
2954 case '2': /* start of right-to-left direction */
2955 ONE_MORE_BYTE (c1);
2956 if (c1 == ']')
2957 coding->mode |= CODING_MODE_DIRECTION;
2958 else
2959 goto invalid_code;
2960 break;
2961
2962 default:
2963 goto invalid_code;
2964 }
2965 continue;
2966
2967 default:
2968 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2969 goto invalid_code;
2970 if (c1 >= 0x28 && c1 <= 0x2B)
2971 { /* designation of DIMENSION1_CHARS94 character set */
2972 ONE_MORE_BYTE (c2);
2973 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
2974 }
2975 else if (c1 >= 0x2C && c1 <= 0x2F)
2976 { /* designation of DIMENSION1_CHARS96 character set */
2977 ONE_MORE_BYTE (c2);
2978 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
2979 }
2980 else
2981 goto invalid_code;
2982 /* We must update these variables now. */
2983 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2984 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2985 continue;
2986 }
2987 }
2988
2989 /* Now we know CHARSET and 1st position code C1 of a character.
2990 Produce a decoded character while getting 2nd position code
2991 C2 if necessary. */
2992 c1 &= 0x7F;
2993 if (CHARSET_DIMENSION (charset) > 1)
2994 {
2995 ONE_MORE_BYTE (c2);
2996 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
2997 /* C2 is not in a valid range. */
2998 goto invalid_code;
2999 c1 = (c1 << 8) | (c2 & 0x7F);
3000 if (CHARSET_DIMENSION (charset) > 2)
3001 {
3002 ONE_MORE_BYTE (c2);
3003 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3004 /* C2 is not in a valid range. */
3005 goto invalid_code;
3006 c1 = (c1 << 8) | (c2 & 0x7F);
3007 }
3008 }
3009
3010 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3011 if (c < 0)
3012 {
3013 MAYBE_FINISH_COMPOSITION ();
3014 for (; src_base < src; src_base++, char_offset++)
3015 {
3016 if (ASCII_BYTE_P (*src_base))
3017 *charbuf++ = *src_base;
3018 else
3019 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3020 }
3021 }
3022 else if (composition_state == COMPOSING_NO)
3023 {
3024 *charbuf++ = c;
3025 char_offset++;
3026 }
3027 else
3028 {
3029 components[component_idx++] = c;
3030 if (method == COMPOSITION_WITH_RULE
3031 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3032 && composition_state == COMPOSING_COMPONENT_CHAR))
3033 composition_state++;
3034 }
3035 continue;
3036
3037 invalid_code:
3038 MAYBE_FINISH_COMPOSITION ();
3039 src = src_base;
3040 consumed_chars = consumed_chars_base;
3041 ONE_MORE_BYTE (c);
3042 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3043 coding->errors++;
3044 }
3045
3046 no_more_source:
3047 coding->consumed_char += consumed_chars_base;
3048 coding->consumed = src_base - coding->source;
3049 coding->charbuf_used = charbuf - coding->charbuf;
3050}
3051
3052
3053/* ISO2022 encoding stuff. */
3054
3055/*
3056 It is not enough to say just "ISO2022" on encoding, we have to
3057 specify more details. In Emacs, each coding system of ISO2022
3058 variant has the following specifications:
3059 1. Initial designation to G0 thru G3.
3060 2. Allows short-form designation?
3061 3. ASCII should be designated to G0 before control characters?
3062 4. ASCII should be designated to G0 at end of line?
3063 5. 7-bit environment or 8-bit environment?
3064 6. Use locking-shift?
3065 7. Use Single-shift?
3066 And the following two are only for Japanese:
3067 8. Use ASCII in place of JIS0201-1976-Roman?
3068 9. Use JISX0208-1983 in place of JISX0208-1978?
3069 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3070 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3071 details.
3072*/
3073
3074/* Produce codes (escape sequence) for designating CHARSET to graphic
3075 register REG at DST, and increment DST. If <final-char> of CHARSET is
3076 '@', 'A', or 'B' and the coding system CODING allows, produce
3077 designation sequence of short-form. */
3078
3079#define ENCODE_DESIGNATION(charset, reg, coding) \
3080 do { \
3081 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3082 char *intermediate_char_94 = "()*+"; \
3083 char *intermediate_char_96 = ",-./"; \
3084 int revision = -1; \
3085 int c; \
3086 \
3087 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3088 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3089 \
3090 if (revision >= 0) \
3091 { \
3092 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3093 EMIT_ONE_BYTE ('@' + revision); \
3094 } \
3095 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3096 if (CHARSET_DIMENSION (charset) == 1) \
3097 { \
3098 if (! CHARSET_ISO_CHARS_96 (charset)) \
3099 c = intermediate_char_94[reg]; \
3100 else \
3101 c = intermediate_char_96[reg]; \
3102 EMIT_ONE_ASCII_BYTE (c); \
3103 } \
3104 else \
3105 { \
3106 EMIT_ONE_ASCII_BYTE ('$'); \
3107 if (! CHARSET_ISO_CHARS_96 (charset)) \
3108 { \
3109 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3110 || reg != 0 \
3111 || final_char < '@' || final_char > 'B') \
3112 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3113 } \
3114 else \
3115 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3116 } \
3117 EMIT_ONE_ASCII_BYTE (final_char); \
3118 \
3119 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3120 } while (0)
3121
3122
3123/* The following two macros produce codes (control character or escape
3124 sequence) for ISO2022 single-shift functions (single-shift-2 and
3125 single-shift-3). */
3126
3127#define ENCODE_SINGLE_SHIFT_2 \
3128 do { \
3129 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3130 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3131 else \
3132 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3133 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3134 } while (0)
3135
3136
3137#define ENCODE_SINGLE_SHIFT_3 \
3138 do { \
3139 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3140 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3141 else \
3142 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3143 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3144 } while (0)
3145
3146
3147/* The following four macros produce codes (control character or
3148 escape sequence) for ISO2022 locking-shift functions (shift-in,
3149 shift-out, locking-shift-2, and locking-shift-3). */
3150
3151#define ENCODE_SHIFT_IN \
3152 do { \
3153 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3154 CODING_ISO_INVOCATION (coding, 0) = 0; \
3155 } while (0)
3156
3157
3158#define ENCODE_SHIFT_OUT \
3159 do { \
3160 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3161 CODING_ISO_INVOCATION (coding, 0) = 1; \
3162 } while (0)
3163
3164
3165#define ENCODE_LOCKING_SHIFT_2 \
3166 do { \
3167 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3168 CODING_ISO_INVOCATION (coding, 0) = 2; \
3169 } while (0)
3170
3171
3172#define ENCODE_LOCKING_SHIFT_3 \
3173 do { \
3174 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3175 CODING_ISO_INVOCATION (coding, 0) = 3; \
3176 } while (0)
3177
3178
3179/* Produce codes for a DIMENSION1 character whose character set is
3180 CHARSET and whose position-code is C1. Designation and invocation
3181 sequences are also produced in advance if necessary. */
3182
3183#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3184 do { \
3185 int id = CHARSET_ID (charset); \
3186 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3187 { \
3188 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3189 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3190 else \
3191 EMIT_ONE_BYTE (c1 | 0x80); \
3192 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3193 break; \
3194 } \
3195 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3196 { \
3197 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3198 break; \
3199 } \
3200 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3201 { \
3202 EMIT_ONE_BYTE (c1 | 0x80); \
3203 break; \
3204 } \
3205 else \
3206 /* Since CHARSET is not yet invoked to any graphic planes, we \
3207 must invoke it, or, at first, designate it to some graphic \
3208 register. Then repeat the loop to actually produce the \
3209 character. */ \
3210 dst = encode_invocation_designation (charset, coding, dst, \
3211 &produced_chars); \
3212 } while (1)
3213
3214
3215/* Produce codes for a DIMENSION2 character whose character set is
3216 CHARSET and whose position-codes are C1 and C2. Designation and
3217 invocation codes are also produced in advance if necessary. */
3218
3219#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3220 do { \
3221 int id = CHARSET_ID (charset); \
3222 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3223 { \
3224 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3225 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3226 else \
3227 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3228 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3229 break; \
3230 } \
3231 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3232 { \
3233 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3234 break; \
3235 } \
3236 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3237 { \
3238 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3239 break; \
3240 } \
3241 else \
3242 /* Since CHARSET is not yet invoked to any graphic planes, we \
3243 must invoke it, or, at first, designate it to some graphic \
3244 register. Then repeat the loop to actually produce the \
3245 character. */ \
3246 dst = encode_invocation_designation (charset, coding, dst, \
3247 &produced_chars); \
3248 } while (1)
3249
3250
3251#define ENCODE_ISO_CHARACTER(charset, c) \
3252 do { \
3253 int code = ENCODE_CHAR ((charset),(c)); \
3254 \
3255 if (CHARSET_DIMENSION (charset) == 1) \
3256 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3257 else \
3258 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3259 } while (0)
3260
3261
3262/* Produce designation and invocation codes at a place pointed by DST
3263 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3264 Return new DST. */
3265
3266unsigned char *
3267encode_invocation_designation (charset, coding, dst, p_nchars)
3268 struct charset *charset;
3269 struct coding_system *coding;
3270 unsigned char *dst;
3271 int *p_nchars;
3272{
3273 int multibytep = coding->dst_multibyte;
3274 int produced_chars = *p_nchars;
3275 int reg; /* graphic register number */
3276 int id = CHARSET_ID (charset);
3277
3278 /* At first, check designations. */
3279 for (reg = 0; reg < 4; reg++)
3280 if (id == CODING_ISO_DESIGNATION (coding, reg))
3281 break;
3282
3283 if (reg >= 4)
3284 {
3285 /* CHARSET is not yet designated to any graphic registers. */
3286 /* At first check the requested designation. */
3287 reg = CODING_ISO_REQUEST (coding, id);
3288 if (reg < 0)
3289 /* Since CHARSET requests no special designation, designate it
3290 to graphic register 0. */
3291 reg = 0;
3292
3293 ENCODE_DESIGNATION (charset, reg, coding);
3294 }
3295
3296 if (CODING_ISO_INVOCATION (coding, 0) != reg
3297 && CODING_ISO_INVOCATION (coding, 1) != reg)
3298 {
3299 /* Since the graphic register REG is not invoked to any graphic
3300 planes, invoke it to graphic plane 0. */
3301 switch (reg)
3302 {
3303 case 0: /* graphic register 0 */
3304 ENCODE_SHIFT_IN;
3305 break;
3306
3307 case 1: /* graphic register 1 */
3308 ENCODE_SHIFT_OUT;
3309 break;
3310
3311 case 2: /* graphic register 2 */
3312 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3313 ENCODE_SINGLE_SHIFT_2;
3314 else
3315 ENCODE_LOCKING_SHIFT_2;
3316 break;
3317
3318 case 3: /* graphic register 3 */
3319 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3320 ENCODE_SINGLE_SHIFT_3;
3321 else
3322 ENCODE_LOCKING_SHIFT_3;
3323 break;
3324 }
3325 }
3326
3327 *p_nchars = produced_chars;
3328 return dst;
3329}
3330
3331/* The following three macros produce codes for indicating direction
3332 of text. */
3333#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3334 do { \
3335 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3336 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3337 else \
3338 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3339 } while (0)
3340
3341
3342#define ENCODE_DIRECTION_R2L() \
3343 do { \
3344 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3345 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3346 } while (0)
3347
3348
3349#define ENCODE_DIRECTION_L2R() \
3350 do { \
3351 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3352 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3353 } while (0)
3354
3355
3356/* Produce codes for designation and invocation to reset the graphic
3357 planes and registers to initial state. */
3358#define ENCODE_RESET_PLANE_AND_REGISTER() \
3359 do { \
3360 int reg; \
3361 struct charset *charset; \
3362 \
3363 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3364 ENCODE_SHIFT_IN; \
3365 for (reg = 0; reg < 4; reg++) \
3366 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3367 && (CODING_ISO_DESIGNATION (coding, reg) \
3368 != CODING_ISO_INITIAL (coding, reg))) \
3369 { \
3370 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3371 ENCODE_DESIGNATION (charset, reg, coding); \
3372 } \
3373 } while (0)
3374
3375
3376/* Produce designation sequences of charsets in the line started from
3377 SRC to a place pointed by DST, and return updated DST.
3378
3379 If the current block ends before any end-of-line, we may fail to
3380 find all the necessary designations. */
3381
3382static unsigned char *
3383encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3384 struct coding_system *coding;
3385 int *charbuf, *charbuf_end;
3386 unsigned char *dst;
3387{
3388 struct charset *charset;
3389 /* Table of charsets to be designated to each graphic register. */
3390 int r[4];
3391 int c, found = 0, reg;
3392 int produced_chars = 0;
3393 int multibytep = coding->dst_multibyte;
3394 Lisp_Object attrs;
3395 Lisp_Object charset_list;
3396
3397 attrs = CODING_ID_ATTRS (coding->id);
3398 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3399 if (EQ (charset_list, Qiso_2022))
3400 charset_list = Viso_2022_charset_list;
3401
3402 for (reg = 0; reg < 4; reg++)
3403 r[reg] = -1;
3404
3405 while (found < 4)
3406 {
3407 int id;
3408
3409 c = *charbuf++;
3410 if (c == '\n')
3411 break;
3412 charset = char_charset (c, charset_list, NULL);
3413 id = CHARSET_ID (charset);
3414 reg = CODING_ISO_REQUEST (coding, id);
3415 if (reg >= 0 && r[reg] < 0)
3416 {
3417 found++;
3418 r[reg] = id;
3419 }
3420 }
3421
3422 if (found)
3423 {
3424 for (reg = 0; reg < 4; reg++)
3425 if (r[reg] >= 0
3426 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3427 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3428 }
3429
3430 return dst;
3431}
3432
3433/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3434
3435static int
3436encode_coding_iso_2022 (coding)
3437 struct coding_system *coding;
3438{
3439 int multibytep = coding->dst_multibyte;
3440 int *charbuf = coding->charbuf;
3441 int *charbuf_end = charbuf + coding->charbuf_used;
3442 unsigned char *dst = coding->destination + coding->produced;
3443 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3444 int safe_room = 16;
3445 int bol_designation
3446 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3447 && CODING_ISO_BOL (coding));
3448 int produced_chars = 0;
3449 Lisp_Object attrs, eol_type, charset_list;
3450 int ascii_compatible;
3451 int c;
3452
3453 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3454 setup_iso_safe_charsets (attrs);
3455 coding->safe_charsets
3456 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
3457
3458 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3459
3460 while (charbuf < charbuf_end)
3461 {
3462 ASSURE_DESTINATION (safe_room);
3463
3464 if (bol_designation)
3465 {
3466 unsigned char *dst_prev = dst;
3467
3468 /* We have to produce designation sequences if any now. */
3469 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3470 bol_designation = 0;
3471 /* We are sure that designation sequences are all ASCII bytes. */
3472 produced_chars += dst - dst_prev;
3473 }
3474
3475 c = *charbuf++;
3476
3477 /* Now encode the character C. */
3478 if (c < 0x20 || c == 0x7F)
3479 {
3480 if (c == '\n'
3481 || (c == '\r' && EQ (eol_type, Qmac)))
3482 {
3483 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3484 ENCODE_RESET_PLANE_AND_REGISTER ();
3485 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3486 {
3487 int i;
3488
3489 for (i = 0; i < 4; i++)
3490 CODING_ISO_DESIGNATION (coding, i)
3491 = CODING_ISO_INITIAL (coding, i);
3492 }
3493 bol_designation
3494 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3495 }
3496 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3497 ENCODE_RESET_PLANE_AND_REGISTER ();
3498 EMIT_ONE_ASCII_BYTE (c);
3499 }
3500 else if (ASCII_CHAR_P (c))
3501 {
3502 if (ascii_compatible)
3503 EMIT_ONE_ASCII_BYTE (c);
3504 else
3505 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii), c);
3506 }
3507 else
3508 {
3509 struct charset *charset = char_charset (c, charset_list, NULL);
3510
3511 if (!charset)
3512 {
3513 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3514 {
3515 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3516 charset = CHARSET_FROM_ID (charset_ascii);
3517 }
3518 else
3519 {
3520 c = coding->default_char;
3521 charset = char_charset (c, charset_list, NULL);
3522 }
3523 }
3524 ENCODE_ISO_CHARACTER (charset, c);
3525 }
3526 }
3527
3528 if (coding->mode & CODING_MODE_LAST_BLOCK
3529 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3530 {
3531 ASSURE_DESTINATION (safe_room);
3532 ENCODE_RESET_PLANE_AND_REGISTER ();
3533 }
3534 coding->result = CODING_RESULT_SUCCESS;
3535 CODING_ISO_BOL (coding) = bol_designation;
3536 coding->produced_char += produced_chars;
3537 coding->produced = dst - coding->destination;
3538 return 0;
3539}
3540
3541\f
3542/*** 8,9. SJIS and BIG5 handlers ***/
3543
3544/* Although SJIS and BIG5 are not ISO's coding system, they are used
3545 quite widely. So, for the moment, Emacs supports them in the bare
3546 C code. But, in the future, they may be supported only by CCL. */
3547
3548/* SJIS is a coding system encoding three character sets: ASCII, right
3549 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3550 as is. A character of charset katakana-jisx0201 is encoded by
3551 "position-code + 0x80". A character of charset japanese-jisx0208
3552 is encoded in 2-byte but two position-codes are divided and shifted
3553 so that it fit in the range below.
3554
3555 --- CODE RANGE of SJIS ---
3556 (character set) (range)
3557 ASCII 0x00 .. 0x7F
3558 KATAKANA-JISX0201 0xA0 .. 0xDF
3559 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3560 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3561 -------------------------------
3562
3563*/
3564
3565/* BIG5 is a coding system encoding two character sets: ASCII and
3566 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3567 character set and is encoded in two-byte.
3568
3569 --- CODE RANGE of BIG5 ---
3570 (character set) (range)
3571 ASCII 0x00 .. 0x7F
3572 Big5 (1st byte) 0xA1 .. 0xFE
3573 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3574 --------------------------
3575
3576 */
3577
3578/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3579 Check if a text is encoded in SJIS. If it is, return
3580 CATEGORY_MASK_SJIS, else return 0. */
3581
3582static int
3583detect_coding_sjis (coding, mask)
3584 struct coding_system *coding;
3585 int *mask;
3586{
3587 unsigned char *src = coding->source, *src_base = src;
3588 unsigned char *src_end = coding->source + coding->src_bytes;
3589 int multibytep = coding->src_multibyte;
3590 int consumed_chars = 0;
3591 int found = 0;
3592 int c;
3593
3594 /* A coding system of this category is always ASCII compatible. */
3595 src += coding->head_ascii;
3596
3597 while (1)
3598 {
3599 ONE_MORE_BYTE (c);
3600 if (c < 0x80)
3601 continue;
3602 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3603 {
3604 ONE_MORE_BYTE (c);
3605 if (c < 0x40 || c == 0x7F || c > 0xFC)
3606 break;
3607 found = 1;
3608 }
3609 else if (c >= 0xA0 && c < 0xE0)
3610 found = 1;
3611 else
3612 break;
3613 }
3614 *mask &= ~CATEGORY_MASK_SJIS;
3615 return 0;
3616
3617 no_more_source:
3618 if (!found)
3619 return 0;
3620 *mask &= CATEGORY_MASK_SJIS;
3621 return 1;
3622}
3623
3624/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3625 Check if a text is encoded in BIG5. If it is, return
3626 CATEGORY_MASK_BIG5, else return 0. */
3627
3628static int
3629detect_coding_big5 (coding, mask)
3630 struct coding_system *coding;
3631 int *mask;
3632{
3633 unsigned char *src = coding->source, *src_base = src;
3634 unsigned char *src_end = coding->source + coding->src_bytes;
3635 int multibytep = coding->src_multibyte;
3636 int consumed_chars = 0;
3637 int found = 0;
3638 int c;
3639
3640 /* A coding system of this category is always ASCII compatible. */
3641 src += coding->head_ascii;
3642
3643 while (1)
3644 {
3645 ONE_MORE_BYTE (c);
3646 if (c < 0x80)
3647 continue;
3648 if (c >= 0xA1)
3649 {
3650 ONE_MORE_BYTE (c);
3651 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3652 return 0;
3653 found = 1;
3654 }
3655 else
3656 break;
3657 }
3658 *mask &= ~CATEGORY_MASK_BIG5;
3659 return 0;
3660
3661 no_more_source:
3662 if (!found)
3663 return 0;
3664 *mask &= CATEGORY_MASK_BIG5;
3665 return 1;
3666}
3667
3668/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3669 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3670
3671static void
3672decode_coding_sjis (coding)
3673 struct coding_system *coding;
3674{
3675 unsigned char *src = coding->source + coding->consumed;
3676 unsigned char *src_end = coding->source + coding->src_bytes;
3677 unsigned char *src_base;
3678 int *charbuf = coding->charbuf;
3679 int *charbuf_end = charbuf + coding->charbuf_size;
3680 int consumed_chars = 0, consumed_chars_base;
3681 int multibytep = coding->src_multibyte;
3682 struct charset *charset_roman, *charset_kanji, *charset_kana;
3683 Lisp_Object attrs, eol_type, charset_list, val;
3684
3685 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3686
3687 val = charset_list;
3688 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3689 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3690 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
3691
3692 while (1)
3693 {
3694 int c, c1;
3695
3696 src_base = src;
3697 consumed_chars_base = consumed_chars;
3698
3699 if (charbuf >= charbuf_end)
3700 break;
3701
3702 ONE_MORE_BYTE (c);
3703
3704 if (c == '\r')
3705 {
3706 if (EQ (eol_type, Qdos))
3707 {
3708 if (src == src_end)
3709 goto no_more_source;
3710 if (*src == '\n')
3711 ONE_MORE_BYTE (c);
3712 }
3713 else if (EQ (eol_type, Qmac))
3714 c = '\n';
3715 }
3716 else
3717 {
3718 struct charset *charset;
3719
3720 if (c < 0x80)
3721 charset = charset_roman;
3722 else
3723 {
3724 if (c >= 0xF0)
3725 goto invalid_code;
3726 if (c < 0xA0 || c >= 0xE0)
3727 {
3728 /* SJIS -> JISX0208 */
3729 ONE_MORE_BYTE (c1);
3730 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3731 goto invalid_code;
3732 c = (c << 8) | c1;
3733 SJIS_TO_JIS (c);
3734 charset = charset_kanji;
3735 }
3736 else
3737 /* SJIS -> JISX0201-Kana */
3738 charset = charset_kana;
3739 }
3740 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3741 }
3742 *charbuf++ = c;
3743 continue;
3744
3745 invalid_code:
3746 src = src_base;
3747 consumed_chars = consumed_chars_base;
3748 ONE_MORE_BYTE (c);
3749 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3750 coding->errors++;
3751 }
3752
3753 no_more_source:
3754 coding->consumed_char += consumed_chars_base;
3755 coding->consumed = src_base - coding->source;
3756 coding->charbuf_used = charbuf - coding->charbuf;
3757}
3758
3759static void
3760decode_coding_big5 (coding)
3761 struct coding_system *coding;
3762{
3763 unsigned char *src = coding->source + coding->consumed;
3764 unsigned char *src_end = coding->source + coding->src_bytes;
3765 unsigned char *src_base;
3766 int *charbuf = coding->charbuf;
3767 int *charbuf_end = charbuf + coding->charbuf_size;
3768 int consumed_chars = 0, consumed_chars_base;
3769 int multibytep = coding->src_multibyte;
3770 struct charset *charset_roman, *charset_big5;
3771 Lisp_Object attrs, eol_type, charset_list, val;
3772
3773 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3774 val = charset_list;
3775 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3776 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3777
3778 while (1)
3779 {
3780 int c, c1;
3781
3782 src_base = src;
3783 consumed_chars_base = consumed_chars;
3784
3785 if (charbuf >= charbuf_end)
3786 break;
3787
3788 ONE_MORE_BYTE (c);
3789
3790 if (c == '\r')
3791 {
3792 if (EQ (eol_type, Qdos))
3793 {
3794 if (src == src_end)
3795 goto no_more_source;
3796 if (*src == '\n')
3797 ONE_MORE_BYTE (c);
3798 }
3799 else if (EQ (eol_type, Qmac))
3800 c = '\n';
3801 }
3802 else
3803 {
3804 struct charset *charset;
3805 if (c < 0x80)
3806 charset = charset_roman;
3807 else
3808 {
3809 /* BIG5 -> Big5 */
3810 if (c < 0xA1 || c > 0xFE)
3811 goto invalid_code;
3812 ONE_MORE_BYTE (c1);
3813 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3814 goto invalid_code;
3815 c = c << 8 | c1;
3816 charset = charset_big5;
3817 }
3818 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3819 }
3820
3821 *charbuf++ = c;
3822 continue;
3823
3824 invalid_code:
3825 src = src_base;
3826 consumed_chars = consumed_chars_base;
3827 ONE_MORE_BYTE (c);
3828 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3829 coding->errors++;
3830 }
3831
3832 no_more_source:
3833 coding->consumed_char += consumed_chars_base;
3834 coding->consumed = src_base - coding->source;
3835 coding->charbuf_used = charbuf - coding->charbuf;
3836}
3837
3838/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3839 This function can encode charsets `ascii', `katakana-jisx0201',
3840 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3841 are sure that all these charsets are registered as official charset
3842 (i.e. do not have extended leading-codes). Characters of other
3843 charsets are produced without any encoding. If SJIS_P is 1, encode
3844 SJIS text, else encode BIG5 text. */
3845
3846static int
3847encode_coding_sjis (coding)
3848 struct coding_system *coding;
3849{
3850 int multibytep = coding->dst_multibyte;
3851 int *charbuf = coding->charbuf;
3852 int *charbuf_end = charbuf + coding->charbuf_used;
3853 unsigned char *dst = coding->destination + coding->produced;
3854 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3855 int safe_room = 4;
3856 int produced_chars = 0;
3857 Lisp_Object attrs, eol_type, charset_list, val;
3858 int ascii_compatible;
3859 struct charset *charset_roman, *charset_kanji, *charset_kana;
3860 int c;
3861
3862 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3863 val = charset_list;
3864 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3865 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3866 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3867
3868 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3869
3870 while (charbuf < charbuf_end)
3871 {
3872 ASSURE_DESTINATION (safe_room);
3873 c = *charbuf++;
3874 /* Now encode the character C. */
3875 if (ASCII_CHAR_P (c) && ascii_compatible)
3876 EMIT_ONE_ASCII_BYTE (c);
3877 else
3878 {
3879 unsigned code;
3880 struct charset *charset = char_charset (c, charset_list, &code);
3881
3882 if (!charset)
3883 {
3884 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3885 {
3886 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3887 charset = CHARSET_FROM_ID (charset_ascii);
3888 }
3889 else
3890 {
3891 c = coding->default_char;
3892 charset = char_charset (c, charset_list, &code);
3893 }
3894 }
3895 if (code == CHARSET_INVALID_CODE (charset))
3896 abort ();
3897 if (charset == charset_kanji)
3898 {
3899 int c1, c2;
3900 JIS_TO_SJIS (code);
3901 c1 = code >> 8, c2 = code & 0xFF;
3902 EMIT_TWO_BYTES (c1, c2);
3903 }
3904 else if (charset == charset_kana)
3905 EMIT_ONE_BYTE (code | 0x80);
3906 else
3907 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3908 }
3909 }
3910 coding->result = CODING_RESULT_SUCCESS;
3911 coding->produced_char += produced_chars;
3912 coding->produced = dst - coding->destination;
3913 return 0;
3914}
3915
3916static int
3917encode_coding_big5 (coding)
3918 struct coding_system *coding;
3919{
3920 int multibytep = coding->dst_multibyte;
3921 int *charbuf = coding->charbuf;
3922 int *charbuf_end = charbuf + coding->charbuf_used;
3923 unsigned char *dst = coding->destination + coding->produced;
3924 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3925 int safe_room = 4;
3926 int produced_chars = 0;
3927 Lisp_Object attrs, eol_type, charset_list, val;
3928 int ascii_compatible;
3929 struct charset *charset_roman, *charset_big5;
3930 int c;
3931
3932 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3933 val = charset_list;
3934 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3935 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3936 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3937
3938 while (charbuf < charbuf_end)
3939 {
3940 ASSURE_DESTINATION (safe_room);
3941 c = *charbuf++;
3942 /* Now encode the character C. */
3943 if (ASCII_CHAR_P (c) && ascii_compatible)
3944 EMIT_ONE_ASCII_BYTE (c);
3945 else
3946 {
3947 unsigned code;
3948 struct charset *charset = char_charset (c, charset_list, &code);
3949
3950 if (! charset)
3951 {
3952 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3953 {
3954 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3955 charset = CHARSET_FROM_ID (charset_ascii);
3956 }
3957 else
3958 {
3959 c = coding->default_char;
3960 charset = char_charset (c, charset_list, &code);
3961 }
3962 }
3963 if (code == CHARSET_INVALID_CODE (charset))
3964 abort ();
3965 if (charset == charset_big5)
3966 {
3967 int c1, c2;
3968
3969 c1 = code >> 8, c2 = code & 0xFF;
3970 EMIT_TWO_BYTES (c1, c2);
3971 }
3972 else
3973 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3974 }
3975 }
3976 coding->result = CODING_RESULT_SUCCESS;
3977 coding->produced_char += produced_chars;
3978 coding->produced = dst - coding->destination;
3979 return 0;
3980}
3981
3982\f
3983/*** 10. CCL handlers ***/
3984
3985/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3986 Check if a text is encoded in a coding system of which
3987 encoder/decoder are written in CCL program. If it is, return
3988 CATEGORY_MASK_CCL, else return 0. */
3989
3990static int
3991detect_coding_ccl (coding, mask)
3992 struct coding_system *coding;
3993 int *mask;
3994{
3995 unsigned char *src = coding->source, *src_base = src;
3996 unsigned char *src_end = coding->source + coding->src_bytes;
3997 int multibytep = coding->src_multibyte;
3998 int consumed_chars = 0;
3999 int found = 0;
4000 unsigned char *valids = CODING_CCL_VALIDS (coding);
4001 int head_ascii = coding->head_ascii;
4002 Lisp_Object attrs;
4003
4004 coding = &coding_categories[coding_category_ccl];
4005 attrs = CODING_ID_ATTRS (coding->id);
4006 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4007 src += head_ascii;
4008
4009 while (1)
4010 {
4011 int c;
4012 ONE_MORE_BYTE (c);
4013 if (! valids[c])
4014 break;
4015 if (!found && valids[c] > 1)
4016 found = 1;
4017 }
4018 *mask &= ~CATEGORY_MASK_CCL;
4019 return 0;
4020
4021 no_more_source:
4022 if (!found)
4023 return 0;
4024 *mask &= CATEGORY_MASK_CCL;
4025 return 1;
4026}
4027
4028static void
4029decode_coding_ccl (coding)
4030 struct coding_system *coding;
4031{
4032 unsigned char *src = coding->source + coding->consumed;
4033 unsigned char *src_end = coding->source + coding->src_bytes;
4034 int *charbuf = coding->charbuf;
4035 int *charbuf_end = charbuf + coding->charbuf_size;
4036 int consumed_chars = 0;
4037 int multibytep = coding->src_multibyte;
4038 struct ccl_program ccl;
4039 int source_charbuf[1024];
4040 int source_byteidx[1024];
4041
4042 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4043
4044 while (src < src_end)
4045 {
4046 unsigned char *p = src;
4047 int *source, *source_end;
4048 int i = 0;
4049
4050 if (multibytep)
4051 while (i < 1024 && p < src_end)
4052 {
4053 source_byteidx[i] = p - src;
4054 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4055 }
4056 else
4057 while (i < 1024 && p < src_end)
4058 source_charbuf[i++] = *p++;
4059
4060 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4061 ccl.last_block = 1;
4062
4063 source = source_charbuf;
4064 source_end = source + i;
4065 while (source < source_end)
4066 {
4067 ccl_driver (&ccl, source, charbuf,
4068 source_end - source, charbuf_end - charbuf);
4069 source += ccl.consumed;
4070 charbuf += ccl.produced;
4071 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4072 break;
4073 }
4074 if (source < source_end)
4075 src += source_byteidx[source - source_charbuf];
4076 else
4077 src = p;
4078 consumed_chars += source - source_charbuf;
4079
4080 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4081 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4082 break;
4083 }
4084
4085 switch (ccl.status)
4086 {
4087 case CCL_STAT_SUSPEND_BY_SRC:
4088 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4089 break;
4090 case CCL_STAT_SUSPEND_BY_DST:
4091 break;
4092 case CCL_STAT_QUIT:
4093 case CCL_STAT_INVALID_CMD:
4094 coding->result = CODING_RESULT_INTERRUPT;
4095 break;
4096 default:
4097 coding->result = CODING_RESULT_SUCCESS;
4098 break;
4099 }
4100 coding->consumed_char += consumed_chars;
4101 coding->consumed = src - coding->source;
4102 coding->charbuf_used = charbuf - coding->charbuf;
4103}
4104
4105static int
4106encode_coding_ccl (coding)
4107 struct coding_system *coding;
4108{
4109 struct ccl_program ccl;
4110 int multibytep = coding->dst_multibyte;
4111 int *charbuf = coding->charbuf;
4112 int *charbuf_end = charbuf + coding->charbuf_used;
4113 unsigned char *dst = coding->destination + coding->produced;
4114 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4115 unsigned char *adjusted_dst_end = dst_end - 1;
4116 int destination_charbuf[1024];
4117 int i, produced_chars = 0;
4118
4119 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4120
4121 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4122 ccl.dst_multibyte = coding->dst_multibyte;
4123
4124 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4125 {
4126 int dst_bytes = dst_end - dst;
4127 if (dst_bytes > 1024)
4128 dst_bytes = 1024;
4129
4130 ccl_driver (&ccl, charbuf, destination_charbuf,
4131 charbuf_end - charbuf, dst_bytes);
4132 charbuf += ccl.consumed;
4133 if (multibytep)
4134 for (i = 0; i < ccl.produced; i++)
4135 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4136 else
4137 {
4138 for (i = 0; i < ccl.produced; i++)
4139 *dst++ = destination_charbuf[i] & 0xFF;
4140 produced_chars += ccl.produced;
4141 }
4142 }
4143
4144 switch (ccl.status)
4145 {
4146 case CCL_STAT_SUSPEND_BY_SRC:
4147 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4148 break;
4149 case CCL_STAT_SUSPEND_BY_DST:
4150 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4151 break;
4152 case CCL_STAT_QUIT:
4153 case CCL_STAT_INVALID_CMD:
4154 coding->result = CODING_RESULT_INTERRUPT;
4155 break;
4156 default:
4157 coding->result = CODING_RESULT_SUCCESS;
4158 break;
4159 }
4160
4161 coding->produced_char += produced_chars;
4162 coding->produced = dst - coding->destination;
4163 return 0;
4164}
4165
4166
4167\f
4168/*** 10, 11. no-conversion handlers ***/
4169
4170/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4171
4172static void
4173decode_coding_raw_text (coding)
4174 struct coding_system *coding;
4175{
4176 coding->chars_at_source = 1;
4177 coding->consumed_char = 0;
4178 coding->consumed = 0;
4179 coding->result = CODING_RESULT_SUCCESS;
4180}
4181
4182static int
4183encode_coding_raw_text (coding)
4184 struct coding_system *coding;
4185{
4186 int multibytep = coding->dst_multibyte;
4187 int *charbuf = coding->charbuf;
4188 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4189 unsigned char *dst = coding->destination + coding->produced;
4190 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4191 int produced_chars = 0;
4192 int c;
4193
4194 if (multibytep)
4195 {
4196 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4197
4198 if (coding->src_multibyte)
4199 while (charbuf < charbuf_end)
4200 {
4201 ASSURE_DESTINATION (safe_room);
4202 c = *charbuf++;
4203 if (ASCII_CHAR_P (c))
4204 EMIT_ONE_ASCII_BYTE (c);
4205 else if (CHAR_BYTE8_P (c))
4206 {
4207 c = CHAR_TO_BYTE8 (c);
4208 EMIT_ONE_BYTE (c);
4209 }
4210 else
4211 {
4212 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4213
4214 CHAR_STRING_ADVANCE (c, p1);
4215 while (p0 < p1)
4216 EMIT_ONE_BYTE (*p0);
4217 }
4218 }
4219 else
4220 while (charbuf < charbuf_end)
4221 {
4222 ASSURE_DESTINATION (safe_room);
4223 c = *charbuf++;
4224 EMIT_ONE_BYTE (c);
4225 }
4226 }
4227 else
4228 {
4229 if (coding->src_multibyte)
4230 {
4231 int safe_room = MAX_MULTIBYTE_LENGTH;
4232
4233 while (charbuf < charbuf_end)
4234 {
4235 ASSURE_DESTINATION (safe_room);
4236 c = *charbuf++;
4237 if (ASCII_CHAR_P (c))
4238 *dst++ = c;
4239 else if (CHAR_BYTE8_P (c))
4240 *dst++ = CHAR_TO_BYTE8 (c);
4241 else
4242 CHAR_STRING_ADVANCE (c, dst);
4243 produced_chars++;
4244 }
4245 }
4246 else
4247 {
4248 ASSURE_DESTINATION (charbuf_end - charbuf);
4249 while (charbuf < charbuf_end && dst < dst_end)
4250 *dst++ = *charbuf++;
4251 produced_chars = dst - (coding->destination + coding->dst_bytes);
4252 }
4253 }
4254 coding->result = CODING_RESULT_SUCCESS;
4255 coding->produced_char += produced_chars;
4256 coding->produced = dst - coding->destination;
4257 return 0;
4258}
4259
4260static int
4261detect_coding_charset (coding, mask)
4262 struct coding_system *coding;
4263 int *mask;
4264{
4265 unsigned char *src = coding->source, *src_base = src;
4266 unsigned char *src_end = coding->source + coding->src_bytes;
4267 int multibytep = coding->src_multibyte;
4268 int consumed_chars = 0;
4269 Lisp_Object attrs, valids;
4270
4271 coding = &coding_categories[coding_category_charset];
4272 attrs = CODING_ID_ATTRS (coding->id);
4273 valids = AREF (attrs, coding_attr_charset_valids);
4274
4275 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4276 src += coding->head_ascii;
4277
4278 while (1)
4279 {
4280 int c;
4281
4282 ONE_MORE_BYTE (c);
4283 if (NILP (AREF (valids, c)))
4284 break;
4285 }
4286 *mask &= ~CATEGORY_MASK_CHARSET;
4287 return 0;
4288
4289 no_more_source:
4290 *mask &= CATEGORY_MASK_CHARSET;
4291 return 1;
4292}
4293
4294static void
4295decode_coding_charset (coding)
4296 struct coding_system *coding;
4297{
4298 unsigned char *src = coding->source + coding->consumed;
4299 unsigned char *src_end = coding->source + coding->src_bytes;
4300 unsigned char *src_base;
4301 int *charbuf = coding->charbuf;
4302 int *charbuf_end = charbuf + coding->charbuf_size;
4303 int consumed_chars = 0, consumed_chars_base;
4304 int multibytep = coding->src_multibyte;
4305 Lisp_Object attrs, eol_type, charset_list, valids;
4306
4307 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4308 valids = AREF (attrs, coding_attr_charset_valids);
4309
4310 while (1)
4311 {
4312 int c;
4313
4314 src_base = src;
4315 consumed_chars_base = consumed_chars;
4316
4317 if (charbuf >= charbuf_end)
4318 break;
4319
4320 ONE_MORE_BYTE (c);
4321 if (c == '\r')
4322 {
4323 /* Here we assume that no charset maps '\r' to something
4324 else. */
4325 if (EQ (eol_type, Qdos))
4326 {
4327 if (src < src_end
4328 && *src == '\n')
4329 ONE_MORE_BYTE (c);
4330 }
4331 else if (EQ (eol_type, Qmac))
4332 c = '\n';
4333 }
4334 else
4335 {
4336 Lisp_Object val;
4337 struct charset *charset;
4338 int dim;
4339 int len = 1;
4340 unsigned code = c;
4341
4342 val = AREF (valids, c);
4343 if (NILP (val))
4344 goto invalid_code;
4345 if (INTEGERP (val))
4346 {
4347 charset = CHARSET_FROM_ID (XFASTINT (val));
4348 dim = CHARSET_DIMENSION (charset);
4349 while (len < dim)
4350 {
4351 ONE_MORE_BYTE (c);
4352 code = (code << 8) | c;
4353 len++;
4354 }
4355 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4356 charset, code, c);
4357 }
4358 else
4359 {
4360 /* VAL is a list of charset IDs. It is assured that the
4361 list is sorted by charset dimensions (smaller one
4362 comes first). */
4363 while (CONSP (val))
4364 {
4365 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4366 dim = CHARSET_DIMENSION (charset);
4367 while (len < dim)
4368 {
4369 ONE_MORE_BYTE (c);
4370 code = (code << 8) | c;
4371 len++;
4372 }
4373 CODING_DECODE_CHAR (coding, src, src_base,
4374 src_end, charset, code, c);
4375 if (c >= 0)
4376 break;
4377 val = XCDR (val);
4378 }
4379 }
4380 if (c < 0)
4381 goto invalid_code;
4382 }
4383 *charbuf++ = c;
4384 continue;
4385
4386 invalid_code:
4387 src = src_base;
4388 consumed_chars = consumed_chars_base;
4389 ONE_MORE_BYTE (c);
4390 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4391 coding->errors++;
4392 }
4393
4394 no_more_source:
4395 coding->consumed_char += consumed_chars_base;
4396 coding->consumed = src_base - coding->source;
4397 coding->charbuf_used = charbuf - coding->charbuf;
4398}
4399
4400static int
4401encode_coding_charset (coding)
4402 struct coding_system *coding;
4403{
4404 int multibytep = coding->dst_multibyte;
4405 int *charbuf = coding->charbuf;
4406 int *charbuf_end = charbuf + coding->charbuf_used;
4407 unsigned char *dst = coding->destination + coding->produced;
4408 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4409 int safe_room = MAX_MULTIBYTE_LENGTH;
4410 int produced_chars = 0;
4411 Lisp_Object attrs, eol_type, charset_list;
4412 int ascii_compatible;
4413 int c;
4414
4415 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4416 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4417
4418 while (charbuf < charbuf_end)
4419 {
4420 struct charset *charset;
4421 unsigned code;
4422
4423 ASSURE_DESTINATION (safe_room);
4424 c = *charbuf++;
4425 if (ascii_compatible && ASCII_CHAR_P (c))
4426 EMIT_ONE_ASCII_BYTE (c);
4427 else
4428 {
4429 charset = char_charset (c, charset_list, &code);
4430 if (charset)
4431 {
4432 if (CHARSET_DIMENSION (charset) == 1)
4433 EMIT_ONE_BYTE (code);
4434 else if (CHARSET_DIMENSION (charset) == 2)
4435 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4436 else if (CHARSET_DIMENSION (charset) == 3)
4437 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4438 else
4439 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4440 (code >> 8) & 0xFF, code & 0xFF);
4441 }
4442 else
4443 {
4444 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4445 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4446 else
4447 c = coding->default_char;
4448 EMIT_ONE_BYTE (c);
4449 }
4450 }
4451 }
4452
4453 coding->result = CODING_RESULT_SUCCESS;
4454 coding->produced_char += produced_chars;
4455 coding->produced = dst - coding->destination;
4456 return 0;
4457}
4458
4459\f
4460/*** 7. C library functions ***/
4461
4462/* In Emacs Lisp, coding system is represented by a Lisp symbol which
4463 has a property `coding-system'. The value of this property is a
4464 vector of length 5 (called as coding-vector). Among elements of
4465 this vector, the first (element[0]) and the fifth (element[4])
4466 carry important information for decoding/encoding. Before
4467 decoding/encoding, this information should be set in fields of a
4468 structure of type `coding_system'.
4469
4470 A value of property `coding-system' can be a symbol of another
4471 subsidiary coding-system. In that case, Emacs gets coding-vector
4472 from that symbol.
4473
4474 `element[0]' contains information to be set in `coding->type'. The
4475 value and its meaning is as follows:
4476
4477 0 -- coding_type_emacs_mule
4478 1 -- coding_type_sjis
4479 2 -- coding_type_iso_2022
4480 3 -- coding_type_big5
4481 4 -- coding_type_ccl encoder/decoder written in CCL
4482 nil -- coding_type_no_conversion
4483 t -- coding_type_undecided (automatic conversion on decoding,
4484 no-conversion on encoding)
4485
4486 `element[4]' contains information to be set in `coding->flags' and
4487 `coding->spec'. The meaning varies by `coding->type'.
4488
4489 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4490 of length 32 (of which the first 13 sub-elements are used now).
4491 Meanings of these sub-elements are:
4492
4493 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4494 If the value is an integer of valid charset, the charset is
4495 assumed to be designated to graphic register N initially.
4496
4497 If the value is minus, it is a minus value of charset which
4498 reserves graphic register N, which means that the charset is
4499 not designated initially but should be designated to graphic
4500 register N just before encoding a character in that charset.
4501
4502 If the value is nil, graphic register N is never used on
4503 encoding.
4504
4505 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4506 Each value takes t or nil. See the section ISO2022 of
4507 `coding.h' for more information.
4508
4509 If `coding->type' is `coding_type_big5', element[4] is t to denote
4510 BIG5-ETen or nil to denote BIG5-HKU.
4511
4512 If `coding->type' takes the other value, element[4] is ignored.
4513
4514 Emacs Lisp's coding system also carries information about format of
4515 end-of-line in a value of property `eol-type'. If the value is
4516 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4517 it is not integer, it should be a vector of subsidiary coding
4518 systems of which property `eol-type' has one of above values.
4519
4520*/
4521
4522/* Setup coding context CODING from information about CODING_SYSTEM.
4523 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4524 CODING_SYSTEM is invalid, signal an error. */
4525
4526void
4527setup_coding_system (coding_system, coding)
4528 Lisp_Object coding_system;
4529 struct coding_system *coding;
4530{
4531 Lisp_Object attrs;
4532 Lisp_Object eol_type;
4533 Lisp_Object coding_type;
4534 Lisp_Object val;
4535
4536 if (NILP (coding_system))
4537 coding_system = Qno_conversion;
4538
4539 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4540
4541 attrs = CODING_ID_ATTRS (coding->id);
4542 eol_type = CODING_ID_EOL_TYPE (coding->id);
4543
4544 coding->mode = 0;
4545 coding->head_ascii = -1;
4546 coding->common_flags
4547 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4548
4549 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4550 coding->max_charset_id = XSTRING (val)->size - 1;
4551 coding->safe_charsets = (char *) XSTRING (val)->data;
4552 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4553
4554 coding_type = CODING_ATTR_TYPE (attrs);
4555 if (EQ (coding_type, Qundecided))
4556 {
4557 coding->detector = NULL;
4558 coding->decoder = decode_coding_raw_text;
4559 coding->encoder = encode_coding_raw_text;
4560 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4561 }
4562 else if (EQ (coding_type, Qiso_2022))
4563 {
4564 int i;
4565 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4566
4567 /* Invoke graphic register 0 to plane 0. */
4568 CODING_ISO_INVOCATION (coding, 0) = 0;
4569 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4570 CODING_ISO_INVOCATION (coding, 1)
4571 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4572 /* Setup the initial status of designation. */
4573 for (i = 0; i < 4; i++)
4574 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4575 /* Not single shifting initially. */
4576 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4577 /* Beginning of buffer should also be regarded as bol. */
4578 CODING_ISO_BOL (coding) = 1;
4579 coding->detector = detect_coding_iso_2022;
4580 coding->decoder = decode_coding_iso_2022;
4581 coding->encoder = encode_coding_iso_2022;
4582 if (flags & CODING_ISO_FLAG_SAFE)
4583 coding->mode |= CODING_MODE_SAFE_ENCODING;
4584 coding->common_flags
4585 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4586 | CODING_REQUIRE_FLUSHING_MASK);
4587 if (flags & CODING_ISO_FLAG_COMPOSITION)
4588 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4589 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4590 {
4591 setup_iso_safe_charsets (attrs);
4592 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4593 coding->max_charset_id = XSTRING (val)->size - 1;
4594 coding->safe_charsets = (char *) XSTRING (val)->data;
4595 }
4596 CODING_ISO_FLAGS (coding) = flags;
4597 }
4598 else if (EQ (coding_type, Qcharset))
4599 {
4600 coding->detector = detect_coding_charset;
4601 coding->decoder = decode_coding_charset;
4602 coding->encoder = encode_coding_charset;
4603 coding->common_flags
4604 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4605 }
4606 else if (EQ (coding_type, Qutf_8))
4607 {
4608 coding->detector = detect_coding_utf_8;
4609 coding->decoder = decode_coding_utf_8;
4610 coding->encoder = encode_coding_utf_8;
4611 coding->common_flags
4612 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4613 }
4614 else if (EQ (coding_type, Qutf_16))
4615 {
4616 val = AREF (attrs, coding_attr_utf_16_bom);
4617 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4618 : EQ (val, Qt) ? utf_16_with_bom
4619 : utf_16_without_bom);
4620 val = AREF (attrs, coding_attr_utf_16_endian);
4621 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4622 : utf_16_little_endian);
4623 CODING_UTF_16_SURROGATE (coding) = 0;
4624 coding->detector = detect_coding_utf_16;
4625 coding->decoder = decode_coding_utf_16;
4626 coding->encoder = encode_coding_utf_16;
4627 coding->common_flags
4628 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4629 }
4630 else if (EQ (coding_type, Qccl))
4631 {
4632 coding->detector = detect_coding_ccl;
4633 coding->decoder = decode_coding_ccl;
4634 coding->encoder = encode_coding_ccl;
4635 coding->common_flags
4636 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4637 | CODING_REQUIRE_FLUSHING_MASK);
4638 }
4639 else if (EQ (coding_type, Qemacs_mule))
4640 {
4641 coding->detector = detect_coding_emacs_mule;
4642 coding->decoder = decode_coding_emacs_mule;
4643 coding->encoder = encode_coding_emacs_mule;
4644 coding->common_flags
4645 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4646 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4647 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4648 {
4649 Lisp_Object tail, safe_charsets;
4650 int max_charset_id = 0;
4651
4652 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4653 tail = XCDR (tail))
4654 if (max_charset_id < XFASTINT (XCAR (tail)))
4655 max_charset_id = XFASTINT (XCAR (tail));
4656 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4657 make_number (255));
4658 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4659 tail = XCDR (tail))
4660 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4661 coding->max_charset_id = max_charset_id;
4662 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4663 }
4664 }
4665 else if (EQ (coding_type, Qshift_jis))
4666 {
4667 coding->detector = detect_coding_sjis;
4668 coding->decoder = decode_coding_sjis;
4669 coding->encoder = encode_coding_sjis;
4670 coding->common_flags
4671 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4672 }
4673 else if (EQ (coding_type, Qbig5))
4674 {
4675 coding->detector = detect_coding_big5;
4676 coding->decoder = decode_coding_big5;
4677 coding->encoder = encode_coding_big5;
4678 coding->common_flags
4679 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4680 }
4681 else /* EQ (coding_type, Qraw_text) */
4682 {
4683 coding->detector = NULL;
4684 coding->decoder = decode_coding_raw_text;
4685 coding->encoder = encode_coding_raw_text;
4686 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4687 }
4688
4689 return;
4690}
4691
4692/* Return raw-text or one of its subsidiaries that has the same
4693 eol_type as CODING-SYSTEM. */
4694
4695Lisp_Object
4696raw_text_coding_system (coding_system)
4697 Lisp_Object coding_system;
4698{
4699 Lisp_Object spec, attrs;
4700 Lisp_Object eol_type, raw_text_eol_type;
4701
4702 spec = CODING_SYSTEM_SPEC (coding_system);
4703 attrs = AREF (spec, 0);
4704
4705 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4706 return coding_system;
4707
4708 eol_type = AREF (spec, 2);
4709 if (VECTORP (eol_type))
4710 return Qraw_text;
4711 spec = CODING_SYSTEM_SPEC (Qraw_text);
4712 raw_text_eol_type = AREF (spec, 2);
4713 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4714 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4715 : AREF (raw_text_eol_type, 2));
4716}
4717
4718
4719/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4720 does, return one of the subsidiary that has the same eol-spec as
4721 PARENT. Otherwise, return CODING_SYSTEM. */
4722
4723Lisp_Object
4724coding_inherit_eol_type (coding_system, parent)
4725 Lisp_Object coding_system, parent;
4726{
4727 Lisp_Object spec, attrs, eol_type;
4728
4729 spec = CODING_SYSTEM_SPEC (coding_system);
4730 attrs = AREF (spec, 0);
4731 eol_type = AREF (spec, 2);
4732 if (VECTORP (eol_type))
4733 {
4734 Lisp_Object parent_spec;
4735 Lisp_Object parent_eol_type;
4736
4737 parent_spec
4738 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4739 parent_eol_type = AREF (parent_spec, 2);
4740 if (EQ (parent_eol_type, Qunix))
4741 coding_system = AREF (eol_type, 0);
4742 else if (EQ (parent_eol_type, Qdos))
4743 coding_system = AREF (eol_type, 1);
4744 else if (EQ (parent_eol_type, Qmac))
4745 coding_system = AREF (eol_type, 2);
4746 }
4747 return coding_system;
4748}
4749
4750/* Emacs has a mechanism to automatically detect a coding system if it
4751 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4752 it's impossible to distinguish some coding systems accurately
4753 because they use the same range of codes. So, at first, coding
4754 systems are categorized into 7, those are:
4755
4756 o coding-category-emacs-mule
4757
4758 The category for a coding system which has the same code range
4759 as Emacs' internal format. Assigned the coding-system (Lisp
4760 symbol) `emacs-mule' by default.
4761
4762 o coding-category-sjis
4763
4764 The category for a coding system which has the same code range
4765 as SJIS. Assigned the coding-system (Lisp
4766 symbol) `japanese-shift-jis' by default.
4767
4768 o coding-category-iso-7
4769
4770 The category for a coding system which has the same code range
4771 as ISO2022 of 7-bit environment. This doesn't use any locking
4772 shift and single shift functions. This can encode/decode all
4773 charsets. Assigned the coding-system (Lisp symbol)
4774 `iso-2022-7bit' by default.
4775
4776 o coding-category-iso-7-tight
4777
4778 Same as coding-category-iso-7 except that this can
4779 encode/decode only the specified charsets.
4780
4781 o coding-category-iso-8-1
4782
4783 The category for a coding system which has the same code range
4784 as ISO2022 of 8-bit environment and graphic plane 1 used only
4785 for DIMENSION1 charset. This doesn't use any locking shift
4786 and single shift functions. Assigned the coding-system (Lisp
4787 symbol) `iso-latin-1' by default.
4788
4789 o coding-category-iso-8-2
4790
4791 The category for a coding system which has the same code range
4792 as ISO2022 of 8-bit environment and graphic plane 1 used only
4793 for DIMENSION2 charset. This doesn't use any locking shift
4794 and single shift functions. Assigned the coding-system (Lisp
4795 symbol) `japanese-iso-8bit' by default.
4796
4797 o coding-category-iso-7-else
4798
4799 The category for a coding system which has the same code range
4800 as ISO2022 of 7-bit environemnt but uses locking shift or
4801 single shift functions. Assigned the coding-system (Lisp
4802 symbol) `iso-2022-7bit-lock' by default.
4803
4804 o coding-category-iso-8-else
4805
4806 The category for a coding system which has the same code range
4807 as ISO2022 of 8-bit environemnt but uses locking shift or
4808 single shift functions. Assigned the coding-system (Lisp
4809 symbol) `iso-2022-8bit-ss2' by default.
4810
4811 o coding-category-big5
4812
4813 The category for a coding system which has the same code range
4814 as BIG5. Assigned the coding-system (Lisp symbol)
4815 `cn-big5' by default.
4816
4817 o coding-category-utf-8
4818
4819 The category for a coding system which has the same code range
4820 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4821 symbol) `utf-8' by default.
4822
4823 o coding-category-utf-16-be
4824
4825 The category for a coding system in which a text has an
4826 Unicode signature (cf. Unicode Standard) in the order of BIG
4827 endian at the head. Assigned the coding-system (Lisp symbol)
4828 `utf-16-be' by default.
4829
4830 o coding-category-utf-16-le
4831
4832 The category for a coding system in which a text has an
4833 Unicode signature (cf. Unicode Standard) in the order of
4834 LITTLE endian at the head. Assigned the coding-system (Lisp
4835 symbol) `utf-16-le' by default.
4836
4837 o coding-category-ccl
4838
4839 The category for a coding system of which encoder/decoder is
4840 written in CCL programs. The default value is nil, i.e., no
4841 coding system is assigned.
4842
4843 o coding-category-binary
4844
4845 The category for a coding system not categorized in any of the
4846 above. Assigned the coding-system (Lisp symbol)
4847 `no-conversion' by default.
4848
4849 Each of them is a Lisp symbol and the value is an actual
4850 `coding-system's (this is also a Lisp symbol) assigned by a user.
4851 What Emacs does actually is to detect a category of coding system.
4852 Then, it uses a `coding-system' assigned to it. If Emacs can't
4853 decide only one possible category, it selects a category of the
4854 highest priority. Priorities of categories are also specified by a
4855 user in a Lisp variable `coding-category-list'.
4856
4857*/
4858
4859#define EOL_SEEN_NONE 0
4860#define EOL_SEEN_LF 1
4861#define EOL_SEEN_CR 2
4862#define EOL_SEEN_CRLF 4
4863
4864/* Detect how end-of-line of a text of length CODING->src_bytes
4865 pointed by CODING->source is encoded. Return one of
4866 EOL_SEEN_XXX. */
4867
4868#define MAX_EOL_CHECK_COUNT 3
4869
4870static int
4871detect_eol (coding, source, src_bytes)
4872 struct coding_system *coding;
4873 unsigned char *source;
4874 EMACS_INT src_bytes;
4875{
4876 Lisp_Object attrs, coding_type;
4877 unsigned char *src = source, *src_end = src + src_bytes;
4878 unsigned char c;
4879 int total = 0;
4880 int eol_seen = EOL_SEEN_NONE;
4881
4882 attrs = CODING_ID_ATTRS (coding->id);
4883 coding_type = CODING_ATTR_TYPE (attrs);
4884
4885 if (EQ (coding_type, Qccl))
4886 {
4887 int msb, lsb;
4888
4889 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4890 lsb = 1 - msb;
4891
4892 while (src + 1 < src_end)
4893 {
4894 c = src[lsb];
4895 if (src[msb] == 0 && (c == '\n' || c == '\r'))
4896 {
4897 int this_eol;
4898
4899 if (c == '\n')
4900 this_eol = EOL_SEEN_LF;
4901 else if (src + 3 >= src_end
4902 || src[msb + 2] != 0
4903 || src[lsb + 2] != '\n')
4904 this_eol = EOL_SEEN_CR;
4905 else
4906 this_eol = EOL_SEEN_CRLF;
4907
4908 if (eol_seen == EOL_SEEN_NONE)
4909 /* This is the first end-of-line. */
4910 eol_seen = this_eol;
4911 else if (eol_seen != this_eol)
4912 {
4913 /* The found type is different from what found before. */
4914 eol_seen = EOL_SEEN_LF;
4915 break;
4916 }
4917 if (++total == MAX_EOL_CHECK_COUNT)
4918 break;
4919 }
4920 src += 2;
4921 }
4922 }
4923 else
4924 {
4925 while (src < src_end)
4926 {
4927 c = *src++;
4928 if (c == '\n' || c == '\r')
4929 {
4930 int this_eol;
4931
4932 if (c == '\n')
4933 this_eol = EOL_SEEN_LF;
4934 else if (src >= src_end || *src != '\n')
4935 this_eol = EOL_SEEN_CR;
4936 else
4937 this_eol = EOL_SEEN_CRLF, src++;
4938
4939 if (eol_seen == EOL_SEEN_NONE)
4940 /* This is the first end-of-line. */
4941 eol_seen = this_eol;
4942 else if (eol_seen != this_eol)
4943 {
4944 /* The found type is different from what found before. */
4945 eol_seen = EOL_SEEN_LF;
4946 break;
4947 }
4948 if (++total == MAX_EOL_CHECK_COUNT)
4949 break;
4950 }
4951 }
4952 }
4953 return eol_seen;
4954}
4955
4956
4957static void
4958adjust_coding_eol_type (coding, eol_seen)
4959 struct coding_system *coding;
4960 int eol_seen;
4961{
4962 Lisp_Object eol_type;
4963
4964 eol_type = CODING_ID_EOL_TYPE (coding->id);
4965 if (eol_seen & EOL_SEEN_LF)
4966 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
4967 else if (eol_type & EOL_SEEN_CRLF)
4968 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
4969 else if (eol_type & EOL_SEEN_CR)
4970 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
4971}
4972
4973/* Detect how a text specified in CODING is encoded. If a coding
4974 system is detected, update fields of CODING by the detected coding
4975 system. */
4976
4977void
4978detect_coding (coding)
4979 struct coding_system *coding;
4980{
4981 unsigned char *src, *src_end;
4982 Lisp_Object attrs, coding_type;
4983
4984 coding->consumed = coding->consumed_char = 0;
4985 coding->produced = coding->produced_char = 0;
4986 coding_set_source (coding);
4987
4988 src_end = coding->source + coding->src_bytes;
4989
4990 /* If we have not yet decided the text encoding type, detect it
4991 now. */
4992 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
4993 {
4994 int mask = CATEGORY_MASK_ANY;
4995 int c, i;
4996
4997 for (src = coding->source; src < src_end; src++)
4998 {
4999 c = *src;
5000 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5001 || c == ISO_CODE_SI
5002 || c == ISO_CODE_SO)))
5003 break;
5004 }
5005 coding->head_ascii = src - (coding->source + coding->consumed);
5006
5007 if (coding->head_ascii < coding->src_bytes)
5008 {
5009 int detected = 0;
5010
5011 for (i = 0; i < coding_category_raw_text; i++)
5012 {
5013 enum coding_category category = coding_priorities[i];
5014 struct coding_system *this = coding_categories + category;
5015
5016 if (category >= coding_category_raw_text
5017 || detected & (1 << category))
5018 continue;
5019
5020 if (this->id < 0)
5021 {
5022 /* No coding system of this category is defined. */
5023 mask &= ~(1 << category);
5024 }
5025 else
5026 {
5027 detected |= detected_mask[category];
5028 if ((*(this->detector)) (coding, &mask))
5029 break;
5030 }
5031 }
5032 if (! mask)
5033 setup_coding_system (Qraw_text, coding);
5034 else if (mask != CATEGORY_MASK_ANY)
5035 for (i = 0; i < coding_category_raw_text; i++)
5036 {
5037 enum coding_category category = coding_priorities[i];
5038 struct coding_system *this = coding_categories + category;
5039
5040 if (mask & (1 << category))
5041 {
5042 setup_coding_system (CODING_ID_NAME (this->id), coding);
5043 break;
5044 }
5045 }
5046 }
5047 }
5048
5049 attrs = CODING_ID_ATTRS (coding->id);
5050 coding_type = CODING_ATTR_TYPE (attrs);
5051
5052 /* If we have not yet decided the EOL type, detect it now. But, the
5053 detection is impossible for a CCL based coding system, in which
5054 case, we detct the EOL type after decoding. */
5055 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5056 && ! EQ (coding_type, Qccl))
5057 {
5058 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
5059
5060 if (eol_seen != EOL_SEEN_NONE)
5061 adjust_coding_eol_type (coding, eol_seen);
5062 }
5063}
5064
5065
5066static void
5067decode_eol (coding)
5068 struct coding_system *coding;
5069{
5070 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5071 {
5072 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5073 unsigned char *pend = p + coding->produced;
5074 int eol_seen = EOL_SEEN_NONE;
5075
5076 for (; p < pend; p++)
5077 {
5078 if (*p == '\n')
5079 eol_seen |= EOL_SEEN_LF;
5080 else if (*p == '\r')
5081 {
5082 if (p + 1 < pend && *(p + 1) == '\n')
5083 {
5084 eol_seen |= EOL_SEEN_CRLF;
5085 p++;
5086 }
5087 else
5088 eol_seen |= EOL_SEEN_CR;
5089 }
5090 }
5091 if (eol_seen != EOL_SEEN_NONE)
5092 adjust_coding_eol_type (coding, eol_seen);
5093 }
5094
5095 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5096 {
5097 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5098 unsigned char *pend = p + coding->produced;
5099
5100 for (; p < pend; p++)
5101 if (*p == '\r')
5102 *p = '\n';
5103 }
5104 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5105 {
5106 unsigned char *p, *pbeg, *pend;
5107 Lisp_Object undo_list;
5108
5109 move_gap_both (coding->dst_pos + coding->produced_char,
5110 coding->dst_pos_byte + coding->produced);
5111 undo_list = current_buffer->undo_list;
5112 current_buffer->undo_list = Qt;
5113 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, Qnil);
5114 current_buffer->undo_list = undo_list;
5115 pbeg = GPT_ADDR;
5116 pend = pbeg + coding->produced;
5117
5118 for (p = pend - 1; p >= pbeg; p--)
5119 if (*p == '\r')
5120 {
5121 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5122 pend--;
5123 }
5124 coding->produced_char -= coding->produced - (pend - pbeg);
5125 coding->produced = pend - pbeg;
5126 insert_from_gap (coding->produced_char, coding->produced);
5127 }
5128}
5129
5130static void
5131translate_chars (coding, table)
5132 struct coding_system *coding;
5133 Lisp_Object table;
5134{
5135 int *charbuf = coding->charbuf;
5136 int *charbuf_end = charbuf + coding->charbuf_used;
5137 int c;
5138
5139 if (coding->chars_at_source)
5140 return;
5141
5142 while (charbuf < charbuf_end)
5143 {
5144 c = *charbuf;
5145 if (c < 0)
5146 charbuf += c;
5147 else
5148 *charbuf++ = translate_char (table, c);
5149 }
5150}
5151
5152static int
5153produce_chars (coding)
5154 struct coding_system *coding;
5155{
5156 unsigned char *dst = coding->destination + coding->produced;
5157 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5158 int produced;
5159 int produced_chars = 0;
5160
5161 if (! coding->chars_at_source)
5162 {
5163 /* Characters are in coding->charbuf. */
5164 int *buf = coding->charbuf;
5165 int *buf_end = buf + coding->charbuf_used;
5166 unsigned char *adjusted_dst_end;
5167
5168 if (BUFFERP (coding->src_object)
5169 && EQ (coding->src_object, coding->dst_object))
5170 dst_end = coding->source + coding->consumed;
5171 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5172
5173 while (buf < buf_end)
5174 {
5175 int c = *buf++;
5176
5177 if (dst >= adjusted_dst_end)
5178 {
5179 dst = alloc_destination (coding,
5180 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5181 dst);
5182 dst_end = coding->destination + coding->dst_bytes;
5183 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5184 }
5185 if (c >= 0)
5186 {
5187 if (coding->dst_multibyte
5188 || ! CHAR_BYTE8_P (c))
5189 CHAR_STRING_ADVANCE (c, dst);
5190 else
5191 *dst++ = CHAR_TO_BYTE8 (c);
5192 produced_chars++;
5193 }
5194 else
5195 /* This is an annotation data. */
5196 buf -= c + 1;
5197 }
5198 }
5199 else
5200 {
5201 unsigned char *src = coding->source;
5202 unsigned char *src_end = src + coding->src_bytes;
5203 Lisp_Object eol_type;
5204
5205 eol_type = CODING_ID_EOL_TYPE (coding->id);
5206
5207 if (coding->src_multibyte != coding->dst_multibyte)
5208 {
5209 if (coding->src_multibyte)
5210 {
5211 int multibytep = 1;
5212 int consumed_chars;
5213
5214 while (1)
5215 {
5216 unsigned char *src_base = src;
5217 int c;
5218
5219 ONE_MORE_BYTE (c);
5220 if (c == '\r')
5221 {
5222 if (EQ (eol_type, Qdos))
5223 {
5224 if (src < src_end
5225 && *src == '\n')
5226 c = *src++;
5227 }
5228 else if (EQ (eol_type, Qmac))
5229 c = '\n';
5230 }
5231 if (dst == dst_end)
5232 {
5233 coding->consumed = src - coding->source;
5234
5235 if (EQ (coding->src_object, coding->dst_object))
5236 dst_end = src;
5237 if (dst == dst_end)
5238 {
5239 dst = alloc_destination (coding, src_end - src + 1,
5240 dst);
5241 dst_end = coding->destination + coding->dst_bytes;
5242 coding_set_source (coding);
5243 src = coding->source + coding->consumed;
5244 src_end = coding->source + coding->src_bytes;
5245 }
5246 }
5247 *dst++ = c;
5248 produced_chars++;
5249 }
5250 no_more_source:
5251 ;
5252 }
5253 else
5254 while (src < src_end)
5255 {
5256 int multibytep = 1;
5257 int c = *src++;
5258
5259 if (c == '\r')
5260 {
5261 if (EQ (eol_type, Qdos))
5262 {
5263 if (src < src_end
5264 && *src == '\n')
5265 c = *src++;
5266 }
5267 else if (EQ (eol_type, Qmac))
5268 c = '\n';
5269 }
5270 if (dst >= dst_end - 1)
5271 {
5272 coding->consumed = src - coding->source;
5273
5274 if (EQ (coding->src_object, coding->dst_object))
5275 dst_end = src;
5276 if (dst >= dst_end - 1)
5277 {
5278 dst = alloc_destination (coding, src_end - src + 2,
5279 dst);
5280 dst_end = coding->destination + coding->dst_bytes;
5281 coding_set_source (coding);
5282 src = coding->source + coding->consumed;
5283 src_end = coding->source + coding->src_bytes;
5284 }
5285 }
5286 EMIT_ONE_BYTE (c);
5287 }
5288 }
5289 else
5290 {
5291 if (!EQ (coding->src_object, coding->dst_object))
5292 {
5293 int require = coding->src_bytes - coding->dst_bytes;
5294
5295 if (require > 0)
5296 {
5297 EMACS_INT offset = src - coding->source;
5298
5299 dst = alloc_destination (coding, require, dst);
5300 coding_set_source (coding);
5301 src = coding->source + offset;
5302 src_end = coding->source + coding->src_bytes;
5303 }
5304 }
5305 produced_chars = coding->src_chars;
5306 while (src < src_end)
5307 {
5308 int c = *src++;
5309
5310 if (c == '\r')
5311 {
5312 if (EQ (eol_type, Qdos))
5313 {
5314 if (src < src_end
5315 && *src == '\n')
5316 c = *src++;
5317 produced_chars--;
5318 }
5319 else if (EQ (eol_type, Qmac))
5320 c = '\n';
5321 }
5322 *dst++ = c;
5323 }
5324 }
5325 coding->consumed = coding->src_bytes;
5326 coding->consumed_char = coding->src_chars;
5327 }
5328
5329 produced = dst - (coding->destination + coding->produced);
5330 if (BUFFERP (coding->dst_object))
5331 insert_from_gap (produced_chars, produced);
5332 coding->produced += produced;
5333 coding->produced_char += produced_chars;
5334 return produced_chars;
5335}
5336
5337/* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5338 or
5339 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5340 */
5341
5342static INLINE void
5343produce_composition (coding, charbuf)
5344 struct coding_system *coding;
5345 int *charbuf;
5346{
5347 Lisp_Object buffer;
5348 int len;
5349 EMACS_INT pos;
5350 enum composition_method method;
5351 int cmp_len;
5352 Lisp_Object components;
5353
5354 buffer = coding->dst_object;
5355 len = -charbuf[0];
5356 pos = coding->dst_pos + charbuf[1];
5357 method = (enum composition_method) (charbuf[3]);
5358 cmp_len = charbuf[4];
5359
5360 if (method == COMPOSITION_RELATIVE)
5361 components = Qnil;
5362 else
5363 {
5364 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5365 int i;
5366
5367 len -= 5;
5368 charbuf += 5;
5369 for (i = 0; i < len; i++)
5370 args[i] = make_number (charbuf[i]);
5371 components = (method == COMPOSITION_WITH_ALTCHARS
5372 ? Fstring (len, args) : Fvector (len, args));
5373 }
5374 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5375}
5376
5377static int *
5378save_composition_data (buf, buf_end, prop)
5379 int *buf, *buf_end;
5380 Lisp_Object prop;
5381{
5382 enum composition_method method = COMPOSITION_METHOD (prop);
5383 int cmp_len = COMPOSITION_LENGTH (prop);
5384
5385 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5386 return NULL;
5387
5388 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5389 buf[2] = method;
5390 buf[3] = cmp_len;
5391
5392 if (method == COMPOSITION_RELATIVE)
5393 buf[0] = 4;
5394 else
5395 {
5396 Lisp_Object components;
5397 int len, i;
5398
5399 components = COMPOSITION_COMPONENTS (prop);
5400 if (VECTORP (components))
5401 {
5402 len = XVECTOR (components)->size;
5403 for (i = 0; i < len; i++)
5404 buf[4 + i] = XINT (AREF (components, i));
5405 }
5406 else if (STRINGP (components))
5407 {
5408 int i_byte;
5409
5410 len = XSTRING (components)->size;
5411 i = i_byte = 0;
5412 while (i < len)
5413 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5414 }
5415 else if (INTEGERP (components))
5416 {
5417 len = 1;
5418 buf[4] = XINT (components);
5419 }
5420 else if (CONSP (components))
5421 {
5422 for (len = 0; CONSP (components);
5423 len++, components = XCDR (components))
5424 buf[4 + len] = XINT (XCAR (components));
5425 }
5426 else
5427 abort ();
5428 buf[0] = 4 + len;
5429 }
5430 return (buf + buf[0]);
5431}
5432
5433#define CHARBUF_SIZE 0x4000
5434
5435#define ALLOC_CONVERSION_WORK_AREA(coding) \
5436 do { \
5437 int size = CHARBUF_SIZE;; \
5438 \
5439 coding->charbuf = NULL; \
5440 while (size > 1024) \
5441 { \
5442 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5443 if (coding->charbuf) \
5444 break; \
5445 size >>= 1; \
5446 } \
5447 if (! coding->charbuf) \
5448 { \
5449 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5450 return coding->result; \
5451 } \
5452 coding->charbuf_size = size; \
5453 } while (0)
5454
5455
5456static void
5457produce_annotation (coding)
5458 struct coding_system *coding;
5459{
5460 int *charbuf = coding->charbuf;
5461 int *charbuf_end = charbuf + coding->charbuf_used;
5462
5463 while (charbuf < charbuf_end)
5464 {
5465 if (*charbuf >= 0)
5466 charbuf++;
5467 else
5468 {
5469 int len = -*charbuf;
5470 switch (charbuf[2])
5471 {
5472 case CODING_ANNOTATE_COMPOSITION_MASK:
5473 produce_composition (coding, charbuf);
5474 break;
5475 default:
5476 abort ();
5477 }
5478 charbuf += len;
5479 }
5480 }
5481}
5482
5483/* Decode the data at CODING->src_object into CODING->dst_object.
5484 CODING->src_object is a buffer, a string, or nil.
5485 CODING->dst_object is a buffer.
5486
5487 If CODING->src_object is a buffer, it must be the current buffer.
5488 In this case, if CODING->src_pos is positive, it is a position of
5489 the source text in the buffer, otherwise, the source text is in the
5490 gap area of the buffer, and CODING->src_pos specifies the offset of
5491 the text from GPT (which must be the same as PT). If this is the
5492 same buffer as CODING->dst_object, CODING->src_pos must be
5493 negative.
5494
5495 If CODING->src_object is a string, CODING->src_pos in an index to
5496 that string.
5497
5498 If CODING->src_object is nil, CODING->source must already point to
5499 the non-relocatable memory area. In this case, CODING->src_pos is
5500 an offset from CODING->source.
5501
5502 The decoded data is inserted at the current point of the buffer
5503 CODING->dst_object.
5504*/
5505
5506static int
5507decode_coding (coding)
5508 struct coding_system *coding;
5509{
5510 Lisp_Object attrs;
5511
5512 if (BUFFERP (coding->src_object)
5513 && coding->src_pos > 0
5514 && coding->src_pos < GPT
5515 && coding->src_pos + coding->src_chars > GPT)
5516 move_gap_both (coding->src_pos, coding->src_pos_byte);
5517
5518 if (BUFFERP (coding->dst_object))
5519 {
5520 if (current_buffer != XBUFFER (coding->dst_object))
5521 set_buffer_internal (XBUFFER (coding->dst_object));
5522 if (GPT != PT)
5523 move_gap_both (PT, PT_BYTE);
5524 }
5525
5526 coding->consumed = coding->consumed_char = 0;
5527 coding->produced = coding->produced_char = 0;
5528 coding->chars_at_source = 0;
5529 coding->result = CODING_RESULT_SUCCESS;
5530 coding->errors = 0;
5531
5532 ALLOC_CONVERSION_WORK_AREA (coding);
5533
5534 attrs = CODING_ID_ATTRS (coding->id);
5535
5536 do
5537 {
5538 coding_set_source (coding);
5539 coding->annotated = 0;
5540 (*(coding->decoder)) (coding);
5541 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5542 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5543 coding_set_destination (coding);
5544 produce_chars (coding);
5545 if (coding->annotated)
5546 produce_annotation (coding);
5547 }
5548 while (coding->consumed < coding->src_bytes
5549 && ! coding->result);
5550
5551 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5552 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5553 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5554 decode_eol (coding);
5555
5556 coding->carryover_bytes = 0;
5557 if (coding->consumed < coding->src_bytes)
5558 {
5559 int nbytes = coding->src_bytes - coding->consumed;
5560 unsigned char *src;
5561
5562 coding_set_source (coding);
5563 coding_set_destination (coding);
5564 src = coding->source + coding->consumed;
5565
5566 if (coding->mode & CODING_MODE_LAST_BLOCK)
5567 {
5568 /* Flush out unprocessed data as binary chars. We are sure
5569 that the number of data is less than the size of
5570 coding->charbuf. */
5571 int *charbuf = coding->charbuf;
5572
5573 while (nbytes-- > 0)
5574 {
5575 int c = *src++;
5576 *charbuf++ = (c & 0x80 ? - c : c);
5577 }
5578 produce_chars (coding);
5579 }
5580 else
5581 {
5582 /* Record unprocessed bytes in coding->carryover. We are
5583 sure that the number of data is less than the size of
5584 coding->carryover. */
5585 unsigned char *p = coding->carryover;
5586
5587 coding->carryover_bytes = nbytes;
5588 while (nbytes-- > 0)
5589 *p++ = *src++;
5590 }
5591 coding->consumed = coding->src_bytes;
5592 }
5593
5594 return coding->result;
5595}
5596
5597static void
5598consume_chars (coding)
5599 struct coding_system *coding;
5600{
5601 int *buf = coding->charbuf;
5602 /* -1 is to compensate for CRLF. */
5603 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5604 unsigned char *src = coding->source + coding->consumed;
5605 int pos = coding->src_pos + coding->consumed_char;
5606 int end_pos = coding->src_pos + coding->src_chars;
5607 int multibytep = coding->src_multibyte;
5608 Lisp_Object eol_type;
5609 int c;
5610 int start, end, stop;
5611 Lisp_Object object, prop;
5612
5613 eol_type = CODING_ID_EOL_TYPE (coding->id);
5614 if (VECTORP (eol_type))
5615 eol_type = Qunix;
5616
5617 object = coding->src_object;
5618
5619 /* Note: composition handling is not yet implemented. */
5620 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
5621
5622 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5623 && find_composition (pos, end_pos, &start, &end, &prop, object)
5624 && end <= end_pos
5625 && (start >= pos
5626 || (find_composition (end, end_pos, &start, &end, &prop, object)
5627 && end <= end_pos)))
5628 stop = start;
5629 else
5630 stop = end_pos;
5631
5632 while (buf < buf_end)
5633 {
5634 if (pos == stop)
5635 {
5636 int *p;
5637
5638 if (pos == end_pos)
5639 break;
5640 p = save_composition_data (buf, buf_end, prop);
5641 if (p == NULL)
5642 break;
5643 buf = p;
5644 if (find_composition (end, end_pos, &start, &end, &prop, object)
5645 && end <= end_pos)
5646 stop = start;
5647 else
5648 stop = end_pos;
5649 }
5650
5651 if (! multibytep)
5652 c = *src++;
5653 else
5654 c = STRING_CHAR_ADVANCE (src);
5655 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5656 c = '\n';
5657 if (! EQ (eol_type, Qunix))
5658 {
5659 if (c == '\n')
5660 {
5661 if (EQ (eol_type, Qdos))
5662 *buf++ = '\r';
5663 else
5664 c = '\r';
5665 }
5666 }
5667 *buf++ = c;
5668 pos++;
5669 }
5670
5671 coding->consumed = src - coding->source;
5672 coding->consumed_char = pos - coding->src_pos;
5673 coding->charbuf_used = buf - coding->charbuf;
5674 coding->chars_at_source = 0;
5675}
5676
5677
5678/* Encode the text at CODING->src_object into CODING->dst_object.
5679 CODING->src_object is a buffer or a string.
5680 CODING->dst_object is a buffer or nil.
5681
5682 If CODING->src_object is a buffer, it must be the current buffer.
5683 In this case, if CODING->src_pos is positive, it is a position of
5684 the source text in the buffer, otherwise. the source text is in the
5685 gap area of the buffer, and coding->src_pos specifies the offset of
5686 the text from GPT (which must be the same as PT). If this is the
5687 same buffer as CODING->dst_object, CODING->src_pos must be
5688 negative and CODING should not have `pre-write-conversion'.
5689
5690 If CODING->src_object is a string, CODING should not have
5691 `pre-write-conversion'.
5692
5693 If CODING->dst_object is a buffer, the encoded data is inserted at
5694 the current point of that buffer.
5695
5696 If CODING->dst_object is nil, the encoded data is placed at the
5697 memory area specified by CODING->destination. */
5698
5699static int
5700encode_coding (coding)
5701 struct coding_system *coding;
5702{
5703 Lisp_Object attrs;
5704
5705 attrs = CODING_ID_ATTRS (coding->id);
5706
5707 if (BUFFERP (coding->dst_object))
5708 {
5709 set_buffer_internal (XBUFFER (coding->dst_object));
5710 coding->dst_multibyte
5711 = ! NILP (current_buffer->enable_multibyte_characters);
5712 }
5713
5714 coding->consumed = coding->consumed_char = 0;
5715 coding->produced = coding->produced_char = 0;
5716 coding->result = CODING_RESULT_SUCCESS;
5717 coding->errors = 0;
5718
5719 ALLOC_CONVERSION_WORK_AREA (coding);
5720
5721 do {
5722 coding_set_source (coding);
5723 consume_chars (coding);
5724
5725 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5726 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5727
5728 coding_set_destination (coding);
5729 (*(coding->encoder)) (coding);
5730 } while (coding->consumed_char < coding->src_chars);
5731
5732 if (BUFFERP (coding->dst_object))
5733 insert_from_gap (coding->produced_char, coding->produced);
5734
5735 return (coding->result);
5736}
5737
5738/* Work buffer */
5739
5740/* List of currently used working buffer. */
5741Lisp_Object Vcode_conversion_work_buf_list;
5742
5743/* A working buffer used by the top level conversion. */
5744Lisp_Object Vcode_conversion_reused_work_buf;
5745
5746
5747/* Return a working buffer that can be freely used by the following
5748 code conversion. MULTIBYTEP specifies the multibyteness of the
5749 buffer. */
5750
5751Lisp_Object
5752make_conversion_work_buffer (multibytep)
5753 int multibytep;
5754{
5755 struct buffer *current = current_buffer;
5756 Lisp_Object buf;
5757
5758 if (NILP (Vcode_conversion_work_buf_list))
5759 {
5760 if (NILP (Vcode_conversion_reused_work_buf))
5761 Vcode_conversion_reused_work_buf
5762 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5763 Vcode_conversion_work_buf_list
5764 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
5765 }
5766 else
5767 {
5768 int depth = Flength (Vcode_conversion_work_buf_list);
5769 char str[128];
5770
5771 sprintf (str, " *code-conversion-work*<%d>", depth);
5772 Vcode_conversion_work_buf_list
5773 = Fcons (Fget_buffer_create (build_string (str)),
5774 Vcode_conversion_work_buf_list);
5775 }
5776
5777 buf = XCAR (Vcode_conversion_work_buf_list);
5778 set_buffer_internal (XBUFFER (buf));
5779 current_buffer->undo_list = Qt;
5780 Ferase_buffer ();
5781 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
5782 set_buffer_internal (current);
5783 return buf;
5784}
5785
5786static struct coding_system *saved_coding;
5787
5788Lisp_Object
5789code_conversion_restore (info)
5790 Lisp_Object info;
5791{
5792 int depth = Flength (Vcode_conversion_work_buf_list);
5793 Lisp_Object buf;
5794
5795 if (depth > 0)
5796 {
5797 buf = XCAR (Vcode_conversion_work_buf_list);
5798 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5799 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5800 Fkill_buffer (buf);
5801 }
5802
5803 if (saved_coding->dst_object == Qt
5804 && saved_coding->destination)
5805 xfree (saved_coding->destination);
5806
5807 return save_excursion_restore (info);
5808}
5809
5810
5811int
5812decode_coding_gap (coding, chars, bytes)
5813 struct coding_system *coding;
5814 EMACS_INT chars, bytes;
5815{
5816 int count = specpdl_ptr - specpdl;
5817
5818 saved_coding = coding;
5819 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5820
5821 coding->src_object = Fcurrent_buffer ();
5822 coding->src_chars = chars;
5823 coding->src_bytes = bytes;
5824 coding->src_pos = -chars;
5825 coding->src_pos_byte = -bytes;
5826 coding->src_multibyte = chars < bytes;
5827 coding->dst_object = coding->src_object;
5828 coding->dst_pos = PT;
5829 coding->dst_pos_byte = PT_BYTE;
5830 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
5831
5832 if (CODING_REQUIRE_DETECTION (coding))
5833 detect_coding (coding);
5834
5835 decode_coding (coding);
5836
5837 unbind_to (count, Qnil);
5838 return coding->result;
5839}
5840
5841int
5842encode_coding_gap (coding, chars, bytes)
5843 struct coding_system *coding;
5844 EMACS_INT chars, bytes;
5845{
5846 int count = specpdl_ptr - specpdl;
5847 Lisp_Object buffer;
5848
5849 saved_coding = coding;
5850 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5851
5852 buffer = Fcurrent_buffer ();
5853 coding->src_object = buffer;
5854 coding->src_chars = chars;
5855 coding->src_bytes = bytes;
5856 coding->src_pos = -chars;
5857 coding->src_pos_byte = -bytes;
5858 coding->src_multibyte = chars < bytes;
5859 coding->dst_object = coding->src_object;
5860 coding->dst_pos = PT;
5861 coding->dst_pos_byte = PT_BYTE;
5862
5863 encode_coding (coding);
5864
5865 unbind_to (count, Qnil);
5866 return coding->result;
5867}
5868
5869
5870/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5871 SRC_OBJECT into DST_OBJECT by coding context CODING.
5872
5873 SRC_OBJECT is a buffer, a string, or Qnil.
5874
5875 If it is a buffer, the text is at point of the buffer. FROM and TO
5876 are positions in the buffer.
5877
5878 If it is a string, the text is at the beginning of the string.
5879 FROM and TO are indices to the string.
5880
5881 If it is nil, the text is at coding->source. FROM and TO are
5882 indices to coding->source.
5883
5884 DST_OBJECT is a buffer, Qt, or Qnil.
5885
5886 If it is a buffer, the decoded text is inserted at point of the
5887 buffer. If the buffer is the same as SRC_OBJECT, the source text
5888 is deleted.
5889
5890 If it is Qt, a string is made from the decoded text, and
5891 set in CODING->dst_object.
5892
5893 If it is Qnil, the decoded text is stored at CODING->destination.
5894 The called must allocate CODING->dst_bytes bytes at
5895 CODING->destination by xmalloc. If the decoded text is longer than
5896 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5897 */
5898
5899void
5900decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5901 dst_object)
5902 struct coding_system *coding;
5903 Lisp_Object src_object;
5904 EMACS_INT from, from_byte, to, to_byte;
5905 Lisp_Object dst_object;
5906{
5907 int count = specpdl_ptr - specpdl;
5908 unsigned char *destination;
5909 EMACS_INT dst_bytes;
5910 EMACS_INT chars = to - from;
5911 EMACS_INT bytes = to_byte - from_byte;
5912 Lisp_Object attrs;
5913
5914 saved_coding = coding;
5915 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5916
5917 if (NILP (dst_object))
5918 {
5919 destination = coding->destination;
5920 dst_bytes = coding->dst_bytes;
5921 }
5922
5923 coding->src_object = src_object;
5924 coding->src_chars = chars;
5925 coding->src_bytes = bytes;
5926 coding->src_multibyte = chars < bytes;
5927
5928 if (STRINGP (src_object))
5929 {
5930 coding->src_pos = from;
5931 coding->src_pos_byte = from_byte;
5932 }
5933 else if (BUFFERP (src_object))
5934 {
5935 set_buffer_internal (XBUFFER (src_object));
5936 if (from != GPT)
5937 move_gap_both (from, from_byte);
5938 if (EQ (src_object, dst_object))
5939 {
5940 TEMP_SET_PT_BOTH (from, from_byte);
5941 del_range_both (from, from_byte, to, to_byte, 1);
5942 coding->src_pos = -chars;
5943 coding->src_pos_byte = -bytes;
5944 }
5945 else
5946 {
5947 coding->src_pos = from;
5948 coding->src_pos_byte = from_byte;
5949 }
5950 }
5951
5952 if (CODING_REQUIRE_DETECTION (coding))
5953 detect_coding (coding);
5954 attrs = CODING_ID_ATTRS (coding->id);
5955
5956 if (! NILP (CODING_ATTR_POST_READ (attrs))
5957 || EQ (dst_object, Qt))
5958 {
5959 coding->dst_object = make_conversion_work_buffer (1);
5960 coding->dst_pos = BEG;
5961 coding->dst_pos_byte = BEG_BYTE;
5962 coding->dst_multibyte = 1;
5963 }
5964 else if (BUFFERP (dst_object))
5965 {
5966 coding->dst_object = dst_object;
5967 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5968 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5969 coding->dst_multibyte
5970 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
5971 }
5972 else
5973 {
5974 coding->dst_object = Qnil;
5975 coding->dst_multibyte = 1;
5976 }
5977
5978 decode_coding (coding);
5979
5980 if (BUFFERP (coding->dst_object))
5981 set_buffer_internal (XBUFFER (coding->dst_object));
5982
5983 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5984 {
5985 struct gcpro gcpro1, gcpro2;
5986 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
5987 Lisp_Object val;
5988
5989 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5990 GCPRO2 (coding->src_object, coding->dst_object);
5991 val = call1 (CODING_ATTR_POST_READ (attrs),
5992 make_number (coding->produced_char));
5993 UNGCPRO;
5994 CHECK_NATNUM (val);
5995 coding->produced_char += Z - prev_Z;
5996 coding->produced += Z_BYTE - prev_Z_BYTE;
5997 }
5998
5999 if (EQ (dst_object, Qt))
6000 {
6001 coding->dst_object = Fbuffer_string ();
6002 }
6003 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6004 {
6005 set_buffer_internal (XBUFFER (coding->dst_object));
6006 if (dst_bytes < coding->produced)
6007 {
6008 destination
6009 = (unsigned char *) xrealloc (destination, coding->produced);
6010 if (! destination)
6011 {
6012 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6013 unbind_to (count, Qnil);
6014 return;
6015 }
6016 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6017 move_gap_both (BEGV, BEGV_BYTE);
6018 bcopy (BEGV_ADDR, destination, coding->produced);
6019 coding->destination = destination;
6020 }
6021 }
6022
6023 unbind_to (count, Qnil);
6024}
6025
6026
6027void
6028encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6029 dst_object)
6030 struct coding_system *coding;
6031 Lisp_Object src_object;
6032 EMACS_INT from, from_byte, to, to_byte;
6033 Lisp_Object dst_object;
6034{
6035 int count = specpdl_ptr - specpdl;
6036 EMACS_INT chars = to - from;
6037 EMACS_INT bytes = to_byte - from_byte;
6038 Lisp_Object attrs;
6039
6040 saved_coding = coding;
6041 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6042
6043 coding->src_object = src_object;
6044 coding->src_chars = chars;
6045 coding->src_bytes = bytes;
6046 coding->src_multibyte = chars < bytes;
6047
6048 attrs = CODING_ID_ATTRS (coding->id);
6049
6050 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6051 {
6052 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6053 set_buffer_internal (XBUFFER (coding->src_object));
6054 if (STRINGP (src_object))
6055 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6056 else if (BUFFERP (src_object))
6057 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6058 else
6059 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6060
6061 if (EQ (src_object, dst_object))
6062 {
6063 set_buffer_internal (XBUFFER (src_object));
6064 del_range_both (from, from_byte, to, to_byte, 1);
6065 set_buffer_internal (XBUFFER (coding->src_object));
6066 }
6067
6068 call2 (CODING_ATTR_PRE_WRITE (attrs),
6069 make_number (BEG), make_number (Z));
6070 coding->src_object = Fcurrent_buffer ();
6071 if (BEG != GPT)
6072 move_gap_both (BEG, BEG_BYTE);
6073 coding->src_chars = Z - BEG;
6074 coding->src_bytes = Z_BYTE - BEG_BYTE;
6075 coding->src_pos = BEG;
6076 coding->src_pos_byte = BEG_BYTE;
6077 coding->src_multibyte = Z < Z_BYTE;
6078 }
6079 else if (STRINGP (src_object))
6080 {
6081 coding->src_pos = from;
6082 coding->src_pos_byte = from_byte;
6083 }
6084 else if (BUFFERP (src_object))
6085 {
6086 set_buffer_internal (XBUFFER (src_object));
6087 if (from != GPT)
6088 move_gap_both (from, from_byte);
6089 if (EQ (src_object, dst_object))
6090 {
6091 del_range_both (from, from_byte, to, to_byte, 1);
6092 coding->src_pos = -chars;
6093 coding->src_pos_byte = -bytes;
6094 }
6095 else
6096 {
6097 coding->src_pos = from;
6098 coding->src_pos_byte = from_byte;
6099 }
6100 }
6101
6102 if (BUFFERP (dst_object))
6103 {
6104 coding->dst_object = dst_object;
6105 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6106 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6107 coding->dst_multibyte
6108 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6109 }
6110 else if (EQ (dst_object, Qt))
6111 {
6112 coding->dst_object = Qnil;
6113 coding->dst_bytes = coding->src_chars;
6114 if (coding->dst_bytes == 0)
6115 coding->dst_bytes = 1;
6116 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6117 coding->dst_multibyte = 0;
6118 }
6119 else
6120 {
6121 coding->dst_object = Qnil;
6122 coding->dst_multibyte = 0;
6123 }
6124
6125 encode_coding (coding);
6126
6127 if (EQ (dst_object, Qt))
6128 {
6129 if (BUFFERP (coding->dst_object))
6130 coding->dst_object = Fbuffer_string ();
6131 else
6132 {
6133 coding->dst_object
6134 = make_unibyte_string ((char *) coding->destination,
6135 coding->produced);
6136 xfree (coding->destination);
6137 }
6138 }
6139
6140 unbind_to (count, Qnil);
6141}
6142
6143
6144Lisp_Object
6145preferred_coding_system ()
6146{
6147 int id = coding_categories[coding_priorities[0]].id;
6148
6149 return CODING_ID_NAME (id);
6150}
6151
6152\f
6153#ifdef emacs
6154/*** 8. Emacs Lisp library functions ***/
6155
6156DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6157 doc: /* Return t if OBJECT is nil or a coding-system.
6158See the documentation of `define-coding-system' for information
6159about coding-system objects. */)
6160 (obj)
6161 Lisp_Object obj;
6162{
6163 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6164}
6165
6166DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6167 Sread_non_nil_coding_system, 1, 1, 0,
6168 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6169 (prompt)
6170 Lisp_Object prompt;
6171{
6172 Lisp_Object val;
6173 do
6174 {
6175 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6176 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6177 }
6178 while (XSTRING (val)->size == 0);
6179 return (Fintern (val, Qnil));
6180}
6181
6182DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6183 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6184If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6185 (prompt, default_coding_system)
6186 Lisp_Object prompt, default_coding_system;
6187{
6188 Lisp_Object val;
6189 if (SYMBOLP (default_coding_system))
6190 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6191 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6192 Qt, Qnil, Qcoding_system_history,
6193 default_coding_system, Qnil);
6194 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6195}
6196
6197DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6198 1, 1, 0,
6199 doc: /* Check validity of CODING-SYSTEM.
6200If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6201It is valid if it is a symbol with a non-nil `coding-system' property.
6202The value of property should be a vector of length 5. */)
6203 (coding_system)
6204 Lisp_Object coding_system;
6205{
6206 CHECK_SYMBOL (coding_system);
6207 if (!NILP (Fcoding_system_p (coding_system)))
6208 return coding_system;
6209 while (1)
6210 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6211}
6212
6213\f
6214Lisp_Object
6215detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6216 unsigned char *src;
6217 int src_bytes, highest;
6218 int multibytep;
6219 Lisp_Object coding_system;
6220{
6221 unsigned char *src_end = src + src_bytes;
6222 int mask = CATEGORY_MASK_ANY;
6223 int detected = 0;
6224 int c, i;
6225 Lisp_Object attrs, eol_type;
6226 Lisp_Object val;
6227 struct coding_system coding;
6228
6229 if (NILP (coding_system))
6230 coding_system = Qundecided;
6231 setup_coding_system (coding_system, &coding);
6232 attrs = CODING_ID_ATTRS (coding.id);
6233 eol_type = CODING_ID_EOL_TYPE (coding.id);
6234
6235 coding.source = src;
6236 coding.src_bytes = src_bytes;
6237 coding.src_multibyte = multibytep;
6238 coding.consumed = 0;
6239
6240 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
6241 {
6242 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6243 }
6244 else
6245 {
6246 coding_system = Qnil;
6247 for (; src < src_end; src++)
6248 {
6249 c = *src;
6250 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6251 || c == ISO_CODE_SI
6252 || c == ISO_CODE_SO)))
6253 break;
6254 }
6255 coding.head_ascii = src - coding.source;
6256
6257 if (src < src_end)
6258 for (i = 0; i < coding_category_raw_text; i++)
6259 {
6260 enum coding_category category = coding_priorities[i];
6261 struct coding_system *this = coding_categories + category;
6262
6263 if (category >= coding_category_raw_text
6264 || detected & (1 << category))
6265 continue;
6266
6267 if (this->id < 0)
6268 {
6269 /* No coding system of this category is defined. */
6270 mask &= ~(1 << category);
6271 }
6272 else
6273 {
6274 detected |= detected_mask[category];
6275 if ((*(coding_categories[category].detector)) (&coding, &mask)
6276 && highest)
6277 {
6278 mask &= detected_mask[category];
6279 break;
6280 }
6281 }
6282 }
6283 }
6284
6285 if (!mask)
6286 val = Fcons (make_number (coding_category_raw_text), Qnil);
6287 else if (mask == CATEGORY_MASK_ANY)
6288 val = Fcons (make_number (coding_category_undecided), Qnil);
6289 else if (highest)
6290 {
6291 for (i = 0; i < coding_category_raw_text; i++)
6292 if (mask & (1 << coding_priorities[i]))
6293 {
6294 val = Fcons (make_number (coding_priorities[i]), Qnil);
6295 break;
6296 }
6297 }
6298 else
6299 {
6300 val = Qnil;
6301 for (i = coding_category_raw_text - 1; i >= 0; i--)
6302 if (mask & (1 << coding_priorities[i]))
6303 val = Fcons (make_number (coding_priorities[i]), val);
6304 }
6305
6306 {
6307 int one_byte_eol = -1, two_byte_eol = -1;
6308 Lisp_Object tail;
6309
6310 for (tail = val; CONSP (tail); tail = XCDR (tail))
6311 {
6312 struct coding_system *this
6313 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6314 : &coding);
6315 int this_eol;
6316
6317 attrs = CODING_ID_ATTRS (this->id);
6318 eol_type = CODING_ID_EOL_TYPE (this->id);
6319 XSETCAR (tail, CODING_ID_NAME (this->id));
6320 if (VECTORP (eol_type))
6321 {
6322 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6323 {
6324 if (two_byte_eol < 0)
6325 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6326 this_eol = two_byte_eol;
6327 }
6328 else
6329 {
6330 if (one_byte_eol < 0)
6331 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6332 this_eol = one_byte_eol;
6333 }
6334 if (this_eol == EOL_SEEN_LF)
6335 XSETCAR (tail, AREF (eol_type, 0));
6336 else if (this_eol == EOL_SEEN_CRLF)
6337 XSETCAR (tail, AREF (eol_type, 1));
6338 else if (this_eol == EOL_SEEN_CR)
6339 XSETCAR (tail, AREF (eol_type, 2));
6340 }
6341 }
6342 }
6343
6344 return (highest ? XCAR (val) : val);
6345}
6346
6347
6348DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6349 2, 3, 0,
6350 doc: /* Detect coding system of the text in the region between START and END.
6351Return a list of possible coding systems ordered by priority.
6352
6353If only ASCII characters are found, it returns a list of single element
6354`undecided' or its subsidiary coding system according to a detected
6355end-of-line format.
6356
6357If optional argument HIGHEST is non-nil, return the coding system of
6358highest priority. */)
6359 (start, end, highest)
6360 Lisp_Object start, end, highest;
6361{
6362 int from, to;
6363 int from_byte, to_byte;
6364
6365 CHECK_NUMBER_COERCE_MARKER (start);
6366 CHECK_NUMBER_COERCE_MARKER (end);
6367
6368 validate_region (&start, &end);
6369 from = XINT (start), to = XINT (end);
6370 from_byte = CHAR_TO_BYTE (from);
6371 to_byte = CHAR_TO_BYTE (to);
6372
6373 if (from < GPT && to >= GPT)
6374 move_gap_both (to, to_byte);
6375
6376 return detect_coding_system (BYTE_POS_ADDR (from_byte),
6377 to_byte - from_byte,
6378 !NILP (highest),
6379 !NILP (current_buffer
6380 ->enable_multibyte_characters),
6381 Qnil);
6382}
6383
6384DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6385 1, 2, 0,
6386 doc: /* Detect coding system of the text in STRING.
6387Return a list of possible coding systems ordered by priority.
6388
6389If only ASCII characters are found, it returns a list of single element
6390`undecided' or its subsidiary coding system according to a detected
6391end-of-line format.
6392
6393If optional argument HIGHEST is non-nil, return the coding system of
6394highest priority. */)
6395 (string, highest)
6396 Lisp_Object string, highest;
6397{
6398 CHECK_STRING (string);
6399
6400 return detect_coding_system (XSTRING (string)->data,
6401 STRING_BYTES (XSTRING (string)),
6402 !NILP (highest),
6403 STRING_MULTIBYTE (string),
6404 Qnil);
6405}
6406
6407
6408static INLINE int
6409char_encodable_p (c, attrs)
6410 int c;
6411 Lisp_Object attrs;
6412{
6413 Lisp_Object tail;
6414 struct charset *charset;
6415
6416 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6417 CONSP (tail); tail = XCDR (tail))
6418 {
6419 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6420 if (CHAR_CHARSET_P (c, charset))
6421 break;
6422 }
6423 return (! NILP (tail));
6424}
6425
6426
6427/* Return a list of coding systems that safely encode the text between
6428 START and END. If EXCLUDE is non-nil, it is a list of coding
6429 systems not to check. The returned list doesn't contain any such
6430 coding systems. In any case, If the text contains only ASCII or is
6431 unibyte, return t. */
6432
6433DEFUN ("find-coding-systems-region-internal",
6434 Ffind_coding_systems_region_internal,
6435 Sfind_coding_systems_region_internal, 2, 3, 0,
6436 doc: /* Internal use only. */)
6437 (start, end, exclude)
6438 Lisp_Object start, end, exclude;
6439{
6440 Lisp_Object coding_attrs_list, safe_codings;
6441 EMACS_INT start_byte, end_byte;
6442 unsigned char *p, *pbeg, *pend;
6443 int c;
6444 Lisp_Object tail, elt;
6445
6446 if (STRINGP (start))
6447 {
6448 if (!STRING_MULTIBYTE (start)
6449 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6450 return Qt;
6451 start_byte = 0;
6452 end_byte = STRING_BYTES (XSTRING (start));
6453 }
6454 else
6455 {
6456 CHECK_NUMBER_COERCE_MARKER (start);
6457 CHECK_NUMBER_COERCE_MARKER (end);
6458 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6459 args_out_of_range (start, end);
6460 if (NILP (current_buffer->enable_multibyte_characters))
6461 return Qt;
6462 start_byte = CHAR_TO_BYTE (XINT (start));
6463 end_byte = CHAR_TO_BYTE (XINT (end));
6464 if (XINT (end) - XINT (start) == end_byte - start_byte)
6465 return Qt;
6466
6467 if (start < GPT && end > GPT)
6468 {
6469 if ((GPT - start) < (end - GPT))
6470 move_gap_both (start, start_byte);
6471 else
6472 move_gap_both (end, end_byte);
6473 }
6474 }
6475
6476 coding_attrs_list = Qnil;
6477 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6478 if (NILP (exclude)
6479 || NILP (Fmemq (XCAR (tail), exclude)))
6480 {
6481 Lisp_Object attrs;
6482
6483 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6484 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6485 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6486 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6487 }
6488
6489 if (STRINGP (start))
6490 p = pbeg = XSTRING (start)->data;
6491 else
6492 p = pbeg = BYTE_POS_ADDR (start_byte);
6493 pend = p + (end_byte - start_byte);
6494
6495 while (p < pend && ASCII_BYTE_P (*p)) p++;
6496 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6497
6498 while (p < pend)
6499 {
6500 if (ASCII_BYTE_P (*p))
6501 p++;
6502 else
6503 {
6504 c = STRING_CHAR_ADVANCE (p);
6505
6506 charset_map_loaded = 0;
6507 for (tail = coding_attrs_list; CONSP (tail);)
6508 {
6509 elt = XCAR (tail);
6510 if (NILP (elt))
6511 tail = XCDR (tail);
6512 else if (char_encodable_p (c, elt))
6513 tail = XCDR (tail);
6514 else if (CONSP (XCDR (tail)))
6515 {
6516 XSETCAR (tail, XCAR (XCDR (tail)));
6517 XSETCDR (tail, XCDR (XCDR (tail)));
6518 }
6519 else
6520 {
6521 XSETCAR (tail, Qnil);
6522 tail = XCDR (tail);
6523 }
6524 }
6525 if (charset_map_loaded)
6526 {
6527 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6528
6529 if (STRINGP (start))
6530 pbeg = XSTRING (start)->data;
6531 else
6532 pbeg = BYTE_POS_ADDR (start_byte);
6533 p = pbeg + p_offset;
6534 pend = pbeg + pend_offset;
6535 }
6536 }
6537 }
6538
6539 safe_codings = Qnil;
6540 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6541 if (! NILP (XCAR (tail)))
6542 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6543
6544 return safe_codings;
6545}
6546
6547
6548DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6549 Scheck_coding_systems_region, 3, 3, 0,
6550 doc: /* Check if the region is encodable by coding systems.
6551
6552START and END are buffer positions specifying the region.
6553CODING-SYSTEM-LIST is a list of coding systems to check.
6554
6555The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6556CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6557whole region, POS0, POS1, ... are buffer positions where non-encodable
6558characters are found.
6559
6560If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6561value is nil.
6562
6563START may be a string. In that case, check if the string is
6564encodable, and the value contains indices to the string instead of
6565buffer positions. END is ignored. */)
6566 (start, end, coding_system_list)
6567 Lisp_Object start, end, coding_system_list;
6568{
6569 Lisp_Object list;
6570 EMACS_INT start_byte, end_byte;
6571 int pos;
6572 unsigned char *p, *pbeg, *pend;
6573 int c;
6574 Lisp_Object tail, elt;
6575
6576 if (STRINGP (start))
6577 {
6578 if (!STRING_MULTIBYTE (start)
6579 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6580 return Qnil;
6581 start_byte = 0;
6582 end_byte = STRING_BYTES (XSTRING (start));
6583 pos = 0;
6584 }
6585 else
6586 {
6587 CHECK_NUMBER_COERCE_MARKER (start);
6588 CHECK_NUMBER_COERCE_MARKER (end);
6589 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6590 args_out_of_range (start, end);
6591 if (NILP (current_buffer->enable_multibyte_characters))
6592 return Qnil;
6593 start_byte = CHAR_TO_BYTE (XINT (start));
6594 end_byte = CHAR_TO_BYTE (XINT (end));
6595 if (XINT (end) - XINT (start) == end_byte - start_byte)
6596 return Qt;
6597
6598 if (start < GPT && end > GPT)
6599 {
6600 if ((GPT - start) < (end - GPT))
6601 move_gap_both (start, start_byte);
6602 else
6603 move_gap_both (end, end_byte);
6604 }
6605 pos = start;
6606 }
6607
6608 list = Qnil;
6609 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6610 {
6611 elt = XCAR (tail);
6612 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6613 Qnil)),
6614 list);
6615 }
6616
6617 if (STRINGP (start))
6618 p = pbeg = XSTRING (start)->data;
6619 else
6620 p = pbeg = BYTE_POS_ADDR (start_byte);
6621 pend = p + (end_byte - start_byte);
6622
6623 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6624 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6625
6626 while (p < pend)
6627 {
6628 if (ASCII_BYTE_P (*p))
6629 p++;
6630 else
6631 {
6632 c = STRING_CHAR_ADVANCE (p);
6633
6634 charset_map_loaded = 0;
6635 for (tail = list; CONSP (tail); tail = XCDR (tail))
6636 {
6637 elt = XCDR (XCAR (tail));
6638 if (! char_encodable_p (c, XCAR (elt)))
6639 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6640 }
6641 if (charset_map_loaded)
6642 {
6643 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6644
6645 if (STRINGP (start))
6646 pbeg = XSTRING (start)->data;
6647 else
6648 pbeg = BYTE_POS_ADDR (start_byte);
6649 p = pbeg + p_offset;
6650 pend = pbeg + pend_offset;
6651 }
6652 }
6653 pos++;
6654 }
6655
6656 tail = list;
6657 list = Qnil;
6658 for (; CONSP (tail); tail = XCDR (tail))
6659 {
6660 elt = XCAR (tail);
6661 if (CONSP (XCDR (XCDR (elt))))
6662 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6663 list);
6664 }
6665
6666 return list;
6667}
6668
6669
6670
6671Lisp_Object
6672code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6673 Lisp_Object start, end, coding_system, dst_object;
6674 int encodep, norecord;
6675{
6676 struct coding_system coding;
6677 EMACS_INT from, from_byte, to, to_byte;
6678 Lisp_Object src_object;
6679
6680 CHECK_NUMBER_COERCE_MARKER (start);
6681 CHECK_NUMBER_COERCE_MARKER (end);
6682 if (NILP (coding_system))
6683 coding_system = Qno_conversion;
6684 else
6685 CHECK_CODING_SYSTEM (coding_system);
6686 src_object = Fcurrent_buffer ();
6687 if (NILP (dst_object))
6688 dst_object = src_object;
6689 else if (! EQ (dst_object, Qt))
6690 CHECK_BUFFER (dst_object);
6691
6692 validate_region (&start, &end);
6693 from = XFASTINT (start);
6694 from_byte = CHAR_TO_BYTE (from);
6695 to = XFASTINT (end);
6696 to_byte = CHAR_TO_BYTE (to);
6697
6698 setup_coding_system (coding_system, &coding);
6699 coding.mode |= CODING_MODE_LAST_BLOCK;
6700
6701 if (encodep)
6702 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6703 dst_object);
6704 else
6705 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6706 dst_object);
6707 if (! norecord)
6708 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6709
6710 if (coding.result != CODING_RESULT_SUCCESS)
6711 error ("Code conversion error: %d", coding.result);
6712
6713 return (BUFFERP (dst_object)
6714 ? make_number (coding.produced_char)
6715 : coding.dst_object);
6716}
6717
6718
6719DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6720 3, 4, "r\nzCoding system: ",
6721 doc: /* Decode the current region from the specified coding system.
6722When called from a program, takes four arguments:
6723 START, END, CODING-SYSTEM, and DESTINATION.
6724START and END are buffer positions.
6725
6726Optional 4th arguments DESTINATION specifies where the decoded text goes.
6727If nil, the region between START and END is replace by the decoded text.
6728If buffer, the decoded text is inserted in the buffer.
6729If t, the decoded text is returned.
6730
6731This function sets `last-coding-system-used' to the precise coding system
6732used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6733not fully specified.)
6734It returns the length of the decoded text. */)
6735 (start, end, coding_system, destination)
6736 Lisp_Object start, end, coding_system, destination;
6737{
6738 return code_convert_region (start, end, coding_system, destination, 0, 0);
6739}
6740
6741DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6742 3, 4, "r\nzCoding system: ",
6743 doc: /* Encode the current region by specified coding system.
6744When called from a program, takes three arguments:
6745START, END, and CODING-SYSTEM. START and END are buffer positions.
6746
6747Optional 4th arguments DESTINATION specifies where the encoded text goes.
6748If nil, the region between START and END is replace by the encoded text.
6749If buffer, the encoded text is inserted in the buffer.
6750If t, the encoded text is returned.
6751
6752This function sets `last-coding-system-used' to the precise coding system
6753used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6754not fully specified.)
6755It returns the length of the encoded text. */)
6756 (start, end, coding_system, destination)
6757 Lisp_Object start, end, coding_system, destination;
6758{
6759 return code_convert_region (start, end, coding_system, destination, 1, 0);
6760}
6761
6762Lisp_Object
6763code_convert_string (string, coding_system, dst_object,
6764 encodep, nocopy, norecord)
6765 Lisp_Object string, coding_system, dst_object;
6766 int encodep, nocopy, norecord;
6767{
6768 struct coding_system coding;
6769 EMACS_INT chars, bytes;
6770
6771 CHECK_STRING (string);
6772 if (NILP (coding_system))
6773 {
6774 if (! norecord)
6775 Vlast_coding_system_used = Qno_conversion;
6776 if (NILP (dst_object))
6777 return (nocopy ? Fcopy_sequence (string) : string);
6778 }
6779
6780 if (NILP (coding_system))
6781 coding_system = Qno_conversion;
6782 else
6783 CHECK_CODING_SYSTEM (coding_system);
6784 if (NILP (dst_object))
6785 dst_object = Qt;
6786 else if (! EQ (dst_object, Qt))
6787 CHECK_BUFFER (dst_object);
6788
6789 setup_coding_system (coding_system, &coding);
6790 coding.mode |= CODING_MODE_LAST_BLOCK;
6791 chars = XSTRING (string)->size;
6792 bytes = STRING_BYTES (XSTRING (string));
6793 if (encodep)
6794 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6795 else
6796 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6797 if (! norecord)
6798 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6799
6800 if (coding.result != CODING_RESULT_SUCCESS)
6801 error ("Code conversion error: %d", coding.result);
6802
6803 return (BUFFERP (dst_object)
6804 ? make_number (coding.produced_char)
6805 : coding.dst_object);
6806}
6807
6808
6809/* Encode or decode STRING according to CODING_SYSTEM.
6810 Do not set Vlast_coding_system_used.
6811
6812 This function is called only from macros DECODE_FILE and
6813 ENCODE_FILE, thus we ignore character composition. */
6814
6815Lisp_Object
6816code_convert_string_norecord (string, coding_system, encodep)
6817 Lisp_Object string, coding_system;
6818 int encodep;
6819{
6820 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
6821}
6822
6823
6824DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6825 2, 4, 0,
6826 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6827
6828Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6829if the decoding operation is trivial.
6830
6831Optional fourth arg BUFFER non-nil meant that the decoded text is
6832inserted in BUFFER instead of returned as a astring. In this case,
6833the return value is BUFFER.
6834
6835This function sets `last-coding-system-used' to the precise coding system
6836used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6837not fully specified. */)
6838 (string, coding_system, nocopy, buffer)
6839 Lisp_Object string, coding_system, nocopy, buffer;
6840{
6841 return code_convert_string (string, coding_system, buffer,
6842 0, ! NILP (nocopy), 0);
6843}
6844
6845DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6846 2, 4, 0,
6847 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6848
6849Optional third arg NOCOPY non-nil means it is OK to return STRING
6850itself if the encoding operation is trivial.
6851
6852Optional fourth arg BUFFER non-nil meant that the encoded text is
6853inserted in BUFFER instead of returned as a astring. In this case,
6854the return value is BUFFER.
6855
6856This function sets `last-coding-system-used' to the precise coding system
6857used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6858not fully specified.) */)
6859 (string, coding_system, nocopy, buffer)
6860 Lisp_Object string, coding_system, nocopy, buffer;
6861{
6862 return code_convert_string (string, coding_system, buffer,
6863 nocopy, ! NILP (nocopy), 1);
6864}
6865
6866\f
6867DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6868 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6869Return the corresponding character. */)
6870 (code)
6871 Lisp_Object code;
6872{
6873 Lisp_Object spec, attrs, val;
6874 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6875 int c;
6876
6877 CHECK_NATNUM (code);
6878 c = XFASTINT (code);
6879 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6880 attrs = AREF (spec, 0);
6881
6882 if (ASCII_BYTE_P (c)
6883 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6884 return code;
6885
6886 val = CODING_ATTR_CHARSET_LIST (attrs);
6887 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6888 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6889 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
6890
6891 if (c <= 0x7F)
6892 charset = charset_roman;
6893 else if (c >= 0xA0 && c < 0xDF)
6894 {
6895 charset = charset_kana;
6896 c -= 0x80;
6897 }
6898 else
6899 {
6900 int s1 = c >> 8, s2 = c & 0xFF;
6901
6902 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6903 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6904 error ("Invalid code: %d", code);
6905 SJIS_TO_JIS (c);
6906 charset = charset_kanji;
6907 }
6908 c = DECODE_CHAR (charset, c);
6909 if (c < 0)
6910 error ("Invalid code: %d", code);
6911 return make_number (c);
6912}
6913
6914
6915DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6916 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6917Return the corresponding code in SJIS. */)
6918 (ch)
6919 Lisp_Object ch;
6920{
6921 Lisp_Object spec, attrs, charset_list;
6922 int c;
6923 struct charset *charset;
6924 unsigned code;
6925
6926 CHECK_CHARACTER (ch);
6927 c = XFASTINT (ch);
6928 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6929 attrs = AREF (spec, 0);
6930
6931 if (ASCII_CHAR_P (c)
6932 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6933 return ch;
6934
6935 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6936 charset = char_charset (c, charset_list, &code);
6937 if (code == CHARSET_INVALID_CODE (charset))
6938 error ("Can't encode by shift_jis encoding: %d", c);
6939 JIS_TO_SJIS (code);
6940
6941 return make_number (code);
6942}
6943
6944DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6945 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6946Return the corresponding character. */)
6947 (code)
6948 Lisp_Object code;
6949{
6950 Lisp_Object spec, attrs, val;
6951 struct charset *charset_roman, *charset_big5, *charset;
6952 int c;
6953
6954 CHECK_NATNUM (code);
6955 c = XFASTINT (code);
6956 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6957 attrs = AREF (spec, 0);
6958
6959 if (ASCII_BYTE_P (c)
6960 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6961 return code;
6962
6963 val = CODING_ATTR_CHARSET_LIST (attrs);
6964 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6965 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
6966
6967 if (c <= 0x7F)
6968 charset = charset_roman;
6969 else
6970 {
6971 int b1 = c >> 8, b2 = c & 0x7F;
6972 if (b1 < 0xA1 || b1 > 0xFE
6973 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
6974 error ("Invalid code: %d", code);
6975 charset = charset_big5;
6976 }
6977 c = DECODE_CHAR (charset, (unsigned )c);
6978 if (c < 0)
6979 error ("Invalid code: %d", code);
6980 return make_number (c);
6981}
6982
6983DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6984 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
6985Return the corresponding character code in Big5. */)
6986 (ch)
6987 Lisp_Object ch;
6988{
6989 Lisp_Object spec, attrs, charset_list;
6990 struct charset *charset;
6991 int c;
6992 unsigned code;
6993
6994 CHECK_CHARACTER (ch);
6995 c = XFASTINT (ch);
6996 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6997 attrs = AREF (spec, 0);
6998 if (ASCII_CHAR_P (c)
6999 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7000 return ch;
7001
7002 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7003 charset = char_charset (c, charset_list, &code);
7004 if (code == CHARSET_INVALID_CODE (charset))
7005 error ("Can't encode by Big5 encoding: %d", c);
7006
7007 return make_number (code);
7008}
7009
7010\f
7011DEFUN ("set-terminal-coding-system-internal",
7012 Fset_terminal_coding_system_internal,
7013 Sset_terminal_coding_system_internal, 1, 1, 0,
7014 doc: /* Internal use only. */)
7015 (coding_system)
7016 Lisp_Object coding_system;
7017{
7018 CHECK_SYMBOL (coding_system);
7019 setup_coding_system (Fcheck_coding_system (coding_system),
7020 &terminal_coding);
7021
7022 /* We had better not send unsafe characters to terminal. */
7023 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7024 /* Characer composition should be disabled. */
7025 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7026 terminal_coding.src_multibyte = 1;
7027 terminal_coding.dst_multibyte = 0;
7028 return Qnil;
7029}
7030
7031DEFUN ("set-safe-terminal-coding-system-internal",
7032 Fset_safe_terminal_coding_system_internal,
7033 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7034 doc: /* Internal use only. */)
7035 (coding_system)
7036 Lisp_Object coding_system;
7037{
7038 CHECK_SYMBOL (coding_system);
7039 setup_coding_system (Fcheck_coding_system (coding_system),
7040 &safe_terminal_coding);
7041 /* Characer composition should be disabled. */
7042 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7043 safe_terminal_coding.src_multibyte = 1;
7044 safe_terminal_coding.dst_multibyte = 0;
7045 return Qnil;
7046}
7047
7048DEFUN ("terminal-coding-system",
7049 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7050 doc: /* Return coding system specified for terminal output. */)
7051 ()
7052{
7053 return CODING_ID_NAME (terminal_coding.id);
7054}
7055
7056DEFUN ("set-keyboard-coding-system-internal",
7057 Fset_keyboard_coding_system_internal,
7058 Sset_keyboard_coding_system_internal, 1, 1, 0,
7059 doc: /* Internal use only. */)
7060 (coding_system)
7061 Lisp_Object coding_system;
7062{
7063 CHECK_SYMBOL (coding_system);
7064 setup_coding_system (Fcheck_coding_system (coding_system),
7065 &keyboard_coding);
7066 /* Characer composition should be disabled. */
7067 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7068 return Qnil;
7069}
7070
7071DEFUN ("keyboard-coding-system",
7072 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7073 doc: /* Return coding system specified for decoding keyboard input. */)
7074 ()
7075{
7076 return CODING_ID_NAME (keyboard_coding.id);
7077}
7078
7079\f
7080DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7081 Sfind_operation_coding_system, 1, MANY, 0,
7082 doc: /* Choose a coding system for an operation based on the target name.
7083The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7084DECODING-SYSTEM is the coding system to use for decoding
7085\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7086for encoding (in case OPERATION does encoding).
7087
7088The first argument OPERATION specifies an I/O primitive:
7089 For file I/O, `insert-file-contents' or `write-region'.
7090 For process I/O, `call-process', `call-process-region', or `start-process'.
7091 For network I/O, `open-network-stream'.
7092
7093The remaining arguments should be the same arguments that were passed
7094to the primitive. Depending on which primitive, one of those arguments
7095is selected as the TARGET. For example, if OPERATION does file I/O,
7096whichever argument specifies the file name is TARGET.
7097
7098TARGET has a meaning which depends on OPERATION:
7099 For file I/O, TARGET is a file name.
7100 For process I/O, TARGET is a process name.
7101 For network I/O, TARGET is a service name or a port number
7102
7103This function looks up what specified for TARGET in,
7104`file-coding-system-alist', `process-coding-system-alist',
7105or `network-coding-system-alist' depending on OPERATION.
7106They may specify a coding system, a cons of coding systems,
7107or a function symbol to call.
7108In the last case, we call the function with one argument,
7109which is a list of all the arguments given to this function.
7110
7111usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7112 (nargs, args)
7113 int nargs;
7114 Lisp_Object *args;
7115{
7116 Lisp_Object operation, target_idx, target, val;
7117 register Lisp_Object chain;
7118
7119 if (nargs < 2)
7120 error ("Too few arguments");
7121 operation = args[0];
7122 if (!SYMBOLP (operation)
7123 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7124 error ("Invalid first arguement");
7125 if (nargs < 1 + XINT (target_idx))
7126 error ("Too few arguments for operation: %s",
7127 XSYMBOL (operation)->name->data);
7128 target = args[XINT (target_idx) + 1];
7129 if (!(STRINGP (target)
7130 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7131 error ("Invalid %dth argument", XINT (target_idx) + 1);
7132
7133 chain = ((EQ (operation, Qinsert_file_contents)
7134 || EQ (operation, Qwrite_region))
7135 ? Vfile_coding_system_alist
7136 : (EQ (operation, Qopen_network_stream)
7137 ? Vnetwork_coding_system_alist
7138 : Vprocess_coding_system_alist));
7139 if (NILP (chain))
7140 return Qnil;
7141
7142 for (; CONSP (chain); chain = XCDR (chain))
7143 {
7144 Lisp_Object elt;
7145
7146 elt = XCAR (chain);
7147 if (CONSP (elt)
7148 && ((STRINGP (target)
7149 && STRINGP (XCAR (elt))
7150 && fast_string_match (XCAR (elt), target) >= 0)
7151 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7152 {
7153 val = XCDR (elt);
7154 /* Here, if VAL is both a valid coding system and a valid
7155 function symbol, we return VAL as a coding system. */
7156 if (CONSP (val))
7157 return val;
7158 if (! SYMBOLP (val))
7159 return Qnil;
7160 if (! NILP (Fcoding_system_p (val)))
7161 return Fcons (val, val);
7162 if (! NILP (Ffboundp (val)))
7163 {
7164 val = call1 (val, Flist (nargs, args));
7165 if (CONSP (val))
7166 return val;
7167 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7168 return Fcons (val, val);
7169 }
7170 return Qnil;
7171 }
7172 }
7173 return Qnil;
7174}
7175
7176DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7177 Sset_coding_system_priority, 1, MANY, 0,
7178 doc: /* Assign higher priority to coding systems given as arguments.
7179usage: (set-coding-system-priority CODING-SYSTEM ...) */)
7180 (nargs, args)
7181 int nargs;
7182 Lisp_Object *args;
7183{
7184 int i, j;
7185 int changed[coding_category_max];
7186 enum coding_category priorities[coding_category_max];
7187
7188 bzero (changed, sizeof changed);
7189
7190 for (i = j = 0; i < nargs; i++)
7191 {
7192 enum coding_category category;
7193 Lisp_Object spec, attrs;
7194
7195 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7196 attrs = AREF (spec, 0);
7197 category = XINT (CODING_ATTR_CATEGORY (attrs));
7198 if (changed[category])
7199 /* Ignore this coding system because a coding system of the
7200 same category already had a higher priority. */
7201 continue;
7202 changed[category] = 1;
7203 priorities[j++] = category;
7204 if (coding_categories[category].id >= 0
7205 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7206 setup_coding_system (args[i], &coding_categories[category]);
7207 }
7208
7209 /* Now we have decided top J priorities. Reflect the order of the
7210 original priorities to the remaining priorities. */
7211
7212 for (i = j, j = 0; i < coding_category_max; i++, j++)
7213 {
7214 while (j < coding_category_max
7215 && changed[coding_priorities[j]])
7216 j++;
7217 if (j == coding_category_max)
7218 abort ();
7219 priorities[i] = coding_priorities[j];
7220 }
7221
7222 bcopy (priorities, coding_priorities, sizeof priorities);
7223 return Qnil;
7224}
7225
7226DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7227 Scoding_system_priority_list, 0, 1, 0,
7228 doc: /* Return a list of coding systems ordered by their priorities. */)
7229 (highestp)
7230 Lisp_Object highestp;
7231{
7232 int i;
7233 Lisp_Object val;
7234
7235 for (i = 0, val = Qnil; i < coding_category_max; i++)
7236 {
7237 enum coding_category category = coding_priorities[i];
7238 int id = coding_categories[category].id;
7239 Lisp_Object attrs;
7240
7241 if (id < 0)
7242 continue;
7243 attrs = CODING_ID_ATTRS (id);
7244 if (! NILP (highestp))
7245 return CODING_ATTR_BASE_NAME (attrs);
7246 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7247 }
7248 return Fnreverse (val);
7249}
7250
7251static Lisp_Object
7252make_subsidiaries (base)
7253 Lisp_Object base;
7254{
7255 Lisp_Object subsidiaries;
7256 char *suffixes[] = { "-unix", "-dos", "-mac" };
7257 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7258 char *buf = (char *) alloca (base_name_len + 6);
7259 int i;
7260
7261 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7262 subsidiaries = Fmake_vector (make_number (3), Qnil);
7263 for (i = 0; i < 3; i++)
7264 {
7265 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7266 ASET (subsidiaries, i, intern (buf));
7267 }
7268 return subsidiaries;
7269}
7270
7271
7272DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7273 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7274 doc: /* For internal use only.
7275usage: (define-coding-system-internal ...) */)
7276 (nargs, args)
7277 int nargs;
7278 Lisp_Object *args;
7279{
7280 Lisp_Object name;
7281 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7282 Lisp_Object attrs; /* Vector of attributes. */
7283 Lisp_Object eol_type;
7284 Lisp_Object aliases;
7285 Lisp_Object coding_type, charset_list, safe_charsets;
7286 enum coding_category category;
7287 Lisp_Object tail, val;
7288 int max_charset_id = 0;
7289 int i;
7290
7291 if (nargs < coding_arg_max)
7292 goto short_args;
7293
7294 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7295
7296 name = args[coding_arg_name];
7297 CHECK_SYMBOL (name);
7298 CODING_ATTR_BASE_NAME (attrs) = name;
7299
7300 val = args[coding_arg_mnemonic];
7301 if (! STRINGP (val))
7302 CHECK_CHARACTER (val);
7303 CODING_ATTR_MNEMONIC (attrs) = val;
7304
7305 coding_type = args[coding_arg_coding_type];
7306 CHECK_SYMBOL (coding_type);
7307 CODING_ATTR_TYPE (attrs) = coding_type;
7308
7309 charset_list = args[coding_arg_charset_list];
7310 if (SYMBOLP (charset_list))
7311 {
7312 if (EQ (charset_list, Qiso_2022))
7313 {
7314 if (! EQ (coding_type, Qiso_2022))
7315 error ("Invalid charset-list");
7316 charset_list = Viso_2022_charset_list;
7317 }
7318 else if (EQ (charset_list, Qemacs_mule))
7319 {
7320 if (! EQ (coding_type, Qemacs_mule))
7321 error ("Invalid charset-list");
7322 charset_list = Vemacs_mule_charset_list;
7323 }
7324 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7325 if (max_charset_id < XFASTINT (XCAR (tail)))
7326 max_charset_id = XFASTINT (XCAR (tail));
7327 }
7328 else
7329 {
7330 charset_list = Fcopy_sequence (charset_list);
7331 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7332 {
7333 struct charset *charset;
7334
7335 val = Fcar (tail);
7336 CHECK_CHARSET_GET_CHARSET (val, charset);
7337 if (EQ (coding_type, Qiso_2022)
7338 ? CHARSET_ISO_FINAL (charset) < 0
7339 : EQ (coding_type, Qemacs_mule)
7340 ? CHARSET_EMACS_MULE_ID (charset) < 0
7341 : 0)
7342 error ("Can't handle charset `%s'",
7343 XSYMBOL (CHARSET_NAME (charset))->name->data);
7344
7345 XCAR (tail) = make_number (charset->id);
7346 if (max_charset_id < charset->id)
7347 max_charset_id = charset->id;
7348 }
7349 }
7350 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7351
7352 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7353 make_number (255));
7354 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7355 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7356 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7357
7358 val = args[coding_arg_decode_translation_table];
7359 if (! NILP (val))
7360 CHECK_CHAR_TABLE (val);
7361 CODING_ATTR_DECODE_TBL (attrs) = val;
7362
7363 val = args[coding_arg_encode_translation_table];
7364 if (! NILP (val))
7365 CHECK_CHAR_TABLE (val);
7366 CODING_ATTR_ENCODE_TBL (attrs) = val;
7367
7368 val = args[coding_arg_post_read_conversion];
7369 CHECK_SYMBOL (val);
7370 CODING_ATTR_POST_READ (attrs) = val;
7371
7372 val = args[coding_arg_pre_write_conversion];
7373 CHECK_SYMBOL (val);
7374 CODING_ATTR_PRE_WRITE (attrs) = val;
7375
7376 val = args[coding_arg_default_char];
7377 if (NILP (val))
7378 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7379 else
7380 {
7381 CHECK_CHARACTER (val);
7382 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7383 }
7384
7385 val = args[coding_arg_plist];
7386 CHECK_LIST (val);
7387 CODING_ATTR_PLIST (attrs) = val;
7388
7389 if (EQ (coding_type, Qcharset))
7390 {
7391 /* Generate a lisp vector of 256 elements. Each element is nil,
7392 integer, or a list of charset IDs.
7393
7394 If Nth element is nil, the byte code N is invalid in this
7395 coding system.
7396
7397 If Nth element is a number NUM, N is the first byte of a
7398 charset whose ID is NUM.
7399
7400 If Nth element is a list of charset IDs, N is the first byte
7401 of one of them. The list is sorted by dimensions of the
7402 charsets. A charset of smaller dimension comes firtst.
7403 */
7404 val = Fmake_vector (make_number (256), Qnil);
7405
7406 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7407 {
7408 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7409 int dim = CHARSET_DIMENSION (charset);
7410 int idx = (dim - 1) * 4;
7411
7412 for (i = charset->code_space[idx];
7413 i <= charset->code_space[idx + 1]; i++)
7414 {
7415 Lisp_Object tmp, tmp2;
7416 int dim2;
7417
7418 tmp = AREF (val, i);
7419 if (NILP (tmp))
7420 tmp = XCAR (tail);
7421 else if (NUMBERP (tmp))
7422 {
7423 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7424 if (dim < dim2)
7425 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
7426 else
7427 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
7428 }
7429 else
7430 {
7431 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7432 {
7433 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7434 if (dim < dim2)
7435 break;
7436 }
7437 if (NILP (tmp2))
7438 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7439 else
7440 {
7441 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7442 XSETCAR (tmp2, XCAR (tail));
7443 }
7444 }
7445 ASET (val, i, tmp);
7446 }
7447 }
7448 ASET (attrs, coding_attr_charset_valids, val);
7449 category = coding_category_charset;
7450 }
7451 else if (EQ (coding_type, Qccl))
7452 {
7453 Lisp_Object valids;
7454
7455 if (nargs < coding_arg_ccl_max)
7456 goto short_args;
7457
7458 val = args[coding_arg_ccl_decoder];
7459 CHECK_CCL_PROGRAM (val);
7460 if (VECTORP (val))
7461 val = Fcopy_sequence (val);
7462 ASET (attrs, coding_attr_ccl_decoder, val);
7463
7464 val = args[coding_arg_ccl_encoder];
7465 CHECK_CCL_PROGRAM (val);
7466 if (VECTORP (val))
7467 val = Fcopy_sequence (val);
7468 ASET (attrs, coding_attr_ccl_encoder, val);
7469
7470 val = args[coding_arg_ccl_valids];
7471 valids = Fmake_string (make_number (256), make_number (0));
7472 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7473 {
7474 val = Fcar (tail);
7475 if (INTEGERP (val))
7476 ASET (valids, XINT (val), 1);
7477 else
7478 {
7479 int from, to;
7480
7481 CHECK_CONS (val);
7482 CHECK_NUMBER (XCAR (val));
7483 CHECK_NUMBER (XCDR (val));
7484 from = XINT (XCAR (val));
7485 to = XINT (XCDR (val));
7486 for (i = from; i <= to; i++)
7487 ASET (valids, i, 1);
7488 }
7489 }
7490 ASET (attrs, coding_attr_ccl_valids, valids);
7491
7492 category = coding_category_ccl;
7493 }
7494 else if (EQ (coding_type, Qutf_16))
7495 {
7496 Lisp_Object bom, endian;
7497
7498 if (nargs < coding_arg_utf16_max)
7499 goto short_args;
7500
7501 bom = args[coding_arg_utf16_bom];
7502 if (! NILP (bom) && ! EQ (bom, Qt))
7503 {
7504 CHECK_CONS (bom);
7505 CHECK_CODING_SYSTEM (XCAR (bom));
7506 CHECK_CODING_SYSTEM (XCDR (bom));
7507 }
7508 ASET (attrs, coding_attr_utf_16_bom, bom);
7509
7510 endian = args[coding_arg_utf16_endian];
7511 ASET (attrs, coding_attr_utf_16_endian, endian);
7512
7513 category = (CONSP (bom)
7514 ? coding_category_utf_16_auto
7515 : NILP (bom)
7516 ? (NILP (endian)
7517 ? coding_category_utf_16_be_nosig
7518 : coding_category_utf_16_le_nosig)
7519 : (NILP (endian)
7520 ? coding_category_utf_16_be
7521 : coding_category_utf_16_le));
7522 }
7523 else if (EQ (coding_type, Qiso_2022))
7524 {
7525 Lisp_Object initial, reg_usage, request, flags;
7526 int i, id;
7527
7528 if (nargs < coding_arg_iso2022_max)
7529 goto short_args;
7530
7531 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7532 CHECK_VECTOR (initial);
7533 for (i = 0; i < 4; i++)
7534 {
7535 val = Faref (initial, make_number (i));
7536 if (! NILP (val))
7537 {
7538 CHECK_CHARSET_GET_ID (val, id);
7539 ASET (initial, i, make_number (id));
7540 }
7541 else
7542 ASET (initial, i, make_number (-1));
7543 }
7544
7545 reg_usage = args[coding_arg_iso2022_reg_usage];
7546 CHECK_CONS (reg_usage);
7547 CHECK_NATNUM (XCAR (reg_usage));
7548 CHECK_NATNUM (XCDR (reg_usage));
7549
7550 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7551 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
7552 {
7553 int id;
7554
7555 val = Fcar (tail);
7556 CHECK_CONS (val);
7557 CHECK_CHARSET_GET_ID (XCAR (val), id);
7558 CHECK_NATNUM (XCDR (val));
7559 if (XINT (XCDR (val)) >= 4)
7560 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7561 XCAR (val) = make_number (id);
7562 }
7563
7564 flags = args[coding_arg_iso2022_flags];
7565 CHECK_NATNUM (flags);
7566 i = XINT (flags);
7567 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7568 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7569
7570 ASET (attrs, coding_attr_iso_initial, initial);
7571 ASET (attrs, coding_attr_iso_usage, reg_usage);
7572 ASET (attrs, coding_attr_iso_request, request);
7573 ASET (attrs, coding_attr_iso_flags, flags);
7574 setup_iso_safe_charsets (attrs);
7575
7576 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7577 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7578 | CODING_ISO_FLAG_SINGLE_SHIFT))
7579 ? coding_category_iso_7_else
7580 : EQ (args[coding_arg_charset_list], Qiso_2022)
7581 ? coding_category_iso_7
7582 : coding_category_iso_7_tight);
7583 else
7584 {
7585 int id = XINT (AREF (initial, 1));
7586
7587 category = (((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7588 | CODING_ISO_FLAG_SINGLE_SHIFT))
7589 || EQ (args[coding_arg_charset_list], Qiso_2022)
7590 || id < 0)
7591 ? coding_category_iso_8_else
7592 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7593 ? coding_category_iso_8_1
7594 : coding_category_iso_8_2);
7595 }
7596 }
7597 else if (EQ (coding_type, Qemacs_mule))
7598 {
7599 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7600 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7601
7602 category = coding_category_emacs_mule;
7603 }
7604 else if (EQ (coding_type, Qshift_jis))
7605 {
7606
7607 struct charset *charset;
7608
7609 if (XINT (Flength (charset_list)) != 3)
7610 error ("There should be just three charsets");
7611
7612 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7613 if (CHARSET_DIMENSION (charset) != 1)
7614 error ("Dimension of charset %s is not one",
7615 XSYMBOL (CHARSET_NAME (charset))->name->data);
7616
7617 charset_list = XCDR (charset_list);
7618 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7619 if (CHARSET_DIMENSION (charset) != 1)
7620 error ("Dimension of charset %s is not one",
7621 XSYMBOL (CHARSET_NAME (charset))->name->data);
7622
7623 charset_list = XCDR (charset_list);
7624 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7625 if (CHARSET_DIMENSION (charset) != 2)
7626 error ("Dimension of charset %s is not two",
7627 XSYMBOL (CHARSET_NAME (charset))->name->data);
7628
7629 category = coding_category_sjis;
7630 Vsjis_coding_system = name;
7631 }
7632 else if (EQ (coding_type, Qbig5))
7633 {
7634 struct charset *charset;
7635
7636 if (XINT (Flength (charset_list)) != 2)
7637 error ("There should be just two charsets");
7638
7639 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7640 if (CHARSET_DIMENSION (charset) != 1)
7641 error ("Dimension of charset %s is not one",
7642 XSYMBOL (CHARSET_NAME (charset))->name->data);
7643
7644 charset_list = XCDR (charset_list);
7645 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7646 if (CHARSET_DIMENSION (charset) != 2)
7647 error ("Dimension of charset %s is not two",
7648 XSYMBOL (CHARSET_NAME (charset))->name->data);
7649
7650 category = coding_category_big5;
7651 Vbig5_coding_system = name;
7652 }
7653 else if (EQ (coding_type, Qraw_text))
7654 category = coding_category_raw_text;
7655 else if (EQ (coding_type, Qutf_8))
7656 category = coding_category_utf_8;
7657 else if (EQ (coding_type, Qundecided))
7658 category = coding_category_undecided;
7659 else
7660 error ("Invalid coding system type: %s",
7661 XSYMBOL (coding_type)->name->data);
7662
7663 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7664
7665 eol_type = args[coding_arg_eol_type];
7666 if (! NILP (eol_type)
7667 && ! EQ (eol_type, Qunix)
7668 && ! EQ (eol_type, Qdos)
7669 && ! EQ (eol_type, Qmac))
7670 error ("Invalid eol-type");
7671
7672 aliases = Fcons (name, Qnil);
7673
7674 if (NILP (eol_type))
7675 {
7676 eol_type = make_subsidiaries (name);
7677 for (i = 0; i < 3; i++)
7678 {
7679 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7680
7681 this_name = AREF (eol_type, i);
7682 this_aliases = Fcons (this_name, Qnil);
7683 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7684 this_spec = Fmake_vector (make_number (3), attrs);
7685 ASET (this_spec, 1, this_aliases);
7686 ASET (this_spec, 2, this_eol_type);
7687 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7688 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7689 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7690 Vcoding_system_alist);
7691 }
7692 }
7693
7694 spec_vec = Fmake_vector (make_number (3), attrs);
7695 ASET (spec_vec, 1, aliases);
7696 ASET (spec_vec, 2, eol_type);
7697
7698 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7699 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7700 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7701 Vcoding_system_alist);
7702
7703 {
7704 int id = coding_categories[category].id;
7705
7706 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7707 setup_coding_system (name, &coding_categories[category]);
7708 }
7709
7710 return Qnil;
7711
7712 short_args:
7713 return Fsignal (Qwrong_number_of_arguments,
7714 Fcons (intern ("define-coding-system-internal"),
7715 make_number (nargs)));
7716}
7717
7718DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7719 Sdefine_coding_system_alias, 2, 2, 0,
7720 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7721 (alias, coding_system)
7722 Lisp_Object alias, coding_system;
7723{
7724 Lisp_Object spec, aliases, eol_type;
7725
7726 CHECK_SYMBOL (alias);
7727 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7728 aliases = AREF (spec, 1);
7729 while (!NILP (XCDR (aliases)))
7730 aliases = XCDR (aliases);
7731 XCDR (aliases) = Fcons (alias, Qnil);
7732
7733 eol_type = AREF (spec, 2);
7734 if (VECTORP (eol_type))
7735 {
7736 Lisp_Object subsidiaries;
7737 int i;
7738
7739 subsidiaries = make_subsidiaries (alias);
7740 for (i = 0; i < 3; i++)
7741 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7742 AREF (eol_type, i));
7743
7744 ASET (spec, 2, subsidiaries);
7745 }
7746
7747 Fputhash (alias, spec, Vcoding_system_hash_table);
7748 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
7749 Vcoding_system_alist);
7750
7751 return Qnil;
7752}
7753
7754DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7755 1, 1, 0,
7756 doc: /* Return the base of CODING-SYSTEM.
7757Any alias or subsidiary coding systems are not base coding system. */)
7758 (coding_system)
7759 Lisp_Object coding_system;
7760{
7761 Lisp_Object spec, attrs;
7762
7763 if (NILP (coding_system))
7764 return (Qno_conversion);
7765 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7766 attrs = AREF (spec, 0);
7767 return CODING_ATTR_BASE_NAME (attrs);
7768}
7769
7770DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7771 1, 1, 0,
7772 doc: "Return the property list of CODING-SYSTEM.")
7773 (coding_system)
7774 Lisp_Object coding_system;
7775{
7776 Lisp_Object spec, attrs;
7777
7778 if (NILP (coding_system))
7779 coding_system = Qno_conversion;
7780 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7781 attrs = AREF (spec, 0);
7782 return CODING_ATTR_PLIST (attrs);
7783}
7784
7785
7786DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7787 1, 1, 0,
7788 doc: /* Return the list of aliases of CODING-SYSTEM.
7789A base coding system is what made by `define-coding-system'.
7790Any alias nor subsidiary coding systems are not base coding system. */)
7791 (coding_system)
7792 Lisp_Object coding_system;
7793{
7794 Lisp_Object spec;
7795
7796 if (NILP (coding_system))
7797 coding_system = Qno_conversion;
7798 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7799 return AREF (spec, 2);
7800}
7801
7802DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7803 Scoding_system_eol_type, 1, 1, 0,
7804 doc: /* Return eol-type of CODING-SYSTEM.
7805An eol-type is integer 0, 1, 2, or a vector of coding systems.
7806
7807Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7808and CR respectively.
7809
7810A vector value indicates that a format of end-of-line should be
7811detected automatically. Nth element of the vector is the subsidiary
7812coding system whose eol-type is N. */)
7813 (coding_system)
7814 Lisp_Object coding_system;
7815{
7816 Lisp_Object spec, eol_type;
7817 int n;
7818
7819 if (NILP (coding_system))
7820 coding_system = Qno_conversion;
7821 if (! CODING_SYSTEM_P (coding_system))
7822 return Qnil;
7823 spec = CODING_SYSTEM_SPEC (coding_system);
7824 eol_type = AREF (spec, 2);
7825 if (VECTORP (eol_type))
7826 return Fcopy_sequence (eol_type);
7827 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7828 return make_number (n);
7829}
7830
7831#endif /* emacs */
7832
7833\f
7834/*** 9. Post-amble ***/
7835
7836void
7837init_coding_once ()
7838{
7839 int i;
7840
7841 for (i = 0; i < coding_category_max; i++)
7842 {
7843 coding_categories[i].id = -1;
7844 coding_priorities[i] = i;
7845 }
7846
7847 /* ISO2022 specific initialize routine. */
7848 for (i = 0; i < 0x20; i++)
7849 iso_code_class[i] = ISO_control_0;
7850 for (i = 0x21; i < 0x7F; i++)
7851 iso_code_class[i] = ISO_graphic_plane_0;
7852 for (i = 0x80; i < 0xA0; i++)
7853 iso_code_class[i] = ISO_control_1;
7854 for (i = 0xA1; i < 0xFF; i++)
7855 iso_code_class[i] = ISO_graphic_plane_1;
7856 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7857 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7858 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7859 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7860 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7861 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7862 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7863 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7864 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7865 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7866
7867 inhibit_pre_post_conversion = 0;
7868
7869 for (i = 0; i < 256; i++)
7870 {
7871 emacs_mule_bytes[i] = 1;
7872 }
7873 emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3;
7874 emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3;
7875 emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4;
7876 emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4;
7877}
7878
7879#ifdef emacs
7880
7881void
7882syms_of_coding ()
7883{
7884 staticpro (&Vcoding_system_hash_table);
7885 Vcoding_system_hash_table = Fmakehash (Qeq);
7886
7887 staticpro (&Vsjis_coding_system);
7888 Vsjis_coding_system = Qnil;
7889
7890 staticpro (&Vbig5_coding_system);
7891 Vbig5_coding_system = Qnil;
7892
7893 staticpro (&Vcode_conversion_work_buf_list);
7894 Vcode_conversion_work_buf_list = Qnil;
7895
7896 staticpro (&Vcode_conversion_reused_work_buf);
7897 Vcode_conversion_reused_work_buf = Qnil;
7898
7899 DEFSYM (Qcharset, "charset");
7900 DEFSYM (Qtarget_idx, "target-idx");
7901 DEFSYM (Qcoding_system_history, "coding-system-history");
7902 Fset (Qcoding_system_history, Qnil);
7903
7904 /* Target FILENAME is the first argument. */
7905 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7906 /* Target FILENAME is the third argument. */
7907 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7908
7909 DEFSYM (Qcall_process, "call-process");
7910 /* Target PROGRAM is the first argument. */
7911 Fput (Qcall_process, Qtarget_idx, make_number (0));
7912
7913 DEFSYM (Qcall_process_region, "call-process-region");
7914 /* Target PROGRAM is the third argument. */
7915 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7916
7917 DEFSYM (Qstart_process, "start-process");
7918 /* Target PROGRAM is the third argument. */
7919 Fput (Qstart_process, Qtarget_idx, make_number (2));
7920
7921 DEFSYM (Qopen_network_stream, "open-network-stream");
7922 /* Target SERVICE is the fourth argument. */
7923 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7924
7925 DEFSYM (Qcoding_system, "coding-system");
7926 DEFSYM (Qcoding_aliases, "coding-aliases");
7927
7928 DEFSYM (Qeol_type, "eol-type");
7929 DEFSYM (Qunix, "unix");
7930 DEFSYM (Qdos, "dos");
7931 DEFSYM (Qmac, "mac");
7932
7933 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7934 DEFSYM (Qpost_read_conversion, "post-read-conversion");
7935 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7936 DEFSYM (Qdefault_char, "default-char");
7937 DEFSYM (Qundecided, "undecided");
7938 DEFSYM (Qno_conversion, "no-conversion");
7939 DEFSYM (Qraw_text, "raw-text");
7940
7941 DEFSYM (Qiso_2022, "iso-2022");
7942
7943 DEFSYM (Qutf_8, "utf-8");
7944
7945 DEFSYM (Qutf_16, "utf-16");
7946 DEFSYM (Qutf_16_be, "utf-16-be");
7947 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7948 DEFSYM (Qutf_16_le, "utf-16-l3");
7949 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7950 DEFSYM (Qsignature, "signature");
7951 DEFSYM (Qendian, "endian");
7952 DEFSYM (Qbig, "big");
7953 DEFSYM (Qlittle, "little");
7954
7955 DEFSYM (Qshift_jis, "shift-jis");
7956 DEFSYM (Qbig5, "big5");
7957
7958 DEFSYM (Qcoding_system_p, "coding-system-p");
7959
7960 DEFSYM (Qcoding_system_error, "coding-system-error");
7961 Fput (Qcoding_system_error, Qerror_conditions,
7962 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7963 Fput (Qcoding_system_error, Qerror_message,
7964 build_string ("Invalid coding system"));
7965
7966 /* Intern this now in case it isn't already done.
7967 Setting this variable twice is harmless.
7968 But don't staticpro it here--that is done in alloc.c. */
7969 Qchar_table_extra_slots = intern ("char-table-extra-slots");
7970
7971 DEFSYM (Qtranslation_table, "translation-table");
7972 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7973 DEFSYM (Qtranslation_table_id, "translation-table-id");
7974 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
7975 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
7976
7977 DEFSYM (Qvalid_codes, "valid-codes");
7978
7979 DEFSYM (Qemacs_mule, "emacs-mule");
7980
7981 Vcoding_category_table
7982 = Fmake_vector (make_number (coding_category_max), Qnil);
7983 staticpro (&Vcoding_category_table);
7984 /* Followings are target of code detection. */
7985 ASET (Vcoding_category_table, coding_category_iso_7,
7986 intern ("coding-category-iso-7"));
7987 ASET (Vcoding_category_table, coding_category_iso_7_tight,
7988 intern ("coding-category-iso-7-tight"));
7989 ASET (Vcoding_category_table, coding_category_iso_8_1,
7990 intern ("coding-category-iso-8-1"));
7991 ASET (Vcoding_category_table, coding_category_iso_8_2,
7992 intern ("coding-category-iso-8-2"));
7993 ASET (Vcoding_category_table, coding_category_iso_7_else,
7994 intern ("coding-category-iso-7-else"));
7995 ASET (Vcoding_category_table, coding_category_iso_8_else,
7996 intern ("coding-category-iso-8-else"));
7997 ASET (Vcoding_category_table, coding_category_utf_8,
7998 intern ("coding-category-utf-8"));
7999 ASET (Vcoding_category_table, coding_category_utf_16_be,
8000 intern ("coding-category-utf-16-be"));
8001 ASET (Vcoding_category_table, coding_category_utf_16_le,
8002 intern ("coding-category-utf-16-le"));
8003 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8004 intern ("coding-category-utf-16-be-nosig"));
8005 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8006 intern ("coding-category-utf-16-le-nosig"));
8007 ASET (Vcoding_category_table, coding_category_charset,
8008 intern ("coding-category-charset"));
8009 ASET (Vcoding_category_table, coding_category_sjis,
8010 intern ("coding-category-sjis"));
8011 ASET (Vcoding_category_table, coding_category_big5,
8012 intern ("coding-category-big5"));
8013 ASET (Vcoding_category_table, coding_category_ccl,
8014 intern ("coding-category-ccl"));
8015 ASET (Vcoding_category_table, coding_category_emacs_mule,
8016 intern ("coding-category-emacs-mule"));
8017 /* Followings are NOT target of code detection. */
8018 ASET (Vcoding_category_table, coding_category_raw_text,
8019 intern ("coding-category-raw-text"));
8020 ASET (Vcoding_category_table, coding_category_undecided,
8021 intern ("coding-category-undecided"));
8022
8023 defsubr (&Scoding_system_p);
8024 defsubr (&Sread_coding_system);
8025 defsubr (&Sread_non_nil_coding_system);
8026 defsubr (&Scheck_coding_system);
8027 defsubr (&Sdetect_coding_region);
8028 defsubr (&Sdetect_coding_string);
8029 defsubr (&Sfind_coding_systems_region_internal);
8030 defsubr (&Scheck_coding_systems_region);
8031 defsubr (&Sdecode_coding_region);
8032 defsubr (&Sencode_coding_region);
8033 defsubr (&Sdecode_coding_string);
8034 defsubr (&Sencode_coding_string);
8035 defsubr (&Sdecode_sjis_char);
8036 defsubr (&Sencode_sjis_char);
8037 defsubr (&Sdecode_big5_char);
8038 defsubr (&Sencode_big5_char);
8039 defsubr (&Sset_terminal_coding_system_internal);
8040 defsubr (&Sset_safe_terminal_coding_system_internal);
8041 defsubr (&Sterminal_coding_system);
8042 defsubr (&Sset_keyboard_coding_system_internal);
8043 defsubr (&Skeyboard_coding_system);
8044 defsubr (&Sfind_operation_coding_system);
8045 defsubr (&Sset_coding_system_priority);
8046 defsubr (&Sdefine_coding_system_internal);
8047 defsubr (&Sdefine_coding_system_alias);
8048 defsubr (&Scoding_system_base);
8049 defsubr (&Scoding_system_plist);
8050 defsubr (&Scoding_system_aliases);
8051 defsubr (&Scoding_system_eol_type);
8052 defsubr (&Scoding_system_priority_list);
8053
8054 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8055 doc: /* List of coding systems.
8056
8057Do not alter the value of this variable manually. This variable should be
8058updated by the functions `define-coding-system' and
8059`define-coding-system-alias'. */);
8060 Vcoding_system_list = Qnil;
8061
8062 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8063 doc: /* Alist of coding system names.
8064Each element is one element list of coding system name.
8065This variable is given to `completing-read' as TABLE argument.
8066
8067Do not alter the value of this variable manually. This variable should be
8068updated by the functions `make-coding-system' and
8069`define-coding-system-alias'. */);
8070 Vcoding_system_alist = Qnil;
8071
8072 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8073 doc: /* List of coding-categories (symbols) ordered by priority.
8074
8075On detecting a coding system, Emacs tries code detection algorithms
8076associated with each coding-category one by one in this order. When
8077one algorithm agrees with a byte sequence of source text, the coding
8078system bound to the corresponding coding-category is selected. */);
8079 {
8080 int i;
8081
8082 Vcoding_category_list = Qnil;
8083 for (i = coding_category_max - 1; i >= 0; i--)
8084 Vcoding_category_list
8085 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8086 Vcoding_category_list);
8087 }
8088
8089 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8090 doc: /* Specify the coding system for read operations.
8091It is useful to bind this variable with `let', but do not set it globally.
8092If the value is a coding system, it is used for decoding on read operation.
8093If not, an appropriate element is used from one of the coding system alists:
8094There are three such tables, `file-coding-system-alist',
8095`process-coding-system-alist', and `network-coding-system-alist'. */);
8096 Vcoding_system_for_read = Qnil;
8097
8098 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8099 doc: /* Specify the coding system for write operations.
8100Programs bind this variable with `let', but you should not set it globally.
8101If the value is a coding system, it is used for encoding of output,
8102when writing it to a file and when sending it to a file or subprocess.
8103
8104If this does not specify a coding system, an appropriate element
8105is used from one of the coding system alists:
8106There are three such tables, `file-coding-system-alist',
8107`process-coding-system-alist', and `network-coding-system-alist'.
8108For output to files, if the above procedure does not specify a coding system,
8109the value of `buffer-file-coding-system' is used. */);
8110 Vcoding_system_for_write = Qnil;
8111
8112 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8113 doc: /*
8114Coding system used in the latest file or process I/O. */);
8115 Vlast_coding_system_used = Qnil;
8116
8117 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8118 doc: /*
8119*Non-nil means always inhibit code conversion of end-of-line format.
8120See info node `Coding Systems' and info node `Text and Binary' concerning
8121such conversion. */);
8122 inhibit_eol_conversion = 0;
8123
8124 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8125 doc: /*
8126Non-nil means process buffer inherits coding system of process output.
8127Bind it to t if the process output is to be treated as if it were a file
8128read from some filesystem. */);
8129 inherit_process_coding_system = 0;
8130
8131 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8132 doc: /*
8133Alist to decide a coding system to use for a file I/O operation.
8134The format is ((PATTERN . VAL) ...),
8135where PATTERN is a regular expression matching a file name,
8136VAL is a coding system, a cons of coding systems, or a function symbol.
8137If VAL is a coding system, it is used for both decoding and encoding
8138the file contents.
8139If VAL is a cons of coding systems, the car part is used for decoding,
8140and the cdr part is used for encoding.
8141If VAL is a function symbol, the function must return a coding system
8142or a cons of coding systems which are used as above. The function gets
8143the arguments with which `find-operation-coding-systems' was called.
8144
8145See also the function `find-operation-coding-system'
8146and the variable `auto-coding-alist'. */);
8147 Vfile_coding_system_alist = Qnil;
8148
8149 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8150 doc: /*
8151Alist to decide a coding system to use for a process I/O operation.
8152The format is ((PATTERN . VAL) ...),
8153where PATTERN is a regular expression matching a program name,
8154VAL is a coding system, a cons of coding systems, or a function symbol.
8155If VAL is a coding system, it is used for both decoding what received
8156from the program and encoding what sent to the program.
8157If VAL is a cons of coding systems, the car part is used for decoding,
8158and the cdr part is used for encoding.
8159If VAL is a function symbol, the function must return a coding system
8160or a cons of coding systems which are used as above.
8161
8162See also the function `find-operation-coding-system'. */);
8163 Vprocess_coding_system_alist = Qnil;
8164
8165 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8166 doc: /*
8167Alist to decide a coding system to use for a network I/O operation.
8168The format is ((PATTERN . VAL) ...),
8169where PATTERN is a regular expression matching a network service name
8170or is a port number to connect to,
8171VAL is a coding system, a cons of coding systems, or a function symbol.
8172If VAL is a coding system, it is used for both decoding what received
8173from the network stream and encoding what sent to the network stream.
8174If VAL is a cons of coding systems, the car part is used for decoding,
8175and the cdr part is used for encoding.
8176If VAL is a function symbol, the function must return a coding system
8177or a cons of coding systems which are used as above.
8178
8179See also the function `find-operation-coding-system'. */);
8180 Vnetwork_coding_system_alist = Qnil;
8181
8182 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8183 doc: /* Coding system to use with system messages.
8184Also used for decoding keyboard input on X Window system. */);
8185 Vlocale_coding_system = Qnil;
8186
8187 /* The eol mnemonics are reset in startup.el system-dependently. */
8188 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8189 doc: /*
8190*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8191 eol_mnemonic_unix = build_string (":");
8192
8193 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8194 doc: /*
8195*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8196 eol_mnemonic_dos = build_string ("\\");
8197
8198 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8199 doc: /*
8200*String displayed in mode line for MAC-like (CR) end-of-line format. */);
8201 eol_mnemonic_mac = build_string ("/");
8202
8203 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8204 doc: /*
8205*String displayed in mode line when end-of-line format is not yet determined. */);
8206 eol_mnemonic_undecided = build_string (":");
8207
8208 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8209 doc: /*
8210*Non-nil enables character translation while encoding and decoding. */);
8211 Venable_character_translation = Qt;
8212
8213 DEFVAR_LISP ("standard-translation-table-for-decode",
8214 &Vstandard_translation_table_for_decode,
8215 doc: /* Table for translating characters while decoding. */);
8216 Vstandard_translation_table_for_decode = Qnil;
8217
8218 DEFVAR_LISP ("standard-translation-table-for-encode",
8219 &Vstandard_translation_table_for_encode,
8220 doc: /* Table for translating characters while encoding. */);
8221 Vstandard_translation_table_for_encode = Qnil;
8222
8223 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8224 doc: /* Alist of charsets vs revision numbers.
8225While encoding, if a charset (car part of an element) is found,
8226designate it with the escape sequence identifying revision (cdr part
8227of the element). */);
8228 Vcharset_revision_table = Qnil;
8229
8230 DEFVAR_LISP ("default-process-coding-system",
8231 &Vdefault_process_coding_system,
8232 doc: /* Cons of coding systems used for process I/O by default.
8233The car part is used for decoding a process output,
8234the cdr part is used for encoding a text to be sent to a process. */);
8235 Vdefault_process_coding_system = Qnil;
8236
8237 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8238 doc: /*
8239Table of extra Latin codes in the range 128..159 (inclusive).
8240This is a vector of length 256.
8241If Nth element is non-nil, the existence of code N in a file
8242\(or output of subprocess) doesn't prevent it to be detected as
8243a coding system of ISO 2022 variant which has a flag
8244`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8245or reading output of a subprocess.
8246Only 128th through 159th elements has a meaning. */);
8247 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8248
8249 DEFVAR_LISP ("select-safe-coding-system-function",
8250 &Vselect_safe_coding_system_function,
8251 doc: /*
8252Function to call to select safe coding system for encoding a text.
8253
8254If set, this function is called to force a user to select a proper
8255coding system which can encode the text in the case that a default
8256coding system used in each operation can't encode the text.
8257
8258The default value is `select-safe-coding-system' (which see). */);
8259 Vselect_safe_coding_system_function = Qnil;
8260
8261 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8262 &inhibit_iso_escape_detection,
8263 doc: /*
8264If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8265
8266By default, on reading a file, Emacs tries to detect how the text is
8267encoded. This code detection is sensitive to escape sequences. If
8268the sequence is valid as ISO2022, the code is determined as one of
8269the ISO2022 encodings, and the file is decoded by the corresponding
8270coding system (e.g. `iso-2022-7bit').
8271
8272However, there may be a case that you want to read escape sequences in
8273a file as is. In such a case, you can set this variable to non-nil.
8274Then, as the code detection ignores any escape sequences, no file is
8275detected as encoded in some ISO2022 encoding. The result is that all
8276escape sequences become visible in a buffer.
8277
8278The default value is nil, and it is strongly recommended not to change
8279it. That is because many Emacs Lisp source files that contain
8280non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8281in Emacs's distribution, and they won't be decoded correctly on
8282reading if you suppress escape sequence detection.
8283
8284The other way to read escape sequences in a file without decoding is
8285to explicitly specify some coding system that doesn't use ISO2022's
8286escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8287 inhibit_iso_escape_detection = 0;
8288
8289 {
8290 Lisp_Object args[coding_arg_max];
8291 Lisp_Object plist[14];
8292 int i;
8293
8294 for (i = 0; i < coding_arg_max; i++)
8295 args[i] = Qnil;
8296
8297 plist[0] = intern (":name");
8298 plist[1] = args[coding_arg_name] = Qno_conversion;
8299 plist[2] = intern (":mnemonic");
8300 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8301 plist[4] = intern (":coding-type");
8302 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8303 plist[6] = intern (":ascii-compatible-p");
8304 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8305 plist[8] = intern (":default-char");
8306 plist[9] = args[coding_arg_default_char] = make_number (0);
8307 plist[10] = intern (":docstring");
8308 plist[11] = build_string ("Do no conversion.\n\
8309\n\
8310When you visit a file with this coding, the file is read into a\n\
8311unibyte buffer as is, thus each byte of a file is treated as a\n\
8312character.");
8313 plist[12] = intern (":eol-type");
8314 plist[13] = args[coding_arg_eol_type] = Qunix;
8315 args[coding_arg_plist] = Flist (14, plist);
8316 Fdefine_coding_system_internal (coding_arg_max, args);
8317 }
8318
8319 setup_coding_system (Qno_conversion, &keyboard_coding);
8320 setup_coding_system (Qno_conversion, &terminal_coding);
8321 setup_coding_system (Qno_conversion, &safe_terminal_coding);
8322}
8323
8324char *
8325emacs_strerror (error_number)
8326 int error_number;
8327{
8328 char *str;
8329
8330 synchronize_system_messages_locale ();
8331 str = strerror (error_number);
8332
8333 if (! NILP (Vlocale_coding_system))
8334 {
8335 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8336 Vlocale_coding_system,
8337 0);
8338 str = (char *) XSTRING (dec)->data;
8339 }
8340
8341 return str;
8342}
8343
8344#endif /* emacs */