(define-charset): New args :min-code and :max-code.
[bpt/emacs.git] / src / coding.c
... / ...
CommitLineData
1/* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
8
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
15
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
25
26/*** TABLE OF CONTENTS ***
27
28 0. General comments
29 1. Preamble
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
41
42*/
43
44/*** 0. General comments ***
45
46
47CODING SYSTEM
48
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
75 character set.
76
77 o Old Emacs internal format (emacs-mule)
78
79 The coding system adopted by old versions of Emacs (20 and 21).
80
81 o ISO2022-base coding system
82
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
92 section 8.
93
94 o BIG5
95
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
101
102 o CCL
103
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
108
109 o Raw-text
110
111 A coding system for a text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
114
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
118
119
120END-OF-LINE FORMAT
121
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
127
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
131
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
138
139*/
140
141/* COMMON MACROS */
142
143
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
157#if 0
158static int
159detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
162{
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
188}
189#endif
190
191/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
197
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
202
203 Below is the template of these functions. */
204
205#if 0
206static void
207decode_coding_XXXX (coding)
208 struct coding_system *coding;
209{
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
243}
244#endif
245
246/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
252
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
257
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
261
262 Below is a template of these functions. */
263#if 0
264static void
265encode_coding_XXX (coding)
266 struct coding_system *coding;
267{
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
285}
286#endif
287
288\f
289/*** 1. Preamble ***/
290
291#include <config.h>
292#include <stdio.h>
293
294#include "lisp.h"
295#include "buffer.h"
296#include "character.h"
297#include "charset.h"
298#include "ccl.h"
299#include "composite.h"
300#include "coding.h"
301#include "window.h"
302
303Lisp_Object Vcoding_system_hash_table;
304
305Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
306Lisp_Object Qunix, Qdos;
307extern Lisp_Object Qmac; /* frame.c */
308Lisp_Object Qbuffer_file_coding_system;
309Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
310Lisp_Object Qdefault_char;
311Lisp_Object Qno_conversion, Qundecided;
312Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
313Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
314Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
315Lisp_Object Qcoding_system_history;
316Lisp_Object Qvalid_codes;
317
318extern Lisp_Object Qinsert_file_contents, Qwrite_region;
319Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
320Lisp_Object Qstart_process, Qopen_network_stream;
321Lisp_Object Qtarget_idx;
322
323Lisp_Object Vselect_safe_coding_system_function;
324
325/* Mnemonic string for each format of end-of-line. */
326Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
327/* Mnemonic string to indicate format of end-of-line is not yet
328 decided. */
329Lisp_Object eol_mnemonic_undecided;
330
331#ifdef emacs
332
333Lisp_Object Vcoding_system_list, Vcoding_system_alist;
334
335Lisp_Object Qcoding_system_p, Qcoding_system_error;
336
337/* Coding system emacs-mule and raw-text are for converting only
338 end-of-line format. */
339Lisp_Object Qemacs_mule, Qraw_text;
340
341/* Coding-systems are handed between Emacs Lisp programs and C internal
342 routines by the following three variables. */
343/* Coding-system for reading files and receiving data from process. */
344Lisp_Object Vcoding_system_for_read;
345/* Coding-system for writing files and sending data to process. */
346Lisp_Object Vcoding_system_for_write;
347/* Coding-system actually used in the latest I/O. */
348Lisp_Object Vlast_coding_system_used;
349
350/* A vector of length 256 which contains information about special
351 Latin codes (especially for dealing with Microsoft codes). */
352Lisp_Object Vlatin_extra_code_table;
353
354/* Flag to inhibit code conversion of end-of-line format. */
355int inhibit_eol_conversion;
356
357/* Flag to inhibit ISO2022 escape sequence detection. */
358int inhibit_iso_escape_detection;
359
360/* Flag to make buffer-file-coding-system inherit from process-coding. */
361int inherit_process_coding_system;
362
363/* Coding system to be used to encode text for terminal display. */
364struct coding_system terminal_coding;
365
366/* Coding system to be used to encode text for terminal display when
367 terminal coding system is nil. */
368struct coding_system safe_terminal_coding;
369
370/* Coding system of what is sent from terminal keyboard. */
371struct coding_system keyboard_coding;
372
373Lisp_Object Vfile_coding_system_alist;
374Lisp_Object Vprocess_coding_system_alist;
375Lisp_Object Vnetwork_coding_system_alist;
376
377Lisp_Object Vlocale_coding_system;
378
379#endif /* emacs */
380
381/* Flag to tell if we look up translation table on character code
382 conversion. */
383Lisp_Object Venable_character_translation;
384/* Standard translation table to look up on decoding (reading). */
385Lisp_Object Vstandard_translation_table_for_decode;
386/* Standard translation table to look up on encoding (writing). */
387Lisp_Object Vstandard_translation_table_for_encode;
388
389Lisp_Object Qtranslation_table;
390Lisp_Object Qtranslation_table_id;
391Lisp_Object Qtranslation_table_for_decode;
392Lisp_Object Qtranslation_table_for_encode;
393
394/* Alist of charsets vs revision number. */
395static Lisp_Object Vcharset_revision_table;
396
397/* Default coding systems used for process I/O. */
398Lisp_Object Vdefault_process_coding_system;
399
400/* Global flag to tell that we can't call post-read-conversion and
401 pre-write-conversion functions. Usually the value is zero, but it
402 is set to 1 temporarily while such functions are running. This is
403 to avoid infinite recursive call. */
404static int inhibit_pre_post_conversion;
405
406/* Two special coding systems. */
407Lisp_Object Vsjis_coding_system;
408Lisp_Object Vbig5_coding_system;
409
410
411static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
412static void decode_coding_utf_8 P_ ((struct coding_system *));
413static int encode_coding_utf_8 P_ ((struct coding_system *));
414
415static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
416static void decode_coding_utf_16 P_ ((struct coding_system *));
417static int encode_coding_utf_16 P_ ((struct coding_system *));
418
419static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
420static void decode_coding_iso_2022 P_ ((struct coding_system *));
421static int encode_coding_iso_2022 P_ ((struct coding_system *));
422
423static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
424static void decode_coding_emacs_mule P_ ((struct coding_system *));
425static int encode_coding_emacs_mule P_ ((struct coding_system *));
426
427static int detect_coding_sjis P_ ((struct coding_system *, int *));
428static void decode_coding_sjis P_ ((struct coding_system *));
429static int encode_coding_sjis P_ ((struct coding_system *));
430
431static int detect_coding_big5 P_ ((struct coding_system *, int *));
432static void decode_coding_big5 P_ ((struct coding_system *));
433static int encode_coding_big5 P_ ((struct coding_system *));
434
435static int detect_coding_ccl P_ ((struct coding_system *, int *));
436static void decode_coding_ccl P_ ((struct coding_system *));
437static int encode_coding_ccl P_ ((struct coding_system *));
438
439static void decode_coding_raw_text P_ ((struct coding_system *));
440static int encode_coding_raw_text P_ ((struct coding_system *));
441
442
443/* ISO2022 section */
444
445#define CODING_ISO_INITIAL(coding, reg) \
446 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
447 coding_attr_iso_initial), \
448 reg)))
449
450
451#define CODING_ISO_REQUEST(coding, charset_id) \
452 ((charset_id <= (coding)->max_charset_id \
453 ? (coding)->safe_charsets[charset_id] \
454 : -1))
455
456
457#define CODING_ISO_FLAGS(coding) \
458 ((coding)->spec.iso_2022.flags)
459#define CODING_ISO_DESIGNATION(coding, reg) \
460 ((coding)->spec.iso_2022.current_designation[reg])
461#define CODING_ISO_INVOCATION(coding, plane) \
462 ((coding)->spec.iso_2022.current_invocation[plane])
463#define CODING_ISO_SINGLE_SHIFTING(coding) \
464 ((coding)->spec.iso_2022.single_shifting)
465#define CODING_ISO_BOL(coding) \
466 ((coding)->spec.iso_2022.bol)
467#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
468 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
469
470/* Control characters of ISO2022. */
471 /* code */ /* function */
472#define ISO_CODE_LF 0x0A /* line-feed */
473#define ISO_CODE_CR 0x0D /* carriage-return */
474#define ISO_CODE_SO 0x0E /* shift-out */
475#define ISO_CODE_SI 0x0F /* shift-in */
476#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
477#define ISO_CODE_ESC 0x1B /* escape */
478#define ISO_CODE_SS2 0x8E /* single-shift-2 */
479#define ISO_CODE_SS3 0x8F /* single-shift-3 */
480#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
481
482/* All code (1-byte) of ISO2022 is classified into one of the
483 followings. */
484enum iso_code_class_type
485 {
486 ISO_control_0, /* Control codes in the range
487 0x00..0x1F and 0x7F, except for the
488 following 5 codes. */
489 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
490 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
491 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
492 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
493 ISO_escape, /* ISO_CODE_SO (0x1B) */
494 ISO_control_1, /* Control codes in the range
495 0x80..0x9F, except for the
496 following 3 codes. */
497 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
498 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
499 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
500 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
501 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
502 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
503 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
504 };
505
506/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
507 `iso-flags' attribute of an iso2022 coding system. */
508
509/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
510 instead of the correct short-form sequence (e.g. ESC $ A). */
511#define CODING_ISO_FLAG_LONG_FORM 0x0001
512
513/* If set, reset graphic planes and registers at end-of-line to the
514 initial state. */
515#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
516
517/* If set, reset graphic planes and registers before any control
518 characters to the initial state. */
519#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
520
521/* If set, encode by 7-bit environment. */
522#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
523
524/* If set, use locking-shift function. */
525#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
526
527/* If set, use single-shift function. Overwrite
528 CODING_ISO_FLAG_LOCKING_SHIFT. */
529#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
530
531/* If set, use designation escape sequence. */
532#define CODING_ISO_FLAG_DESIGNATION 0x0040
533
534/* If set, produce revision number sequence. */
535#define CODING_ISO_FLAG_REVISION 0x0080
536
537/* If set, produce ISO6429's direction specifying sequence. */
538#define CODING_ISO_FLAG_DIRECTION 0x0100
539
540/* If set, assume designation states are reset at beginning of line on
541 output. */
542#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
543
544/* If set, designation sequence should be placed at beginning of line
545 on output. */
546#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
547
548/* If set, do not encode unsafe charactes on output. */
549#define CODING_ISO_FLAG_SAFE 0x0800
550
551/* If set, extra latin codes (128..159) are accepted as a valid code
552 on input. */
553#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
554
555#define CODING_ISO_FLAG_COMPOSITION 0x2000
556
557#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
558
559#define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
560
561/* A character to be produced on output if encoding of the original
562 character is prohibited by CODING_ISO_FLAG_SAFE. */
563#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
564
565
566/* UTF-16 section */
567#define CODING_UTF_16_BOM(coding) \
568 ((coding)->spec.utf_16.bom)
569
570#define CODING_UTF_16_ENDIAN(coding) \
571 ((coding)->spec.utf_16.endian)
572
573#define CODING_UTF_16_SURROGATE(coding) \
574 ((coding)->spec.utf_16.surrogate)
575
576
577/* CCL section */
578#define CODING_CCL_DECODER(coding) \
579 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
580#define CODING_CCL_ENCODER(coding) \
581 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
582#define CODING_CCL_VALIDS(coding) \
583 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
584 ->data)
585
586/* Index for each coding category in `coding_category_table' */
587
588enum coding_category
589 {
590 coding_category_iso_7,
591 coding_category_iso_7_tight,
592 coding_category_iso_8_1,
593 coding_category_iso_8_2,
594 coding_category_iso_7_else,
595 coding_category_iso_8_else,
596 coding_category_utf_8,
597 coding_category_utf_16_auto,
598 coding_category_utf_16_be,
599 coding_category_utf_16_le,
600 coding_category_utf_16_be_nosig,
601 coding_category_utf_16_le_nosig,
602 coding_category_charset,
603 coding_category_sjis,
604 coding_category_big5,
605 coding_category_ccl,
606 coding_category_emacs_mule,
607 /* All above are targets of code detection. */
608 coding_category_raw_text,
609 coding_category_undecided,
610 coding_category_max
611 };
612
613/* Definitions of flag bits used in detect_coding_XXXX. */
614#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
615#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
616#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
617#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
618#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
619#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
620#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
621#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
622#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
623#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
624#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
625#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
626#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
627#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
628#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
629#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
630
631/* This value is returned if detect_coding_mask () find nothing other
632 than ASCII characters. */
633#define CATEGORY_MASK_ANY \
634 (CATEGORY_MASK_ISO_7 \
635 | CATEGORY_MASK_ISO_7_TIGHT \
636 | CATEGORY_MASK_ISO_8_1 \
637 | CATEGORY_MASK_ISO_8_2 \
638 | CATEGORY_MASK_ISO_7_ELSE \
639 | CATEGORY_MASK_ISO_8_ELSE \
640 | CATEGORY_MASK_UTF_8 \
641 | CATEGORY_MASK_UTF_16_BE \
642 | CATEGORY_MASK_UTF_16_LE \
643 | CATEGORY_MASK_UTF_16_BE_NOSIG \
644 | CATEGORY_MASK_UTF_16_LE_NOSIG \
645 | CATEGORY_MASK_CHARSET \
646 | CATEGORY_MASK_SJIS \
647 | CATEGORY_MASK_BIG5 \
648 | CATEGORY_MASK_CCL \
649 | CATEGORY_MASK_EMACS_MULE)
650
651
652#define CATEGORY_MASK_ISO_7BIT \
653 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
654
655#define CATEGORY_MASK_ISO_8BIT \
656 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
657
658#define CATEGORY_MASK_ISO_ELSE \
659 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
660
661#define CATEGORY_MASK_ISO_ESCAPE \
662 (CATEGORY_MASK_ISO_7 \
663 | CATEGORY_MASK_ISO_7_TIGHT \
664 | CATEGORY_MASK_ISO_7_ELSE \
665 | CATEGORY_MASK_ISO_8_ELSE)
666
667#define CATEGORY_MASK_ISO \
668 ( CATEGORY_MASK_ISO_7BIT \
669 | CATEGORY_MASK_ISO_8BIT \
670 | CATEGORY_MASK_ISO_ELSE)
671
672#define CATEGORY_MASK_UTF_16 \
673 (CATEGORY_MASK_UTF_16_BE \
674 | CATEGORY_MASK_UTF_16_LE \
675 | CATEGORY_MASK_UTF_16_BE_NOSIG \
676 | CATEGORY_MASK_UTF_16_LE_NOSIG)
677
678
679/* List of symbols `coding-category-xxx' ordered by priority. This
680 variable is exposed to Emacs Lisp. */
681static Lisp_Object Vcoding_category_list;
682
683/* Table of coding categories (Lisp symbols). This variable is for
684 internal use oly. */
685static Lisp_Object Vcoding_category_table;
686
687/* Table of coding-categories ordered by priority. */
688static enum coding_category coding_priorities[coding_category_max];
689
690/* Nth element is a coding context for the coding system bound to the
691 Nth coding category. */
692static struct coding_system coding_categories[coding_category_max];
693
694static int detected_mask[coding_category_raw_text] =
695 { CATEGORY_MASK_ISO,
696 CATEGORY_MASK_ISO,
697 CATEGORY_MASK_ISO,
698 CATEGORY_MASK_ISO,
699 CATEGORY_MASK_ISO,
700 CATEGORY_MASK_ISO,
701 CATEGORY_MASK_UTF_8,
702 CATEGORY_MASK_UTF_16,
703 CATEGORY_MASK_UTF_16,
704 CATEGORY_MASK_UTF_16,
705 CATEGORY_MASK_UTF_16,
706 CATEGORY_MASK_UTF_16,
707 CATEGORY_MASK_CHARSET,
708 CATEGORY_MASK_SJIS,
709 CATEGORY_MASK_BIG5,
710 CATEGORY_MASK_CCL,
711 CATEGORY_MASK_EMACS_MULE
712 };
713
714/*** Commonly used macros and functions ***/
715
716#ifndef min
717#define min(a, b) ((a) < (b) ? (a) : (b))
718#endif
719#ifndef max
720#define max(a, b) ((a) > (b) ? (a) : (b))
721#endif
722
723#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
724 do { \
725 attrs = CODING_ID_ATTRS (coding->id); \
726 eol_type = CODING_ID_EOL_TYPE (coding->id); \
727 if (VECTORP (eol_type)) \
728 eol_type = Qunix; \
729 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
730 } while (0)
731
732
733/* Safely get one byte from the source text pointed by SRC which ends
734 at SRC_END, and set C to that byte. If there are not enough bytes
735 in the source, it jumps to `no_more_source'. The caller
736 should declare and set these variables appropriately in advance:
737 src, src_end, multibytep
738*/
739
740#define ONE_MORE_BYTE(c) \
741 do { \
742 if (src == src_end) \
743 { \
744 if (src_base < src) \
745 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
746 goto no_more_source; \
747 } \
748 c = *src++; \
749 if (multibytep && (c & 0x80)) \
750 { \
751 if ((c & 0xFE) != 0xC0) \
752 error ("Undecodable char found"); \
753 c = ((c & 1) << 6) | *src++; \
754 } \
755 consumed_chars++; \
756 } while (0)
757
758
759#define ONE_MORE_BYTE_NO_CHECK(c) \
760 do { \
761 c = *src++; \
762 if (multibytep && (c & 0x80)) \
763 { \
764 if ((c & 0xFE) != 0xC0) \
765 error ("Undecodable char found"); \
766 c = ((c & 1) << 6) | *src++; \
767 } \
768 consumed_chars++; \
769 } while (0)
770
771
772/* Store a byte C in the place pointed by DST and increment DST to the
773 next free point, and increment PRODUCED_CHARS. The caller should
774 assure that C is 0..127, and declare and set the variable `dst'
775 appropriately in advance.
776*/
777
778
779#define EMIT_ONE_ASCII_BYTE(c) \
780 do { \
781 produced_chars++; \
782 *dst++ = (c); \
783 } while (0)
784
785
786/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
787
788#define EMIT_TWO_ASCII_BYTES(c1, c2) \
789 do { \
790 produced_chars += 2; \
791 *dst++ = (c1), *dst++ = (c2); \
792 } while (0)
793
794
795/* Store a byte C in the place pointed by DST and increment DST to the
796 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
797 nonzero, store in an appropriate multibyte from. The caller should
798 declare and set the variables `dst' and `multibytep' appropriately
799 in advance. */
800
801#define EMIT_ONE_BYTE(c) \
802 do { \
803 produced_chars++; \
804 if (multibytep) \
805 { \
806 int ch = (c); \
807 if (ch >= 0x80) \
808 ch = BYTE8_TO_CHAR (ch); \
809 CHAR_STRING_ADVANCE (ch, dst); \
810 } \
811 else \
812 *dst++ = (c); \
813 } while (0)
814
815
816/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
817
818#define EMIT_TWO_BYTES(c1, c2) \
819 do { \
820 produced_chars += 2; \
821 if (multibytep) \
822 { \
823 int ch; \
824 \
825 ch = (c1); \
826 if (ch >= 0x80) \
827 ch = BYTE8_TO_CHAR (ch); \
828 CHAR_STRING_ADVANCE (ch, dst); \
829 ch = (c2); \
830 if (ch >= 0x80) \
831 ch = BYTE8_TO_CHAR (ch); \
832 CHAR_STRING_ADVANCE (ch, dst); \
833 } \
834 else \
835 { \
836 *dst++ = (c1); \
837 *dst++ = (c2); \
838 } \
839 } while (0)
840
841
842#define EMIT_THREE_BYTES(c1, c2, c3) \
843 do { \
844 EMIT_ONE_BYTE (c1); \
845 EMIT_TWO_BYTES (c2, c3); \
846 } while (0)
847
848
849#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
850 do { \
851 EMIT_TWO_BYTES (c1, c2); \
852 EMIT_TWO_BYTES (c3, c4); \
853 } while (0)
854
855
856#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
857 do { \
858 charset_map_loaded = 0; \
859 c = DECODE_CHAR (charset, code); \
860 if (charset_map_loaded) \
861 { \
862 unsigned char *orig = coding->source; \
863 EMACS_INT offset; \
864 \
865 coding_set_source (coding); \
866 offset = coding->source - orig; \
867 src += offset; \
868 src_base += offset; \
869 src_end += offset; \
870 } \
871 } while (0)
872
873
874#define ASSURE_DESTINATION(bytes) \
875 do { \
876 if (dst + (bytes) >= dst_end) \
877 { \
878 int more_bytes = charbuf_end - charbuf + (bytes); \
879 \
880 dst = alloc_destination (coding, more_bytes, dst); \
881 dst_end = coding->destination + coding->dst_bytes; \
882 } \
883 } while (0)
884
885
886
887static void
888coding_set_source (coding)
889 struct coding_system *coding;
890{
891 if (BUFFERP (coding->src_object))
892 {
893 if (coding->src_pos < 0)
894 coding->source = GAP_END_ADDR + coding->src_pos_byte;
895 else
896 {
897 struct buffer *buf = XBUFFER (coding->src_object);
898 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
899 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
900
901 coding->source = beg_addr + coding->src_pos_byte - 1;
902 if (coding->src_pos_byte >= gpt_byte)
903 coding->source += BUF_GAP_SIZE (buf);
904 }
905 }
906 else if (STRINGP (coding->src_object))
907 {
908 coding->source = (XSTRING (coding->src_object)->data
909 + coding->src_pos_byte);
910 }
911 else
912 /* Otherwise, the source is C string and is never relocated
913 automatically. Thus we don't have to update anything. */
914 ;
915}
916
917static void
918coding_set_destination (coding)
919 struct coding_system *coding;
920{
921 if (BUFFERP (coding->dst_object))
922 {
923 /* We are sure that coding->dst_pos_byte is before the gap of the
924 buffer. */
925 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
926 + coding->dst_pos_byte - 1);
927 if (coding->src_pos < 0)
928 coding->dst_bytes = (GAP_END_ADDR
929 - (coding->src_bytes - coding->consumed)
930 - coding->destination);
931 else
932 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
933 - coding->destination);
934 }
935 else
936 /* Otherwise, the destination is C string and is never relocated
937 automatically. Thus we don't have to update anything. */
938 ;
939}
940
941
942static void
943coding_alloc_by_realloc (coding, bytes)
944 struct coding_system *coding;
945 EMACS_INT bytes;
946{
947 coding->destination = (unsigned char *) xrealloc (coding->destination,
948 coding->dst_bytes + bytes);
949 coding->dst_bytes += bytes;
950}
951
952static void
953coding_alloc_by_making_gap (coding, bytes)
954 struct coding_system *coding;
955 EMACS_INT bytes;
956{
957 if (BUFFERP (coding->dst_object)
958 && EQ (coding->src_object, coding->dst_object))
959 {
960 EMACS_INT add = coding->src_bytes - coding->consumed;
961
962 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
963 make_gap (bytes);
964 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
965 }
966 else
967 {
968 Lisp_Object this_buffer;
969
970 this_buffer = Fcurrent_buffer ();
971 set_buffer_internal (XBUFFER (coding->dst_object));
972 make_gap (bytes);
973 set_buffer_internal (XBUFFER (this_buffer));
974 }
975}
976
977
978static unsigned char *
979alloc_destination (coding, nbytes, dst)
980 struct coding_system *coding;
981 int nbytes;
982 unsigned char *dst;
983{
984 EMACS_INT offset = dst - coding->destination;
985
986 if (BUFFERP (coding->dst_object))
987 coding_alloc_by_making_gap (coding, nbytes);
988 else
989 coding_alloc_by_realloc (coding, nbytes);
990 coding->result = CODING_RESULT_SUCCESS;
991 coding_set_destination (coding);
992 dst = coding->destination + offset;
993 return dst;
994}
995
996\f
997/*** 2. Emacs' internal format (emacs-utf-8) ***/
998
999
1000
1001\f
1002/*** 3. UTF-8 ***/
1003
1004/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1005 Check if a text is encoded in UTF-8. If it is, return
1006 CATEGORY_MASK_UTF_8, else return 0. */
1007
1008#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1009#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1010#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1011#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1012#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1013#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1014
1015static int
1016detect_coding_utf_8 (coding, mask)
1017 struct coding_system *coding;
1018 int *mask;
1019{
1020 unsigned char *src = coding->source, *src_base = src;
1021 unsigned char *src_end = coding->source + coding->src_bytes;
1022 int multibytep = coding->src_multibyte;
1023 int consumed_chars = 0;
1024 int found = 0;
1025
1026 /* A coding system of this category is always ASCII compatible. */
1027 src += coding->head_ascii;
1028
1029 while (1)
1030 {
1031 int c, c1, c2, c3, c4;
1032
1033 ONE_MORE_BYTE (c);
1034 if (UTF_8_1_OCTET_P (c))
1035 continue;
1036 ONE_MORE_BYTE (c1);
1037 if (! UTF_8_EXTRA_OCTET_P (c1))
1038 break;
1039 if (UTF_8_2_OCTET_LEADING_P (c))
1040 {
1041 found++;
1042 continue;
1043 }
1044 ONE_MORE_BYTE (c2);
1045 if (! UTF_8_EXTRA_OCTET_P (c2))
1046 break;
1047 if (UTF_8_3_OCTET_LEADING_P (c))
1048 {
1049 found++;
1050 continue;
1051 }
1052 ONE_MORE_BYTE (c3);
1053 if (! UTF_8_EXTRA_OCTET_P (c3))
1054 break;
1055 if (UTF_8_4_OCTET_LEADING_P (c))
1056 {
1057 found++;
1058 continue;
1059 }
1060 ONE_MORE_BYTE (c4);
1061 if (! UTF_8_EXTRA_OCTET_P (c4))
1062 break;
1063 if (UTF_8_5_OCTET_LEADING_P (c))
1064 {
1065 found++;
1066 continue;
1067 }
1068 break;
1069 }
1070 *mask &= ~CATEGORY_MASK_UTF_8;
1071 return 0;
1072
1073 no_more_source:
1074 if (! found)
1075 return 0;
1076 *mask &= CATEGORY_MASK_UTF_8;
1077 return 1;
1078}
1079
1080
1081/* Fixme: deal with surrogates? */
1082static void
1083decode_coding_utf_8 (coding)
1084 struct coding_system *coding;
1085{
1086 unsigned char *src = coding->source + coding->consumed;
1087 unsigned char *src_end = coding->source + coding->src_bytes;
1088 unsigned char *src_base;
1089 int *charbuf = coding->charbuf;
1090 int *charbuf_end = charbuf + coding->charbuf_size;
1091 int consumed_chars = 0, consumed_chars_base;
1092 int multibytep = coding->src_multibyte;
1093 Lisp_Object attr, eol_type, charset_list;
1094
1095 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1096
1097 while (1)
1098 {
1099 int c, c1, c2, c3, c4, c5;
1100
1101 src_base = src;
1102 consumed_chars_base = consumed_chars;
1103
1104 if (charbuf >= charbuf_end)
1105 break;
1106
1107 ONE_MORE_BYTE (c1);
1108 if (UTF_8_1_OCTET_P(c1))
1109 {
1110 c = c1;
1111 if (c == '\r')
1112 {
1113 if (EQ (eol_type, Qdos))
1114 {
1115 if (src == src_end)
1116 goto no_more_source;
1117 if (*src == '\n')
1118 ONE_MORE_BYTE (c);
1119 }
1120 else if (EQ (eol_type, Qmac))
1121 c = '\n';
1122 }
1123 }
1124 else
1125 {
1126 ONE_MORE_BYTE (c2);
1127 if (! UTF_8_EXTRA_OCTET_P (c2))
1128 goto invalid_code;
1129 if (UTF_8_2_OCTET_LEADING_P (c1))
1130 {
1131 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1132 /* Reject overlong sequences here and below. Encoders
1133 producing them are incorrect, they can be misleading,
1134 and they mess up read/write invariance. */
1135 if (c < 128)
1136 goto invalid_code;
1137 }
1138 else
1139 {
1140 ONE_MORE_BYTE (c3);
1141 if (! UTF_8_EXTRA_OCTET_P (c3))
1142 goto invalid_code;
1143 if (UTF_8_3_OCTET_LEADING_P (c1))
1144 {
1145 c = (((c1 & 0xF) << 12)
1146 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1147 if (c < 0x800)
1148 goto invalid_code;
1149 }
1150 else
1151 {
1152 ONE_MORE_BYTE (c4);
1153 if (! UTF_8_EXTRA_OCTET_P (c4))
1154 goto invalid_code;
1155 if (UTF_8_4_OCTET_LEADING_P (c1))
1156 {
1157 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1158 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1159 if (c < 0x10000)
1160 goto invalid_code;
1161 }
1162 else
1163 {
1164 ONE_MORE_BYTE (c5);
1165 if (! UTF_8_EXTRA_OCTET_P (c5))
1166 goto invalid_code;
1167 if (UTF_8_5_OCTET_LEADING_P (c1))
1168 {
1169 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1170 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1171 | (c5 & 0x3F));
1172 if ((c > MAX_CHAR) || (c < 0x200000))
1173 goto invalid_code;
1174 }
1175 else
1176 goto invalid_code;
1177 }
1178 }
1179 }
1180 }
1181
1182 *charbuf++ = c;
1183 continue;
1184
1185 invalid_code:
1186 src = src_base;
1187 consumed_chars = consumed_chars_base;
1188 ONE_MORE_BYTE (c);
1189 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1190 coding->errors++;
1191 }
1192
1193 no_more_source:
1194 coding->consumed_char += consumed_chars_base;
1195 coding->consumed = src_base - coding->source;
1196 coding->charbuf_used = charbuf - coding->charbuf;
1197}
1198
1199
1200static int
1201encode_coding_utf_8 (coding)
1202 struct coding_system *coding;
1203{
1204 int multibytep = coding->dst_multibyte;
1205 int *charbuf = coding->charbuf;
1206 int *charbuf_end = charbuf + coding->charbuf_used;
1207 unsigned char *dst = coding->destination + coding->produced;
1208 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1209 int produced_chars = 0;
1210 int c;
1211
1212 if (multibytep)
1213 {
1214 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1215
1216 while (charbuf < charbuf_end)
1217 {
1218 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1219
1220 ASSURE_DESTINATION (safe_room);
1221 c = *charbuf++;
1222 CHAR_STRING_ADVANCE (c, pend);
1223 for (p = str; p < pend; p++)
1224 EMIT_ONE_BYTE (*p);
1225 }
1226 }
1227 else
1228 {
1229 int safe_room = MAX_MULTIBYTE_LENGTH;
1230
1231 while (charbuf < charbuf_end)
1232 {
1233 ASSURE_DESTINATION (safe_room);
1234 c = *charbuf++;
1235 dst += CHAR_STRING (c, dst);
1236 produced_chars++;
1237 }
1238 }
1239 coding->result = CODING_RESULT_SUCCESS;
1240 coding->produced_char += produced_chars;
1241 coding->produced = dst - coding->destination;
1242 return 0;
1243}
1244
1245
1246/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1247 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1248 Little Endian (otherwise). If it is, return
1249 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1250 else return 0. */
1251
1252#define UTF_16_HIGH_SURROGATE_P(val) \
1253 (((val) & 0xFC00) == 0xD800)
1254
1255#define UTF_16_LOW_SURROGATE_P(val) \
1256 (((val) & 0xFC00) == 0xDC00)
1257
1258#define UTF_16_INVALID_P(val) \
1259 (((val) == 0xFFFE) \
1260 || ((val) == 0xFFFF) \
1261 || UTF_16_LOW_SURROGATE_P (val))
1262
1263
1264static int
1265detect_coding_utf_16 (coding, mask)
1266 struct coding_system *coding;
1267 int *mask;
1268{
1269 unsigned char *src = coding->source, *src_base = src;
1270 unsigned char *src_end = coding->source + coding->src_bytes;
1271 int multibytep = coding->src_multibyte;
1272 int consumed_chars = 0;
1273 int c1, c2;
1274
1275 ONE_MORE_BYTE (c1);
1276 ONE_MORE_BYTE (c2);
1277
1278 if ((c1 == 0xFF) && (c2 == 0xFE))
1279 {
1280 *mask &= CATEGORY_MASK_UTF_16_LE;
1281 return 1;
1282 }
1283 else if ((c1 == 0xFE) && (c2 == 0xFF))
1284 {
1285 *mask &= CATEGORY_MASK_UTF_16_BE;
1286 return 1;
1287 }
1288 no_more_source:
1289 return 0;
1290}
1291
1292static void
1293decode_coding_utf_16 (coding)
1294 struct coding_system *coding;
1295{
1296 unsigned char *src = coding->source + coding->consumed;
1297 unsigned char *src_end = coding->source + coding->src_bytes;
1298 unsigned char *src_base;
1299 int *charbuf = coding->charbuf;
1300 int *charbuf_end = charbuf + coding->charbuf_size;
1301 int consumed_chars = 0, consumed_chars_base;
1302 int multibytep = coding->src_multibyte;
1303 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1304 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1305 int surrogate = CODING_UTF_16_SURROGATE (coding);
1306 Lisp_Object attr, eol_type, charset_list;
1307
1308 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1309
1310 if (bom != utf_16_without_bom)
1311 {
1312 int c, c1, c2;
1313
1314 src_base = src;
1315 ONE_MORE_BYTE (c1);
1316 ONE_MORE_BYTE (c2);
1317 c = (c1 << 8) | c2;
1318 if (bom == utf_16_with_bom)
1319 {
1320 if (endian == utf_16_big_endian
1321 ? c != 0xFFFE : c != 0xFEFF)
1322 {
1323 /* We are sure that there's enouph room at CHARBUF. */
1324 *charbuf++ = c1;
1325 *charbuf++ = c2;
1326 coding->errors++;
1327 }
1328 }
1329 else
1330 {
1331 if (c == 0xFFFE)
1332 CODING_UTF_16_ENDIAN (coding)
1333 = endian = utf_16_big_endian;
1334 else if (c == 0xFEFF)
1335 CODING_UTF_16_ENDIAN (coding)
1336 = endian = utf_16_little_endian;
1337 else
1338 {
1339 CODING_UTF_16_ENDIAN (coding)
1340 = endian = utf_16_big_endian;
1341 src = src_base;
1342 }
1343 }
1344 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1345 }
1346
1347 while (1)
1348 {
1349 int c, c1, c2;
1350
1351 src_base = src;
1352 consumed_chars_base = consumed_chars;
1353
1354 if (charbuf + 2 >= charbuf_end)
1355 break;
1356
1357 ONE_MORE_BYTE (c1);
1358 ONE_MORE_BYTE (c2);
1359 c = (endian == utf_16_big_endian
1360 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1361 if (surrogate)
1362 {
1363 if (! UTF_16_LOW_SURROGATE_P (c))
1364 {
1365 if (endian == utf_16_big_endian)
1366 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1367 else
1368 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1369 *charbuf++ = c1;
1370 *charbuf++ = c2;
1371 coding->errors++;
1372 if (UTF_16_HIGH_SURROGATE_P (c))
1373 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1374 else
1375 *charbuf++ = c;
1376 }
1377 else
1378 {
1379 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1380 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1381 *charbuf++ = c;
1382 }
1383 }
1384 else
1385 {
1386 if (UTF_16_HIGH_SURROGATE_P (c))
1387 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1388 else
1389 *charbuf++ = c;
1390 }
1391 }
1392
1393 no_more_source:
1394 coding->consumed_char += consumed_chars_base;
1395 coding->consumed = src_base - coding->source;
1396 coding->charbuf_used = charbuf - coding->charbuf;
1397}
1398
1399static int
1400encode_coding_utf_16 (coding)
1401 struct coding_system *coding;
1402{
1403 int multibytep = coding->dst_multibyte;
1404 int *charbuf = coding->charbuf;
1405 int *charbuf_end = charbuf + coding->charbuf_used;
1406 unsigned char *dst = coding->destination + coding->produced;
1407 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1408 int safe_room = 8;
1409 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1410 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1411 int produced_chars = 0;
1412 Lisp_Object attrs, eol_type, charset_list;
1413 int c;
1414
1415 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1416
1417 if (bom == utf_16_with_bom)
1418 {
1419 ASSURE_DESTINATION (safe_room);
1420 if (big_endian)
1421 EMIT_TWO_BYTES (0xFF, 0xFE);
1422 else
1423 EMIT_TWO_BYTES (0xFE, 0xFF);
1424 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1425 }
1426
1427 while (charbuf < charbuf_end)
1428 {
1429 ASSURE_DESTINATION (safe_room);
1430 c = *charbuf++;
1431 if (c >= MAX_UNICODE_CHAR)
1432 c = coding->default_char;
1433
1434 if (c < 0x10000)
1435 {
1436 if (big_endian)
1437 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1438 else
1439 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1440 }
1441 else
1442 {
1443 int c1, c2;
1444
1445 c -= 0x10000;
1446 c1 = (c >> 10) + 0xD800;
1447 c2 = (c & 0x3FF) + 0xDC00;
1448 if (big_endian)
1449 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1450 else
1451 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1452 }
1453 }
1454 coding->result = CODING_RESULT_SUCCESS;
1455 coding->produced = dst - coding->destination;
1456 coding->produced_char += produced_chars;
1457 return 0;
1458}
1459
1460\f
1461/*** 6. Old Emacs' internal format (emacs-mule) ***/
1462
1463/* Emacs' internal format for representation of multiple character
1464 sets is a kind of multi-byte encoding, i.e. characters are
1465 represented by variable-length sequences of one-byte codes.
1466
1467 ASCII characters and control characters (e.g. `tab', `newline') are
1468 represented by one-byte sequences which are their ASCII codes, in
1469 the range 0x00 through 0x7F.
1470
1471 8-bit characters of the range 0x80..0x9F are represented by
1472 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1473 code + 0x20).
1474
1475 8-bit characters of the range 0xA0..0xFF are represented by
1476 one-byte sequences which are their 8-bit code.
1477
1478 The other characters are represented by a sequence of `base
1479 leading-code', optional `extended leading-code', and one or two
1480 `position-code's. The length of the sequence is determined by the
1481 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1482 whereas extended leading-code and position-code take the range 0xA0
1483 through 0xFF. See `charset.h' for more details about leading-code
1484 and position-code.
1485
1486 --- CODE RANGE of Emacs' internal format ---
1487 character set range
1488 ------------- -----
1489 ascii 0x00..0x7F
1490 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1491 eight-bit-graphic 0xA0..0xBF
1492 ELSE 0x81..0x9D + [0xA0..0xFF]+
1493 ---------------------------------------------
1494
1495 As this is the internal character representation, the format is
1496 usually not used externally (i.e. in a file or in a data sent to a
1497 process). But, it is possible to have a text externally in this
1498 format (i.e. by encoding by the coding system `emacs-mule').
1499
1500 In that case, a sequence of one-byte codes has a slightly different
1501 form.
1502
1503 At first, all characters in eight-bit-control are represented by
1504 one-byte sequences which are their 8-bit code.
1505
1506 Next, character composition data are represented by the byte
1507 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1508 where,
1509 METHOD is 0xF0 plus one of composition method (enum
1510 composition_method),
1511
1512 BYTES is 0xA0 plus a byte length of this composition data,
1513
1514 CHARS is 0x20 plus a number of characters composed by this
1515 data,
1516
1517 COMPONENTs are characters of multibye form or composition
1518 rules encoded by two-byte of ASCII codes.
1519
1520 In addition, for backward compatibility, the following formats are
1521 also recognized as composition data on decoding.
1522
1523 0x80 MSEQ ...
1524 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1525
1526 Here,
1527 MSEQ is a multibyte form but in these special format:
1528 ASCII: 0xA0 ASCII_CODE+0x80,
1529 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1530 RULE is a one byte code of the range 0xA0..0xF0 that
1531 represents a composition rule.
1532 */
1533
1534char emacs_mule_bytes[256];
1535
1536/* Leading-code followed by extended leading-code. */
1537#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1538#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1539#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1540#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1541
1542
1543int
1544emacs_mule_char (coding, src, nbytes, nchars)
1545 struct coding_system *coding;
1546 unsigned char *src;
1547 int *nbytes, *nchars;
1548{
1549 unsigned char *src_end = coding->source + coding->src_bytes;
1550 int multibytep = coding->src_multibyte;
1551 unsigned char *src_base = src;
1552 struct charset *charset;
1553 unsigned code;
1554 int c;
1555 int consumed_chars = 0;
1556
1557 ONE_MORE_BYTE (c);
1558 switch (emacs_mule_bytes[c])
1559 {
1560 case 2:
1561 if (! (charset = emacs_mule_charset[c]))
1562 goto invalid_code;
1563 ONE_MORE_BYTE (c);
1564 code = c & 0x7F;
1565 break;
1566
1567 case 3:
1568 if (c == LEADING_CODE_PRIVATE_11
1569 || c == LEADING_CODE_PRIVATE_12)
1570 {
1571 ONE_MORE_BYTE (c);
1572 if (! (charset = emacs_mule_charset[c]))
1573 goto invalid_code;
1574 ONE_MORE_BYTE (c);
1575 code = c & 0x7F;
1576 }
1577 else
1578 {
1579 if (! (charset = emacs_mule_charset[c]))
1580 goto invalid_code;
1581 ONE_MORE_BYTE (c);
1582 code = (c & 0x7F) << 8;
1583 ONE_MORE_BYTE (c);
1584 code |= c & 0x7F;
1585 }
1586 break;
1587
1588 case 4:
1589 ONE_MORE_BYTE (c);
1590 if (! (charset = emacs_mule_charset[c]))
1591 goto invalid_code;
1592 ONE_MORE_BYTE (c);
1593 code = (c & 0x7F) << 8;
1594 ONE_MORE_BYTE (c);
1595 code |= c & 0x7F;
1596 break;
1597
1598 case 1:
1599 code = c;
1600 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) ? charset_ascii
1601 : code < 0xA0 ? charset_8_bit_control
1602 : charset_8_bit_graphic);
1603 break;
1604
1605 default:
1606 abort ();
1607 }
1608 c = DECODE_CHAR (charset, code);
1609 if (c < 0)
1610 goto invalid_code;
1611 *nbytes = src - src_base;
1612 *nchars = consumed_chars;
1613 return c;
1614
1615 no_more_source:
1616 return -2;
1617
1618 invalid_code:
1619 return -1;
1620}
1621
1622
1623/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1624 Check if a text is encoded in `emacs-mule'. */
1625
1626static int
1627detect_coding_emacs_mule (coding, mask)
1628 struct coding_system *coding;
1629 int *mask;
1630{
1631 unsigned char *src = coding->source, *src_base = src;
1632 unsigned char *src_end = coding->source + coding->src_bytes;
1633 int multibytep = coding->src_multibyte;
1634 int consumed_chars = 0;
1635 int c;
1636 int found = 0;
1637
1638 /* A coding system of this category is always ASCII compatible. */
1639 src += coding->head_ascii;
1640
1641 while (1)
1642 {
1643 ONE_MORE_BYTE (c);
1644
1645 if (c == 0x80)
1646 {
1647 /* Perhaps the start of composite character. We simple skip
1648 it because analyzing it is too heavy for detecting. But,
1649 at least, we check that the composite character
1650 constitues of more than 4 bytes. */
1651 unsigned char *src_base;
1652
1653 repeat:
1654 src_base = src;
1655 do
1656 {
1657 ONE_MORE_BYTE (c);
1658 }
1659 while (c >= 0xA0);
1660
1661 if (src - src_base <= 4)
1662 break;
1663 found = 1;
1664 if (c == 0x80)
1665 goto repeat;
1666 }
1667
1668 if (c < 0x80)
1669 {
1670 if (c < 0x20
1671 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1672 break;
1673 }
1674 else
1675 {
1676 unsigned char *src_base = src - 1;
1677
1678 do
1679 {
1680 ONE_MORE_BYTE (c);
1681 }
1682 while (c >= 0xA0);
1683 if (src - src_base != emacs_mule_bytes[*src_base])
1684 break;
1685 found = 1;
1686 }
1687 }
1688 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1689 return 0;
1690
1691 no_more_source:
1692 if (!found)
1693 return 0;
1694 *mask &= CATEGORY_MASK_EMACS_MULE;
1695 return 1;
1696}
1697
1698
1699/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1700
1701/* Decode a character represented as a component of composition
1702 sequence of Emacs 20/21 style at SRC. Set C to that character and
1703 update SRC to the head of next character (or an encoded composition
1704 rule). If SRC doesn't points a composition component, set C to -1.
1705 If SRC points an invalid byte sequence, global exit by a return
1706 value 0. */
1707
1708#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1709 if (1) \
1710 { \
1711 int c; \
1712 int nbytes, nchars; \
1713 \
1714 if (src == src_end) \
1715 break; \
1716 c = emacs_mule_char (coding, src, &nbytes, &nchars); \
1717 if (c < 0) \
1718 { \
1719 if (c == -2) \
1720 break; \
1721 goto invalid_code; \
1722 } \
1723 *buf++ = c; \
1724 src += nbytes; \
1725 consumed_chars += nchars; \
1726 } \
1727 else
1728
1729
1730/* Decode a composition rule represented as a component of composition
1731 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1732 and increment BUF. If SRC points an invalid byte sequence, set C
1733 to -1. */
1734
1735#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1736 do { \
1737 int c, gref, nref; \
1738 \
1739 if (src >= src_end) \
1740 goto invalid_code; \
1741 ONE_MORE_BYTE_NO_CHECK (c); \
1742 c -= 0x20; \
1743 if (c < 0 || c >= 81) \
1744 goto invalid_code; \
1745 \
1746 gref = c / 9, nref = c % 9; \
1747 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1748 } while (0)
1749
1750
1751/* Decode a composition rule represented as a component of composition
1752 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1753 and increment BUF. If SRC points an invalid byte sequence, set C
1754 to -1. */
1755
1756#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1757 do { \
1758 int gref, nref; \
1759 \
1760 if (src + 1>= src_end) \
1761 goto invalid_code; \
1762 ONE_MORE_BYTE_NO_CHECK (gref); \
1763 gref -= 0x20; \
1764 ONE_MORE_BYTE_NO_CHECK (nref); \
1765 nref -= 0x20; \
1766 if (gref < 0 || gref >= 81 \
1767 || nref < 0 || nref >= 81) \
1768 goto invalid_code; \
1769 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1770 } while (0)
1771
1772
1773#define ADD_COMPOSITION_DATA(buf, method, nchars) \
1774 do { \
1775 *buf++ = -5; \
1776 *buf++ = coding->produced_char + char_offset; \
1777 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1778 *buf++ = method; \
1779 *buf++ = nchars; \
1780 } while (0)
1781
1782
1783#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1784 do { \
1785 /* Emacs 21 style format. The first three bytes at SRC are \
1786 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1787 the byte length of this composition information, CHARS is the \
1788 number of characters composed by this composition. */ \
1789 enum composition_method method = c - 0xF2; \
1790 int *charbuf_base = charbuf; \
1791 int consumed_chars_limit; \
1792 int nbytes, nchars; \
1793 \
1794 ONE_MORE_BYTE (c); \
1795 nbytes = c - 0xA0; \
1796 if (nbytes < 3) \
1797 goto invalid_code; \
1798 ONE_MORE_BYTE (c); \
1799 nchars = c - 0xA0; \
1800 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1801 consumed_chars_limit = consumed_chars_base + nbytes; \
1802 if (method != COMPOSITION_RELATIVE) \
1803 { \
1804 int i = 0; \
1805 while (consumed_chars < consumed_chars_limit) \
1806 { \
1807 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1808 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1809 else \
1810 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1811 i++; \
1812 } \
1813 if (consumed_chars < consumed_chars_limit) \
1814 goto invalid_code; \
1815 charbuf_base[0] -= i; \
1816 } \
1817 } while (0)
1818
1819
1820#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1821 do { \
1822 /* Emacs 20 style format for relative composition. */ \
1823 /* Store multibyte form of characters to be composed. */ \
1824 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1825 int *buf = components; \
1826 int i, j; \
1827 \
1828 src = src_base; \
1829 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1830 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1831 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1832 if (i < 2) \
1833 goto invalid_code; \
1834 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1835 for (j = 0; j < i; j++) \
1836 *charbuf++ = components[j]; \
1837 } while (0)
1838
1839
1840#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1841 do { \
1842 /* Emacs 20 style format for rule-base composition. */ \
1843 /* Store multibyte form of characters to be composed. */ \
1844 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1845 int *buf = components; \
1846 int i, j; \
1847 \
1848 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1849 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1850 { \
1851 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1852 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1853 } \
1854 if (i < 1 || (buf - components) % 2 == 0) \
1855 goto invalid_code; \
1856 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1857 goto no_more_source; \
1858 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1859 for (j = 0; j < i; j++) \
1860 *charbuf++ = components[j]; \
1861 for (j = 0; j < i; j += 2) \
1862 *charbuf++ = components[j]; \
1863 } while (0)
1864
1865
1866static void
1867decode_coding_emacs_mule (coding)
1868 struct coding_system *coding;
1869{
1870 unsigned char *src = coding->source + coding->consumed;
1871 unsigned char *src_end = coding->source + coding->src_bytes;
1872 unsigned char *src_base;
1873 int *charbuf = coding->charbuf;
1874 int *charbuf_end = charbuf + coding->charbuf_size;
1875 int consumed_chars = 0, consumed_chars_base;
1876 int char_offset = 0;
1877 int multibytep = coding->src_multibyte;
1878 Lisp_Object attrs, eol_type, charset_list;
1879
1880 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1881
1882 while (1)
1883 {
1884 int c;
1885
1886 src_base = src;
1887 consumed_chars_base = consumed_chars;
1888
1889 if (charbuf >= charbuf_end)
1890 break;
1891
1892 ONE_MORE_BYTE (c);
1893
1894 if (c < 0x80)
1895 {
1896 if (c == '\r')
1897 {
1898 if (EQ (eol_type, Qdos))
1899 {
1900 if (src == src_end)
1901 goto no_more_source;
1902 if (*src == '\n')
1903 ONE_MORE_BYTE (c);
1904 }
1905 else if (EQ (eol_type, Qmac))
1906 c = '\n';
1907 }
1908 *charbuf++ = c;
1909 char_offset++;
1910 }
1911 else if (c == 0x80)
1912 {
1913 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1914 break;
1915 ONE_MORE_BYTE (c);
1916 if (c - 0xF2 >= COMPOSITION_RELATIVE
1917 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
1918 DECODE_EMACS_MULE_21_COMPOSITION (c);
1919 else if (c < 0xC0)
1920 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1921 else if (c == 0xFF)
1922 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1923 else
1924 goto invalid_code;
1925 coding->annotated = 1;
1926 }
1927 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1928 {
1929 int nbytes, nchars;
1930 src = src_base;
1931 consumed_chars = consumed_chars_base;
1932 c = emacs_mule_char (coding, src, &nbytes, &nchars);
1933 if (c < 0)
1934 {
1935 if (c == -2)
1936 break;
1937 goto invalid_code;
1938 }
1939 *charbuf++ = c;
1940 src += nbytes;
1941 consumed_chars += nchars;
1942 char_offset++;
1943 }
1944 continue;
1945
1946 invalid_code:
1947 src = src_base;
1948 consumed_chars = consumed_chars_base;
1949 ONE_MORE_BYTE (c);
1950 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1951 coding->errors++;
1952 }
1953
1954 no_more_source:
1955 coding->consumed_char += consumed_chars_base;
1956 coding->consumed = src_base - coding->source;
1957 coding->charbuf_used = charbuf - coding->charbuf;
1958}
1959
1960
1961#define EMACS_MULE_LEADING_CODES(id, codes) \
1962 do { \
1963 if (id < 0xA0) \
1964 codes[0] = id, codes[1] = 0; \
1965 else if (id < 0xE0) \
1966 codes[0] = 0x9A, codes[1] = id; \
1967 else if (id < 0xF0) \
1968 codes[0] = 0x9B, codes[1] = id; \
1969 else if (id < 0xF5) \
1970 codes[0] = 0x9C, codes[1] = id; \
1971 else \
1972 codes[0] = 0x9D, codes[1] = id; \
1973 } while (0);
1974
1975
1976static int
1977encode_coding_emacs_mule (coding)
1978 struct coding_system *coding;
1979{
1980 int multibytep = coding->dst_multibyte;
1981 int *charbuf = coding->charbuf;
1982 int *charbuf_end = charbuf + coding->charbuf_used;
1983 unsigned char *dst = coding->destination + coding->produced;
1984 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1985 int safe_room = 8;
1986 int produced_chars = 0;
1987 Lisp_Object attrs, eol_type, charset_list;
1988 int c;
1989
1990 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1991
1992 while (charbuf < charbuf_end)
1993 {
1994 ASSURE_DESTINATION (safe_room);
1995 c = *charbuf++;
1996 if (ASCII_CHAR_P (c))
1997 EMIT_ONE_ASCII_BYTE (c);
1998 else
1999 {
2000 struct charset *charset;
2001 unsigned code;
2002 int dimension;
2003 int emacs_mule_id;
2004 unsigned char leading_codes[2];
2005
2006 charset = char_charset (c, charset_list, &code);
2007 if (! charset)
2008 {
2009 c = coding->default_char;
2010 if (ASCII_CHAR_P (c))
2011 {
2012 EMIT_ONE_ASCII_BYTE (c);
2013 continue;
2014 }
2015 charset = char_charset (c, charset_list, &code);
2016 }
2017 dimension = CHARSET_DIMENSION (charset);
2018 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2019 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2020 EMIT_ONE_BYTE (leading_codes[0]);
2021 if (leading_codes[1])
2022 EMIT_ONE_BYTE (leading_codes[1]);
2023 if (dimension == 1)
2024 EMIT_ONE_BYTE (code);
2025 else
2026 {
2027 EMIT_ONE_BYTE (code >> 8);
2028 EMIT_ONE_BYTE (code & 0xFF);
2029 }
2030 }
2031 }
2032 coding->result = CODING_RESULT_SUCCESS;
2033 coding->produced_char += produced_chars;
2034 coding->produced = dst - coding->destination;
2035 return 0;
2036}
2037
2038\f
2039/*** 7. ISO2022 handlers ***/
2040
2041/* The following note describes the coding system ISO2022 briefly.
2042 Since the intention of this note is to help understand the
2043 functions in this file, some parts are NOT ACCURATE or OVERLY
2044 SIMPLIFIED. For thorough understanding, please refer to the
2045 original document of ISO2022.
2046
2047 ISO2022 provides many mechanisms to encode several character sets
2048 in 7-bit and 8-bit environments. For 7-bite environments, all text
2049 is encoded using bytes less than 128. This may make the encoded
2050 text a little bit longer, but the text passes more easily through
2051 several gateways, some of which strip off MSB (Most Signigant Bit).
2052
2053 There are two kinds of character sets: control character set and
2054 graphic character set. The former contains control characters such
2055 as `newline' and `escape' to provide control functions (control
2056 functions are also provided by escape sequences). The latter
2057 contains graphic characters such as 'A' and '-'. Emacs recognizes
2058 two control character sets and many graphic character sets.
2059
2060 Graphic character sets are classified into one of the following
2061 four classes, according to the number of bytes (DIMENSION) and
2062 number of characters in one dimension (CHARS) of the set:
2063 - DIMENSION1_CHARS94
2064 - DIMENSION1_CHARS96
2065 - DIMENSION2_CHARS94
2066 - DIMENSION2_CHARS96
2067
2068 In addition, each character set is assigned an identification tag,
2069 unique for each set, called "final character" (denoted as <F>
2070 hereafter). The <F> of each character set is decided by ECMA(*)
2071 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2072 (0x30..0x3F are for private use only).
2073
2074 Note (*): ECMA = European Computer Manufacturers Association
2075
2076 Here are examples of graphic character set [NAME(<F>)]:
2077 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2078 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2079 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2080 o DIMENSION2_CHARS96 -- none for the moment
2081
2082 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2083 C0 [0x00..0x1F] -- control character plane 0
2084 GL [0x20..0x7F] -- graphic character plane 0
2085 C1 [0x80..0x9F] -- control character plane 1
2086 GR [0xA0..0xFF] -- graphic character plane 1
2087
2088 A control character set is directly designated and invoked to C0 or
2089 C1 by an escape sequence. The most common case is that:
2090 - ISO646's control character set is designated/invoked to C0, and
2091 - ISO6429's control character set is designated/invoked to C1,
2092 and usually these designations/invocations are omitted in encoded
2093 text. In a 7-bit environment, only C0 can be used, and a control
2094 character for C1 is encoded by an appropriate escape sequence to
2095 fit into the environment. All control characters for C1 are
2096 defined to have corresponding escape sequences.
2097
2098 A graphic character set is at first designated to one of four
2099 graphic registers (G0 through G3), then these graphic registers are
2100 invoked to GL or GR. These designations and invocations can be
2101 done independently. The most common case is that G0 is invoked to
2102 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2103 these invocations and designations are omitted in encoded text.
2104 In a 7-bit environment, only GL can be used.
2105
2106 When a graphic character set of CHARS94 is invoked to GL, codes
2107 0x20 and 0x7F of the GL area work as control characters SPACE and
2108 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2109 be used.
2110
2111 There are two ways of invocation: locking-shift and single-shift.
2112 With locking-shift, the invocation lasts until the next different
2113 invocation, whereas with single-shift, the invocation affects the
2114 following character only and doesn't affect the locking-shift
2115 state. Invocations are done by the following control characters or
2116 escape sequences:
2117
2118 ----------------------------------------------------------------------
2119 abbrev function cntrl escape seq description
2120 ----------------------------------------------------------------------
2121 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2122 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2123 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2124 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2125 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2126 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2127 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2128 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2129 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2130 ----------------------------------------------------------------------
2131 (*) These are not used by any known coding system.
2132
2133 Control characters for these functions are defined by macros
2134 ISO_CODE_XXX in `coding.h'.
2135
2136 Designations are done by the following escape sequences:
2137 ----------------------------------------------------------------------
2138 escape sequence description
2139 ----------------------------------------------------------------------
2140 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2141 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2142 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2143 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2144 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2145 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2146 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2147 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2148 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2149 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2150 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2151 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2152 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2153 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2154 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2155 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2156 ----------------------------------------------------------------------
2157
2158 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2159 of dimension 1, chars 94, and final character <F>, etc...
2160
2161 Note (*): Although these designations are not allowed in ISO2022,
2162 Emacs accepts them on decoding, and produces them on encoding
2163 CHARS96 character sets in a coding system which is characterized as
2164 7-bit environment, non-locking-shift, and non-single-shift.
2165
2166 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2167 '(' must be omitted. We refer to this as "short-form" hereafter.
2168
2169 Now you may notice that there are a lot of ways for encoding the
2170 same multilingual text in ISO2022. Actually, there exist many
2171 coding systems such as Compound Text (used in X11's inter client
2172 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2173 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
2174 localized platforms), and all of these are variants of ISO2022.
2175
2176 In addition to the above, Emacs handles two more kinds of escape
2177 sequences: ISO6429's direction specification and Emacs' private
2178 sequence for specifying character composition.
2179
2180 ISO6429's direction specification takes the following form:
2181 o CSI ']' -- end of the current direction
2182 o CSI '0' ']' -- end of the current direction
2183 o CSI '1' ']' -- start of left-to-right text
2184 o CSI '2' ']' -- start of right-to-left text
2185 The control character CSI (0x9B: control sequence introducer) is
2186 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2187
2188 Character composition specification takes the following form:
2189 o ESC '0' -- start relative composition
2190 o ESC '1' -- end composition
2191 o ESC '2' -- start rule-base composition (*)
2192 o ESC '3' -- start relative composition with alternate chars (**)
2193 o ESC '4' -- start rule-base composition with alternate chars (**)
2194 Since these are not standard escape sequences of any ISO standard,
2195 the use of them for these meaning is restricted to Emacs only.
2196
2197 (*) This form is used only in Emacs 20.5 and the older versions,
2198 but the newer versions can safely decode it.
2199 (**) This form is used only in Emacs 21.1 and the newer versions,
2200 and the older versions can't decode it.
2201
2202 Here's a list of examples usages of these composition escape
2203 sequences (categorized by `enum composition_method').
2204
2205 COMPOSITION_RELATIVE:
2206 ESC 0 CHAR [ CHAR ] ESC 1
2207 COMPOSITOIN_WITH_RULE:
2208 ESC 2 CHAR [ RULE CHAR ] ESC 1
2209 COMPOSITION_WITH_ALTCHARS:
2210 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2211 COMPOSITION_WITH_RULE_ALTCHARS:
2212 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2213
2214enum iso_code_class_type iso_code_class[256];
2215
2216#define SAFE_CHARSET_P(coding, id) \
2217 ((id) <= (coding)->max_charset_id \
2218 && (coding)->safe_charsets[id] >= 0)
2219
2220
2221#define SHIFT_OUT_OK(category) \
2222 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2223
2224static void
2225setup_iso_safe_charsets (attrs)
2226 Lisp_Object attrs;
2227{
2228 Lisp_Object charset_list, safe_charsets;
2229 Lisp_Object request;
2230 Lisp_Object reg_usage;
2231 Lisp_Object tail;
2232 int reg94, reg96;
2233 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2234 int max_charset_id;
2235
2236 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2237 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2238 && ! EQ (charset_list, Viso_2022_charset_list))
2239 {
2240 CODING_ATTR_CHARSET_LIST (attrs)
2241 = charset_list = Viso_2022_charset_list;
2242 ASET (attrs, coding_attr_safe_charsets, Qnil);
2243 }
2244
2245 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2246 return;
2247
2248 max_charset_id = 0;
2249 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2250 {
2251 int id = XINT (XCAR (tail));
2252 if (max_charset_id < id)
2253 max_charset_id = id;
2254 }
2255
2256 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2257 make_number (255));
2258 request = AREF (attrs, coding_attr_iso_request);
2259 reg_usage = AREF (attrs, coding_attr_iso_usage);
2260 reg94 = XINT (XCAR (reg_usage));
2261 reg96 = XINT (XCDR (reg_usage));
2262
2263 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2264 {
2265 Lisp_Object id;
2266 Lisp_Object reg;
2267 struct charset *charset;
2268
2269 id = XCAR (tail);
2270 charset = CHARSET_FROM_ID (XINT (id));
2271 reg = Fcdr (Fassq (request, id));
2272 if (! NILP (reg))
2273 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2274 else if (charset->iso_chars_96)
2275 {
2276 if (reg96 < 4)
2277 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2278 }
2279 else
2280 {
2281 if (reg94 < 4)
2282 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2283 }
2284 }
2285 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2286}
2287
2288
2289/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2290 Check if a text is encoded in ISO2022. If it is, returns an
2291 integer in which appropriate flag bits any of:
2292 CATEGORY_MASK_ISO_7
2293 CATEGORY_MASK_ISO_7_TIGHT
2294 CATEGORY_MASK_ISO_8_1
2295 CATEGORY_MASK_ISO_8_2
2296 CATEGORY_MASK_ISO_7_ELSE
2297 CATEGORY_MASK_ISO_8_ELSE
2298 are set. If a code which should never appear in ISO2022 is found,
2299 returns 0. */
2300
2301static int
2302detect_coding_iso_2022 (coding, mask)
2303 struct coding_system *coding;
2304 int *mask;
2305{
2306 unsigned char *src = coding->source, *src_base = src;
2307 unsigned char *src_end = coding->source + coding->src_bytes;
2308 int multibytep = coding->src_multibyte;
2309 int mask_iso = CATEGORY_MASK_ISO;
2310 int mask_found = 0, mask_8bit_found = 0;
2311 int reg[4], shift_out = 0, single_shifting = 0;
2312 int id;
2313 int c, c1;
2314 int consumed_chars = 0;
2315 int i;
2316
2317 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2318 {
2319 struct coding_system *this = &(coding_categories[i]);
2320 Lisp_Object attrs, val;
2321
2322 attrs = CODING_ID_ATTRS (this->id);
2323 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2324 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2325 setup_iso_safe_charsets (attrs);
2326 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2327 this->max_charset_id = XSTRING (val)->size - 1;
2328 this->safe_charsets = (char *) XSTRING (val)->data;
2329 }
2330
2331 /* A coding system of this category is always ASCII compatible. */
2332 src += coding->head_ascii;
2333
2334 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2335 while (mask_iso && src < src_end)
2336 {
2337 ONE_MORE_BYTE (c);
2338 switch (c)
2339 {
2340 case ISO_CODE_ESC:
2341 if (inhibit_iso_escape_detection)
2342 break;
2343 single_shifting = 0;
2344 ONE_MORE_BYTE (c);
2345 if (c >= '(' && c <= '/')
2346 {
2347 /* Designation sequence for a charset of dimension 1. */
2348 ONE_MORE_BYTE (c1);
2349 if (c1 < ' ' || c1 >= 0x80
2350 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2351 /* Invalid designation sequence. Just ignore. */
2352 break;
2353 reg[(c - '(') % 4] = id;
2354 }
2355 else if (c == '$')
2356 {
2357 /* Designation sequence for a charset of dimension 2. */
2358 ONE_MORE_BYTE (c);
2359 if (c >= '@' && c <= 'B')
2360 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2361 reg[0] = id = iso_charset_table[1][0][c];
2362 else if (c >= '(' && c <= '/')
2363 {
2364 ONE_MORE_BYTE (c1);
2365 if (c1 < ' ' || c1 >= 0x80
2366 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2367 /* Invalid designation sequence. Just ignore. */
2368 break;
2369 reg[(c - '(') % 4] = id;
2370 }
2371 else
2372 /* Invalid designation sequence. Just ignore. */
2373 break;
2374 }
2375 else if (c == 'N' || c == 'O')
2376 {
2377 /* ESC <Fe> for SS2 or SS3. */
2378 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
2379 break;
2380 }
2381 else if (c >= '0' && c <= '4')
2382 {
2383 /* ESC <Fp> for start/end composition. */
2384 mask_found |= CATEGORY_MASK_ISO;
2385 break;
2386 }
2387 else
2388 {
2389 /* Invalid escape sequence. */
2390 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2391 break;
2392 }
2393
2394 /* We found a valid designation sequence for CHARSET. */
2395 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2396 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2397 id))
2398 mask_found |= CATEGORY_MASK_ISO_7;
2399 else
2400 mask_iso &= ~CATEGORY_MASK_ISO_7;
2401 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2402 id))
2403 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
2404 else
2405 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2406 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2407 id))
2408 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
2409 else
2410 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2411 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2412 id))
2413 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
2414 else
2415 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
2416 break;
2417
2418 case ISO_CODE_SO:
2419 if (inhibit_iso_escape_detection)
2420 break;
2421 single_shifting = 0;
2422 if (shift_out == 0
2423 && (reg[1] >= 0
2424 || SHIFT_OUT_OK (coding_category_iso_7_else)
2425 || SHIFT_OUT_OK (coding_category_iso_8_else)))
2426 {
2427 /* Locking shift out. */
2428 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2429 mask_found |= CATEGORY_MASK_ISO_ELSE;
2430 }
2431 break;
2432
2433 case ISO_CODE_SI:
2434 if (inhibit_iso_escape_detection)
2435 break;
2436 single_shifting = 0;
2437 if (shift_out == 1)
2438 {
2439 /* Locking shift in. */
2440 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2441 mask_found |= CATEGORY_MASK_ISO_ELSE;
2442 }
2443 break;
2444
2445 case ISO_CODE_CSI:
2446 single_shifting = 0;
2447 case ISO_CODE_SS2:
2448 case ISO_CODE_SS3:
2449 {
2450 int newmask = CATEGORY_MASK_ISO_8_ELSE;
2451
2452 if (inhibit_iso_escape_detection)
2453 break;
2454 if (c != ISO_CODE_CSI)
2455 {
2456 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2457 & CODING_ISO_FLAG_SINGLE_SHIFT)
2458 newmask |= CATEGORY_MASK_ISO_8_1;
2459 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2460 & CODING_ISO_FLAG_SINGLE_SHIFT)
2461 newmask |= CATEGORY_MASK_ISO_8_2;
2462 single_shifting = 1;
2463 }
2464 if (VECTORP (Vlatin_extra_code_table)
2465 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2466 {
2467 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2468 & CODING_ISO_FLAG_LATIN_EXTRA)
2469 newmask |= CATEGORY_MASK_ISO_8_1;
2470 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2471 & CODING_ISO_FLAG_LATIN_EXTRA)
2472 newmask |= CATEGORY_MASK_ISO_8_2;
2473 }
2474 mask_iso &= newmask;
2475 mask_found |= newmask;
2476 }
2477 break;
2478
2479 default:
2480 if (c < 0x80)
2481 {
2482 single_shifting = 0;
2483 break;
2484 }
2485 else if (c < 0xA0)
2486 {
2487 single_shifting = 0;
2488 mask_8bit_found = 1;
2489 if (VECTORP (Vlatin_extra_code_table)
2490 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2491 {
2492 int newmask = 0;
2493
2494 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2495 & CODING_ISO_FLAG_LATIN_EXTRA)
2496 newmask |= CATEGORY_MASK_ISO_8_1;
2497 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2498 & CODING_ISO_FLAG_LATIN_EXTRA)
2499 newmask |= CATEGORY_MASK_ISO_8_2;
2500 mask_iso &= newmask;
2501 mask_found |= newmask;
2502 }
2503 else
2504 return 0;
2505 }
2506 else
2507 {
2508 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2509 | CATEGORY_MASK_ISO_7_ELSE);
2510 mask_found |= CATEGORY_MASK_ISO_8_1;
2511 mask_8bit_found = 1;
2512 /* Check the length of succeeding codes of the range
2513 0xA0..0FF. If the byte length is odd, we exclude
2514 CATEGORY_MASK_ISO_8_2. We can check this only
2515 when we are not single shifting. */
2516 if (!single_shifting
2517 && mask_iso & CATEGORY_MASK_ISO_8_2)
2518 {
2519 int i = 1;
2520 while (src < src_end)
2521 {
2522 ONE_MORE_BYTE (c);
2523 if (c < 0xA0)
2524 break;
2525 i++;
2526 }
2527
2528 if (i & 1 && src < src_end)
2529 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
2530 else
2531 mask_found |= CATEGORY_MASK_ISO_8_2;
2532 }
2533 }
2534 break;
2535 }
2536 }
2537 no_more_source:
2538 if (!mask_iso)
2539 {
2540 *mask &= ~CATEGORY_MASK_ISO;
2541 return 0;
2542 }
2543 if (!mask_found)
2544 return 0;
2545 *mask &= mask_iso & mask_found;
2546 if (! mask_8bit_found)
2547 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2548 return 1;
2549}
2550
2551
2552/* Set designation state into CODING. */
2553#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2554 do { \
2555 int id, prev; \
2556 \
2557 if (final < '0' || final >= 128 \
2558 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2559 || !SAFE_CHARSET_P (coding, id)) \
2560 { \
2561 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2562 goto invalid_code; \
2563 } \
2564 prev = CODING_ISO_DESIGNATION (coding, reg); \
2565 CODING_ISO_DESIGNATION (coding, reg) = id; \
2566 /* If there was an invalid designation to REG previously, and this \
2567 designation is ASCII to REG, we should keep this designation \
2568 sequence. */ \
2569 if (prev == -2 && id == charset_ascii) \
2570 goto invalid_code; \
2571 } while (0)
2572
2573
2574#define MAYBE_FINISH_COMPOSITION() \
2575 do { \
2576 int i; \
2577 if (composition_state == COMPOSING_NO) \
2578 break; \
2579 /* It is assured that we have enough room for producing \
2580 characters stored in the table `components'. */ \
2581 if (charbuf + component_idx > charbuf_end) \
2582 goto no_more_source; \
2583 composition_state = COMPOSING_NO; \
2584 if (method == COMPOSITION_RELATIVE \
2585 || method == COMPOSITION_WITH_ALTCHARS) \
2586 { \
2587 for (i = 0; i < component_idx; i++) \
2588 *charbuf++ = components[i]; \
2589 char_offset += component_idx; \
2590 } \
2591 else \
2592 { \
2593 for (i = 0; i < component_idx; i += 2) \
2594 *charbuf++ = components[i]; \
2595 char_offset += (component_idx / 2) + 1; \
2596 } \
2597 } while (0)
2598
2599
2600/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2601 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2602 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2603 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2604 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2605 */
2606
2607#define DECODE_COMPOSITION_START(c1) \
2608 do { \
2609 if (c1 == '0' \
2610 && composition_state == COMPOSING_COMPONENT_RULE) \
2611 { \
2612 component_len = component_idx; \
2613 composition_state = COMPOSING_CHAR; \
2614 } \
2615 else \
2616 { \
2617 unsigned char *p; \
2618 \
2619 MAYBE_FINISH_COMPOSITION (); \
2620 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2621 goto no_more_source; \
2622 for (p = src; p < src_end - 1; p++) \
2623 if (*p == ISO_CODE_ESC && p[1] == '1') \
2624 break; \
2625 if (p == src_end - 1) \
2626 { \
2627 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2628 goto invalid_code; \
2629 goto no_more_source; \
2630 } \
2631 \
2632 /* This is surely the start of a composition. */ \
2633 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2634 : c1 == '2' ? COMPOSITION_WITH_RULE \
2635 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2636 : COMPOSITION_WITH_RULE_ALTCHARS); \
2637 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2638 : COMPOSING_COMPONENT_CHAR); \
2639 component_idx = component_len = 0; \
2640 } \
2641 } while (0)
2642
2643
2644/* Handle compositoin end sequence ESC 1. */
2645
2646#define DECODE_COMPOSITION_END() \
2647 do { \
2648 int nchars = (component_len > 0 ? component_idx - component_len \
2649 : method == COMPOSITION_RELATIVE ? component_idx \
2650 : (component_idx + 1) / 2); \
2651 int i; \
2652 int *saved_charbuf = charbuf; \
2653 \
2654 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2655 if (method != COMPOSITION_RELATIVE) \
2656 { \
2657 if (component_len == 0) \
2658 for (i = 0; i < component_idx; i++) \
2659 *charbuf++ = components[i]; \
2660 else \
2661 for (i = 0; i < component_len; i++) \
2662 *charbuf++ = components[i]; \
2663 *saved_charbuf = saved_charbuf - charbuf; \
2664 } \
2665 if (method == COMPOSITION_WITH_RULE) \
2666 for (i = 0; i < component_idx; i += 2, char_offset++) \
2667 *charbuf++ = components[i]; \
2668 else \
2669 for (i = component_len; i < component_idx; i++, char_offset++) \
2670 *charbuf++ = components[i]; \
2671 coding->annotated = 1; \
2672 composition_state = COMPOSING_NO; \
2673 } while (0)
2674
2675
2676/* Decode a composition rule from the byte C1 (and maybe one more byte
2677 from SRC) and store one encoded composition rule in
2678 coding->cmp_data. */
2679
2680#define DECODE_COMPOSITION_RULE(c1) \
2681 do { \
2682 (c1) -= 32; \
2683 if (c1 < 81) /* old format (before ver.21) */ \
2684 { \
2685 int gref = (c1) / 9; \
2686 int nref = (c1) % 9; \
2687 if (gref == 4) gref = 10; \
2688 if (nref == 4) nref = 10; \
2689 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2690 } \
2691 else if (c1 < 93) /* new format (after ver.21) */ \
2692 { \
2693 ONE_MORE_BYTE (c2); \
2694 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2695 } \
2696 else \
2697 c1 = 0; \
2698 } while (0)
2699
2700
2701/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2702
2703static void
2704decode_coding_iso_2022 (coding)
2705 struct coding_system *coding;
2706{
2707 unsigned char *src = coding->source + coding->consumed;
2708 unsigned char *src_end = coding->source + coding->src_bytes;
2709 unsigned char *src_base;
2710 int *charbuf = coding->charbuf;
2711 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2712 int consumed_chars = 0, consumed_chars_base;
2713 int char_offset = 0;
2714 int multibytep = coding->src_multibyte;
2715 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2716 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2717 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2718 struct charset *charset;
2719 int c;
2720 /* For handling composition sequence. */
2721#define COMPOSING_NO 0
2722#define COMPOSING_CHAR 1
2723#define COMPOSING_RULE 2
2724#define COMPOSING_COMPONENT_CHAR 3
2725#define COMPOSING_COMPONENT_RULE 4
2726
2727 int composition_state = COMPOSING_NO;
2728 enum composition_method method;
2729 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2730 int component_idx;
2731 int component_len;
2732 Lisp_Object attrs, eol_type, charset_list;
2733
2734 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2735 setup_iso_safe_charsets (attrs);
2736
2737 while (1)
2738 {
2739 int c1, c2;
2740
2741 src_base = src;
2742 consumed_chars_base = consumed_chars;
2743
2744 if (charbuf >= charbuf_end)
2745 break;
2746
2747 ONE_MORE_BYTE (c1);
2748
2749 /* We produce no character or one character. */
2750 switch (iso_code_class [c1])
2751 {
2752 case ISO_0x20_or_0x7F:
2753 if (composition_state != COMPOSING_NO)
2754 {
2755 if (composition_state == COMPOSING_RULE
2756 || composition_state == COMPOSING_COMPONENT_RULE)
2757 {
2758 DECODE_COMPOSITION_RULE (c1);
2759 components[component_idx++] = c1;
2760 composition_state--;
2761 continue;
2762 }
2763 }
2764 if (charset_id_0 < 0
2765 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2766 /* This is SPACE or DEL. */
2767 charset = CHARSET_FROM_ID (charset_ascii);
2768 else
2769 charset = CHARSET_FROM_ID (charset_id_0);
2770 break;
2771
2772 case ISO_graphic_plane_0:
2773 if (composition_state != COMPOSING_NO)
2774 {
2775 if (composition_state == COMPOSING_RULE
2776 || composition_state == COMPOSING_COMPONENT_RULE)
2777 {
2778 DECODE_COMPOSITION_RULE (c1);
2779 components[component_idx++] = c1;
2780 composition_state--;
2781 continue;
2782 }
2783 }
2784 charset = CHARSET_FROM_ID (charset_id_0);
2785 break;
2786
2787 case ISO_0xA0_or_0xFF:
2788 if (charset_id_1 < 0
2789 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2790 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2791 goto invalid_code;
2792 /* This is a graphic character, we fall down ... */
2793
2794 case ISO_graphic_plane_1:
2795 if (charset_id_1 < 0)
2796 goto invalid_code;
2797 charset = CHARSET_FROM_ID (charset_id_1);
2798 break;
2799
2800 case ISO_carriage_return:
2801 if (c1 == '\r')
2802 {
2803 if (EQ (eol_type, Qdos))
2804 {
2805 if (src == src_end)
2806 goto no_more_source;
2807 if (*src == '\n')
2808 ONE_MORE_BYTE (c1);
2809 }
2810 else if (EQ (eol_type, Qmac))
2811 c1 = '\n';
2812 }
2813 /* fall through */
2814
2815 case ISO_control_0:
2816 MAYBE_FINISH_COMPOSITION ();
2817 charset = CHARSET_FROM_ID (charset_ascii);
2818 break;
2819
2820 case ISO_control_1:
2821 MAYBE_FINISH_COMPOSITION ();
2822 goto invalid_code;
2823
2824 case ISO_shift_out:
2825 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2826 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2827 goto invalid_code;
2828 CODING_ISO_INVOCATION (coding, 0) = 1;
2829 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2830 continue;
2831
2832 case ISO_shift_in:
2833 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2834 goto invalid_code;
2835 CODING_ISO_INVOCATION (coding, 0) = 0;
2836 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2837 continue;
2838
2839 case ISO_single_shift_2_7:
2840 case ISO_single_shift_2:
2841 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2842 goto invalid_code;
2843 /* SS2 is handled as an escape sequence of ESC 'N' */
2844 c1 = 'N';
2845 goto label_escape_sequence;
2846
2847 case ISO_single_shift_3:
2848 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2849 goto invalid_code;
2850 /* SS2 is handled as an escape sequence of ESC 'O' */
2851 c1 = 'O';
2852 goto label_escape_sequence;
2853
2854 case ISO_control_sequence_introducer:
2855 /* CSI is handled as an escape sequence of ESC '[' ... */
2856 c1 = '[';
2857 goto label_escape_sequence;
2858
2859 case ISO_escape:
2860 ONE_MORE_BYTE (c1);
2861 label_escape_sequence:
2862 /* Escape sequences handled here are invocation,
2863 designation, direction specification, and character
2864 composition specification. */
2865 switch (c1)
2866 {
2867 case '&': /* revision of following character set */
2868 ONE_MORE_BYTE (c1);
2869 if (!(c1 >= '@' && c1 <= '~'))
2870 goto invalid_code;
2871 ONE_MORE_BYTE (c1);
2872 if (c1 != ISO_CODE_ESC)
2873 goto invalid_code;
2874 ONE_MORE_BYTE (c1);
2875 goto label_escape_sequence;
2876
2877 case '$': /* designation of 2-byte character set */
2878 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2879 goto invalid_code;
2880 ONE_MORE_BYTE (c1);
2881 if (c1 >= '@' && c1 <= 'B')
2882 { /* designation of JISX0208.1978, GB2312.1980,
2883 or JISX0208.1980 */
2884 DECODE_DESIGNATION (0, 2, 0, c1);
2885 }
2886 else if (c1 >= 0x28 && c1 <= 0x2B)
2887 { /* designation of DIMENSION2_CHARS94 character set */
2888 ONE_MORE_BYTE (c2);
2889 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
2890 }
2891 else if (c1 >= 0x2C && c1 <= 0x2F)
2892 { /* designation of DIMENSION2_CHARS96 character set */
2893 ONE_MORE_BYTE (c2);
2894 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
2895 }
2896 else
2897 goto invalid_code;
2898 /* We must update these variables now. */
2899 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2900 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2901 continue;
2902
2903 case 'n': /* invocation of locking-shift-2 */
2904 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2905 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2906 goto invalid_code;
2907 CODING_ISO_INVOCATION (coding, 0) = 2;
2908 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2909 continue;
2910
2911 case 'o': /* invocation of locking-shift-3 */
2912 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2913 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2914 goto invalid_code;
2915 CODING_ISO_INVOCATION (coding, 0) = 3;
2916 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2917 continue;
2918
2919 case 'N': /* invocation of single-shift-2 */
2920 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2921 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2922 goto invalid_code;
2923 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
2924 ONE_MORE_BYTE (c1);
2925 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2926 goto invalid_code;
2927 break;
2928
2929 case 'O': /* invocation of single-shift-3 */
2930 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2931 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2932 goto invalid_code;
2933 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
2934 ONE_MORE_BYTE (c1);
2935 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2936 goto invalid_code;
2937 break;
2938
2939 case '0': case '2': case '3': case '4': /* start composition */
2940 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2941 goto invalid_code;
2942 DECODE_COMPOSITION_START (c1);
2943 continue;
2944
2945 case '1': /* end composition */
2946 if (composition_state == COMPOSING_NO)
2947 goto invalid_code;
2948 DECODE_COMPOSITION_END ();
2949 continue;
2950
2951 case '[': /* specification of direction */
2952 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2953 goto invalid_code;
2954 /* For the moment, nested direction is not supported.
2955 So, `coding->mode & CODING_MODE_DIRECTION' zero means
2956 left-to-right, and nozero means right-to-left. */
2957 ONE_MORE_BYTE (c1);
2958 switch (c1)
2959 {
2960 case ']': /* end of the current direction */
2961 coding->mode &= ~CODING_MODE_DIRECTION;
2962
2963 case '0': /* end of the current direction */
2964 case '1': /* start of left-to-right direction */
2965 ONE_MORE_BYTE (c1);
2966 if (c1 == ']')
2967 coding->mode &= ~CODING_MODE_DIRECTION;
2968 else
2969 goto invalid_code;
2970 break;
2971
2972 case '2': /* start of right-to-left direction */
2973 ONE_MORE_BYTE (c1);
2974 if (c1 == ']')
2975 coding->mode |= CODING_MODE_DIRECTION;
2976 else
2977 goto invalid_code;
2978 break;
2979
2980 default:
2981 goto invalid_code;
2982 }
2983 continue;
2984
2985 default:
2986 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2987 goto invalid_code;
2988 if (c1 >= 0x28 && c1 <= 0x2B)
2989 { /* designation of DIMENSION1_CHARS94 character set */
2990 ONE_MORE_BYTE (c2);
2991 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
2992 }
2993 else if (c1 >= 0x2C && c1 <= 0x2F)
2994 { /* designation of DIMENSION1_CHARS96 character set */
2995 ONE_MORE_BYTE (c2);
2996 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
2997 }
2998 else
2999 goto invalid_code;
3000 /* We must update these variables now. */
3001 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3002 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3003 continue;
3004 }
3005 }
3006
3007 /* Now we know CHARSET and 1st position code C1 of a character.
3008 Produce a decoded character while getting 2nd position code
3009 C2 if necessary. */
3010 c1 &= 0x7F;
3011 if (CHARSET_DIMENSION (charset) > 1)
3012 {
3013 ONE_MORE_BYTE (c2);
3014 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3015 /* C2 is not in a valid range. */
3016 goto invalid_code;
3017 c1 = (c1 << 8) | (c2 & 0x7F);
3018 if (CHARSET_DIMENSION (charset) > 2)
3019 {
3020 ONE_MORE_BYTE (c2);
3021 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3022 /* C2 is not in a valid range. */
3023 goto invalid_code;
3024 c1 = (c1 << 8) | (c2 & 0x7F);
3025 }
3026 }
3027
3028 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3029 if (c < 0)
3030 {
3031 MAYBE_FINISH_COMPOSITION ();
3032 for (; src_base < src; src_base++, char_offset++)
3033 {
3034 if (ASCII_BYTE_P (*src_base))
3035 *charbuf++ = *src_base;
3036 else
3037 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3038 }
3039 }
3040 else if (composition_state == COMPOSING_NO)
3041 {
3042 *charbuf++ = c;
3043 char_offset++;
3044 }
3045 else
3046 {
3047 components[component_idx++] = c;
3048 if (method == COMPOSITION_WITH_RULE
3049 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3050 && composition_state == COMPOSING_COMPONENT_CHAR))
3051 composition_state++;
3052 }
3053 continue;
3054
3055 invalid_code:
3056 MAYBE_FINISH_COMPOSITION ();
3057 src = src_base;
3058 consumed_chars = consumed_chars_base;
3059 ONE_MORE_BYTE (c);
3060 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3061 coding->errors++;
3062 }
3063
3064 no_more_source:
3065 coding->consumed_char += consumed_chars_base;
3066 coding->consumed = src_base - coding->source;
3067 coding->charbuf_used = charbuf - coding->charbuf;
3068}
3069
3070
3071/* ISO2022 encoding stuff. */
3072
3073/*
3074 It is not enough to say just "ISO2022" on encoding, we have to
3075 specify more details. In Emacs, each coding system of ISO2022
3076 variant has the following specifications:
3077 1. Initial designation to G0 thru G3.
3078 2. Allows short-form designation?
3079 3. ASCII should be designated to G0 before control characters?
3080 4. ASCII should be designated to G0 at end of line?
3081 5. 7-bit environment or 8-bit environment?
3082 6. Use locking-shift?
3083 7. Use Single-shift?
3084 And the following two are only for Japanese:
3085 8. Use ASCII in place of JIS0201-1976-Roman?
3086 9. Use JISX0208-1983 in place of JISX0208-1978?
3087 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3088 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3089 details.
3090*/
3091
3092/* Produce codes (escape sequence) for designating CHARSET to graphic
3093 register REG at DST, and increment DST. If <final-char> of CHARSET is
3094 '@', 'A', or 'B' and the coding system CODING allows, produce
3095 designation sequence of short-form. */
3096
3097#define ENCODE_DESIGNATION(charset, reg, coding) \
3098 do { \
3099 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3100 char *intermediate_char_94 = "()*+"; \
3101 char *intermediate_char_96 = ",-./"; \
3102 int revision = -1; \
3103 int c; \
3104 \
3105 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3106 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3107 \
3108 if (revision >= 0) \
3109 { \
3110 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3111 EMIT_ONE_BYTE ('@' + revision); \
3112 } \
3113 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3114 if (CHARSET_DIMENSION (charset) == 1) \
3115 { \
3116 if (! CHARSET_ISO_CHARS_96 (charset)) \
3117 c = intermediate_char_94[reg]; \
3118 else \
3119 c = intermediate_char_96[reg]; \
3120 EMIT_ONE_ASCII_BYTE (c); \
3121 } \
3122 else \
3123 { \
3124 EMIT_ONE_ASCII_BYTE ('$'); \
3125 if (! CHARSET_ISO_CHARS_96 (charset)) \
3126 { \
3127 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3128 || reg != 0 \
3129 || final_char < '@' || final_char > 'B') \
3130 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3131 } \
3132 else \
3133 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3134 } \
3135 EMIT_ONE_ASCII_BYTE (final_char); \
3136 \
3137 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3138 } while (0)
3139
3140
3141/* The following two macros produce codes (control character or escape
3142 sequence) for ISO2022 single-shift functions (single-shift-2 and
3143 single-shift-3). */
3144
3145#define ENCODE_SINGLE_SHIFT_2 \
3146 do { \
3147 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3148 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3149 else \
3150 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3151 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3152 } while (0)
3153
3154
3155#define ENCODE_SINGLE_SHIFT_3 \
3156 do { \
3157 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3158 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3159 else \
3160 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3161 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3162 } while (0)
3163
3164
3165/* The following four macros produce codes (control character or
3166 escape sequence) for ISO2022 locking-shift functions (shift-in,
3167 shift-out, locking-shift-2, and locking-shift-3). */
3168
3169#define ENCODE_SHIFT_IN \
3170 do { \
3171 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3172 CODING_ISO_INVOCATION (coding, 0) = 0; \
3173 } while (0)
3174
3175
3176#define ENCODE_SHIFT_OUT \
3177 do { \
3178 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3179 CODING_ISO_INVOCATION (coding, 0) = 1; \
3180 } while (0)
3181
3182
3183#define ENCODE_LOCKING_SHIFT_2 \
3184 do { \
3185 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3186 CODING_ISO_INVOCATION (coding, 0) = 2; \
3187 } while (0)
3188
3189
3190#define ENCODE_LOCKING_SHIFT_3 \
3191 do { \
3192 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3193 CODING_ISO_INVOCATION (coding, 0) = 3; \
3194 } while (0)
3195
3196
3197/* Produce codes for a DIMENSION1 character whose character set is
3198 CHARSET and whose position-code is C1. Designation and invocation
3199 sequences are also produced in advance if necessary. */
3200
3201#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3202 do { \
3203 int id = CHARSET_ID (charset); \
3204 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3205 { \
3206 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3207 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3208 else \
3209 EMIT_ONE_BYTE (c1 | 0x80); \
3210 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3211 break; \
3212 } \
3213 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3214 { \
3215 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3216 break; \
3217 } \
3218 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3219 { \
3220 EMIT_ONE_BYTE (c1 | 0x80); \
3221 break; \
3222 } \
3223 else \
3224 /* Since CHARSET is not yet invoked to any graphic planes, we \
3225 must invoke it, or, at first, designate it to some graphic \
3226 register. Then repeat the loop to actually produce the \
3227 character. */ \
3228 dst = encode_invocation_designation (charset, coding, dst, \
3229 &produced_chars); \
3230 } while (1)
3231
3232
3233/* Produce codes for a DIMENSION2 character whose character set is
3234 CHARSET and whose position-codes are C1 and C2. Designation and
3235 invocation codes are also produced in advance if necessary. */
3236
3237#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3238 do { \
3239 int id = CHARSET_ID (charset); \
3240 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3241 { \
3242 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3243 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3244 else \
3245 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3246 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3247 break; \
3248 } \
3249 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3250 { \
3251 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3252 break; \
3253 } \
3254 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3255 { \
3256 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3257 break; \
3258 } \
3259 else \
3260 /* Since CHARSET is not yet invoked to any graphic planes, we \
3261 must invoke it, or, at first, designate it to some graphic \
3262 register. Then repeat the loop to actually produce the \
3263 character. */ \
3264 dst = encode_invocation_designation (charset, coding, dst, \
3265 &produced_chars); \
3266 } while (1)
3267
3268
3269#define ENCODE_ISO_CHARACTER(charset, c) \
3270 do { \
3271 int code = ENCODE_CHAR ((charset),(c)); \
3272 \
3273 if (CHARSET_DIMENSION (charset) == 1) \
3274 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3275 else \
3276 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3277 } while (0)
3278
3279
3280/* Produce designation and invocation codes at a place pointed by DST
3281 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3282 Return new DST. */
3283
3284unsigned char *
3285encode_invocation_designation (charset, coding, dst, p_nchars)
3286 struct charset *charset;
3287 struct coding_system *coding;
3288 unsigned char *dst;
3289 int *p_nchars;
3290{
3291 int multibytep = coding->dst_multibyte;
3292 int produced_chars = *p_nchars;
3293 int reg; /* graphic register number */
3294 int id = CHARSET_ID (charset);
3295
3296 /* At first, check designations. */
3297 for (reg = 0; reg < 4; reg++)
3298 if (id == CODING_ISO_DESIGNATION (coding, reg))
3299 break;
3300
3301 if (reg >= 4)
3302 {
3303 /* CHARSET is not yet designated to any graphic registers. */
3304 /* At first check the requested designation. */
3305 reg = CODING_ISO_REQUEST (coding, id);
3306 if (reg < 0)
3307 /* Since CHARSET requests no special designation, designate it
3308 to graphic register 0. */
3309 reg = 0;
3310
3311 ENCODE_DESIGNATION (charset, reg, coding);
3312 }
3313
3314 if (CODING_ISO_INVOCATION (coding, 0) != reg
3315 && CODING_ISO_INVOCATION (coding, 1) != reg)
3316 {
3317 /* Since the graphic register REG is not invoked to any graphic
3318 planes, invoke it to graphic plane 0. */
3319 switch (reg)
3320 {
3321 case 0: /* graphic register 0 */
3322 ENCODE_SHIFT_IN;
3323 break;
3324
3325 case 1: /* graphic register 1 */
3326 ENCODE_SHIFT_OUT;
3327 break;
3328
3329 case 2: /* graphic register 2 */
3330 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3331 ENCODE_SINGLE_SHIFT_2;
3332 else
3333 ENCODE_LOCKING_SHIFT_2;
3334 break;
3335
3336 case 3: /* graphic register 3 */
3337 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3338 ENCODE_SINGLE_SHIFT_3;
3339 else
3340 ENCODE_LOCKING_SHIFT_3;
3341 break;
3342 }
3343 }
3344
3345 *p_nchars = produced_chars;
3346 return dst;
3347}
3348
3349/* The following three macros produce codes for indicating direction
3350 of text. */
3351#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3352 do { \
3353 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3354 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3355 else \
3356 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3357 } while (0)
3358
3359
3360#define ENCODE_DIRECTION_R2L() \
3361 do { \
3362 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3363 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3364 } while (0)
3365
3366
3367#define ENCODE_DIRECTION_L2R() \
3368 do { \
3369 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3370 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3371 } while (0)
3372
3373
3374/* Produce codes for designation and invocation to reset the graphic
3375 planes and registers to initial state. */
3376#define ENCODE_RESET_PLANE_AND_REGISTER() \
3377 do { \
3378 int reg; \
3379 struct charset *charset; \
3380 \
3381 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3382 ENCODE_SHIFT_IN; \
3383 for (reg = 0; reg < 4; reg++) \
3384 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3385 && (CODING_ISO_DESIGNATION (coding, reg) \
3386 != CODING_ISO_INITIAL (coding, reg))) \
3387 { \
3388 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3389 ENCODE_DESIGNATION (charset, reg, coding); \
3390 } \
3391 } while (0)
3392
3393
3394/* Produce designation sequences of charsets in the line started from
3395 SRC to a place pointed by DST, and return updated DST.
3396
3397 If the current block ends before any end-of-line, we may fail to
3398 find all the necessary designations. */
3399
3400static unsigned char *
3401encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3402 struct coding_system *coding;
3403 int *charbuf, *charbuf_end;
3404 unsigned char *dst;
3405{
3406 struct charset *charset;
3407 /* Table of charsets to be designated to each graphic register. */
3408 int r[4];
3409 int c, found = 0, reg;
3410 int produced_chars = 0;
3411 int multibytep = coding->dst_multibyte;
3412 Lisp_Object attrs;
3413 Lisp_Object charset_list;
3414
3415 attrs = CODING_ID_ATTRS (coding->id);
3416 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3417 if (EQ (charset_list, Qiso_2022))
3418 charset_list = Viso_2022_charset_list;
3419
3420 for (reg = 0; reg < 4; reg++)
3421 r[reg] = -1;
3422
3423 while (found < 4)
3424 {
3425 int id;
3426
3427 c = *charbuf++;
3428 if (c == '\n')
3429 break;
3430 charset = char_charset (c, charset_list, NULL);
3431 id = CHARSET_ID (charset);
3432 reg = CODING_ISO_REQUEST (coding, id);
3433 if (reg >= 0 && r[reg] < 0)
3434 {
3435 found++;
3436 r[reg] = id;
3437 }
3438 }
3439
3440 if (found)
3441 {
3442 for (reg = 0; reg < 4; reg++)
3443 if (r[reg] >= 0
3444 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3445 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3446 }
3447
3448 return dst;
3449}
3450
3451/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3452
3453static int
3454encode_coding_iso_2022 (coding)
3455 struct coding_system *coding;
3456{
3457 int multibytep = coding->dst_multibyte;
3458 int *charbuf = coding->charbuf;
3459 int *charbuf_end = charbuf + coding->charbuf_used;
3460 unsigned char *dst = coding->destination + coding->produced;
3461 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3462 int safe_room = 16;
3463 int bol_designation
3464 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3465 && CODING_ISO_BOL (coding));
3466 int produced_chars = 0;
3467 Lisp_Object attrs, eol_type, charset_list;
3468 int ascii_compatible;
3469 int c;
3470
3471 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3472 setup_iso_safe_charsets (attrs);
3473 coding->safe_charsets
3474 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
3475
3476 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3477
3478 while (charbuf < charbuf_end)
3479 {
3480 ASSURE_DESTINATION (safe_room);
3481
3482 if (bol_designation)
3483 {
3484 unsigned char *dst_prev = dst;
3485
3486 /* We have to produce designation sequences if any now. */
3487 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3488 bol_designation = 0;
3489 /* We are sure that designation sequences are all ASCII bytes. */
3490 produced_chars += dst - dst_prev;
3491 }
3492
3493 c = *charbuf++;
3494
3495 /* Now encode the character C. */
3496 if (c < 0x20 || c == 0x7F)
3497 {
3498 if (c == '\n'
3499 || (c == '\r' && EQ (eol_type, Qmac)))
3500 {
3501 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3502 ENCODE_RESET_PLANE_AND_REGISTER ();
3503 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3504 {
3505 int i;
3506
3507 for (i = 0; i < 4; i++)
3508 CODING_ISO_DESIGNATION (coding, i)
3509 = CODING_ISO_INITIAL (coding, i);
3510 }
3511 bol_designation
3512 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3513 }
3514 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3515 ENCODE_RESET_PLANE_AND_REGISTER ();
3516 EMIT_ONE_ASCII_BYTE (c);
3517 }
3518 else if (ASCII_CHAR_P (c))
3519 {
3520 if (ascii_compatible)
3521 EMIT_ONE_ASCII_BYTE (c);
3522 else
3523 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii), c);
3524 }
3525 else
3526 {
3527 struct charset *charset = char_charset (c, charset_list, NULL);
3528
3529 if (!charset)
3530 {
3531 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3532 {
3533 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3534 charset = CHARSET_FROM_ID (charset_ascii);
3535 }
3536 else
3537 {
3538 c = coding->default_char;
3539 charset = char_charset (c, charset_list, NULL);
3540 }
3541 }
3542 ENCODE_ISO_CHARACTER (charset, c);
3543 }
3544 }
3545
3546 if (coding->mode & CODING_MODE_LAST_BLOCK
3547 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3548 {
3549 ASSURE_DESTINATION (safe_room);
3550 ENCODE_RESET_PLANE_AND_REGISTER ();
3551 }
3552 coding->result = CODING_RESULT_SUCCESS;
3553 CODING_ISO_BOL (coding) = bol_designation;
3554 coding->produced_char += produced_chars;
3555 coding->produced = dst - coding->destination;
3556 return 0;
3557}
3558
3559\f
3560/*** 8,9. SJIS and BIG5 handlers ***/
3561
3562/* Although SJIS and BIG5 are not ISO's coding system, they are used
3563 quite widely. So, for the moment, Emacs supports them in the bare
3564 C code. But, in the future, they may be supported only by CCL. */
3565
3566/* SJIS is a coding system encoding three character sets: ASCII, right
3567 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3568 as is. A character of charset katakana-jisx0201 is encoded by
3569 "position-code + 0x80". A character of charset japanese-jisx0208
3570 is encoded in 2-byte but two position-codes are divided and shifted
3571 so that it fit in the range below.
3572
3573 --- CODE RANGE of SJIS ---
3574 (character set) (range)
3575 ASCII 0x00 .. 0x7F
3576 KATAKANA-JISX0201 0xA0 .. 0xDF
3577 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3578 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3579 -------------------------------
3580
3581*/
3582
3583/* BIG5 is a coding system encoding two character sets: ASCII and
3584 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3585 character set and is encoded in two-byte.
3586
3587 --- CODE RANGE of BIG5 ---
3588 (character set) (range)
3589 ASCII 0x00 .. 0x7F
3590 Big5 (1st byte) 0xA1 .. 0xFE
3591 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3592 --------------------------
3593
3594 */
3595
3596/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3597 Check if a text is encoded in SJIS. If it is, return
3598 CATEGORY_MASK_SJIS, else return 0. */
3599
3600static int
3601detect_coding_sjis (coding, mask)
3602 struct coding_system *coding;
3603 int *mask;
3604{
3605 unsigned char *src = coding->source, *src_base = src;
3606 unsigned char *src_end = coding->source + coding->src_bytes;
3607 int multibytep = coding->src_multibyte;
3608 int consumed_chars = 0;
3609 int found = 0;
3610 int c;
3611
3612 /* A coding system of this category is always ASCII compatible. */
3613 src += coding->head_ascii;
3614
3615 while (1)
3616 {
3617 ONE_MORE_BYTE (c);
3618 if (c < 0x80)
3619 continue;
3620 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3621 {
3622 ONE_MORE_BYTE (c);
3623 if (c < 0x40 || c == 0x7F || c > 0xFC)
3624 break;
3625 found = 1;
3626 }
3627 else if (c >= 0xA0 && c < 0xE0)
3628 found = 1;
3629 else
3630 break;
3631 }
3632 *mask &= ~CATEGORY_MASK_SJIS;
3633 return 0;
3634
3635 no_more_source:
3636 if (!found)
3637 return 0;
3638 *mask &= CATEGORY_MASK_SJIS;
3639 return 1;
3640}
3641
3642/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3643 Check if a text is encoded in BIG5. If it is, return
3644 CATEGORY_MASK_BIG5, else return 0. */
3645
3646static int
3647detect_coding_big5 (coding, mask)
3648 struct coding_system *coding;
3649 int *mask;
3650{
3651 unsigned char *src = coding->source, *src_base = src;
3652 unsigned char *src_end = coding->source + coding->src_bytes;
3653 int multibytep = coding->src_multibyte;
3654 int consumed_chars = 0;
3655 int found = 0;
3656 int c;
3657
3658 /* A coding system of this category is always ASCII compatible. */
3659 src += coding->head_ascii;
3660
3661 while (1)
3662 {
3663 ONE_MORE_BYTE (c);
3664 if (c < 0x80)
3665 continue;
3666 if (c >= 0xA1)
3667 {
3668 ONE_MORE_BYTE (c);
3669 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3670 return 0;
3671 found = 1;
3672 }
3673 else
3674 break;
3675 }
3676 *mask &= ~CATEGORY_MASK_BIG5;
3677 return 0;
3678
3679 no_more_source:
3680 if (!found)
3681 return 0;
3682 *mask &= CATEGORY_MASK_BIG5;
3683 return 1;
3684}
3685
3686/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3687 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3688
3689static void
3690decode_coding_sjis (coding)
3691 struct coding_system *coding;
3692{
3693 unsigned char *src = coding->source + coding->consumed;
3694 unsigned char *src_end = coding->source + coding->src_bytes;
3695 unsigned char *src_base;
3696 int *charbuf = coding->charbuf;
3697 int *charbuf_end = charbuf + coding->charbuf_size;
3698 int consumed_chars = 0, consumed_chars_base;
3699 int multibytep = coding->src_multibyte;
3700 struct charset *charset_roman, *charset_kanji, *charset_kana;
3701 Lisp_Object attrs, eol_type, charset_list, val;
3702
3703 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3704
3705 val = charset_list;
3706 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3707 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3708 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
3709
3710 while (1)
3711 {
3712 int c, c1;
3713
3714 src_base = src;
3715 consumed_chars_base = consumed_chars;
3716
3717 if (charbuf >= charbuf_end)
3718 break;
3719
3720 ONE_MORE_BYTE (c);
3721
3722 if (c == '\r')
3723 {
3724 if (EQ (eol_type, Qdos))
3725 {
3726 if (src == src_end)
3727 goto no_more_source;
3728 if (*src == '\n')
3729 ONE_MORE_BYTE (c);
3730 }
3731 else if (EQ (eol_type, Qmac))
3732 c = '\n';
3733 }
3734 else
3735 {
3736 struct charset *charset;
3737
3738 if (c < 0x80)
3739 charset = charset_roman;
3740 else
3741 {
3742 if (c >= 0xF0)
3743 goto invalid_code;
3744 if (c < 0xA0 || c >= 0xE0)
3745 {
3746 /* SJIS -> JISX0208 */
3747 ONE_MORE_BYTE (c1);
3748 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3749 goto invalid_code;
3750 c = (c << 8) | c1;
3751 SJIS_TO_JIS (c);
3752 charset = charset_kanji;
3753 }
3754 else
3755 /* SJIS -> JISX0201-Kana */
3756 charset = charset_kana;
3757 }
3758 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3759 }
3760 *charbuf++ = c;
3761 continue;
3762
3763 invalid_code:
3764 src = src_base;
3765 consumed_chars = consumed_chars_base;
3766 ONE_MORE_BYTE (c);
3767 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3768 coding->errors++;
3769 }
3770
3771 no_more_source:
3772 coding->consumed_char += consumed_chars_base;
3773 coding->consumed = src_base - coding->source;
3774 coding->charbuf_used = charbuf - coding->charbuf;
3775}
3776
3777static void
3778decode_coding_big5 (coding)
3779 struct coding_system *coding;
3780{
3781 unsigned char *src = coding->source + coding->consumed;
3782 unsigned char *src_end = coding->source + coding->src_bytes;
3783 unsigned char *src_base;
3784 int *charbuf = coding->charbuf;
3785 int *charbuf_end = charbuf + coding->charbuf_size;
3786 int consumed_chars = 0, consumed_chars_base;
3787 int multibytep = coding->src_multibyte;
3788 struct charset *charset_roman, *charset_big5;
3789 Lisp_Object attrs, eol_type, charset_list, val;
3790
3791 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3792 val = charset_list;
3793 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3794 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3795
3796 while (1)
3797 {
3798 int c, c1;
3799
3800 src_base = src;
3801 consumed_chars_base = consumed_chars;
3802
3803 if (charbuf >= charbuf_end)
3804 break;
3805
3806 ONE_MORE_BYTE (c);
3807
3808 if (c == '\r')
3809 {
3810 if (EQ (eol_type, Qdos))
3811 {
3812 if (src == src_end)
3813 goto no_more_source;
3814 if (*src == '\n')
3815 ONE_MORE_BYTE (c);
3816 }
3817 else if (EQ (eol_type, Qmac))
3818 c = '\n';
3819 }
3820 else
3821 {
3822 struct charset *charset;
3823 if (c < 0x80)
3824 charset = charset_roman;
3825 else
3826 {
3827 /* BIG5 -> Big5 */
3828 if (c < 0xA1 || c > 0xFE)
3829 goto invalid_code;
3830 ONE_MORE_BYTE (c1);
3831 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3832 goto invalid_code;
3833 c = c << 8 | c1;
3834 charset = charset_big5;
3835 }
3836 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3837 }
3838
3839 *charbuf++ = c;
3840 continue;
3841
3842 invalid_code:
3843 src = src_base;
3844 consumed_chars = consumed_chars_base;
3845 ONE_MORE_BYTE (c);
3846 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3847 coding->errors++;
3848 }
3849
3850 no_more_source:
3851 coding->consumed_char += consumed_chars_base;
3852 coding->consumed = src_base - coding->source;
3853 coding->charbuf_used = charbuf - coding->charbuf;
3854}
3855
3856/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3857 This function can encode charsets `ascii', `katakana-jisx0201',
3858 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3859 are sure that all these charsets are registered as official charset
3860 (i.e. do not have extended leading-codes). Characters of other
3861 charsets are produced without any encoding. If SJIS_P is 1, encode
3862 SJIS text, else encode BIG5 text. */
3863
3864static int
3865encode_coding_sjis (coding)
3866 struct coding_system *coding;
3867{
3868 int multibytep = coding->dst_multibyte;
3869 int *charbuf = coding->charbuf;
3870 int *charbuf_end = charbuf + coding->charbuf_used;
3871 unsigned char *dst = coding->destination + coding->produced;
3872 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3873 int safe_room = 4;
3874 int produced_chars = 0;
3875 Lisp_Object attrs, eol_type, charset_list, val;
3876 int ascii_compatible;
3877 struct charset *charset_roman, *charset_kanji, *charset_kana;
3878 int c;
3879
3880 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3881 val = charset_list;
3882 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3883 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3884 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3885
3886 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3887
3888 while (charbuf < charbuf_end)
3889 {
3890 ASSURE_DESTINATION (safe_room);
3891 c = *charbuf++;
3892 /* Now encode the character C. */
3893 if (ASCII_CHAR_P (c) && ascii_compatible)
3894 EMIT_ONE_ASCII_BYTE (c);
3895 else
3896 {
3897 unsigned code;
3898 struct charset *charset = char_charset (c, charset_list, &code);
3899
3900 if (!charset)
3901 {
3902 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3903 {
3904 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3905 charset = CHARSET_FROM_ID (charset_ascii);
3906 }
3907 else
3908 {
3909 c = coding->default_char;
3910 charset = char_charset (c, charset_list, &code);
3911 }
3912 }
3913 if (code == CHARSET_INVALID_CODE (charset))
3914 abort ();
3915 if (charset == charset_kanji)
3916 {
3917 int c1, c2;
3918 JIS_TO_SJIS (code);
3919 c1 = code >> 8, c2 = code & 0xFF;
3920 EMIT_TWO_BYTES (c1, c2);
3921 }
3922 else if (charset == charset_kana)
3923 EMIT_ONE_BYTE (code | 0x80);
3924 else
3925 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3926 }
3927 }
3928 coding->result = CODING_RESULT_SUCCESS;
3929 coding->produced_char += produced_chars;
3930 coding->produced = dst - coding->destination;
3931 return 0;
3932}
3933
3934static int
3935encode_coding_big5 (coding)
3936 struct coding_system *coding;
3937{
3938 int multibytep = coding->dst_multibyte;
3939 int *charbuf = coding->charbuf;
3940 int *charbuf_end = charbuf + coding->charbuf_used;
3941 unsigned char *dst = coding->destination + coding->produced;
3942 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3943 int safe_room = 4;
3944 int produced_chars = 0;
3945 Lisp_Object attrs, eol_type, charset_list, val;
3946 int ascii_compatible;
3947 struct charset *charset_roman, *charset_big5;
3948 int c;
3949
3950 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3951 val = charset_list;
3952 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3953 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3954 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3955
3956 while (charbuf < charbuf_end)
3957 {
3958 ASSURE_DESTINATION (safe_room);
3959 c = *charbuf++;
3960 /* Now encode the character C. */
3961 if (ASCII_CHAR_P (c) && ascii_compatible)
3962 EMIT_ONE_ASCII_BYTE (c);
3963 else
3964 {
3965 unsigned code;
3966 struct charset *charset = char_charset (c, charset_list, &code);
3967
3968 if (! charset)
3969 {
3970 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3971 {
3972 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3973 charset = CHARSET_FROM_ID (charset_ascii);
3974 }
3975 else
3976 {
3977 c = coding->default_char;
3978 charset = char_charset (c, charset_list, &code);
3979 }
3980 }
3981 if (code == CHARSET_INVALID_CODE (charset))
3982 abort ();
3983 if (charset == charset_big5)
3984 {
3985 int c1, c2;
3986
3987 c1 = code >> 8, c2 = code & 0xFF;
3988 EMIT_TWO_BYTES (c1, c2);
3989 }
3990 else
3991 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3992 }
3993 }
3994 coding->result = CODING_RESULT_SUCCESS;
3995 coding->produced_char += produced_chars;
3996 coding->produced = dst - coding->destination;
3997 return 0;
3998}
3999
4000\f
4001/*** 10. CCL handlers ***/
4002
4003/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4004 Check if a text is encoded in a coding system of which
4005 encoder/decoder are written in CCL program. If it is, return
4006 CATEGORY_MASK_CCL, else return 0. */
4007
4008static int
4009detect_coding_ccl (coding, mask)
4010 struct coding_system *coding;
4011 int *mask;
4012{
4013 unsigned char *src = coding->source, *src_base = src;
4014 unsigned char *src_end = coding->source + coding->src_bytes;
4015 int multibytep = coding->src_multibyte;
4016 int consumed_chars = 0;
4017 int found = 0;
4018 unsigned char *valids = CODING_CCL_VALIDS (coding);
4019 int head_ascii = coding->head_ascii;
4020 Lisp_Object attrs;
4021
4022 coding = &coding_categories[coding_category_ccl];
4023 attrs = CODING_ID_ATTRS (coding->id);
4024 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4025 src += head_ascii;
4026
4027 while (1)
4028 {
4029 int c;
4030 ONE_MORE_BYTE (c);
4031 if (! valids[c])
4032 break;
4033 if (!found && valids[c] > 1)
4034 found = 1;
4035 }
4036 *mask &= ~CATEGORY_MASK_CCL;
4037 return 0;
4038
4039 no_more_source:
4040 if (!found)
4041 return 0;
4042 *mask &= CATEGORY_MASK_CCL;
4043 return 1;
4044}
4045
4046static void
4047decode_coding_ccl (coding)
4048 struct coding_system *coding;
4049{
4050 unsigned char *src = coding->source + coding->consumed;
4051 unsigned char *src_end = coding->source + coding->src_bytes;
4052 int *charbuf = coding->charbuf;
4053 int *charbuf_end = charbuf + coding->charbuf_size;
4054 int consumed_chars = 0;
4055 int multibytep = coding->src_multibyte;
4056 struct ccl_program ccl;
4057 int source_charbuf[1024];
4058 int source_byteidx[1024];
4059
4060 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4061
4062 while (src < src_end)
4063 {
4064 unsigned char *p = src;
4065 int *source, *source_end;
4066 int i = 0;
4067
4068 if (multibytep)
4069 while (i < 1024 && p < src_end)
4070 {
4071 source_byteidx[i] = p - src;
4072 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4073 }
4074 else
4075 while (i < 1024 && p < src_end)
4076 source_charbuf[i++] = *p++;
4077
4078 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4079 ccl.last_block = 1;
4080
4081 source = source_charbuf;
4082 source_end = source + i;
4083 while (source < source_end)
4084 {
4085 ccl_driver (&ccl, source, charbuf,
4086 source_end - source, charbuf_end - charbuf);
4087 source += ccl.consumed;
4088 charbuf += ccl.produced;
4089 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4090 break;
4091 }
4092 if (source < source_end)
4093 src += source_byteidx[source - source_charbuf];
4094 else
4095 src = p;
4096 consumed_chars += source - source_charbuf;
4097
4098 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4099 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4100 break;
4101 }
4102
4103 switch (ccl.status)
4104 {
4105 case CCL_STAT_SUSPEND_BY_SRC:
4106 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4107 break;
4108 case CCL_STAT_SUSPEND_BY_DST:
4109 break;
4110 case CCL_STAT_QUIT:
4111 case CCL_STAT_INVALID_CMD:
4112 coding->result = CODING_RESULT_INTERRUPT;
4113 break;
4114 default:
4115 coding->result = CODING_RESULT_SUCCESS;
4116 break;
4117 }
4118 coding->consumed_char += consumed_chars;
4119 coding->consumed = src - coding->source;
4120 coding->charbuf_used = charbuf - coding->charbuf;
4121}
4122
4123static int
4124encode_coding_ccl (coding)
4125 struct coding_system *coding;
4126{
4127 struct ccl_program ccl;
4128 int multibytep = coding->dst_multibyte;
4129 int *charbuf = coding->charbuf;
4130 int *charbuf_end = charbuf + coding->charbuf_used;
4131 unsigned char *dst = coding->destination + coding->produced;
4132 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4133 unsigned char *adjusted_dst_end = dst_end - 1;
4134 int destination_charbuf[1024];
4135 int i, produced_chars = 0;
4136
4137 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4138
4139 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4140 ccl.dst_multibyte = coding->dst_multibyte;
4141
4142 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4143 {
4144 int dst_bytes = dst_end - dst;
4145 if (dst_bytes > 1024)
4146 dst_bytes = 1024;
4147
4148 ccl_driver (&ccl, charbuf, destination_charbuf,
4149 charbuf_end - charbuf, dst_bytes);
4150 charbuf += ccl.consumed;
4151 if (multibytep)
4152 for (i = 0; i < ccl.produced; i++)
4153 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4154 else
4155 {
4156 for (i = 0; i < ccl.produced; i++)
4157 *dst++ = destination_charbuf[i] & 0xFF;
4158 produced_chars += ccl.produced;
4159 }
4160 }
4161
4162 switch (ccl.status)
4163 {
4164 case CCL_STAT_SUSPEND_BY_SRC:
4165 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4166 break;
4167 case CCL_STAT_SUSPEND_BY_DST:
4168 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4169 break;
4170 case CCL_STAT_QUIT:
4171 case CCL_STAT_INVALID_CMD:
4172 coding->result = CODING_RESULT_INTERRUPT;
4173 break;
4174 default:
4175 coding->result = CODING_RESULT_SUCCESS;
4176 break;
4177 }
4178
4179 coding->produced_char += produced_chars;
4180 coding->produced = dst - coding->destination;
4181 return 0;
4182}
4183
4184
4185\f
4186/*** 10, 11. no-conversion handlers ***/
4187
4188/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4189
4190static void
4191decode_coding_raw_text (coding)
4192 struct coding_system *coding;
4193{
4194 coding->chars_at_source = 1;
4195 coding->consumed_char = 0;
4196 coding->consumed = 0;
4197 coding->result = CODING_RESULT_SUCCESS;
4198}
4199
4200static int
4201encode_coding_raw_text (coding)
4202 struct coding_system *coding;
4203{
4204 int multibytep = coding->dst_multibyte;
4205 int *charbuf = coding->charbuf;
4206 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4207 unsigned char *dst = coding->destination + coding->produced;
4208 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4209 int produced_chars = 0;
4210 int c;
4211
4212 if (multibytep)
4213 {
4214 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4215
4216 if (coding->src_multibyte)
4217 while (charbuf < charbuf_end)
4218 {
4219 ASSURE_DESTINATION (safe_room);
4220 c = *charbuf++;
4221 if (ASCII_CHAR_P (c))
4222 EMIT_ONE_ASCII_BYTE (c);
4223 else if (CHAR_BYTE8_P (c))
4224 {
4225 c = CHAR_TO_BYTE8 (c);
4226 EMIT_ONE_BYTE (c);
4227 }
4228 else
4229 {
4230 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4231
4232 CHAR_STRING_ADVANCE (c, p1);
4233 while (p0 < p1)
4234 EMIT_ONE_BYTE (*p0);
4235 }
4236 }
4237 else
4238 while (charbuf < charbuf_end)
4239 {
4240 ASSURE_DESTINATION (safe_room);
4241 c = *charbuf++;
4242 EMIT_ONE_BYTE (c);
4243 }
4244 }
4245 else
4246 {
4247 if (coding->src_multibyte)
4248 {
4249 int safe_room = MAX_MULTIBYTE_LENGTH;
4250
4251 while (charbuf < charbuf_end)
4252 {
4253 ASSURE_DESTINATION (safe_room);
4254 c = *charbuf++;
4255 if (ASCII_CHAR_P (c))
4256 *dst++ = c;
4257 else if (CHAR_BYTE8_P (c))
4258 *dst++ = CHAR_TO_BYTE8 (c);
4259 else
4260 CHAR_STRING_ADVANCE (c, dst);
4261 produced_chars++;
4262 }
4263 }
4264 else
4265 {
4266 ASSURE_DESTINATION (charbuf_end - charbuf);
4267 while (charbuf < charbuf_end && dst < dst_end)
4268 *dst++ = *charbuf++;
4269 produced_chars = dst - (coding->destination + coding->dst_bytes);
4270 }
4271 }
4272 coding->result = CODING_RESULT_SUCCESS;
4273 coding->produced_char += produced_chars;
4274 coding->produced = dst - coding->destination;
4275 return 0;
4276}
4277
4278static int
4279detect_coding_charset (coding, mask)
4280 struct coding_system *coding;
4281 int *mask;
4282{
4283 unsigned char *src = coding->source, *src_base = src;
4284 unsigned char *src_end = coding->source + coding->src_bytes;
4285 int multibytep = coding->src_multibyte;
4286 int consumed_chars = 0;
4287 Lisp_Object attrs, valids;
4288
4289 coding = &coding_categories[coding_category_charset];
4290 attrs = CODING_ID_ATTRS (coding->id);
4291 valids = AREF (attrs, coding_attr_charset_valids);
4292
4293 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4294 src += coding->head_ascii;
4295
4296 while (1)
4297 {
4298 int c;
4299
4300 ONE_MORE_BYTE (c);
4301 if (NILP (AREF (valids, c)))
4302 break;
4303 }
4304 *mask &= ~CATEGORY_MASK_CHARSET;
4305 return 0;
4306
4307 no_more_source:
4308 *mask &= CATEGORY_MASK_CHARSET;
4309 return 1;
4310}
4311
4312static void
4313decode_coding_charset (coding)
4314 struct coding_system *coding;
4315{
4316 unsigned char *src = coding->source + coding->consumed;
4317 unsigned char *src_end = coding->source + coding->src_bytes;
4318 unsigned char *src_base;
4319 int *charbuf = coding->charbuf;
4320 int *charbuf_end = charbuf + coding->charbuf_size;
4321 int consumed_chars = 0, consumed_chars_base;
4322 int multibytep = coding->src_multibyte;
4323 Lisp_Object attrs, eol_type, charset_list, valids;
4324
4325 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4326 valids = AREF (attrs, coding_attr_charset_valids);
4327
4328 while (1)
4329 {
4330 int c;
4331
4332 src_base = src;
4333 consumed_chars_base = consumed_chars;
4334
4335 if (charbuf >= charbuf_end)
4336 break;
4337
4338 ONE_MORE_BYTE (c);
4339 if (c == '\r')
4340 {
4341 /* Here we assume that no charset maps '\r' to something
4342 else. */
4343 if (EQ (eol_type, Qdos))
4344 {
4345 if (src < src_end
4346 && *src == '\n')
4347 ONE_MORE_BYTE (c);
4348 }
4349 else if (EQ (eol_type, Qmac))
4350 c = '\n';
4351 }
4352 else
4353 {
4354 Lisp_Object val;
4355 struct charset *charset;
4356 int dim;
4357 int len = 1;
4358 unsigned code = c;
4359
4360 val = AREF (valids, c);
4361 if (NILP (val))
4362 goto invalid_code;
4363 if (INTEGERP (val))
4364 {
4365 charset = CHARSET_FROM_ID (XFASTINT (val));
4366 dim = CHARSET_DIMENSION (charset);
4367 while (len < dim)
4368 {
4369 ONE_MORE_BYTE (c);
4370 code = (code << 8) | c;
4371 len++;
4372 }
4373 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4374 charset, code, c);
4375 }
4376 else
4377 {
4378 /* VAL is a list of charset IDs. It is assured that the
4379 list is sorted by charset dimensions (smaller one
4380 comes first). */
4381 while (CONSP (val))
4382 {
4383 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4384 dim = CHARSET_DIMENSION (charset);
4385 while (len < dim)
4386 {
4387 ONE_MORE_BYTE (c);
4388 code = (code << 8) | c;
4389 len++;
4390 }
4391 CODING_DECODE_CHAR (coding, src, src_base,
4392 src_end, charset, code, c);
4393 if (c >= 0)
4394 break;
4395 val = XCDR (val);
4396 }
4397 }
4398 if (c < 0)
4399 goto invalid_code;
4400 }
4401 *charbuf++ = c;
4402 continue;
4403
4404 invalid_code:
4405 src = src_base;
4406 consumed_chars = consumed_chars_base;
4407 ONE_MORE_BYTE (c);
4408 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4409 coding->errors++;
4410 }
4411
4412 no_more_source:
4413 coding->consumed_char += consumed_chars_base;
4414 coding->consumed = src_base - coding->source;
4415 coding->charbuf_used = charbuf - coding->charbuf;
4416}
4417
4418static int
4419encode_coding_charset (coding)
4420 struct coding_system *coding;
4421{
4422 int multibytep = coding->dst_multibyte;
4423 int *charbuf = coding->charbuf;
4424 int *charbuf_end = charbuf + coding->charbuf_used;
4425 unsigned char *dst = coding->destination + coding->produced;
4426 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4427 int safe_room = MAX_MULTIBYTE_LENGTH;
4428 int produced_chars = 0;
4429 Lisp_Object attrs, eol_type, charset_list;
4430 int ascii_compatible;
4431 int c;
4432
4433 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4434 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4435
4436 while (charbuf < charbuf_end)
4437 {
4438 struct charset *charset;
4439 unsigned code;
4440
4441 ASSURE_DESTINATION (safe_room);
4442 c = *charbuf++;
4443 if (ascii_compatible && ASCII_CHAR_P (c))
4444 EMIT_ONE_ASCII_BYTE (c);
4445 else
4446 {
4447 charset = char_charset (c, charset_list, &code);
4448 if (charset)
4449 {
4450 if (CHARSET_DIMENSION (charset) == 1)
4451 EMIT_ONE_BYTE (code);
4452 else if (CHARSET_DIMENSION (charset) == 2)
4453 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4454 else if (CHARSET_DIMENSION (charset) == 3)
4455 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4456 else
4457 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4458 (code >> 8) & 0xFF, code & 0xFF);
4459 }
4460 else
4461 {
4462 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4463 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4464 else
4465 c = coding->default_char;
4466 EMIT_ONE_BYTE (c);
4467 }
4468 }
4469 }
4470
4471 coding->result = CODING_RESULT_SUCCESS;
4472 coding->produced_char += produced_chars;
4473 coding->produced = dst - coding->destination;
4474 return 0;
4475}
4476
4477\f
4478/*** 7. C library functions ***/
4479
4480/* In Emacs Lisp, coding system is represented by a Lisp symbol which
4481 has a property `coding-system'. The value of this property is a
4482 vector of length 5 (called as coding-vector). Among elements of
4483 this vector, the first (element[0]) and the fifth (element[4])
4484 carry important information for decoding/encoding. Before
4485 decoding/encoding, this information should be set in fields of a
4486 structure of type `coding_system'.
4487
4488 A value of property `coding-system' can be a symbol of another
4489 subsidiary coding-system. In that case, Emacs gets coding-vector
4490 from that symbol.
4491
4492 `element[0]' contains information to be set in `coding->type'. The
4493 value and its meaning is as follows:
4494
4495 0 -- coding_type_emacs_mule
4496 1 -- coding_type_sjis
4497 2 -- coding_type_iso_2022
4498 3 -- coding_type_big5
4499 4 -- coding_type_ccl encoder/decoder written in CCL
4500 nil -- coding_type_no_conversion
4501 t -- coding_type_undecided (automatic conversion on decoding,
4502 no-conversion on encoding)
4503
4504 `element[4]' contains information to be set in `coding->flags' and
4505 `coding->spec'. The meaning varies by `coding->type'.
4506
4507 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4508 of length 32 (of which the first 13 sub-elements are used now).
4509 Meanings of these sub-elements are:
4510
4511 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4512 If the value is an integer of valid charset, the charset is
4513 assumed to be designated to graphic register N initially.
4514
4515 If the value is minus, it is a minus value of charset which
4516 reserves graphic register N, which means that the charset is
4517 not designated initially but should be designated to graphic
4518 register N just before encoding a character in that charset.
4519
4520 If the value is nil, graphic register N is never used on
4521 encoding.
4522
4523 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4524 Each value takes t or nil. See the section ISO2022 of
4525 `coding.h' for more information.
4526
4527 If `coding->type' is `coding_type_big5', element[4] is t to denote
4528 BIG5-ETen or nil to denote BIG5-HKU.
4529
4530 If `coding->type' takes the other value, element[4] is ignored.
4531
4532 Emacs Lisp's coding system also carries information about format of
4533 end-of-line in a value of property `eol-type'. If the value is
4534 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4535 it is not integer, it should be a vector of subsidiary coding
4536 systems of which property `eol-type' has one of above values.
4537
4538*/
4539
4540/* Setup coding context CODING from information about CODING_SYSTEM.
4541 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4542 CODING_SYSTEM is invalid, signal an error. */
4543
4544void
4545setup_coding_system (coding_system, coding)
4546 Lisp_Object coding_system;
4547 struct coding_system *coding;
4548{
4549 Lisp_Object attrs;
4550 Lisp_Object eol_type;
4551 Lisp_Object coding_type;
4552 Lisp_Object val;
4553
4554 if (NILP (coding_system))
4555 coding_system = Qno_conversion;
4556
4557 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4558
4559 attrs = CODING_ID_ATTRS (coding->id);
4560 eol_type = CODING_ID_EOL_TYPE (coding->id);
4561
4562 coding->mode = 0;
4563 coding->head_ascii = -1;
4564 coding->common_flags
4565 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4566
4567 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4568 coding->max_charset_id = XSTRING (val)->size - 1;
4569 coding->safe_charsets = (char *) XSTRING (val)->data;
4570 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4571
4572 coding_type = CODING_ATTR_TYPE (attrs);
4573 if (EQ (coding_type, Qundecided))
4574 {
4575 coding->detector = NULL;
4576 coding->decoder = decode_coding_raw_text;
4577 coding->encoder = encode_coding_raw_text;
4578 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4579 }
4580 else if (EQ (coding_type, Qiso_2022))
4581 {
4582 int i;
4583 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4584
4585 /* Invoke graphic register 0 to plane 0. */
4586 CODING_ISO_INVOCATION (coding, 0) = 0;
4587 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4588 CODING_ISO_INVOCATION (coding, 1)
4589 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4590 /* Setup the initial status of designation. */
4591 for (i = 0; i < 4; i++)
4592 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4593 /* Not single shifting initially. */
4594 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4595 /* Beginning of buffer should also be regarded as bol. */
4596 CODING_ISO_BOL (coding) = 1;
4597 coding->detector = detect_coding_iso_2022;
4598 coding->decoder = decode_coding_iso_2022;
4599 coding->encoder = encode_coding_iso_2022;
4600 if (flags & CODING_ISO_FLAG_SAFE)
4601 coding->mode |= CODING_MODE_SAFE_ENCODING;
4602 coding->common_flags
4603 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4604 | CODING_REQUIRE_FLUSHING_MASK);
4605 if (flags & CODING_ISO_FLAG_COMPOSITION)
4606 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4607 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4608 {
4609 setup_iso_safe_charsets (attrs);
4610 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4611 coding->max_charset_id = XSTRING (val)->size - 1;
4612 coding->safe_charsets = (char *) XSTRING (val)->data;
4613 }
4614 CODING_ISO_FLAGS (coding) = flags;
4615 }
4616 else if (EQ (coding_type, Qcharset))
4617 {
4618 coding->detector = detect_coding_charset;
4619 coding->decoder = decode_coding_charset;
4620 coding->encoder = encode_coding_charset;
4621 coding->common_flags
4622 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4623 }
4624 else if (EQ (coding_type, Qutf_8))
4625 {
4626 coding->detector = detect_coding_utf_8;
4627 coding->decoder = decode_coding_utf_8;
4628 coding->encoder = encode_coding_utf_8;
4629 coding->common_flags
4630 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4631 }
4632 else if (EQ (coding_type, Qutf_16))
4633 {
4634 val = AREF (attrs, coding_attr_utf_16_bom);
4635 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4636 : EQ (val, Qt) ? utf_16_with_bom
4637 : utf_16_without_bom);
4638 val = AREF (attrs, coding_attr_utf_16_endian);
4639 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4640 : utf_16_little_endian);
4641 CODING_UTF_16_SURROGATE (coding) = 0;
4642 coding->detector = detect_coding_utf_16;
4643 coding->decoder = decode_coding_utf_16;
4644 coding->encoder = encode_coding_utf_16;
4645 coding->common_flags
4646 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4647 }
4648 else if (EQ (coding_type, Qccl))
4649 {
4650 coding->detector = detect_coding_ccl;
4651 coding->decoder = decode_coding_ccl;
4652 coding->encoder = encode_coding_ccl;
4653 coding->common_flags
4654 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4655 | CODING_REQUIRE_FLUSHING_MASK);
4656 }
4657 else if (EQ (coding_type, Qemacs_mule))
4658 {
4659 coding->detector = detect_coding_emacs_mule;
4660 coding->decoder = decode_coding_emacs_mule;
4661 coding->encoder = encode_coding_emacs_mule;
4662 coding->common_flags
4663 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4664 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4665 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4666 {
4667 Lisp_Object tail, safe_charsets;
4668 int max_charset_id = 0;
4669
4670 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4671 tail = XCDR (tail))
4672 if (max_charset_id < XFASTINT (XCAR (tail)))
4673 max_charset_id = XFASTINT (XCAR (tail));
4674 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4675 make_number (255));
4676 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4677 tail = XCDR (tail))
4678 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4679 coding->max_charset_id = max_charset_id;
4680 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4681 }
4682 }
4683 else if (EQ (coding_type, Qshift_jis))
4684 {
4685 coding->detector = detect_coding_sjis;
4686 coding->decoder = decode_coding_sjis;
4687 coding->encoder = encode_coding_sjis;
4688 coding->common_flags
4689 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4690 }
4691 else if (EQ (coding_type, Qbig5))
4692 {
4693 coding->detector = detect_coding_big5;
4694 coding->decoder = decode_coding_big5;
4695 coding->encoder = encode_coding_big5;
4696 coding->common_flags
4697 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4698 }
4699 else /* EQ (coding_type, Qraw_text) */
4700 {
4701 coding->detector = NULL;
4702 coding->decoder = decode_coding_raw_text;
4703 coding->encoder = encode_coding_raw_text;
4704 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4705 }
4706
4707 return;
4708}
4709
4710/* Return raw-text or one of its subsidiaries that has the same
4711 eol_type as CODING-SYSTEM. */
4712
4713Lisp_Object
4714raw_text_coding_system (coding_system)
4715 Lisp_Object coding_system;
4716{
4717 Lisp_Object spec, attrs;
4718 Lisp_Object eol_type, raw_text_eol_type;
4719
4720 spec = CODING_SYSTEM_SPEC (coding_system);
4721 attrs = AREF (spec, 0);
4722
4723 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4724 return coding_system;
4725
4726 eol_type = AREF (spec, 2);
4727 if (VECTORP (eol_type))
4728 return Qraw_text;
4729 spec = CODING_SYSTEM_SPEC (Qraw_text);
4730 raw_text_eol_type = AREF (spec, 2);
4731 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4732 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4733 : AREF (raw_text_eol_type, 2));
4734}
4735
4736
4737/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4738 does, return one of the subsidiary that has the same eol-spec as
4739 PARENT. Otherwise, return CODING_SYSTEM. */
4740
4741Lisp_Object
4742coding_inherit_eol_type (coding_system, parent)
4743 Lisp_Object coding_system, parent;
4744{
4745 Lisp_Object spec, attrs, eol_type;
4746
4747 spec = CODING_SYSTEM_SPEC (coding_system);
4748 attrs = AREF (spec, 0);
4749 eol_type = AREF (spec, 2);
4750 if (VECTORP (eol_type))
4751 {
4752 Lisp_Object parent_spec;
4753 Lisp_Object parent_eol_type;
4754
4755 parent_spec
4756 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4757 parent_eol_type = AREF (parent_spec, 2);
4758 if (EQ (parent_eol_type, Qunix))
4759 coding_system = AREF (eol_type, 0);
4760 else if (EQ (parent_eol_type, Qdos))
4761 coding_system = AREF (eol_type, 1);
4762 else if (EQ (parent_eol_type, Qmac))
4763 coding_system = AREF (eol_type, 2);
4764 }
4765 return coding_system;
4766}
4767
4768/* Emacs has a mechanism to automatically detect a coding system if it
4769 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4770 it's impossible to distinguish some coding systems accurately
4771 because they use the same range of codes. So, at first, coding
4772 systems are categorized into 7, those are:
4773
4774 o coding-category-emacs-mule
4775
4776 The category for a coding system which has the same code range
4777 as Emacs' internal format. Assigned the coding-system (Lisp
4778 symbol) `emacs-mule' by default.
4779
4780 o coding-category-sjis
4781
4782 The category for a coding system which has the same code range
4783 as SJIS. Assigned the coding-system (Lisp
4784 symbol) `japanese-shift-jis' by default.
4785
4786 o coding-category-iso-7
4787
4788 The category for a coding system which has the same code range
4789 as ISO2022 of 7-bit environment. This doesn't use any locking
4790 shift and single shift functions. This can encode/decode all
4791 charsets. Assigned the coding-system (Lisp symbol)
4792 `iso-2022-7bit' by default.
4793
4794 o coding-category-iso-7-tight
4795
4796 Same as coding-category-iso-7 except that this can
4797 encode/decode only the specified charsets.
4798
4799 o coding-category-iso-8-1
4800
4801 The category for a coding system which has the same code range
4802 as ISO2022 of 8-bit environment and graphic plane 1 used only
4803 for DIMENSION1 charset. This doesn't use any locking shift
4804 and single shift functions. Assigned the coding-system (Lisp
4805 symbol) `iso-latin-1' by default.
4806
4807 o coding-category-iso-8-2
4808
4809 The category for a coding system which has the same code range
4810 as ISO2022 of 8-bit environment and graphic plane 1 used only
4811 for DIMENSION2 charset. This doesn't use any locking shift
4812 and single shift functions. Assigned the coding-system (Lisp
4813 symbol) `japanese-iso-8bit' by default.
4814
4815 o coding-category-iso-7-else
4816
4817 The category for a coding system which has the same code range
4818 as ISO2022 of 7-bit environemnt but uses locking shift or
4819 single shift functions. Assigned the coding-system (Lisp
4820 symbol) `iso-2022-7bit-lock' by default.
4821
4822 o coding-category-iso-8-else
4823
4824 The category for a coding system which has the same code range
4825 as ISO2022 of 8-bit environemnt but uses locking shift or
4826 single shift functions. Assigned the coding-system (Lisp
4827 symbol) `iso-2022-8bit-ss2' by default.
4828
4829 o coding-category-big5
4830
4831 The category for a coding system which has the same code range
4832 as BIG5. Assigned the coding-system (Lisp symbol)
4833 `cn-big5' by default.
4834
4835 o coding-category-utf-8
4836
4837 The category for a coding system which has the same code range
4838 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4839 symbol) `utf-8' by default.
4840
4841 o coding-category-utf-16-be
4842
4843 The category for a coding system in which a text has an
4844 Unicode signature (cf. Unicode Standard) in the order of BIG
4845 endian at the head. Assigned the coding-system (Lisp symbol)
4846 `utf-16-be' by default.
4847
4848 o coding-category-utf-16-le
4849
4850 The category for a coding system in which a text has an
4851 Unicode signature (cf. Unicode Standard) in the order of
4852 LITTLE endian at the head. Assigned the coding-system (Lisp
4853 symbol) `utf-16-le' by default.
4854
4855 o coding-category-ccl
4856
4857 The category for a coding system of which encoder/decoder is
4858 written in CCL programs. The default value is nil, i.e., no
4859 coding system is assigned.
4860
4861 o coding-category-binary
4862
4863 The category for a coding system not categorized in any of the
4864 above. Assigned the coding-system (Lisp symbol)
4865 `no-conversion' by default.
4866
4867 Each of them is a Lisp symbol and the value is an actual
4868 `coding-system's (this is also a Lisp symbol) assigned by a user.
4869 What Emacs does actually is to detect a category of coding system.
4870 Then, it uses a `coding-system' assigned to it. If Emacs can't
4871 decide only one possible category, it selects a category of the
4872 highest priority. Priorities of categories are also specified by a
4873 user in a Lisp variable `coding-category-list'.
4874
4875*/
4876
4877#define EOL_SEEN_NONE 0
4878#define EOL_SEEN_LF 1
4879#define EOL_SEEN_CR 2
4880#define EOL_SEEN_CRLF 4
4881
4882/* Detect how end-of-line of a text of length CODING->src_bytes
4883 pointed by CODING->source is encoded. Return one of
4884 EOL_SEEN_XXX. */
4885
4886#define MAX_EOL_CHECK_COUNT 3
4887
4888static int
4889detect_eol (coding, source, src_bytes)
4890 struct coding_system *coding;
4891 unsigned char *source;
4892 EMACS_INT src_bytes;
4893{
4894 Lisp_Object attrs, coding_type;
4895 unsigned char *src = source, *src_end = src + src_bytes;
4896 unsigned char c;
4897 int total = 0;
4898 int eol_seen = EOL_SEEN_NONE;
4899
4900 attrs = CODING_ID_ATTRS (coding->id);
4901 coding_type = CODING_ATTR_TYPE (attrs);
4902
4903 if (EQ (coding_type, Qccl))
4904 {
4905 int msb, lsb;
4906
4907 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4908 lsb = 1 - msb;
4909
4910 while (src + 1 < src_end)
4911 {
4912 c = src[lsb];
4913 if (src[msb] == 0 && (c == '\n' || c == '\r'))
4914 {
4915 int this_eol;
4916
4917 if (c == '\n')
4918 this_eol = EOL_SEEN_LF;
4919 else if (src + 3 >= src_end
4920 || src[msb + 2] != 0
4921 || src[lsb + 2] != '\n')
4922 this_eol = EOL_SEEN_CR;
4923 else
4924 this_eol = EOL_SEEN_CRLF;
4925
4926 if (eol_seen == EOL_SEEN_NONE)
4927 /* This is the first end-of-line. */
4928 eol_seen = this_eol;
4929 else if (eol_seen != this_eol)
4930 {
4931 /* The found type is different from what found before. */
4932 eol_seen = EOL_SEEN_LF;
4933 break;
4934 }
4935 if (++total == MAX_EOL_CHECK_COUNT)
4936 break;
4937 }
4938 src += 2;
4939 }
4940 }
4941 else
4942 {
4943 while (src < src_end)
4944 {
4945 c = *src++;
4946 if (c == '\n' || c == '\r')
4947 {
4948 int this_eol;
4949
4950 if (c == '\n')
4951 this_eol = EOL_SEEN_LF;
4952 else if (src >= src_end || *src != '\n')
4953 this_eol = EOL_SEEN_CR;
4954 else
4955 this_eol = EOL_SEEN_CRLF, src++;
4956
4957 if (eol_seen == EOL_SEEN_NONE)
4958 /* This is the first end-of-line. */
4959 eol_seen = this_eol;
4960 else if (eol_seen != this_eol)
4961 {
4962 /* The found type is different from what found before. */
4963 eol_seen = EOL_SEEN_LF;
4964 break;
4965 }
4966 if (++total == MAX_EOL_CHECK_COUNT)
4967 break;
4968 }
4969 }
4970 }
4971 return eol_seen;
4972}
4973
4974
4975static void
4976adjust_coding_eol_type (coding, eol_seen)
4977 struct coding_system *coding;
4978 int eol_seen;
4979{
4980 Lisp_Object eol_type;
4981
4982 eol_type = CODING_ID_EOL_TYPE (coding->id);
4983 if (eol_seen & EOL_SEEN_LF)
4984 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
4985 else if (eol_type & EOL_SEEN_CRLF)
4986 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
4987 else if (eol_type & EOL_SEEN_CR)
4988 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
4989}
4990
4991/* Detect how a text specified in CODING is encoded. If a coding
4992 system is detected, update fields of CODING by the detected coding
4993 system. */
4994
4995void
4996detect_coding (coding)
4997 struct coding_system *coding;
4998{
4999 unsigned char *src, *src_end;
5000 Lisp_Object attrs, coding_type;
5001
5002 coding->consumed = coding->consumed_char = 0;
5003 coding->produced = coding->produced_char = 0;
5004 coding_set_source (coding);
5005
5006 src_end = coding->source + coding->src_bytes;
5007
5008 /* If we have not yet decided the text encoding type, detect it
5009 now. */
5010 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5011 {
5012 int mask = CATEGORY_MASK_ANY;
5013 int c, i;
5014
5015 for (src = coding->source; src < src_end; src++)
5016 {
5017 c = *src;
5018 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5019 || c == ISO_CODE_SI
5020 || c == ISO_CODE_SO)))
5021 break;
5022 }
5023 coding->head_ascii = src - (coding->source + coding->consumed);
5024
5025 if (coding->head_ascii < coding->src_bytes)
5026 {
5027 int detected = 0;
5028
5029 for (i = 0; i < coding_category_raw_text; i++)
5030 {
5031 enum coding_category category = coding_priorities[i];
5032 struct coding_system *this = coding_categories + category;
5033
5034 if (category >= coding_category_raw_text
5035 || detected & (1 << category))
5036 continue;
5037
5038 if (this->id < 0)
5039 {
5040 /* No coding system of this category is defined. */
5041 mask &= ~(1 << category);
5042 }
5043 else
5044 {
5045 detected |= detected_mask[category];
5046 if ((*(this->detector)) (coding, &mask))
5047 break;
5048 }
5049 }
5050 if (! mask)
5051 setup_coding_system (Qraw_text, coding);
5052 else if (mask != CATEGORY_MASK_ANY)
5053 for (i = 0; i < coding_category_raw_text; i++)
5054 {
5055 enum coding_category category = coding_priorities[i];
5056 struct coding_system *this = coding_categories + category;
5057
5058 if (mask & (1 << category))
5059 {
5060 setup_coding_system (CODING_ID_NAME (this->id), coding);
5061 break;
5062 }
5063 }
5064 }
5065 }
5066
5067 attrs = CODING_ID_ATTRS (coding->id);
5068 coding_type = CODING_ATTR_TYPE (attrs);
5069
5070 /* If we have not yet decided the EOL type, detect it now. But, the
5071 detection is impossible for a CCL based coding system, in which
5072 case, we detct the EOL type after decoding. */
5073 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5074 && ! EQ (coding_type, Qccl))
5075 {
5076 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
5077
5078 if (eol_seen != EOL_SEEN_NONE)
5079 adjust_coding_eol_type (coding, eol_seen);
5080 }
5081}
5082
5083
5084static void
5085decode_eol (coding)
5086 struct coding_system *coding;
5087{
5088 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5089 {
5090 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5091 unsigned char *pend = p + coding->produced;
5092 int eol_seen = EOL_SEEN_NONE;
5093
5094 for (; p < pend; p++)
5095 {
5096 if (*p == '\n')
5097 eol_seen |= EOL_SEEN_LF;
5098 else if (*p == '\r')
5099 {
5100 if (p + 1 < pend && *(p + 1) == '\n')
5101 {
5102 eol_seen |= EOL_SEEN_CRLF;
5103 p++;
5104 }
5105 else
5106 eol_seen |= EOL_SEEN_CR;
5107 }
5108 }
5109 if (eol_seen != EOL_SEEN_NONE)
5110 adjust_coding_eol_type (coding, eol_seen);
5111 }
5112
5113 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5114 {
5115 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5116 unsigned char *pend = p + coding->produced;
5117
5118 for (; p < pend; p++)
5119 if (*p == '\r')
5120 *p = '\n';
5121 }
5122 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5123 {
5124 unsigned char *p, *pbeg, *pend;
5125 Lisp_Object undo_list;
5126
5127 move_gap_both (coding->dst_pos + coding->produced_char,
5128 coding->dst_pos_byte + coding->produced);
5129 undo_list = current_buffer->undo_list;
5130 current_buffer->undo_list = Qt;
5131 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, Qnil);
5132 current_buffer->undo_list = undo_list;
5133 pbeg = GPT_ADDR;
5134 pend = pbeg + coding->produced;
5135
5136 for (p = pend - 1; p >= pbeg; p--)
5137 if (*p == '\r')
5138 {
5139 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5140 pend--;
5141 }
5142 coding->produced_char -= coding->produced - (pend - pbeg);
5143 coding->produced = pend - pbeg;
5144 insert_from_gap (coding->produced_char, coding->produced);
5145 }
5146}
5147
5148static void
5149translate_chars (coding, table)
5150 struct coding_system *coding;
5151 Lisp_Object table;
5152{
5153 int *charbuf = coding->charbuf;
5154 int *charbuf_end = charbuf + coding->charbuf_used;
5155 int c;
5156
5157 if (coding->chars_at_source)
5158 return;
5159
5160 while (charbuf < charbuf_end)
5161 {
5162 c = *charbuf;
5163 if (c < 0)
5164 charbuf += c;
5165 else
5166 *charbuf++ = translate_char (table, c);
5167 }
5168}
5169
5170static int
5171produce_chars (coding)
5172 struct coding_system *coding;
5173{
5174 unsigned char *dst = coding->destination + coding->produced;
5175 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5176 int produced;
5177 int produced_chars = 0;
5178
5179 if (! coding->chars_at_source)
5180 {
5181 /* Characters are in coding->charbuf. */
5182 int *buf = coding->charbuf;
5183 int *buf_end = buf + coding->charbuf_used;
5184 unsigned char *adjusted_dst_end;
5185
5186 if (BUFFERP (coding->src_object)
5187 && EQ (coding->src_object, coding->dst_object))
5188 dst_end = coding->source + coding->consumed;
5189 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5190
5191 while (buf < buf_end)
5192 {
5193 int c = *buf++;
5194
5195 if (dst >= adjusted_dst_end)
5196 {
5197 dst = alloc_destination (coding,
5198 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5199 dst);
5200 dst_end = coding->destination + coding->dst_bytes;
5201 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5202 }
5203 if (c >= 0)
5204 {
5205 if (coding->dst_multibyte
5206 || ! CHAR_BYTE8_P (c))
5207 CHAR_STRING_ADVANCE (c, dst);
5208 else
5209 *dst++ = CHAR_TO_BYTE8 (c);
5210 produced_chars++;
5211 }
5212 else
5213 /* This is an annotation data. */
5214 buf -= c + 1;
5215 }
5216 }
5217 else
5218 {
5219 unsigned char *src = coding->source;
5220 unsigned char *src_end = src + coding->src_bytes;
5221 Lisp_Object eol_type;
5222
5223 eol_type = CODING_ID_EOL_TYPE (coding->id);
5224
5225 if (coding->src_multibyte != coding->dst_multibyte)
5226 {
5227 if (coding->src_multibyte)
5228 {
5229 int multibytep = 1;
5230 int consumed_chars;
5231
5232 while (1)
5233 {
5234 unsigned char *src_base = src;
5235 int c;
5236
5237 ONE_MORE_BYTE (c);
5238 if (c == '\r')
5239 {
5240 if (EQ (eol_type, Qdos))
5241 {
5242 if (src < src_end
5243 && *src == '\n')
5244 c = *src++;
5245 }
5246 else if (EQ (eol_type, Qmac))
5247 c = '\n';
5248 }
5249 if (dst == dst_end)
5250 {
5251 coding->consumed = src - coding->source;
5252
5253 if (EQ (coding->src_object, coding->dst_object))
5254 dst_end = src;
5255 if (dst == dst_end)
5256 {
5257 dst = alloc_destination (coding, src_end - src + 1,
5258 dst);
5259 dst_end = coding->destination + coding->dst_bytes;
5260 coding_set_source (coding);
5261 src = coding->source + coding->consumed;
5262 src_end = coding->source + coding->src_bytes;
5263 }
5264 }
5265 *dst++ = c;
5266 produced_chars++;
5267 }
5268 no_more_source:
5269 ;
5270 }
5271 else
5272 while (src < src_end)
5273 {
5274 int multibytep = 1;
5275 int c = *src++;
5276
5277 if (c == '\r')
5278 {
5279 if (EQ (eol_type, Qdos))
5280 {
5281 if (src < src_end
5282 && *src == '\n')
5283 c = *src++;
5284 }
5285 else if (EQ (eol_type, Qmac))
5286 c = '\n';
5287 }
5288 if (dst >= dst_end - 1)
5289 {
5290 coding->consumed = src - coding->source;
5291
5292 if (EQ (coding->src_object, coding->dst_object))
5293 dst_end = src;
5294 if (dst >= dst_end - 1)
5295 {
5296 dst = alloc_destination (coding, src_end - src + 2,
5297 dst);
5298 dst_end = coding->destination + coding->dst_bytes;
5299 coding_set_source (coding);
5300 src = coding->source + coding->consumed;
5301 src_end = coding->source + coding->src_bytes;
5302 }
5303 }
5304 EMIT_ONE_BYTE (c);
5305 }
5306 }
5307 else
5308 {
5309 if (!EQ (coding->src_object, coding->dst_object))
5310 {
5311 int require = coding->src_bytes - coding->dst_bytes;
5312
5313 if (require > 0)
5314 {
5315 EMACS_INT offset = src - coding->source;
5316
5317 dst = alloc_destination (coding, require, dst);
5318 coding_set_source (coding);
5319 src = coding->source + offset;
5320 src_end = coding->source + coding->src_bytes;
5321 }
5322 }
5323 produced_chars = coding->src_chars;
5324 while (src < src_end)
5325 {
5326 int c = *src++;
5327
5328 if (c == '\r')
5329 {
5330 if (EQ (eol_type, Qdos))
5331 {
5332 if (src < src_end
5333 && *src == '\n')
5334 c = *src++;
5335 produced_chars--;
5336 }
5337 else if (EQ (eol_type, Qmac))
5338 c = '\n';
5339 }
5340 *dst++ = c;
5341 }
5342 }
5343 coding->consumed = coding->src_bytes;
5344 coding->consumed_char = coding->src_chars;
5345 }
5346
5347 produced = dst - (coding->destination + coding->produced);
5348 if (BUFFERP (coding->dst_object))
5349 insert_from_gap (produced_chars, produced);
5350 coding->produced += produced;
5351 coding->produced_char += produced_chars;
5352 return produced_chars;
5353}
5354
5355/* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5356 or
5357 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5358 */
5359
5360static INLINE void
5361produce_composition (coding, charbuf)
5362 struct coding_system *coding;
5363 int *charbuf;
5364{
5365 Lisp_Object buffer;
5366 int len;
5367 EMACS_INT pos;
5368 enum composition_method method;
5369 int cmp_len;
5370 Lisp_Object components;
5371
5372 buffer = coding->dst_object;
5373 len = -charbuf[0];
5374 pos = coding->dst_pos + charbuf[1];
5375 method = (enum composition_method) (charbuf[3]);
5376 cmp_len = charbuf[4];
5377
5378 if (method == COMPOSITION_RELATIVE)
5379 components = Qnil;
5380 else
5381 {
5382 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5383 int i;
5384
5385 len -= 5;
5386 charbuf += 5;
5387 for (i = 0; i < len; i++)
5388 args[i] = make_number (charbuf[i]);
5389 components = (method == COMPOSITION_WITH_ALTCHARS
5390 ? Fstring (len, args) : Fvector (len, args));
5391 }
5392 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5393}
5394
5395static int *
5396save_composition_data (buf, buf_end, prop)
5397 int *buf, *buf_end;
5398 Lisp_Object prop;
5399{
5400 enum composition_method method = COMPOSITION_METHOD (prop);
5401 int cmp_len = COMPOSITION_LENGTH (prop);
5402
5403 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5404 return NULL;
5405
5406 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5407 buf[2] = method;
5408 buf[3] = cmp_len;
5409
5410 if (method == COMPOSITION_RELATIVE)
5411 buf[0] = 4;
5412 else
5413 {
5414 Lisp_Object components;
5415 int len, i;
5416
5417 components = COMPOSITION_COMPONENTS (prop);
5418 if (VECTORP (components))
5419 {
5420 len = XVECTOR (components)->size;
5421 for (i = 0; i < len; i++)
5422 buf[4 + i] = XINT (AREF (components, i));
5423 }
5424 else if (STRINGP (components))
5425 {
5426 int i_byte;
5427
5428 len = XSTRING (components)->size;
5429 i = i_byte = 0;
5430 while (i < len)
5431 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5432 }
5433 else if (INTEGERP (components))
5434 {
5435 len = 1;
5436 buf[4] = XINT (components);
5437 }
5438 else if (CONSP (components))
5439 {
5440 for (len = 0; CONSP (components);
5441 len++, components = XCDR (components))
5442 buf[4 + len] = XINT (XCAR (components));
5443 }
5444 else
5445 abort ();
5446 buf[0] = 4 + len;
5447 }
5448 return (buf + buf[0]);
5449}
5450
5451#define CHARBUF_SIZE 0x4000
5452
5453#define ALLOC_CONVERSION_WORK_AREA(coding) \
5454 do { \
5455 int size = CHARBUF_SIZE;; \
5456 \
5457 coding->charbuf = NULL; \
5458 while (size > 1024) \
5459 { \
5460 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5461 if (coding->charbuf) \
5462 break; \
5463 size >>= 1; \
5464 } \
5465 if (! coding->charbuf) \
5466 { \
5467 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5468 return coding->result; \
5469 } \
5470 coding->charbuf_size = size; \
5471 } while (0)
5472
5473
5474static void
5475produce_annotation (coding)
5476 struct coding_system *coding;
5477{
5478 int *charbuf = coding->charbuf;
5479 int *charbuf_end = charbuf + coding->charbuf_used;
5480
5481 while (charbuf < charbuf_end)
5482 {
5483 if (*charbuf >= 0)
5484 charbuf++;
5485 else
5486 {
5487 int len = -*charbuf;
5488 switch (charbuf[2])
5489 {
5490 case CODING_ANNOTATE_COMPOSITION_MASK:
5491 produce_composition (coding, charbuf);
5492 break;
5493 default:
5494 abort ();
5495 }
5496 charbuf += len;
5497 }
5498 }
5499}
5500
5501/* Decode the data at CODING->src_object into CODING->dst_object.
5502 CODING->src_object is a buffer, a string, or nil.
5503 CODING->dst_object is a buffer.
5504
5505 If CODING->src_object is a buffer, it must be the current buffer.
5506 In this case, if CODING->src_pos is positive, it is a position of
5507 the source text in the buffer, otherwise, the source text is in the
5508 gap area of the buffer, and CODING->src_pos specifies the offset of
5509 the text from GPT (which must be the same as PT). If this is the
5510 same buffer as CODING->dst_object, CODING->src_pos must be
5511 negative.
5512
5513 If CODING->src_object is a string, CODING->src_pos in an index to
5514 that string.
5515
5516 If CODING->src_object is nil, CODING->source must already point to
5517 the non-relocatable memory area. In this case, CODING->src_pos is
5518 an offset from CODING->source.
5519
5520 The decoded data is inserted at the current point of the buffer
5521 CODING->dst_object.
5522*/
5523
5524static int
5525decode_coding (coding)
5526 struct coding_system *coding;
5527{
5528 Lisp_Object attrs;
5529
5530 if (BUFFERP (coding->src_object)
5531 && coding->src_pos > 0
5532 && coding->src_pos < GPT
5533 && coding->src_pos + coding->src_chars > GPT)
5534 move_gap_both (coding->src_pos, coding->src_pos_byte);
5535
5536 if (BUFFERP (coding->dst_object))
5537 {
5538 if (current_buffer != XBUFFER (coding->dst_object))
5539 set_buffer_internal (XBUFFER (coding->dst_object));
5540 if (GPT != PT)
5541 move_gap_both (PT, PT_BYTE);
5542 }
5543
5544 coding->consumed = coding->consumed_char = 0;
5545 coding->produced = coding->produced_char = 0;
5546 coding->chars_at_source = 0;
5547 coding->result = CODING_RESULT_SUCCESS;
5548 coding->errors = 0;
5549
5550 ALLOC_CONVERSION_WORK_AREA (coding);
5551
5552 attrs = CODING_ID_ATTRS (coding->id);
5553
5554 do
5555 {
5556 coding_set_source (coding);
5557 coding->annotated = 0;
5558 (*(coding->decoder)) (coding);
5559 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5560 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5561 coding_set_destination (coding);
5562 produce_chars (coding);
5563 if (coding->annotated)
5564 produce_annotation (coding);
5565 }
5566 while (coding->consumed < coding->src_bytes
5567 && ! coding->result);
5568
5569 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5570 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5571 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5572 decode_eol (coding);
5573
5574 coding->carryover_bytes = 0;
5575 if (coding->consumed < coding->src_bytes)
5576 {
5577 int nbytes = coding->src_bytes - coding->consumed;
5578 unsigned char *src;
5579
5580 coding_set_source (coding);
5581 coding_set_destination (coding);
5582 src = coding->source + coding->consumed;
5583
5584 if (coding->mode & CODING_MODE_LAST_BLOCK)
5585 {
5586 /* Flush out unprocessed data as binary chars. We are sure
5587 that the number of data is less than the size of
5588 coding->charbuf. */
5589 int *charbuf = coding->charbuf;
5590
5591 while (nbytes-- > 0)
5592 {
5593 int c = *src++;
5594 *charbuf++ = (c & 0x80 ? - c : c);
5595 }
5596 produce_chars (coding);
5597 }
5598 else
5599 {
5600 /* Record unprocessed bytes in coding->carryover. We are
5601 sure that the number of data is less than the size of
5602 coding->carryover. */
5603 unsigned char *p = coding->carryover;
5604
5605 coding->carryover_bytes = nbytes;
5606 while (nbytes-- > 0)
5607 *p++ = *src++;
5608 }
5609 coding->consumed = coding->src_bytes;
5610 }
5611
5612 return coding->result;
5613}
5614
5615static void
5616consume_chars (coding)
5617 struct coding_system *coding;
5618{
5619 int *buf = coding->charbuf;
5620 /* -1 is to compensate for CRLF. */
5621 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5622 unsigned char *src = coding->source + coding->consumed;
5623 int pos = coding->src_pos + coding->consumed_char;
5624 int end_pos = coding->src_pos + coding->src_chars;
5625 int multibytep = coding->src_multibyte;
5626 Lisp_Object eol_type;
5627 int c;
5628 int start, end, stop;
5629 Lisp_Object object, prop;
5630
5631 eol_type = CODING_ID_EOL_TYPE (coding->id);
5632 if (VECTORP (eol_type))
5633 eol_type = Qunix;
5634
5635 object = coding->src_object;
5636
5637 /* Note: composition handling is not yet implemented. */
5638 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
5639
5640 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5641 && find_composition (pos, end_pos, &start, &end, &prop, object)
5642 && end <= end_pos
5643 && (start >= pos
5644 || (find_composition (end, end_pos, &start, &end, &prop, object)
5645 && end <= end_pos)))
5646 stop = start;
5647 else
5648 stop = end_pos;
5649
5650 while (buf < buf_end)
5651 {
5652 if (pos == stop)
5653 {
5654 int *p;
5655
5656 if (pos == end_pos)
5657 break;
5658 p = save_composition_data (buf, buf_end, prop);
5659 if (p == NULL)
5660 break;
5661 buf = p;
5662 if (find_composition (end, end_pos, &start, &end, &prop, object)
5663 && end <= end_pos)
5664 stop = start;
5665 else
5666 stop = end_pos;
5667 }
5668
5669 if (! multibytep)
5670 c = *src++;
5671 else
5672 c = STRING_CHAR_ADVANCE (src);
5673 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5674 c = '\n';
5675 if (! EQ (eol_type, Qunix))
5676 {
5677 if (c == '\n')
5678 {
5679 if (EQ (eol_type, Qdos))
5680 *buf++ = '\r';
5681 else
5682 c = '\r';
5683 }
5684 }
5685 *buf++ = c;
5686 pos++;
5687 }
5688
5689 coding->consumed = src - coding->source;
5690 coding->consumed_char = pos - coding->src_pos;
5691 coding->charbuf_used = buf - coding->charbuf;
5692 coding->chars_at_source = 0;
5693}
5694
5695
5696/* Encode the text at CODING->src_object into CODING->dst_object.
5697 CODING->src_object is a buffer or a string.
5698 CODING->dst_object is a buffer or nil.
5699
5700 If CODING->src_object is a buffer, it must be the current buffer.
5701 In this case, if CODING->src_pos is positive, it is a position of
5702 the source text in the buffer, otherwise. the source text is in the
5703 gap area of the buffer, and coding->src_pos specifies the offset of
5704 the text from GPT (which must be the same as PT). If this is the
5705 same buffer as CODING->dst_object, CODING->src_pos must be
5706 negative and CODING should not have `pre-write-conversion'.
5707
5708 If CODING->src_object is a string, CODING should not have
5709 `pre-write-conversion'.
5710
5711 If CODING->dst_object is a buffer, the encoded data is inserted at
5712 the current point of that buffer.
5713
5714 If CODING->dst_object is nil, the encoded data is placed at the
5715 memory area specified by CODING->destination. */
5716
5717static int
5718encode_coding (coding)
5719 struct coding_system *coding;
5720{
5721 Lisp_Object attrs;
5722
5723 attrs = CODING_ID_ATTRS (coding->id);
5724
5725 if (BUFFERP (coding->dst_object))
5726 {
5727 set_buffer_internal (XBUFFER (coding->dst_object));
5728 coding->dst_multibyte
5729 = ! NILP (current_buffer->enable_multibyte_characters);
5730 }
5731
5732 coding->consumed = coding->consumed_char = 0;
5733 coding->produced = coding->produced_char = 0;
5734 coding->result = CODING_RESULT_SUCCESS;
5735 coding->errors = 0;
5736
5737 ALLOC_CONVERSION_WORK_AREA (coding);
5738
5739 do {
5740 coding_set_source (coding);
5741 consume_chars (coding);
5742
5743 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5744 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5745
5746 coding_set_destination (coding);
5747 (*(coding->encoder)) (coding);
5748 } while (coding->consumed_char < coding->src_chars);
5749
5750 if (BUFFERP (coding->dst_object))
5751 insert_from_gap (coding->produced_char, coding->produced);
5752
5753 return (coding->result);
5754}
5755
5756/* Work buffer */
5757
5758/* List of currently used working buffer. */
5759Lisp_Object Vcode_conversion_work_buf_list;
5760
5761/* A working buffer used by the top level conversion. */
5762Lisp_Object Vcode_conversion_reused_work_buf;
5763
5764
5765/* Return a working buffer that can be freely used by the following
5766 code conversion. MULTIBYTEP specifies the multibyteness of the
5767 buffer. */
5768
5769Lisp_Object
5770make_conversion_work_buffer (multibytep)
5771 int multibytep;
5772{
5773 struct buffer *current = current_buffer;
5774 Lisp_Object buf;
5775
5776 if (NILP (Vcode_conversion_work_buf_list))
5777 {
5778 if (NILP (Vcode_conversion_reused_work_buf))
5779 Vcode_conversion_reused_work_buf
5780 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5781 Vcode_conversion_work_buf_list
5782 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
5783 }
5784 else
5785 {
5786 int depth = Flength (Vcode_conversion_work_buf_list);
5787 char str[128];
5788
5789 sprintf (str, " *code-conversion-work*<%d>", depth);
5790 Vcode_conversion_work_buf_list
5791 = Fcons (Fget_buffer_create (build_string (str)),
5792 Vcode_conversion_work_buf_list);
5793 }
5794
5795 buf = XCAR (Vcode_conversion_work_buf_list);
5796 set_buffer_internal (XBUFFER (buf));
5797 current_buffer->undo_list = Qt;
5798 Ferase_buffer ();
5799 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
5800 set_buffer_internal (current);
5801 return buf;
5802}
5803
5804static struct coding_system *saved_coding;
5805
5806Lisp_Object
5807code_conversion_restore (info)
5808 Lisp_Object info;
5809{
5810 int depth = Flength (Vcode_conversion_work_buf_list);
5811 Lisp_Object buf;
5812
5813 if (depth > 0)
5814 {
5815 buf = XCAR (Vcode_conversion_work_buf_list);
5816 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5817 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5818 Fkill_buffer (buf);
5819 }
5820
5821 if (saved_coding->dst_object == Qt
5822 && saved_coding->destination)
5823 xfree (saved_coding->destination);
5824
5825 return save_excursion_restore (info);
5826}
5827
5828
5829int
5830decode_coding_gap (coding, chars, bytes)
5831 struct coding_system *coding;
5832 EMACS_INT chars, bytes;
5833{
5834 int count = specpdl_ptr - specpdl;
5835
5836 saved_coding = coding;
5837 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5838
5839 coding->src_object = Fcurrent_buffer ();
5840 coding->src_chars = chars;
5841 coding->src_bytes = bytes;
5842 coding->src_pos = -chars;
5843 coding->src_pos_byte = -bytes;
5844 coding->src_multibyte = chars < bytes;
5845 coding->dst_object = coding->src_object;
5846 coding->dst_pos = PT;
5847 coding->dst_pos_byte = PT_BYTE;
5848 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
5849
5850 if (CODING_REQUIRE_DETECTION (coding))
5851 detect_coding (coding);
5852
5853 decode_coding (coding);
5854
5855 unbind_to (count, Qnil);
5856 return coding->result;
5857}
5858
5859int
5860encode_coding_gap (coding, chars, bytes)
5861 struct coding_system *coding;
5862 EMACS_INT chars, bytes;
5863{
5864 int count = specpdl_ptr - specpdl;
5865 Lisp_Object buffer;
5866
5867 saved_coding = coding;
5868 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5869
5870 buffer = Fcurrent_buffer ();
5871 coding->src_object = buffer;
5872 coding->src_chars = chars;
5873 coding->src_bytes = bytes;
5874 coding->src_pos = -chars;
5875 coding->src_pos_byte = -bytes;
5876 coding->src_multibyte = chars < bytes;
5877 coding->dst_object = coding->src_object;
5878 coding->dst_pos = PT;
5879 coding->dst_pos_byte = PT_BYTE;
5880
5881 encode_coding (coding);
5882
5883 unbind_to (count, Qnil);
5884 return coding->result;
5885}
5886
5887
5888/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5889 SRC_OBJECT into DST_OBJECT by coding context CODING.
5890
5891 SRC_OBJECT is a buffer, a string, or Qnil.
5892
5893 If it is a buffer, the text is at point of the buffer. FROM and TO
5894 are positions in the buffer.
5895
5896 If it is a string, the text is at the beginning of the string.
5897 FROM and TO are indices to the string.
5898
5899 If it is nil, the text is at coding->source. FROM and TO are
5900 indices to coding->source.
5901
5902 DST_OBJECT is a buffer, Qt, or Qnil.
5903
5904 If it is a buffer, the decoded text is inserted at point of the
5905 buffer. If the buffer is the same as SRC_OBJECT, the source text
5906 is deleted.
5907
5908 If it is Qt, a string is made from the decoded text, and
5909 set in CODING->dst_object.
5910
5911 If it is Qnil, the decoded text is stored at CODING->destination.
5912 The called must allocate CODING->dst_bytes bytes at
5913 CODING->destination by xmalloc. If the decoded text is longer than
5914 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5915 */
5916
5917void
5918decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5919 dst_object)
5920 struct coding_system *coding;
5921 Lisp_Object src_object;
5922 EMACS_INT from, from_byte, to, to_byte;
5923 Lisp_Object dst_object;
5924{
5925 int count = specpdl_ptr - specpdl;
5926 unsigned char *destination;
5927 EMACS_INT dst_bytes;
5928 EMACS_INT chars = to - from;
5929 EMACS_INT bytes = to_byte - from_byte;
5930 Lisp_Object attrs;
5931
5932 saved_coding = coding;
5933 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5934
5935 if (NILP (dst_object))
5936 {
5937 destination = coding->destination;
5938 dst_bytes = coding->dst_bytes;
5939 }
5940
5941 coding->src_object = src_object;
5942 coding->src_chars = chars;
5943 coding->src_bytes = bytes;
5944 coding->src_multibyte = chars < bytes;
5945
5946 if (STRINGP (src_object))
5947 {
5948 coding->src_pos = from;
5949 coding->src_pos_byte = from_byte;
5950 }
5951 else if (BUFFERP (src_object))
5952 {
5953 set_buffer_internal (XBUFFER (src_object));
5954 if (from != GPT)
5955 move_gap_both (from, from_byte);
5956 if (EQ (src_object, dst_object))
5957 {
5958 TEMP_SET_PT_BOTH (from, from_byte);
5959 del_range_both (from, from_byte, to, to_byte, 1);
5960 coding->src_pos = -chars;
5961 coding->src_pos_byte = -bytes;
5962 }
5963 else
5964 {
5965 coding->src_pos = from;
5966 coding->src_pos_byte = from_byte;
5967 }
5968 }
5969
5970 if (CODING_REQUIRE_DETECTION (coding))
5971 detect_coding (coding);
5972 attrs = CODING_ID_ATTRS (coding->id);
5973
5974 if (! NILP (CODING_ATTR_POST_READ (attrs))
5975 || EQ (dst_object, Qt))
5976 {
5977 coding->dst_object = make_conversion_work_buffer (1);
5978 coding->dst_pos = BEG;
5979 coding->dst_pos_byte = BEG_BYTE;
5980 coding->dst_multibyte = 1;
5981 }
5982 else if (BUFFERP (dst_object))
5983 {
5984 coding->dst_object = dst_object;
5985 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5986 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5987 coding->dst_multibyte
5988 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
5989 }
5990 else
5991 {
5992 coding->dst_object = Qnil;
5993 coding->dst_multibyte = 1;
5994 }
5995
5996 decode_coding (coding);
5997
5998 if (BUFFERP (coding->dst_object))
5999 set_buffer_internal (XBUFFER (coding->dst_object));
6000
6001 if (! NILP (CODING_ATTR_POST_READ (attrs)))
6002 {
6003 struct gcpro gcpro1, gcpro2;
6004 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6005 Lisp_Object val;
6006
6007 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6008 GCPRO2 (coding->src_object, coding->dst_object);
6009 val = call1 (CODING_ATTR_POST_READ (attrs),
6010 make_number (coding->produced_char));
6011 UNGCPRO;
6012 CHECK_NATNUM (val);
6013 coding->produced_char += Z - prev_Z;
6014 coding->produced += Z_BYTE - prev_Z_BYTE;
6015 }
6016
6017 if (EQ (dst_object, Qt))
6018 {
6019 coding->dst_object = Fbuffer_string ();
6020 }
6021 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6022 {
6023 set_buffer_internal (XBUFFER (coding->dst_object));
6024 if (dst_bytes < coding->produced)
6025 {
6026 destination
6027 = (unsigned char *) xrealloc (destination, coding->produced);
6028 if (! destination)
6029 {
6030 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6031 unbind_to (count, Qnil);
6032 return;
6033 }
6034 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6035 move_gap_both (BEGV, BEGV_BYTE);
6036 bcopy (BEGV_ADDR, destination, coding->produced);
6037 coding->destination = destination;
6038 }
6039 }
6040
6041 unbind_to (count, Qnil);
6042}
6043
6044
6045void
6046encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6047 dst_object)
6048 struct coding_system *coding;
6049 Lisp_Object src_object;
6050 EMACS_INT from, from_byte, to, to_byte;
6051 Lisp_Object dst_object;
6052{
6053 int count = specpdl_ptr - specpdl;
6054 EMACS_INT chars = to - from;
6055 EMACS_INT bytes = to_byte - from_byte;
6056 Lisp_Object attrs;
6057
6058 saved_coding = coding;
6059 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6060
6061 coding->src_object = src_object;
6062 coding->src_chars = chars;
6063 coding->src_bytes = bytes;
6064 coding->src_multibyte = chars < bytes;
6065
6066 attrs = CODING_ID_ATTRS (coding->id);
6067
6068 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6069 {
6070 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6071 set_buffer_internal (XBUFFER (coding->src_object));
6072 if (STRINGP (src_object))
6073 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6074 else if (BUFFERP (src_object))
6075 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6076 else
6077 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6078
6079 if (EQ (src_object, dst_object))
6080 {
6081 set_buffer_internal (XBUFFER (src_object));
6082 del_range_both (from, from_byte, to, to_byte, 1);
6083 set_buffer_internal (XBUFFER (coding->src_object));
6084 }
6085
6086 call2 (CODING_ATTR_PRE_WRITE (attrs),
6087 make_number (BEG), make_number (Z));
6088 coding->src_object = Fcurrent_buffer ();
6089 if (BEG != GPT)
6090 move_gap_both (BEG, BEG_BYTE);
6091 coding->src_chars = Z - BEG;
6092 coding->src_bytes = Z_BYTE - BEG_BYTE;
6093 coding->src_pos = BEG;
6094 coding->src_pos_byte = BEG_BYTE;
6095 coding->src_multibyte = Z < Z_BYTE;
6096 }
6097 else if (STRINGP (src_object))
6098 {
6099 coding->src_pos = from;
6100 coding->src_pos_byte = from_byte;
6101 }
6102 else if (BUFFERP (src_object))
6103 {
6104 set_buffer_internal (XBUFFER (src_object));
6105 if (from != GPT)
6106 move_gap_both (from, from_byte);
6107 if (EQ (src_object, dst_object))
6108 {
6109 del_range_both (from, from_byte, to, to_byte, 1);
6110 coding->src_pos = -chars;
6111 coding->src_pos_byte = -bytes;
6112 }
6113 else
6114 {
6115 coding->src_pos = from;
6116 coding->src_pos_byte = from_byte;
6117 }
6118 }
6119
6120 if (BUFFERP (dst_object))
6121 {
6122 coding->dst_object = dst_object;
6123 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6124 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6125 coding->dst_multibyte
6126 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6127 }
6128 else if (EQ (dst_object, Qt))
6129 {
6130 coding->dst_object = Qnil;
6131 coding->dst_bytes = coding->src_chars;
6132 if (coding->dst_bytes == 0)
6133 coding->dst_bytes = 1;
6134 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6135 coding->dst_multibyte = 0;
6136 }
6137 else
6138 {
6139 coding->dst_object = Qnil;
6140 coding->dst_multibyte = 0;
6141 }
6142
6143 encode_coding (coding);
6144
6145 if (EQ (dst_object, Qt))
6146 {
6147 if (BUFFERP (coding->dst_object))
6148 coding->dst_object = Fbuffer_string ();
6149 else
6150 {
6151 coding->dst_object
6152 = make_unibyte_string ((char *) coding->destination,
6153 coding->produced);
6154 xfree (coding->destination);
6155 }
6156 }
6157
6158 unbind_to (count, Qnil);
6159}
6160
6161
6162Lisp_Object
6163preferred_coding_system ()
6164{
6165 int id = coding_categories[coding_priorities[0]].id;
6166
6167 return CODING_ID_NAME (id);
6168}
6169
6170\f
6171#ifdef emacs
6172/*** 8. Emacs Lisp library functions ***/
6173
6174DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6175 doc: /* Return t if OBJECT is nil or a coding-system.
6176See the documentation of `define-coding-system' for information
6177about coding-system objects. */)
6178 (obj)
6179 Lisp_Object obj;
6180{
6181 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6182}
6183
6184DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6185 Sread_non_nil_coding_system, 1, 1, 0,
6186 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6187 (prompt)
6188 Lisp_Object prompt;
6189{
6190 Lisp_Object val;
6191 do
6192 {
6193 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6194 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6195 }
6196 while (XSTRING (val)->size == 0);
6197 return (Fintern (val, Qnil));
6198}
6199
6200DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6201 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6202If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6203 (prompt, default_coding_system)
6204 Lisp_Object prompt, default_coding_system;
6205{
6206 Lisp_Object val;
6207 if (SYMBOLP (default_coding_system))
6208 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6209 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6210 Qt, Qnil, Qcoding_system_history,
6211 default_coding_system, Qnil);
6212 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6213}
6214
6215DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6216 1, 1, 0,
6217 doc: /* Check validity of CODING-SYSTEM.
6218If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6219It is valid if it is a symbol with a non-nil `coding-system' property.
6220The value of property should be a vector of length 5. */)
6221 (coding_system)
6222 Lisp_Object coding_system;
6223{
6224 CHECK_SYMBOL (coding_system);
6225 if (!NILP (Fcoding_system_p (coding_system)))
6226 return coding_system;
6227 while (1)
6228 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6229}
6230
6231\f
6232Lisp_Object
6233detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6234 unsigned char *src;
6235 int src_bytes, highest;
6236 int multibytep;
6237 Lisp_Object coding_system;
6238{
6239 unsigned char *src_end = src + src_bytes;
6240 int mask = CATEGORY_MASK_ANY;
6241 int detected = 0;
6242 int c, i;
6243 Lisp_Object attrs, eol_type;
6244 Lisp_Object val;
6245 struct coding_system coding;
6246
6247 if (NILP (coding_system))
6248 coding_system = Qundecided;
6249 setup_coding_system (coding_system, &coding);
6250 attrs = CODING_ID_ATTRS (coding.id);
6251 eol_type = CODING_ID_EOL_TYPE (coding.id);
6252
6253 coding.source = src;
6254 coding.src_bytes = src_bytes;
6255 coding.src_multibyte = multibytep;
6256 coding.consumed = 0;
6257
6258 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
6259 {
6260 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6261 }
6262 else
6263 {
6264 coding_system = Qnil;
6265 for (; src < src_end; src++)
6266 {
6267 c = *src;
6268 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6269 || c == ISO_CODE_SI
6270 || c == ISO_CODE_SO)))
6271 break;
6272 }
6273 coding.head_ascii = src - coding.source;
6274
6275 if (src < src_end)
6276 for (i = 0; i < coding_category_raw_text; i++)
6277 {
6278 enum coding_category category = coding_priorities[i];
6279 struct coding_system *this = coding_categories + category;
6280
6281 if (category >= coding_category_raw_text
6282 || detected & (1 << category))
6283 continue;
6284
6285 if (this->id < 0)
6286 {
6287 /* No coding system of this category is defined. */
6288 mask &= ~(1 << category);
6289 }
6290 else
6291 {
6292 detected |= detected_mask[category];
6293 if ((*(coding_categories[category].detector)) (&coding, &mask)
6294 && highest)
6295 {
6296 mask &= detected_mask[category];
6297 break;
6298 }
6299 }
6300 }
6301 }
6302
6303 if (!mask)
6304 val = Fcons (make_number (coding_category_raw_text), Qnil);
6305 else if (mask == CATEGORY_MASK_ANY)
6306 val = Fcons (make_number (coding_category_undecided), Qnil);
6307 else if (highest)
6308 {
6309 for (i = 0; i < coding_category_raw_text; i++)
6310 if (mask & (1 << coding_priorities[i]))
6311 {
6312 val = Fcons (make_number (coding_priorities[i]), Qnil);
6313 break;
6314 }
6315 }
6316 else
6317 {
6318 val = Qnil;
6319 for (i = coding_category_raw_text - 1; i >= 0; i--)
6320 if (mask & (1 << coding_priorities[i]))
6321 val = Fcons (make_number (coding_priorities[i]), val);
6322 }
6323
6324 {
6325 int one_byte_eol = -1, two_byte_eol = -1;
6326 Lisp_Object tail;
6327
6328 for (tail = val; CONSP (tail); tail = XCDR (tail))
6329 {
6330 struct coding_system *this
6331 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6332 : &coding);
6333 int this_eol;
6334
6335 attrs = CODING_ID_ATTRS (this->id);
6336 eol_type = CODING_ID_EOL_TYPE (this->id);
6337 XSETCAR (tail, CODING_ID_NAME (this->id));
6338 if (VECTORP (eol_type))
6339 {
6340 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6341 {
6342 if (two_byte_eol < 0)
6343 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6344 this_eol = two_byte_eol;
6345 }
6346 else
6347 {
6348 if (one_byte_eol < 0)
6349 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6350 this_eol = one_byte_eol;
6351 }
6352 if (this_eol == EOL_SEEN_LF)
6353 XSETCAR (tail, AREF (eol_type, 0));
6354 else if (this_eol == EOL_SEEN_CRLF)
6355 XSETCAR (tail, AREF (eol_type, 1));
6356 else if (this_eol == EOL_SEEN_CR)
6357 XSETCAR (tail, AREF (eol_type, 2));
6358 }
6359 }
6360 }
6361
6362 return (highest ? XCAR (val) : val);
6363}
6364
6365
6366DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6367 2, 3, 0,
6368 doc: /* Detect coding system of the text in the region between START and END.
6369Return a list of possible coding systems ordered by priority.
6370
6371If only ASCII characters are found, it returns a list of single element
6372`undecided' or its subsidiary coding system according to a detected
6373end-of-line format.
6374
6375If optional argument HIGHEST is non-nil, return the coding system of
6376highest priority. */)
6377 (start, end, highest)
6378 Lisp_Object start, end, highest;
6379{
6380 int from, to;
6381 int from_byte, to_byte;
6382
6383 CHECK_NUMBER_COERCE_MARKER (start);
6384 CHECK_NUMBER_COERCE_MARKER (end);
6385
6386 validate_region (&start, &end);
6387 from = XINT (start), to = XINT (end);
6388 from_byte = CHAR_TO_BYTE (from);
6389 to_byte = CHAR_TO_BYTE (to);
6390
6391 if (from < GPT && to >= GPT)
6392 move_gap_both (to, to_byte);
6393
6394 return detect_coding_system (BYTE_POS_ADDR (from_byte),
6395 to_byte - from_byte,
6396 !NILP (highest),
6397 !NILP (current_buffer
6398 ->enable_multibyte_characters),
6399 Qnil);
6400}
6401
6402DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6403 1, 2, 0,
6404 doc: /* Detect coding system of the text in STRING.
6405Return a list of possible coding systems ordered by priority.
6406
6407If only ASCII characters are found, it returns a list of single element
6408`undecided' or its subsidiary coding system according to a detected
6409end-of-line format.
6410
6411If optional argument HIGHEST is non-nil, return the coding system of
6412highest priority. */)
6413 (string, highest)
6414 Lisp_Object string, highest;
6415{
6416 CHECK_STRING (string);
6417
6418 return detect_coding_system (XSTRING (string)->data,
6419 STRING_BYTES (XSTRING (string)),
6420 !NILP (highest),
6421 STRING_MULTIBYTE (string),
6422 Qnil);
6423}
6424
6425
6426static INLINE int
6427char_encodable_p (c, attrs)
6428 int c;
6429 Lisp_Object attrs;
6430{
6431 Lisp_Object tail;
6432 struct charset *charset;
6433
6434 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6435 CONSP (tail); tail = XCDR (tail))
6436 {
6437 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6438 if (CHAR_CHARSET_P (c, charset))
6439 break;
6440 }
6441 return (! NILP (tail));
6442}
6443
6444
6445/* Return a list of coding systems that safely encode the text between
6446 START and END. If EXCLUDE is non-nil, it is a list of coding
6447 systems not to check. The returned list doesn't contain any such
6448 coding systems. In any case, If the text contains only ASCII or is
6449 unibyte, return t. */
6450
6451DEFUN ("find-coding-systems-region-internal",
6452 Ffind_coding_systems_region_internal,
6453 Sfind_coding_systems_region_internal, 2, 3, 0,
6454 doc: /* Internal use only. */)
6455 (start, end, exclude)
6456 Lisp_Object start, end, exclude;
6457{
6458 Lisp_Object coding_attrs_list, safe_codings;
6459 EMACS_INT start_byte, end_byte;
6460 unsigned char *p, *pbeg, *pend;
6461 int c;
6462 Lisp_Object tail, elt;
6463
6464 if (STRINGP (start))
6465 {
6466 if (!STRING_MULTIBYTE (start)
6467 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6468 return Qt;
6469 start_byte = 0;
6470 end_byte = STRING_BYTES (XSTRING (start));
6471 }
6472 else
6473 {
6474 CHECK_NUMBER_COERCE_MARKER (start);
6475 CHECK_NUMBER_COERCE_MARKER (end);
6476 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6477 args_out_of_range (start, end);
6478 if (NILP (current_buffer->enable_multibyte_characters))
6479 return Qt;
6480 start_byte = CHAR_TO_BYTE (XINT (start));
6481 end_byte = CHAR_TO_BYTE (XINT (end));
6482 if (XINT (end) - XINT (start) == end_byte - start_byte)
6483 return Qt;
6484
6485 if (start < GPT && end > GPT)
6486 {
6487 if ((GPT - start) < (end - GPT))
6488 move_gap_both (start, start_byte);
6489 else
6490 move_gap_both (end, end_byte);
6491 }
6492 }
6493
6494 coding_attrs_list = Qnil;
6495 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6496 if (NILP (exclude)
6497 || NILP (Fmemq (XCAR (tail), exclude)))
6498 {
6499 Lisp_Object attrs;
6500
6501 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6502 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6503 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6504 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6505 }
6506
6507 if (STRINGP (start))
6508 p = pbeg = XSTRING (start)->data;
6509 else
6510 p = pbeg = BYTE_POS_ADDR (start_byte);
6511 pend = p + (end_byte - start_byte);
6512
6513 while (p < pend && ASCII_BYTE_P (*p)) p++;
6514 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6515
6516 while (p < pend)
6517 {
6518 if (ASCII_BYTE_P (*p))
6519 p++;
6520 else
6521 {
6522 c = STRING_CHAR_ADVANCE (p);
6523
6524 charset_map_loaded = 0;
6525 for (tail = coding_attrs_list; CONSP (tail);)
6526 {
6527 elt = XCAR (tail);
6528 if (NILP (elt))
6529 tail = XCDR (tail);
6530 else if (char_encodable_p (c, elt))
6531 tail = XCDR (tail);
6532 else if (CONSP (XCDR (tail)))
6533 {
6534 XSETCAR (tail, XCAR (XCDR (tail)));
6535 XSETCDR (tail, XCDR (XCDR (tail)));
6536 }
6537 else
6538 {
6539 XSETCAR (tail, Qnil);
6540 tail = XCDR (tail);
6541 }
6542 }
6543 if (charset_map_loaded)
6544 {
6545 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6546
6547 if (STRINGP (start))
6548 pbeg = XSTRING (start)->data;
6549 else
6550 pbeg = BYTE_POS_ADDR (start_byte);
6551 p = pbeg + p_offset;
6552 pend = pbeg + pend_offset;
6553 }
6554 }
6555 }
6556
6557 safe_codings = Qnil;
6558 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6559 if (! NILP (XCAR (tail)))
6560 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6561
6562 return safe_codings;
6563}
6564
6565
6566DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6567 Scheck_coding_systems_region, 3, 3, 0,
6568 doc: /* Check if the region is encodable by coding systems.
6569
6570START and END are buffer positions specifying the region.
6571CODING-SYSTEM-LIST is a list of coding systems to check.
6572
6573The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6574CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6575whole region, POS0, POS1, ... are buffer positions where non-encodable
6576characters are found.
6577
6578If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6579value is nil.
6580
6581START may be a string. In that case, check if the string is
6582encodable, and the value contains indices to the string instead of
6583buffer positions. END is ignored. */)
6584 (start, end, coding_system_list)
6585 Lisp_Object start, end, coding_system_list;
6586{
6587 Lisp_Object list;
6588 EMACS_INT start_byte, end_byte;
6589 int pos;
6590 unsigned char *p, *pbeg, *pend;
6591 int c;
6592 Lisp_Object tail, elt;
6593
6594 if (STRINGP (start))
6595 {
6596 if (!STRING_MULTIBYTE (start)
6597 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6598 return Qnil;
6599 start_byte = 0;
6600 end_byte = STRING_BYTES (XSTRING (start));
6601 pos = 0;
6602 }
6603 else
6604 {
6605 CHECK_NUMBER_COERCE_MARKER (start);
6606 CHECK_NUMBER_COERCE_MARKER (end);
6607 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6608 args_out_of_range (start, end);
6609 if (NILP (current_buffer->enable_multibyte_characters))
6610 return Qnil;
6611 start_byte = CHAR_TO_BYTE (XINT (start));
6612 end_byte = CHAR_TO_BYTE (XINT (end));
6613 if (XINT (end) - XINT (start) == end_byte - start_byte)
6614 return Qt;
6615
6616 if (start < GPT && end > GPT)
6617 {
6618 if ((GPT - start) < (end - GPT))
6619 move_gap_both (start, start_byte);
6620 else
6621 move_gap_both (end, end_byte);
6622 }
6623 pos = start;
6624 }
6625
6626 list = Qnil;
6627 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6628 {
6629 elt = XCAR (tail);
6630 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6631 Qnil)),
6632 list);
6633 }
6634
6635 if (STRINGP (start))
6636 p = pbeg = XSTRING (start)->data;
6637 else
6638 p = pbeg = BYTE_POS_ADDR (start_byte);
6639 pend = p + (end_byte - start_byte);
6640
6641 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6642 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6643
6644 while (p < pend)
6645 {
6646 if (ASCII_BYTE_P (*p))
6647 p++;
6648 else
6649 {
6650 c = STRING_CHAR_ADVANCE (p);
6651
6652 charset_map_loaded = 0;
6653 for (tail = list; CONSP (tail); tail = XCDR (tail))
6654 {
6655 elt = XCDR (XCAR (tail));
6656 if (! char_encodable_p (c, XCAR (elt)))
6657 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6658 }
6659 if (charset_map_loaded)
6660 {
6661 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6662
6663 if (STRINGP (start))
6664 pbeg = XSTRING (start)->data;
6665 else
6666 pbeg = BYTE_POS_ADDR (start_byte);
6667 p = pbeg + p_offset;
6668 pend = pbeg + pend_offset;
6669 }
6670 }
6671 pos++;
6672 }
6673
6674 tail = list;
6675 list = Qnil;
6676 for (; CONSP (tail); tail = XCDR (tail))
6677 {
6678 elt = XCAR (tail);
6679 if (CONSP (XCDR (XCDR (elt))))
6680 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6681 list);
6682 }
6683
6684 return list;
6685}
6686
6687
6688
6689Lisp_Object
6690code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6691 Lisp_Object start, end, coding_system, dst_object;
6692 int encodep, norecord;
6693{
6694 struct coding_system coding;
6695 EMACS_INT from, from_byte, to, to_byte;
6696 Lisp_Object src_object;
6697
6698 CHECK_NUMBER_COERCE_MARKER (start);
6699 CHECK_NUMBER_COERCE_MARKER (end);
6700 if (NILP (coding_system))
6701 coding_system = Qno_conversion;
6702 else
6703 CHECK_CODING_SYSTEM (coding_system);
6704 src_object = Fcurrent_buffer ();
6705 if (NILP (dst_object))
6706 dst_object = src_object;
6707 else if (! EQ (dst_object, Qt))
6708 CHECK_BUFFER (dst_object);
6709
6710 validate_region (&start, &end);
6711 from = XFASTINT (start);
6712 from_byte = CHAR_TO_BYTE (from);
6713 to = XFASTINT (end);
6714 to_byte = CHAR_TO_BYTE (to);
6715
6716 setup_coding_system (coding_system, &coding);
6717 coding.mode |= CODING_MODE_LAST_BLOCK;
6718
6719 if (encodep)
6720 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6721 dst_object);
6722 else
6723 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6724 dst_object);
6725 if (! norecord)
6726 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6727
6728 if (coding.result != CODING_RESULT_SUCCESS)
6729 error ("Code conversion error: %d", coding.result);
6730
6731 return (BUFFERP (dst_object)
6732 ? make_number (coding.produced_char)
6733 : coding.dst_object);
6734}
6735
6736
6737DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6738 3, 4, "r\nzCoding system: ",
6739 doc: /* Decode the current region from the specified coding system.
6740When called from a program, takes four arguments:
6741 START, END, CODING-SYSTEM, and DESTINATION.
6742START and END are buffer positions.
6743
6744Optional 4th arguments DESTINATION specifies where the decoded text goes.
6745If nil, the region between START and END is replace by the decoded text.
6746If buffer, the decoded text is inserted in the buffer.
6747If t, the decoded text is returned.
6748
6749This function sets `last-coding-system-used' to the precise coding system
6750used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6751not fully specified.)
6752It returns the length of the decoded text. */)
6753 (start, end, coding_system, destination)
6754 Lisp_Object start, end, coding_system, destination;
6755{
6756 return code_convert_region (start, end, coding_system, destination, 0, 0);
6757}
6758
6759DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6760 3, 4, "r\nzCoding system: ",
6761 doc: /* Encode the current region by specified coding system.
6762When called from a program, takes three arguments:
6763START, END, and CODING-SYSTEM. START and END are buffer positions.
6764
6765Optional 4th arguments DESTINATION specifies where the encoded text goes.
6766If nil, the region between START and END is replace by the encoded text.
6767If buffer, the encoded text is inserted in the buffer.
6768If t, the encoded text is returned.
6769
6770This function sets `last-coding-system-used' to the precise coding system
6771used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6772not fully specified.)
6773It returns the length of the encoded text. */)
6774 (start, end, coding_system, destination)
6775 Lisp_Object start, end, coding_system, destination;
6776{
6777 return code_convert_region (start, end, coding_system, destination, 1, 0);
6778}
6779
6780Lisp_Object
6781code_convert_string (string, coding_system, dst_object,
6782 encodep, nocopy, norecord)
6783 Lisp_Object string, coding_system, dst_object;
6784 int encodep, nocopy, norecord;
6785{
6786 struct coding_system coding;
6787 EMACS_INT chars, bytes;
6788
6789 CHECK_STRING (string);
6790 if (NILP (coding_system))
6791 {
6792 if (! norecord)
6793 Vlast_coding_system_used = Qno_conversion;
6794 if (NILP (dst_object))
6795 return (nocopy ? Fcopy_sequence (string) : string);
6796 }
6797
6798 if (NILP (coding_system))
6799 coding_system = Qno_conversion;
6800 else
6801 CHECK_CODING_SYSTEM (coding_system);
6802 if (NILP (dst_object))
6803 dst_object = Qt;
6804 else if (! EQ (dst_object, Qt))
6805 CHECK_BUFFER (dst_object);
6806
6807 setup_coding_system (coding_system, &coding);
6808 coding.mode |= CODING_MODE_LAST_BLOCK;
6809 chars = XSTRING (string)->size;
6810 bytes = STRING_BYTES (XSTRING (string));
6811 if (encodep)
6812 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6813 else
6814 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6815 if (! norecord)
6816 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6817
6818 if (coding.result != CODING_RESULT_SUCCESS)
6819 error ("Code conversion error: %d", coding.result);
6820
6821 return (BUFFERP (dst_object)
6822 ? make_number (coding.produced_char)
6823 : coding.dst_object);
6824}
6825
6826
6827/* Encode or decode STRING according to CODING_SYSTEM.
6828 Do not set Vlast_coding_system_used.
6829
6830 This function is called only from macros DECODE_FILE and
6831 ENCODE_FILE, thus we ignore character composition. */
6832
6833Lisp_Object
6834code_convert_string_norecord (string, coding_system, encodep)
6835 Lisp_Object string, coding_system;
6836 int encodep;
6837{
6838 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
6839}
6840
6841
6842DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6843 2, 4, 0,
6844 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6845
6846Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6847if the decoding operation is trivial.
6848
6849Optional fourth arg BUFFER non-nil meant that the decoded text is
6850inserted in BUFFER instead of returned as a astring. In this case,
6851the return value is BUFFER.
6852
6853This function sets `last-coding-system-used' to the precise coding system
6854used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6855not fully specified. */)
6856 (string, coding_system, nocopy, buffer)
6857 Lisp_Object string, coding_system, nocopy, buffer;
6858{
6859 return code_convert_string (string, coding_system, buffer,
6860 0, ! NILP (nocopy), 0);
6861}
6862
6863DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6864 2, 4, 0,
6865 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6866
6867Optional third arg NOCOPY non-nil means it is OK to return STRING
6868itself if the encoding operation is trivial.
6869
6870Optional fourth arg BUFFER non-nil meant that the encoded text is
6871inserted in BUFFER instead of returned as a astring. In this case,
6872the return value is BUFFER.
6873
6874This function sets `last-coding-system-used' to the precise coding system
6875used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6876not fully specified.) */)
6877 (string, coding_system, nocopy, buffer)
6878 Lisp_Object string, coding_system, nocopy, buffer;
6879{
6880 return code_convert_string (string, coding_system, buffer,
6881 nocopy, ! NILP (nocopy), 1);
6882}
6883
6884\f
6885DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6886 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6887Return the corresponding character. */)
6888 (code)
6889 Lisp_Object code;
6890{
6891 Lisp_Object spec, attrs, val;
6892 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6893 int c;
6894
6895 CHECK_NATNUM (code);
6896 c = XFASTINT (code);
6897 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6898 attrs = AREF (spec, 0);
6899
6900 if (ASCII_BYTE_P (c)
6901 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6902 return code;
6903
6904 val = CODING_ATTR_CHARSET_LIST (attrs);
6905 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6906 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6907 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
6908
6909 if (c <= 0x7F)
6910 charset = charset_roman;
6911 else if (c >= 0xA0 && c < 0xDF)
6912 {
6913 charset = charset_kana;
6914 c -= 0x80;
6915 }
6916 else
6917 {
6918 int s1 = c >> 8, s2 = c & 0xFF;
6919
6920 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6921 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6922 error ("Invalid code: %d", code);
6923 SJIS_TO_JIS (c);
6924 charset = charset_kanji;
6925 }
6926 c = DECODE_CHAR (charset, c);
6927 if (c < 0)
6928 error ("Invalid code: %d", code);
6929 return make_number (c);
6930}
6931
6932
6933DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6934 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6935Return the corresponding code in SJIS. */)
6936 (ch)
6937 Lisp_Object ch;
6938{
6939 Lisp_Object spec, attrs, charset_list;
6940 int c;
6941 struct charset *charset;
6942 unsigned code;
6943
6944 CHECK_CHARACTER (ch);
6945 c = XFASTINT (ch);
6946 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6947 attrs = AREF (spec, 0);
6948
6949 if (ASCII_CHAR_P (c)
6950 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6951 return ch;
6952
6953 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6954 charset = char_charset (c, charset_list, &code);
6955 if (code == CHARSET_INVALID_CODE (charset))
6956 error ("Can't encode by shift_jis encoding: %d", c);
6957 JIS_TO_SJIS (code);
6958
6959 return make_number (code);
6960}
6961
6962DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6963 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6964Return the corresponding character. */)
6965 (code)
6966 Lisp_Object code;
6967{
6968 Lisp_Object spec, attrs, val;
6969 struct charset *charset_roman, *charset_big5, *charset;
6970 int c;
6971
6972 CHECK_NATNUM (code);
6973 c = XFASTINT (code);
6974 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6975 attrs = AREF (spec, 0);
6976
6977 if (ASCII_BYTE_P (c)
6978 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6979 return code;
6980
6981 val = CODING_ATTR_CHARSET_LIST (attrs);
6982 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6983 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
6984
6985 if (c <= 0x7F)
6986 charset = charset_roman;
6987 else
6988 {
6989 int b1 = c >> 8, b2 = c & 0x7F;
6990 if (b1 < 0xA1 || b1 > 0xFE
6991 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
6992 error ("Invalid code: %d", code);
6993 charset = charset_big5;
6994 }
6995 c = DECODE_CHAR (charset, (unsigned )c);
6996 if (c < 0)
6997 error ("Invalid code: %d", code);
6998 return make_number (c);
6999}
7000
7001DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7002 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7003Return the corresponding character code in Big5. */)
7004 (ch)
7005 Lisp_Object ch;
7006{
7007 Lisp_Object spec, attrs, charset_list;
7008 struct charset *charset;
7009 int c;
7010 unsigned code;
7011
7012 CHECK_CHARACTER (ch);
7013 c = XFASTINT (ch);
7014 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7015 attrs = AREF (spec, 0);
7016 if (ASCII_CHAR_P (c)
7017 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7018 return ch;
7019
7020 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7021 charset = char_charset (c, charset_list, &code);
7022 if (code == CHARSET_INVALID_CODE (charset))
7023 error ("Can't encode by Big5 encoding: %d", c);
7024
7025 return make_number (code);
7026}
7027
7028\f
7029DEFUN ("set-terminal-coding-system-internal",
7030 Fset_terminal_coding_system_internal,
7031 Sset_terminal_coding_system_internal, 1, 1, 0,
7032 doc: /* Internal use only. */)
7033 (coding_system)
7034 Lisp_Object coding_system;
7035{
7036 CHECK_SYMBOL (coding_system);
7037 setup_coding_system (Fcheck_coding_system (coding_system),
7038 &terminal_coding);
7039
7040 /* We had better not send unsafe characters to terminal. */
7041 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7042 /* Characer composition should be disabled. */
7043 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7044 terminal_coding.src_multibyte = 1;
7045 terminal_coding.dst_multibyte = 0;
7046 return Qnil;
7047}
7048
7049DEFUN ("set-safe-terminal-coding-system-internal",
7050 Fset_safe_terminal_coding_system_internal,
7051 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7052 doc: /* Internal use only. */)
7053 (coding_system)
7054 Lisp_Object coding_system;
7055{
7056 CHECK_SYMBOL (coding_system);
7057 setup_coding_system (Fcheck_coding_system (coding_system),
7058 &safe_terminal_coding);
7059 /* Characer composition should be disabled. */
7060 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7061 safe_terminal_coding.src_multibyte = 1;
7062 safe_terminal_coding.dst_multibyte = 0;
7063 return Qnil;
7064}
7065
7066DEFUN ("terminal-coding-system",
7067 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7068 doc: /* Return coding system specified for terminal output. */)
7069 ()
7070{
7071 return CODING_ID_NAME (terminal_coding.id);
7072}
7073
7074DEFUN ("set-keyboard-coding-system-internal",
7075 Fset_keyboard_coding_system_internal,
7076 Sset_keyboard_coding_system_internal, 1, 1, 0,
7077 doc: /* Internal use only. */)
7078 (coding_system)
7079 Lisp_Object coding_system;
7080{
7081 CHECK_SYMBOL (coding_system);
7082 setup_coding_system (Fcheck_coding_system (coding_system),
7083 &keyboard_coding);
7084 /* Characer composition should be disabled. */
7085 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7086 return Qnil;
7087}
7088
7089DEFUN ("keyboard-coding-system",
7090 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7091 doc: /* Return coding system specified for decoding keyboard input. */)
7092 ()
7093{
7094 return CODING_ID_NAME (keyboard_coding.id);
7095}
7096
7097\f
7098DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7099 Sfind_operation_coding_system, 1, MANY, 0,
7100 doc: /* Choose a coding system for an operation based on the target name.
7101The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7102DECODING-SYSTEM is the coding system to use for decoding
7103\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7104for encoding (in case OPERATION does encoding).
7105
7106The first argument OPERATION specifies an I/O primitive:
7107 For file I/O, `insert-file-contents' or `write-region'.
7108 For process I/O, `call-process', `call-process-region', or `start-process'.
7109 For network I/O, `open-network-stream'.
7110
7111The remaining arguments should be the same arguments that were passed
7112to the primitive. Depending on which primitive, one of those arguments
7113is selected as the TARGET. For example, if OPERATION does file I/O,
7114whichever argument specifies the file name is TARGET.
7115
7116TARGET has a meaning which depends on OPERATION:
7117 For file I/O, TARGET is a file name.
7118 For process I/O, TARGET is a process name.
7119 For network I/O, TARGET is a service name or a port number
7120
7121This function looks up what specified for TARGET in,
7122`file-coding-system-alist', `process-coding-system-alist',
7123or `network-coding-system-alist' depending on OPERATION.
7124They may specify a coding system, a cons of coding systems,
7125or a function symbol to call.
7126In the last case, we call the function with one argument,
7127which is a list of all the arguments given to this function.
7128
7129usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7130 (nargs, args)
7131 int nargs;
7132 Lisp_Object *args;
7133{
7134 Lisp_Object operation, target_idx, target, val;
7135 register Lisp_Object chain;
7136
7137 if (nargs < 2)
7138 error ("Too few arguments");
7139 operation = args[0];
7140 if (!SYMBOLP (operation)
7141 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7142 error ("Invalid first arguement");
7143 if (nargs < 1 + XINT (target_idx))
7144 error ("Too few arguments for operation: %s",
7145 XSYMBOL (operation)->name->data);
7146 target = args[XINT (target_idx) + 1];
7147 if (!(STRINGP (target)
7148 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7149 error ("Invalid %dth argument", XINT (target_idx) + 1);
7150
7151 chain = ((EQ (operation, Qinsert_file_contents)
7152 || EQ (operation, Qwrite_region))
7153 ? Vfile_coding_system_alist
7154 : (EQ (operation, Qopen_network_stream)
7155 ? Vnetwork_coding_system_alist
7156 : Vprocess_coding_system_alist));
7157 if (NILP (chain))
7158 return Qnil;
7159
7160 for (; CONSP (chain); chain = XCDR (chain))
7161 {
7162 Lisp_Object elt;
7163
7164 elt = XCAR (chain);
7165 if (CONSP (elt)
7166 && ((STRINGP (target)
7167 && STRINGP (XCAR (elt))
7168 && fast_string_match (XCAR (elt), target) >= 0)
7169 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7170 {
7171 val = XCDR (elt);
7172 /* Here, if VAL is both a valid coding system and a valid
7173 function symbol, we return VAL as a coding system. */
7174 if (CONSP (val))
7175 return val;
7176 if (! SYMBOLP (val))
7177 return Qnil;
7178 if (! NILP (Fcoding_system_p (val)))
7179 return Fcons (val, val);
7180 if (! NILP (Ffboundp (val)))
7181 {
7182 val = call1 (val, Flist (nargs, args));
7183 if (CONSP (val))
7184 return val;
7185 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7186 return Fcons (val, val);
7187 }
7188 return Qnil;
7189 }
7190 }
7191 return Qnil;
7192}
7193
7194DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7195 Sset_coding_system_priority, 1, MANY, 0,
7196 doc: /* Assign higher priority to the coding systems given as arguments.
7197usage: (set-coding-system-priority CODING-SYSTEM ...) */)
7198 (nargs, args)
7199 int nargs;
7200 Lisp_Object *args;
7201{
7202 int i, j;
7203 int changed[coding_category_max];
7204 enum coding_category priorities[coding_category_max];
7205
7206 bzero (changed, sizeof changed);
7207
7208 for (i = j = 0; i < nargs; i++)
7209 {
7210 enum coding_category category;
7211 Lisp_Object spec, attrs;
7212
7213 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7214 attrs = AREF (spec, 0);
7215 category = XINT (CODING_ATTR_CATEGORY (attrs));
7216 if (changed[category])
7217 /* Ignore this coding system because a coding system of the
7218 same category already had a higher priority. */
7219 continue;
7220 changed[category] = 1;
7221 priorities[j++] = category;
7222 if (coding_categories[category].id >= 0
7223 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7224 setup_coding_system (args[i], &coding_categories[category]);
7225 }
7226
7227 /* Now we have decided top J priorities. Reflect the order of the
7228 original priorities to the remaining priorities. */
7229
7230 for (i = j, j = 0; i < coding_category_max; i++, j++)
7231 {
7232 while (j < coding_category_max
7233 && changed[coding_priorities[j]])
7234 j++;
7235 if (j == coding_category_max)
7236 abort ();
7237 priorities[i] = coding_priorities[j];
7238 }
7239
7240 bcopy (priorities, coding_priorities, sizeof priorities);
7241 return Qnil;
7242}
7243
7244DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7245 Scoding_system_priority_list, 0, 1, 0,
7246 doc: /* Return a list of coding systems ordered by their priorities.
7247HIGHESTP non-nil means just return the highest priority one. */)
7248 (highestp)
7249 Lisp_Object highestp;
7250{
7251 int i;
7252 Lisp_Object val;
7253
7254 for (i = 0, val = Qnil; i < coding_category_max; i++)
7255 {
7256 enum coding_category category = coding_priorities[i];
7257 int id = coding_categories[category].id;
7258 Lisp_Object attrs;
7259
7260 if (id < 0)
7261 continue;
7262 attrs = CODING_ID_ATTRS (id);
7263 if (! NILP (highestp))
7264 return CODING_ATTR_BASE_NAME (attrs);
7265 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7266 }
7267 return Fnreverse (val);
7268}
7269
7270static char *suffixes[] = { "-unix", "-dos", "-mac" };
7271
7272static Lisp_Object
7273make_subsidiaries (base)
7274 Lisp_Object base;
7275{
7276 Lisp_Object subsidiaries;
7277 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7278 char *buf = (char *) alloca (base_name_len + 6);
7279 int i;
7280
7281 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7282 subsidiaries = Fmake_vector (make_number (3), Qnil);
7283 for (i = 0; i < 3; i++)
7284 {
7285 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7286 ASET (subsidiaries, i, intern (buf));
7287 }
7288 return subsidiaries;
7289}
7290
7291
7292DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7293 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7294 doc: /* For internal use only.
7295usage: (define-coding-system-internal ...) */)
7296 (nargs, args)
7297 int nargs;
7298 Lisp_Object *args;
7299{
7300 Lisp_Object name;
7301 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7302 Lisp_Object attrs; /* Vector of attributes. */
7303 Lisp_Object eol_type;
7304 Lisp_Object aliases;
7305 Lisp_Object coding_type, charset_list, safe_charsets;
7306 enum coding_category category;
7307 Lisp_Object tail, val;
7308 int max_charset_id = 0;
7309 int i;
7310
7311 if (nargs < coding_arg_max)
7312 goto short_args;
7313
7314 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7315
7316 name = args[coding_arg_name];
7317 CHECK_SYMBOL (name);
7318 CODING_ATTR_BASE_NAME (attrs) = name;
7319
7320 val = args[coding_arg_mnemonic];
7321 if (! STRINGP (val))
7322 CHECK_CHARACTER (val);
7323 CODING_ATTR_MNEMONIC (attrs) = val;
7324
7325 coding_type = args[coding_arg_coding_type];
7326 CHECK_SYMBOL (coding_type);
7327 CODING_ATTR_TYPE (attrs) = coding_type;
7328
7329 charset_list = args[coding_arg_charset_list];
7330 if (SYMBOLP (charset_list))
7331 {
7332 if (EQ (charset_list, Qiso_2022))
7333 {
7334 if (! EQ (coding_type, Qiso_2022))
7335 error ("Invalid charset-list");
7336 charset_list = Viso_2022_charset_list;
7337 }
7338 else if (EQ (charset_list, Qemacs_mule))
7339 {
7340 if (! EQ (coding_type, Qemacs_mule))
7341 error ("Invalid charset-list");
7342 charset_list = Vemacs_mule_charset_list;
7343 }
7344 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7345 if (max_charset_id < XFASTINT (XCAR (tail)))
7346 max_charset_id = XFASTINT (XCAR (tail));
7347 }
7348 else
7349 {
7350 charset_list = Fcopy_sequence (charset_list);
7351 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7352 {
7353 struct charset *charset;
7354
7355 val = Fcar (tail);
7356 CHECK_CHARSET_GET_CHARSET (val, charset);
7357 if (EQ (coding_type, Qiso_2022)
7358 ? CHARSET_ISO_FINAL (charset) < 0
7359 : EQ (coding_type, Qemacs_mule)
7360 ? CHARSET_EMACS_MULE_ID (charset) < 0
7361 : 0)
7362 error ("Can't handle charset `%s'",
7363 XSYMBOL (CHARSET_NAME (charset))->name->data);
7364
7365 XCAR (tail) = make_number (charset->id);
7366 if (max_charset_id < charset->id)
7367 max_charset_id = charset->id;
7368 }
7369 }
7370 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7371
7372 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7373 make_number (255));
7374 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7375 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7376 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7377
7378 val = args[coding_arg_decode_translation_table];
7379 if (! NILP (val))
7380 CHECK_CHAR_TABLE (val);
7381 CODING_ATTR_DECODE_TBL (attrs) = val;
7382
7383 val = args[coding_arg_encode_translation_table];
7384 if (! NILP (val))
7385 CHECK_CHAR_TABLE (val);
7386 CODING_ATTR_ENCODE_TBL (attrs) = val;
7387
7388 val = args[coding_arg_post_read_conversion];
7389 CHECK_SYMBOL (val);
7390 CODING_ATTR_POST_READ (attrs) = val;
7391
7392 val = args[coding_arg_pre_write_conversion];
7393 CHECK_SYMBOL (val);
7394 CODING_ATTR_PRE_WRITE (attrs) = val;
7395
7396 val = args[coding_arg_default_char];
7397 if (NILP (val))
7398 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7399 else
7400 {
7401 CHECK_CHARACTER (val);
7402 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7403 }
7404
7405 val = args[coding_arg_plist];
7406 CHECK_LIST (val);
7407 CODING_ATTR_PLIST (attrs) = val;
7408
7409 if (EQ (coding_type, Qcharset))
7410 {
7411 /* Generate a lisp vector of 256 elements. Each element is nil,
7412 integer, or a list of charset IDs.
7413
7414 If Nth element is nil, the byte code N is invalid in this
7415 coding system.
7416
7417 If Nth element is a number NUM, N is the first byte of a
7418 charset whose ID is NUM.
7419
7420 If Nth element is a list of charset IDs, N is the first byte
7421 of one of them. The list is sorted by dimensions of the
7422 charsets. A charset of smaller dimension comes firtst.
7423 */
7424 val = Fmake_vector (make_number (256), Qnil);
7425
7426 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7427 {
7428 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7429 int dim = CHARSET_DIMENSION (charset);
7430 int idx = (dim - 1) * 4;
7431
7432 for (i = charset->code_space[idx];
7433 i <= charset->code_space[idx + 1]; i++)
7434 {
7435 Lisp_Object tmp, tmp2;
7436 int dim2;
7437
7438 tmp = AREF (val, i);
7439 if (NILP (tmp))
7440 tmp = XCAR (tail);
7441 else if (NUMBERP (tmp))
7442 {
7443 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7444 if (dim < dim2)
7445 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
7446 else
7447 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
7448 }
7449 else
7450 {
7451 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7452 {
7453 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7454 if (dim < dim2)
7455 break;
7456 }
7457 if (NILP (tmp2))
7458 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7459 else
7460 {
7461 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7462 XSETCAR (tmp2, XCAR (tail));
7463 }
7464 }
7465 ASET (val, i, tmp);
7466 }
7467 }
7468 ASET (attrs, coding_attr_charset_valids, val);
7469 category = coding_category_charset;
7470 }
7471 else if (EQ (coding_type, Qccl))
7472 {
7473 Lisp_Object valids;
7474
7475 if (nargs < coding_arg_ccl_max)
7476 goto short_args;
7477
7478 val = args[coding_arg_ccl_decoder];
7479 CHECK_CCL_PROGRAM (val);
7480 if (VECTORP (val))
7481 val = Fcopy_sequence (val);
7482 ASET (attrs, coding_attr_ccl_decoder, val);
7483
7484 val = args[coding_arg_ccl_encoder];
7485 CHECK_CCL_PROGRAM (val);
7486 if (VECTORP (val))
7487 val = Fcopy_sequence (val);
7488 ASET (attrs, coding_attr_ccl_encoder, val);
7489
7490 val = args[coding_arg_ccl_valids];
7491 valids = Fmake_string (make_number (256), make_number (0));
7492 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7493 {
7494 val = Fcar (tail);
7495 if (INTEGERP (val))
7496 ASET (valids, XINT (val), 1);
7497 else
7498 {
7499 int from, to;
7500
7501 CHECK_CONS (val);
7502 CHECK_NUMBER (XCAR (val));
7503 CHECK_NUMBER (XCDR (val));
7504 from = XINT (XCAR (val));
7505 to = XINT (XCDR (val));
7506 for (i = from; i <= to; i++)
7507 ASET (valids, i, 1);
7508 }
7509 }
7510 ASET (attrs, coding_attr_ccl_valids, valids);
7511
7512 category = coding_category_ccl;
7513 }
7514 else if (EQ (coding_type, Qutf_16))
7515 {
7516 Lisp_Object bom, endian;
7517
7518 if (nargs < coding_arg_utf16_max)
7519 goto short_args;
7520
7521 bom = args[coding_arg_utf16_bom];
7522 if (! NILP (bom) && ! EQ (bom, Qt))
7523 {
7524 CHECK_CONS (bom);
7525 CHECK_CODING_SYSTEM (XCAR (bom));
7526 CHECK_CODING_SYSTEM (XCDR (bom));
7527 }
7528 ASET (attrs, coding_attr_utf_16_bom, bom);
7529
7530 endian = args[coding_arg_utf16_endian];
7531 ASET (attrs, coding_attr_utf_16_endian, endian);
7532
7533 category = (CONSP (bom)
7534 ? coding_category_utf_16_auto
7535 : NILP (bom)
7536 ? (NILP (endian)
7537 ? coding_category_utf_16_be_nosig
7538 : coding_category_utf_16_le_nosig)
7539 : (NILP (endian)
7540 ? coding_category_utf_16_be
7541 : coding_category_utf_16_le));
7542 }
7543 else if (EQ (coding_type, Qiso_2022))
7544 {
7545 Lisp_Object initial, reg_usage, request, flags;
7546 int i, id;
7547
7548 if (nargs < coding_arg_iso2022_max)
7549 goto short_args;
7550
7551 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7552 CHECK_VECTOR (initial);
7553 for (i = 0; i < 4; i++)
7554 {
7555 val = Faref (initial, make_number (i));
7556 if (! NILP (val))
7557 {
7558 CHECK_CHARSET_GET_ID (val, id);
7559 ASET (initial, i, make_number (id));
7560 }
7561 else
7562 ASET (initial, i, make_number (-1));
7563 }
7564
7565 reg_usage = args[coding_arg_iso2022_reg_usage];
7566 CHECK_CONS (reg_usage);
7567 CHECK_NATNUM (XCAR (reg_usage));
7568 CHECK_NATNUM (XCDR (reg_usage));
7569
7570 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7571 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
7572 {
7573 int id;
7574
7575 val = Fcar (tail);
7576 CHECK_CONS (val);
7577 CHECK_CHARSET_GET_ID (XCAR (val), id);
7578 CHECK_NATNUM (XCDR (val));
7579 if (XINT (XCDR (val)) >= 4)
7580 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7581 XCAR (val) = make_number (id);
7582 }
7583
7584 flags = args[coding_arg_iso2022_flags];
7585 CHECK_NATNUM (flags);
7586 i = XINT (flags);
7587 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7588 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7589
7590 ASET (attrs, coding_attr_iso_initial, initial);
7591 ASET (attrs, coding_attr_iso_usage, reg_usage);
7592 ASET (attrs, coding_attr_iso_request, request);
7593 ASET (attrs, coding_attr_iso_flags, flags);
7594 setup_iso_safe_charsets (attrs);
7595
7596 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7597 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7598 | CODING_ISO_FLAG_SINGLE_SHIFT))
7599 ? coding_category_iso_7_else
7600 : EQ (args[coding_arg_charset_list], Qiso_2022)
7601 ? coding_category_iso_7
7602 : coding_category_iso_7_tight);
7603 else
7604 {
7605 int id = XINT (AREF (initial, 1));
7606
7607 category = (((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7608 | CODING_ISO_FLAG_SINGLE_SHIFT))
7609 || EQ (args[coding_arg_charset_list], Qiso_2022)
7610 || id < 0)
7611 ? coding_category_iso_8_else
7612 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7613 ? coding_category_iso_8_1
7614 : coding_category_iso_8_2);
7615 }
7616 }
7617 else if (EQ (coding_type, Qemacs_mule))
7618 {
7619 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7620 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7621
7622 category = coding_category_emacs_mule;
7623 }
7624 else if (EQ (coding_type, Qshift_jis))
7625 {
7626
7627 struct charset *charset;
7628
7629 if (XINT (Flength (charset_list)) != 3)
7630 error ("There should be just three charsets");
7631
7632 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7633 if (CHARSET_DIMENSION (charset) != 1)
7634 error ("Dimension of charset %s is not one",
7635 XSYMBOL (CHARSET_NAME (charset))->name->data);
7636
7637 charset_list = XCDR (charset_list);
7638 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7639 if (CHARSET_DIMENSION (charset) != 1)
7640 error ("Dimension of charset %s is not one",
7641 XSYMBOL (CHARSET_NAME (charset))->name->data);
7642
7643 charset_list = XCDR (charset_list);
7644 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7645 if (CHARSET_DIMENSION (charset) != 2)
7646 error ("Dimension of charset %s is not two",
7647 XSYMBOL (CHARSET_NAME (charset))->name->data);
7648
7649 category = coding_category_sjis;
7650 Vsjis_coding_system = name;
7651 }
7652 else if (EQ (coding_type, Qbig5))
7653 {
7654 struct charset *charset;
7655
7656 if (XINT (Flength (charset_list)) != 2)
7657 error ("There should be just two charsets");
7658
7659 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7660 if (CHARSET_DIMENSION (charset) != 1)
7661 error ("Dimension of charset %s is not one",
7662 XSYMBOL (CHARSET_NAME (charset))->name->data);
7663
7664 charset_list = XCDR (charset_list);
7665 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7666 if (CHARSET_DIMENSION (charset) != 2)
7667 error ("Dimension of charset %s is not two",
7668 XSYMBOL (CHARSET_NAME (charset))->name->data);
7669
7670 category = coding_category_big5;
7671 Vbig5_coding_system = name;
7672 }
7673 else if (EQ (coding_type, Qraw_text))
7674 category = coding_category_raw_text;
7675 else if (EQ (coding_type, Qutf_8))
7676 category = coding_category_utf_8;
7677 else if (EQ (coding_type, Qundecided))
7678 category = coding_category_undecided;
7679 else
7680 error ("Invalid coding system type: %s",
7681 XSYMBOL (coding_type)->name->data);
7682
7683 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7684
7685 eol_type = args[coding_arg_eol_type];
7686 if (! NILP (eol_type)
7687 && ! EQ (eol_type, Qunix)
7688 && ! EQ (eol_type, Qdos)
7689 && ! EQ (eol_type, Qmac))
7690 error ("Invalid eol-type");
7691
7692 aliases = Fcons (name, Qnil);
7693
7694 if (NILP (eol_type))
7695 {
7696 eol_type = make_subsidiaries (name);
7697 for (i = 0; i < 3; i++)
7698 {
7699 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7700
7701 this_name = AREF (eol_type, i);
7702 this_aliases = Fcons (this_name, Qnil);
7703 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7704 this_spec = Fmake_vector (make_number (3), attrs);
7705 ASET (this_spec, 1, this_aliases);
7706 ASET (this_spec, 2, this_eol_type);
7707 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7708 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7709 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7710 Vcoding_system_alist);
7711 }
7712 }
7713
7714 spec_vec = Fmake_vector (make_number (3), attrs);
7715 ASET (spec_vec, 1, aliases);
7716 ASET (spec_vec, 2, eol_type);
7717
7718 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7719 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7720 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7721 Vcoding_system_alist);
7722
7723 {
7724 int id = coding_categories[category].id;
7725
7726 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7727 setup_coding_system (name, &coding_categories[category]);
7728 }
7729
7730 return Qnil;
7731
7732 short_args:
7733 return Fsignal (Qwrong_number_of_arguments,
7734 Fcons (intern ("define-coding-system-internal"),
7735 make_number (nargs)));
7736}
7737
7738/* Fixme: should this record the alias relationships for
7739 diagnostics? */
7740DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7741 Sdefine_coding_system_alias, 2, 2, 0,
7742 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7743 (alias, coding_system)
7744 Lisp_Object alias, coding_system;
7745{
7746 Lisp_Object spec, aliases, eol_type;
7747
7748 CHECK_SYMBOL (alias);
7749 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7750 aliases = AREF (spec, 1);
7751 while (!NILP (XCDR (aliases)))
7752 aliases = XCDR (aliases);
7753 XCDR (aliases) = Fcons (alias, Qnil);
7754
7755 eol_type = AREF (spec, 2);
7756 if (VECTORP (eol_type))
7757 {
7758 Lisp_Object subsidiaries;
7759 int i;
7760
7761 subsidiaries = make_subsidiaries (alias);
7762 for (i = 0; i < 3; i++)
7763 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7764 AREF (eol_type, i));
7765
7766 ASET (spec, 2, subsidiaries);
7767 }
7768
7769 Fputhash (alias, spec, Vcoding_system_hash_table);
7770 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
7771 Vcoding_system_alist);
7772
7773 return Qnil;
7774}
7775
7776DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7777 1, 1, 0,
7778 doc: /* Return the base of CODING-SYSTEM.
7779Any alias or subsidiary coding system is not a base coding system. */)
7780 (coding_system)
7781 Lisp_Object coding_system;
7782{
7783 Lisp_Object spec, attrs;
7784
7785 if (NILP (coding_system))
7786 return (Qno_conversion);
7787 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7788 attrs = AREF (spec, 0);
7789 return CODING_ATTR_BASE_NAME (attrs);
7790}
7791
7792DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7793 1, 1, 0,
7794 doc: "Return the property list of CODING-SYSTEM.")
7795 (coding_system)
7796 Lisp_Object coding_system;
7797{
7798 Lisp_Object spec, attrs;
7799
7800 if (NILP (coding_system))
7801 coding_system = Qno_conversion;
7802 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7803 attrs = AREF (spec, 0);
7804 return CODING_ATTR_PLIST (attrs);
7805}
7806
7807
7808DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7809 1, 1, 0,
7810 doc: /* Return the list of aliases of CODING-SYSTEM. */)
7811 (coding_system)
7812 Lisp_Object coding_system;
7813{
7814 Lisp_Object spec;
7815
7816 if (NILP (coding_system))
7817 coding_system = Qno_conversion;
7818 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7819 return AREF (spec, 1);
7820}
7821
7822DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7823 Scoding_system_eol_type, 1, 1, 0,
7824 doc: /* Return eol-type of CODING-SYSTEM.
7825An eol-type is integer 0, 1, 2, or a vector of coding systems.
7826
7827Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7828and CR respectively.
7829
7830A vector value indicates that a format of end-of-line should be
7831detected automatically. Nth element of the vector is the subsidiary
7832coding system whose eol-type is N. */)
7833 (coding_system)
7834 Lisp_Object coding_system;
7835{
7836 Lisp_Object spec, eol_type;
7837 int n;
7838
7839 if (NILP (coding_system))
7840 coding_system = Qno_conversion;
7841 if (! CODING_SYSTEM_P (coding_system))
7842 return Qnil;
7843 spec = CODING_SYSTEM_SPEC (coding_system);
7844 eol_type = AREF (spec, 2);
7845 if (VECTORP (eol_type))
7846 return Fcopy_sequence (eol_type);
7847 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7848 return make_number (n);
7849}
7850
7851#endif /* emacs */
7852
7853\f
7854/*** 9. Post-amble ***/
7855
7856void
7857init_coding_once ()
7858{
7859 int i;
7860
7861 for (i = 0; i < coding_category_max; i++)
7862 {
7863 coding_categories[i].id = -1;
7864 coding_priorities[i] = i;
7865 }
7866
7867 /* ISO2022 specific initialize routine. */
7868 for (i = 0; i < 0x20; i++)
7869 iso_code_class[i] = ISO_control_0;
7870 for (i = 0x21; i < 0x7F; i++)
7871 iso_code_class[i] = ISO_graphic_plane_0;
7872 for (i = 0x80; i < 0xA0; i++)
7873 iso_code_class[i] = ISO_control_1;
7874 for (i = 0xA1; i < 0xFF; i++)
7875 iso_code_class[i] = ISO_graphic_plane_1;
7876 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7877 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7878 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7879 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7880 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7881 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7882 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7883 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7884 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7885 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7886
7887 inhibit_pre_post_conversion = 0;
7888
7889 for (i = 0; i < 256; i++)
7890 {
7891 emacs_mule_bytes[i] = 1;
7892 }
7893 emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3;
7894 emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3;
7895 emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4;
7896 emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4;
7897}
7898
7899#ifdef emacs
7900
7901void
7902syms_of_coding ()
7903{
7904 staticpro (&Vcoding_system_hash_table);
7905 Vcoding_system_hash_table = Fmakehash (Qeq);
7906
7907 staticpro (&Vsjis_coding_system);
7908 Vsjis_coding_system = Qnil;
7909
7910 staticpro (&Vbig5_coding_system);
7911 Vbig5_coding_system = Qnil;
7912
7913 staticpro (&Vcode_conversion_work_buf_list);
7914 Vcode_conversion_work_buf_list = Qnil;
7915
7916 staticpro (&Vcode_conversion_reused_work_buf);
7917 Vcode_conversion_reused_work_buf = Qnil;
7918
7919 DEFSYM (Qcharset, "charset");
7920 DEFSYM (Qtarget_idx, "target-idx");
7921 DEFSYM (Qcoding_system_history, "coding-system-history");
7922 Fset (Qcoding_system_history, Qnil);
7923
7924 /* Target FILENAME is the first argument. */
7925 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7926 /* Target FILENAME is the third argument. */
7927 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7928
7929 DEFSYM (Qcall_process, "call-process");
7930 /* Target PROGRAM is the first argument. */
7931 Fput (Qcall_process, Qtarget_idx, make_number (0));
7932
7933 DEFSYM (Qcall_process_region, "call-process-region");
7934 /* Target PROGRAM is the third argument. */
7935 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7936
7937 DEFSYM (Qstart_process, "start-process");
7938 /* Target PROGRAM is the third argument. */
7939 Fput (Qstart_process, Qtarget_idx, make_number (2));
7940
7941 DEFSYM (Qopen_network_stream, "open-network-stream");
7942 /* Target SERVICE is the fourth argument. */
7943 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7944
7945 DEFSYM (Qcoding_system, "coding-system");
7946 DEFSYM (Qcoding_aliases, "coding-aliases");
7947
7948 DEFSYM (Qeol_type, "eol-type");
7949 DEFSYM (Qunix, "unix");
7950 DEFSYM (Qdos, "dos");
7951
7952 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7953 DEFSYM (Qpost_read_conversion, "post-read-conversion");
7954 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7955 DEFSYM (Qdefault_char, "default-char");
7956 DEFSYM (Qundecided, "undecided");
7957 DEFSYM (Qno_conversion, "no-conversion");
7958 DEFSYM (Qraw_text, "raw-text");
7959
7960 DEFSYM (Qiso_2022, "iso-2022");
7961
7962 DEFSYM (Qutf_8, "utf-8");
7963
7964 DEFSYM (Qutf_16, "utf-16");
7965 DEFSYM (Qutf_16_be, "utf-16-be");
7966 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7967 DEFSYM (Qutf_16_le, "utf-16-l3");
7968 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7969 DEFSYM (Qsignature, "signature");
7970 DEFSYM (Qendian, "endian");
7971 DEFSYM (Qbig, "big");
7972 DEFSYM (Qlittle, "little");
7973
7974 DEFSYM (Qshift_jis, "shift-jis");
7975 DEFSYM (Qbig5, "big5");
7976
7977 DEFSYM (Qcoding_system_p, "coding-system-p");
7978
7979 DEFSYM (Qcoding_system_error, "coding-system-error");
7980 Fput (Qcoding_system_error, Qerror_conditions,
7981 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7982 Fput (Qcoding_system_error, Qerror_message,
7983 build_string ("Invalid coding system"));
7984
7985 /* Intern this now in case it isn't already done.
7986 Setting this variable twice is harmless.
7987 But don't staticpro it here--that is done in alloc.c. */
7988 Qchar_table_extra_slots = intern ("char-table-extra-slots");
7989
7990 DEFSYM (Qtranslation_table, "translation-table");
7991 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7992 DEFSYM (Qtranslation_table_id, "translation-table-id");
7993 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
7994 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
7995
7996 DEFSYM (Qvalid_codes, "valid-codes");
7997
7998 DEFSYM (Qemacs_mule, "emacs-mule");
7999
8000 Vcoding_category_table
8001 = Fmake_vector (make_number (coding_category_max), Qnil);
8002 staticpro (&Vcoding_category_table);
8003 /* Followings are target of code detection. */
8004 ASET (Vcoding_category_table, coding_category_iso_7,
8005 intern ("coding-category-iso-7"));
8006 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8007 intern ("coding-category-iso-7-tight"));
8008 ASET (Vcoding_category_table, coding_category_iso_8_1,
8009 intern ("coding-category-iso-8-1"));
8010 ASET (Vcoding_category_table, coding_category_iso_8_2,
8011 intern ("coding-category-iso-8-2"));
8012 ASET (Vcoding_category_table, coding_category_iso_7_else,
8013 intern ("coding-category-iso-7-else"));
8014 ASET (Vcoding_category_table, coding_category_iso_8_else,
8015 intern ("coding-category-iso-8-else"));
8016 ASET (Vcoding_category_table, coding_category_utf_8,
8017 intern ("coding-category-utf-8"));
8018 ASET (Vcoding_category_table, coding_category_utf_16_be,
8019 intern ("coding-category-utf-16-be"));
8020 ASET (Vcoding_category_table, coding_category_utf_16_le,
8021 intern ("coding-category-utf-16-le"));
8022 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8023 intern ("coding-category-utf-16-be-nosig"));
8024 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8025 intern ("coding-category-utf-16-le-nosig"));
8026 ASET (Vcoding_category_table, coding_category_charset,
8027 intern ("coding-category-charset"));
8028 ASET (Vcoding_category_table, coding_category_sjis,
8029 intern ("coding-category-sjis"));
8030 ASET (Vcoding_category_table, coding_category_big5,
8031 intern ("coding-category-big5"));
8032 ASET (Vcoding_category_table, coding_category_ccl,
8033 intern ("coding-category-ccl"));
8034 ASET (Vcoding_category_table, coding_category_emacs_mule,
8035 intern ("coding-category-emacs-mule"));
8036 /* Followings are NOT target of code detection. */
8037 ASET (Vcoding_category_table, coding_category_raw_text,
8038 intern ("coding-category-raw-text"));
8039 ASET (Vcoding_category_table, coding_category_undecided,
8040 intern ("coding-category-undecided"));
8041
8042 defsubr (&Scoding_system_p);
8043 defsubr (&Sread_coding_system);
8044 defsubr (&Sread_non_nil_coding_system);
8045 defsubr (&Scheck_coding_system);
8046 defsubr (&Sdetect_coding_region);
8047 defsubr (&Sdetect_coding_string);
8048 defsubr (&Sfind_coding_systems_region_internal);
8049 defsubr (&Scheck_coding_systems_region);
8050 defsubr (&Sdecode_coding_region);
8051 defsubr (&Sencode_coding_region);
8052 defsubr (&Sdecode_coding_string);
8053 defsubr (&Sencode_coding_string);
8054 defsubr (&Sdecode_sjis_char);
8055 defsubr (&Sencode_sjis_char);
8056 defsubr (&Sdecode_big5_char);
8057 defsubr (&Sencode_big5_char);
8058 defsubr (&Sset_terminal_coding_system_internal);
8059 defsubr (&Sset_safe_terminal_coding_system_internal);
8060 defsubr (&Sterminal_coding_system);
8061 defsubr (&Sset_keyboard_coding_system_internal);
8062 defsubr (&Skeyboard_coding_system);
8063 defsubr (&Sfind_operation_coding_system);
8064 defsubr (&Sset_coding_system_priority);
8065 defsubr (&Sdefine_coding_system_internal);
8066 defsubr (&Sdefine_coding_system_alias);
8067 defsubr (&Scoding_system_base);
8068 defsubr (&Scoding_system_plist);
8069 defsubr (&Scoding_system_aliases);
8070 defsubr (&Scoding_system_eol_type);
8071 defsubr (&Scoding_system_priority_list);
8072
8073 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8074 doc: /* List of coding systems.
8075
8076Do not alter the value of this variable manually. This variable should be
8077updated by the functions `define-coding-system' and
8078`define-coding-system-alias'. */);
8079 Vcoding_system_list = Qnil;
8080
8081 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8082 doc: /* Alist of coding system names.
8083Each element is one element list of coding system name.
8084This variable is given to `completing-read' as TABLE argument.
8085
8086Do not alter the value of this variable manually. This variable should be
8087updated by the functions `make-coding-system' and
8088`define-coding-system-alias'. */);
8089 Vcoding_system_alist = Qnil;
8090
8091 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8092 doc: /* List of coding-categories (symbols) ordered by priority.
8093
8094On detecting a coding system, Emacs tries code detection algorithms
8095associated with each coding-category one by one in this order. When
8096one algorithm agrees with a byte sequence of source text, the coding
8097system bound to the corresponding coding-category is selected. */);
8098 {
8099 int i;
8100
8101 Vcoding_category_list = Qnil;
8102 for (i = coding_category_max - 1; i >= 0; i--)
8103 Vcoding_category_list
8104 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8105 Vcoding_category_list);
8106 }
8107
8108 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8109 doc: /* Specify the coding system for read operations.
8110It is useful to bind this variable with `let', but do not set it globally.
8111If the value is a coding system, it is used for decoding on read operation.
8112If not, an appropriate element is used from one of the coding system alists:
8113There are three such tables, `file-coding-system-alist',
8114`process-coding-system-alist', and `network-coding-system-alist'. */);
8115 Vcoding_system_for_read = Qnil;
8116
8117 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8118 doc: /* Specify the coding system for write operations.
8119Programs bind this variable with `let', but you should not set it globally.
8120If the value is a coding system, it is used for encoding of output,
8121when writing it to a file and when sending it to a file or subprocess.
8122
8123If this does not specify a coding system, an appropriate element
8124is used from one of the coding system alists:
8125There are three such tables, `file-coding-system-alist',
8126`process-coding-system-alist', and `network-coding-system-alist'.
8127For output to files, if the above procedure does not specify a coding system,
8128the value of `buffer-file-coding-system' is used. */);
8129 Vcoding_system_for_write = Qnil;
8130
8131 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8132 doc: /*
8133Coding system used in the latest file or process I/O. */);
8134 Vlast_coding_system_used = Qnil;
8135
8136 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8137 doc: /*
8138*Non-nil means always inhibit code conversion of end-of-line format.
8139See info node `Coding Systems' and info node `Text and Binary' concerning
8140such conversion. */);
8141 inhibit_eol_conversion = 0;
8142
8143 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8144 doc: /*
8145Non-nil means process buffer inherits coding system of process output.
8146Bind it to t if the process output is to be treated as if it were a file
8147read from some filesystem. */);
8148 inherit_process_coding_system = 0;
8149
8150 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8151 doc: /*
8152Alist to decide a coding system to use for a file I/O operation.
8153The format is ((PATTERN . VAL) ...),
8154where PATTERN is a regular expression matching a file name,
8155VAL is a coding system, a cons of coding systems, or a function symbol.
8156If VAL is a coding system, it is used for both decoding and encoding
8157the file contents.
8158If VAL is a cons of coding systems, the car part is used for decoding,
8159and the cdr part is used for encoding.
8160If VAL is a function symbol, the function must return a coding system
8161or a cons of coding systems which are used as above. The function gets
8162the arguments with which `find-operation-coding-systems' was called.
8163
8164See also the function `find-operation-coding-system'
8165and the variable `auto-coding-alist'. */);
8166 Vfile_coding_system_alist = Qnil;
8167
8168 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8169 doc: /*
8170Alist to decide a coding system to use for a process I/O operation.
8171The format is ((PATTERN . VAL) ...),
8172where PATTERN is a regular expression matching a program name,
8173VAL is a coding system, a cons of coding systems, or a function symbol.
8174If VAL is a coding system, it is used for both decoding what received
8175from the program and encoding what sent to the program.
8176If VAL is a cons of coding systems, the car part is used for decoding,
8177and the cdr part is used for encoding.
8178If VAL is a function symbol, the function must return a coding system
8179or a cons of coding systems which are used as above.
8180
8181See also the function `find-operation-coding-system'. */);
8182 Vprocess_coding_system_alist = Qnil;
8183
8184 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8185 doc: /*
8186Alist to decide a coding system to use for a network I/O operation.
8187The format is ((PATTERN . VAL) ...),
8188where PATTERN is a regular expression matching a network service name
8189or is a port number to connect to,
8190VAL is a coding system, a cons of coding systems, or a function symbol.
8191If VAL is a coding system, it is used for both decoding what received
8192from the network stream and encoding what sent to the network stream.
8193If VAL is a cons of coding systems, the car part is used for decoding,
8194and the cdr part is used for encoding.
8195If VAL is a function symbol, the function must return a coding system
8196or a cons of coding systems which are used as above.
8197
8198See also the function `find-operation-coding-system'. */);
8199 Vnetwork_coding_system_alist = Qnil;
8200
8201 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8202 doc: /* Coding system to use with system messages.
8203Also used for decoding keyboard input on X Window system. */);
8204 Vlocale_coding_system = Qnil;
8205
8206 /* The eol mnemonics are reset in startup.el system-dependently. */
8207 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8208 doc: /*
8209*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8210 eol_mnemonic_unix = build_string (":");
8211
8212 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8213 doc: /*
8214*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8215 eol_mnemonic_dos = build_string ("\\");
8216
8217 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8218 doc: /*
8219*String displayed in mode line for MAC-like (CR) end-of-line format. */);
8220 eol_mnemonic_mac = build_string ("/");
8221
8222 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8223 doc: /*
8224*String displayed in mode line when end-of-line format is not yet determined. */);
8225 eol_mnemonic_undecided = build_string (":");
8226
8227 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8228 doc: /*
8229*Non-nil enables character translation while encoding and decoding. */);
8230 Venable_character_translation = Qt;
8231
8232 DEFVAR_LISP ("standard-translation-table-for-decode",
8233 &Vstandard_translation_table_for_decode,
8234 doc: /* Table for translating characters while decoding. */);
8235 Vstandard_translation_table_for_decode = Qnil;
8236
8237 DEFVAR_LISP ("standard-translation-table-for-encode",
8238 &Vstandard_translation_table_for_encode,
8239 doc: /* Table for translating characters while encoding. */);
8240 Vstandard_translation_table_for_encode = Qnil;
8241
8242 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8243 doc: /* Alist of charsets vs revision numbers.
8244While encoding, if a charset (car part of an element) is found,
8245designate it with the escape sequence identifying revision (cdr part
8246of the element). */);
8247 Vcharset_revision_table = Qnil;
8248
8249 DEFVAR_LISP ("default-process-coding-system",
8250 &Vdefault_process_coding_system,
8251 doc: /* Cons of coding systems used for process I/O by default.
8252The car part is used for decoding a process output,
8253the cdr part is used for encoding a text to be sent to a process. */);
8254 Vdefault_process_coding_system = Qnil;
8255
8256 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8257 doc: /*
8258Table of extra Latin codes in the range 128..159 (inclusive).
8259This is a vector of length 256.
8260If Nth element is non-nil, the existence of code N in a file
8261\(or output of subprocess) doesn't prevent it to be detected as
8262a coding system of ISO 2022 variant which has a flag
8263`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8264or reading output of a subprocess.
8265Only 128th through 159th elements has a meaning. */);
8266 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8267
8268 DEFVAR_LISP ("select-safe-coding-system-function",
8269 &Vselect_safe_coding_system_function,
8270 doc: /*
8271Function to call to select safe coding system for encoding a text.
8272
8273If set, this function is called to force a user to select a proper
8274coding system which can encode the text in the case that a default
8275coding system used in each operation can't encode the text.
8276
8277The default value is `select-safe-coding-system' (which see). */);
8278 Vselect_safe_coding_system_function = Qnil;
8279
8280 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8281 &inhibit_iso_escape_detection,
8282 doc: /*
8283If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8284
8285By default, on reading a file, Emacs tries to detect how the text is
8286encoded. This code detection is sensitive to escape sequences. If
8287the sequence is valid as ISO2022, the code is determined as one of
8288the ISO2022 encodings, and the file is decoded by the corresponding
8289coding system (e.g. `iso-2022-7bit').
8290
8291However, there may be a case that you want to read escape sequences in
8292a file as is. In such a case, you can set this variable to non-nil.
8293Then, as the code detection ignores any escape sequences, no file is
8294detected as encoded in some ISO2022 encoding. The result is that all
8295escape sequences become visible in a buffer.
8296
8297The default value is nil, and it is strongly recommended not to change
8298it. That is because many Emacs Lisp source files that contain
8299non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8300in Emacs's distribution, and they won't be decoded correctly on
8301reading if you suppress escape sequence detection.
8302
8303The other way to read escape sequences in a file without decoding is
8304to explicitly specify some coding system that doesn't use ISO2022's
8305escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8306 inhibit_iso_escape_detection = 0;
8307
8308 {
8309 Lisp_Object args[coding_arg_max];
8310 Lisp_Object plist[14];
8311 int i;
8312
8313 for (i = 0; i < coding_arg_max; i++)
8314 args[i] = Qnil;
8315
8316 plist[0] = intern (":name");
8317 plist[1] = args[coding_arg_name] = Qno_conversion;
8318 plist[2] = intern (":mnemonic");
8319 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8320 plist[4] = intern (":coding-type");
8321 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8322 plist[6] = intern (":ascii-compatible-p");
8323 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8324 plist[8] = intern (":default-char");
8325 plist[9] = args[coding_arg_default_char] = make_number (0);
8326 plist[10] = intern (":docstring");
8327 plist[11] = build_string ("Do no conversion.\n\
8328\n\
8329When you visit a file with this coding, the file is read into a\n\
8330unibyte buffer as is, thus each byte of a file is treated as a\n\
8331character.");
8332 plist[12] = intern (":eol-type");
8333 plist[13] = args[coding_arg_eol_type] = Qunix;
8334 args[coding_arg_plist] = Flist (14, plist);
8335 Fdefine_coding_system_internal (coding_arg_max, args);
8336 }
8337
8338 setup_coding_system (Qno_conversion, &keyboard_coding);
8339 setup_coding_system (Qno_conversion, &terminal_coding);
8340 setup_coding_system (Qno_conversion, &safe_terminal_coding);
8341}
8342
8343char *
8344emacs_strerror (error_number)
8345 int error_number;
8346{
8347 char *str;
8348
8349 synchronize_system_messages_locale ();
8350 str = strerror (error_number);
8351
8352 if (! NILP (Vlocale_coding_system))
8353 {
8354 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8355 Vlocale_coding_system,
8356 0);
8357 str = (char *) XSTRING (dec)->data;
8358 }
8359
8360 return str;
8361}
8362
8363#endif /* emacs */