1 /* Coding system handler (conversion, detection, etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
77 o Old Emacs internal format (emacs-mule)
79 The coding system adopted by old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX, and update the members of
150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
152 Below is the template of these functions. */
156 detect_coding_XXX (coding
, detect_info
)
157 struct coding_system
*coding
;
158 struct coding_detection_info
*detect_info
;
160 unsigned char *src
= coding
->source
;
161 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
162 int multibytep
= coding
->src_multibyte
;
163 int consumed_chars
= 0;
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
173 if (! __C_conforms_to_XXX___ (c
))
175 if (! __C_strongly_suggests_XXX__ (c
))
176 found
= CATEGORY_MASK_XXX
;
178 /* The byte sequence is invalid for XXX. */
179 detect_info
->rejected
|= CATEGORY_MASK_XXX
;
183 /* The source exausted successfully. */
184 detect_info
->found
|= found
;
189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
201 Below is the template of these functions. */
205 decode_coding_XXXX (coding
)
206 struct coding_system
*coding
;
208 unsigned char *src
= coding
->source
+ coding
->consumed
;
209 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base
;
214 /* A buffer to produce decoded characters. */
215 int *charbuf
= coding
->charbuf
;
216 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
217 int multibytep
= coding
->src_multibyte
;
222 if (charbuf
< charbuf_end
)
223 /* No more room to produce a decoded character. */
230 if (src_base
< src_end
231 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base
< src_end
&& charbuf
< charbuf_end
)
235 *charbuf
++ = *src_base
++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
239 /* Remember how many characters we produced. */
240 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
260 Below is a template of these functions. */
263 encode_coding_XXX (coding
)
264 struct coding_system
*coding
;
266 int multibytep
= coding
->dst_multibyte
;
267 int *charbuf
= coding
->charbuf
;
268 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
269 unsigned char *dst
= coding
->destination
+ coding
->produced
;
270 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
271 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
272 int produced_chars
= 0;
274 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
277 /* Encode C into DST, and increment DST. */
279 label_no_more_destination
:
280 /* How many chars and bytes we produced. */
281 coding
->produced_char
+= produced_chars
;
282 coding
->produced
= dst
- coding
->destination
;
287 /*** 1. Preamble ***/
294 #include "character.h"
297 #include "composite.h"
301 Lisp_Object Vcoding_system_hash_table
;
303 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
304 Lisp_Object Qunix
, Qdos
;
305 extern Lisp_Object Qmac
; /* frame.c */
306 Lisp_Object Qbuffer_file_coding_system
;
307 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
308 Lisp_Object Qdefault_char
;
309 Lisp_Object Qno_conversion
, Qundecided
;
310 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
311 Lisp_Object Qutf_16_be_nosig
, Qutf_16_be
, Qutf_16_le_nosig
, Qutf_16_le
;
312 Lisp_Object Qsignature
, Qendian
, Qbig
, Qlittle
;
313 Lisp_Object Qcoding_system_history
;
314 Lisp_Object Qvalid_codes
;
316 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
317 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
318 Lisp_Object Qstart_process
, Qopen_network_stream
;
319 Lisp_Object Qtarget_idx
;
321 Lisp_Object Vselect_safe_coding_system_function
;
323 /* Mnemonic string for each format of end-of-line. */
324 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
325 /* Mnemonic string to indicate format of end-of-line is not yet
327 Lisp_Object eol_mnemonic_undecided
;
331 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
333 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
335 /* Coding system emacs-mule and raw-text are for converting only
336 end-of-line format. */
337 Lisp_Object Qemacs_mule
, Qraw_text
;
339 /* Coding-systems are handed between Emacs Lisp programs and C internal
340 routines by the following three variables. */
341 /* Coding-system for reading files and receiving data from process. */
342 Lisp_Object Vcoding_system_for_read
;
343 /* Coding-system for writing files and sending data to process. */
344 Lisp_Object Vcoding_system_for_write
;
345 /* Coding-system actually used in the latest I/O. */
346 Lisp_Object Vlast_coding_system_used
;
348 /* A vector of length 256 which contains information about special
349 Latin codes (especially for dealing with Microsoft codes). */
350 Lisp_Object Vlatin_extra_code_table
;
352 /* Flag to inhibit code conversion of end-of-line format. */
353 int inhibit_eol_conversion
;
355 /* Flag to inhibit ISO2022 escape sequence detection. */
356 int inhibit_iso_escape_detection
;
358 /* Flag to make buffer-file-coding-system inherit from process-coding. */
359 int inherit_process_coding_system
;
361 /* Coding system to be used to encode text for terminal display. */
362 struct coding_system terminal_coding
;
364 /* Coding system to be used to encode text for terminal display when
365 terminal coding system is nil. */
366 struct coding_system safe_terminal_coding
;
368 /* Coding system of what is sent from terminal keyboard. */
369 struct coding_system keyboard_coding
;
371 Lisp_Object Vfile_coding_system_alist
;
372 Lisp_Object Vprocess_coding_system_alist
;
373 Lisp_Object Vnetwork_coding_system_alist
;
375 Lisp_Object Vlocale_coding_system
;
379 /* Flag to tell if we look up translation table on character code
381 Lisp_Object Venable_character_translation
;
382 /* Standard translation table to look up on decoding (reading). */
383 Lisp_Object Vstandard_translation_table_for_decode
;
384 /* Standard translation table to look up on encoding (writing). */
385 Lisp_Object Vstandard_translation_table_for_encode
;
387 Lisp_Object Qtranslation_table
;
388 Lisp_Object Qtranslation_table_id
;
389 Lisp_Object Qtranslation_table_for_decode
;
390 Lisp_Object Qtranslation_table_for_encode
;
392 /* Alist of charsets vs revision number. */
393 static Lisp_Object Vcharset_revision_table
;
395 /* Default coding systems used for process I/O. */
396 Lisp_Object Vdefault_process_coding_system
;
398 /* Global flag to tell that we can't call post-read-conversion and
399 pre-write-conversion functions. Usually the value is zero, but it
400 is set to 1 temporarily while such functions are running. This is
401 to avoid infinite recursive call. */
402 static int inhibit_pre_post_conversion
;
404 /* Two special coding systems. */
405 Lisp_Object Vsjis_coding_system
;
406 Lisp_Object Vbig5_coding_system
;
409 static int detect_coding_utf_8
P_ ((struct coding_system
*,
410 struct coding_detection_info
*info
));
411 static void decode_coding_utf_8
P_ ((struct coding_system
*));
412 static int encode_coding_utf_8
P_ ((struct coding_system
*));
414 static int detect_coding_utf_16
P_ ((struct coding_system
*,
415 struct coding_detection_info
*info
));
416 static void decode_coding_utf_16
P_ ((struct coding_system
*));
417 static int encode_coding_utf_16
P_ ((struct coding_system
*));
419 static int detect_coding_iso_2022
P_ ((struct coding_system
*,
420 struct coding_detection_info
*info
));
421 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
422 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
424 static int detect_coding_emacs_mule
P_ ((struct coding_system
*,
425 struct coding_detection_info
*info
));
426 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
427 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
429 static int detect_coding_sjis
P_ ((struct coding_system
*,
430 struct coding_detection_info
*info
));
431 static void decode_coding_sjis
P_ ((struct coding_system
*));
432 static int encode_coding_sjis
P_ ((struct coding_system
*));
434 static int detect_coding_big5
P_ ((struct coding_system
*,
435 struct coding_detection_info
*info
));
436 static void decode_coding_big5
P_ ((struct coding_system
*));
437 static int encode_coding_big5
P_ ((struct coding_system
*));
439 static int detect_coding_ccl
P_ ((struct coding_system
*,
440 struct coding_detection_info
*info
));
441 static void decode_coding_ccl
P_ ((struct coding_system
*));
442 static int encode_coding_ccl
P_ ((struct coding_system
*));
444 static void decode_coding_raw_text
P_ ((struct coding_system
*));
445 static int encode_coding_raw_text
P_ ((struct coding_system
*));
448 /* ISO2022 section */
450 #define CODING_ISO_INITIAL(coding, reg) \
451 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
452 coding_attr_iso_initial), \
456 #define CODING_ISO_REQUEST(coding, charset_id) \
457 ((charset_id <= (coding)->max_charset_id \
458 ? (coding)->safe_charsets[charset_id] \
462 #define CODING_ISO_FLAGS(coding) \
463 ((coding)->spec.iso_2022.flags)
464 #define CODING_ISO_DESIGNATION(coding, reg) \
465 ((coding)->spec.iso_2022.current_designation[reg])
466 #define CODING_ISO_INVOCATION(coding, plane) \
467 ((coding)->spec.iso_2022.current_invocation[plane])
468 #define CODING_ISO_SINGLE_SHIFTING(coding) \
469 ((coding)->spec.iso_2022.single_shifting)
470 #define CODING_ISO_BOL(coding) \
471 ((coding)->spec.iso_2022.bol)
472 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
473 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
475 /* Control characters of ISO2022. */
476 /* code */ /* function */
477 #define ISO_CODE_LF 0x0A /* line-feed */
478 #define ISO_CODE_CR 0x0D /* carriage-return */
479 #define ISO_CODE_SO 0x0E /* shift-out */
480 #define ISO_CODE_SI 0x0F /* shift-in */
481 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
482 #define ISO_CODE_ESC 0x1B /* escape */
483 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
484 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
485 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
487 /* All code (1-byte) of ISO2022 is classified into one of the
489 enum iso_code_class_type
491 ISO_control_0
, /* Control codes in the range
492 0x00..0x1F and 0x7F, except for the
493 following 5 codes. */
494 ISO_carriage_return
, /* ISO_CODE_CR (0x0D) */
495 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
496 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
497 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
498 ISO_escape
, /* ISO_CODE_SO (0x1B) */
499 ISO_control_1
, /* Control codes in the range
500 0x80..0x9F, except for the
501 following 3 codes. */
502 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
503 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
504 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
505 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
506 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
507 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
508 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
511 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
512 `iso-flags' attribute of an iso2022 coding system. */
514 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
515 instead of the correct short-form sequence (e.g. ESC $ A). */
516 #define CODING_ISO_FLAG_LONG_FORM 0x0001
518 /* If set, reset graphic planes and registers at end-of-line to the
520 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
522 /* If set, reset graphic planes and registers before any control
523 characters to the initial state. */
524 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
526 /* If set, encode by 7-bit environment. */
527 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
529 /* If set, use locking-shift function. */
530 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
532 /* If set, use single-shift function. Overwrite
533 CODING_ISO_FLAG_LOCKING_SHIFT. */
534 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
536 /* If set, use designation escape sequence. */
537 #define CODING_ISO_FLAG_DESIGNATION 0x0040
539 /* If set, produce revision number sequence. */
540 #define CODING_ISO_FLAG_REVISION 0x0080
542 /* If set, produce ISO6429's direction specifying sequence. */
543 #define CODING_ISO_FLAG_DIRECTION 0x0100
545 /* If set, assume designation states are reset at beginning of line on
547 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
549 /* If set, designation sequence should be placed at beginning of line
551 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
553 /* If set, do not encode unsafe charactes on output. */
554 #define CODING_ISO_FLAG_SAFE 0x0800
556 /* If set, extra latin codes (128..159) are accepted as a valid code
558 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
560 #define CODING_ISO_FLAG_COMPOSITION 0x2000
562 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
564 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
566 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
568 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
570 /* A character to be produced on output if encoding of the original
571 character is prohibited by CODING_ISO_FLAG_SAFE. */
572 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
576 #define CODING_UTF_16_BOM(coding) \
577 ((coding)->spec.utf_16.bom)
579 #define CODING_UTF_16_ENDIAN(coding) \
580 ((coding)->spec.utf_16.endian)
582 #define CODING_UTF_16_SURROGATE(coding) \
583 ((coding)->spec.utf_16.surrogate)
587 #define CODING_CCL_DECODER(coding) \
588 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
589 #define CODING_CCL_ENCODER(coding) \
590 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
591 #define CODING_CCL_VALIDS(coding) \
592 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
595 /* Index for each coding category in `coding_categories' */
599 coding_category_iso_7
,
600 coding_category_iso_7_tight
,
601 coding_category_iso_8_1
,
602 coding_category_iso_8_2
,
603 coding_category_iso_7_else
,
604 coding_category_iso_8_else
,
605 coding_category_utf_8
,
606 coding_category_utf_16_auto
,
607 coding_category_utf_16_be
,
608 coding_category_utf_16_le
,
609 coding_category_utf_16_be_nosig
,
610 coding_category_utf_16_le_nosig
,
611 coding_category_charset
,
612 coding_category_sjis
,
613 coding_category_big5
,
615 coding_category_emacs_mule
,
616 /* All above are targets of code detection. */
617 coding_category_raw_text
,
618 coding_category_undecided
,
622 /* Definitions of flag bits used in detect_coding_XXXX. */
623 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
624 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
625 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
626 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
627 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
628 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
629 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
630 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
631 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
632 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
633 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
634 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
635 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
636 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
637 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
638 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
639 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
641 /* This value is returned if detect_coding_mask () find nothing other
642 than ASCII characters. */
643 #define CATEGORY_MASK_ANY \
644 (CATEGORY_MASK_ISO_7 \
645 | CATEGORY_MASK_ISO_7_TIGHT \
646 | CATEGORY_MASK_ISO_8_1 \
647 | CATEGORY_MASK_ISO_8_2 \
648 | CATEGORY_MASK_ISO_7_ELSE \
649 | CATEGORY_MASK_ISO_8_ELSE \
650 | CATEGORY_MASK_UTF_8 \
651 | CATEGORY_MASK_UTF_16_BE \
652 | CATEGORY_MASK_UTF_16_LE \
653 | CATEGORY_MASK_UTF_16_BE_NOSIG \
654 | CATEGORY_MASK_UTF_16_LE_NOSIG \
655 | CATEGORY_MASK_CHARSET \
656 | CATEGORY_MASK_SJIS \
657 | CATEGORY_MASK_BIG5 \
658 | CATEGORY_MASK_CCL \
659 | CATEGORY_MASK_EMACS_MULE)
662 #define CATEGORY_MASK_ISO_7BIT \
663 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
665 #define CATEGORY_MASK_ISO_8BIT \
666 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
668 #define CATEGORY_MASK_ISO_ELSE \
669 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
671 #define CATEGORY_MASK_ISO_ESCAPE \
672 (CATEGORY_MASK_ISO_7 \
673 | CATEGORY_MASK_ISO_7_TIGHT \
674 | CATEGORY_MASK_ISO_7_ELSE \
675 | CATEGORY_MASK_ISO_8_ELSE)
677 #define CATEGORY_MASK_ISO \
678 ( CATEGORY_MASK_ISO_7BIT \
679 | CATEGORY_MASK_ISO_8BIT \
680 | CATEGORY_MASK_ISO_ELSE)
682 #define CATEGORY_MASK_UTF_16 \
683 (CATEGORY_MASK_UTF_16_BE \
684 | CATEGORY_MASK_UTF_16_LE \
685 | CATEGORY_MASK_UTF_16_BE_NOSIG \
686 | CATEGORY_MASK_UTF_16_LE_NOSIG)
689 /* List of symbols `coding-category-xxx' ordered by priority. This
690 variable is exposed to Emacs Lisp. */
691 static Lisp_Object Vcoding_category_list
;
693 /* Table of coding categories (Lisp symbols). This variable is for
695 static Lisp_Object Vcoding_category_table
;
697 /* Table of coding-categories ordered by priority. */
698 static enum coding_category coding_priorities
[coding_category_max
];
700 /* Nth element is a coding context for the coding system bound to the
701 Nth coding category. */
702 static struct coding_system coding_categories
[coding_category_max
];
704 static int detected_mask
[coding_category_raw_text
] =
712 CATEGORY_MASK_UTF_16
,
713 CATEGORY_MASK_UTF_16
,
714 CATEGORY_MASK_UTF_16
,
715 CATEGORY_MASK_UTF_16
,
716 CATEGORY_MASK_UTF_16
,
717 CATEGORY_MASK_CHARSET
,
721 CATEGORY_MASK_EMACS_MULE
724 /*** Commonly used macros and functions ***/
727 #define min(a, b) ((a) < (b) ? (a) : (b))
730 #define max(a, b) ((a) > (b) ? (a) : (b))
733 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
735 attrs = CODING_ID_ATTRS (coding->id); \
736 eol_type = CODING_ID_EOL_TYPE (coding->id); \
737 if (VECTORP (eol_type)) \
739 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
743 /* Safely get one byte from the source text pointed by SRC which ends
744 at SRC_END, and set C to that byte. If there are not enough bytes
745 in the source, it jumps to `no_more_source'. The caller
746 should declare and set these variables appropriately in advance:
747 src, src_end, multibytep
750 #define ONE_MORE_BYTE(c) \
752 if (src == src_end) \
754 if (src_base < src) \
755 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
756 goto no_more_source; \
759 if (multibytep && (c & 0x80)) \
761 if ((c & 0xFE) != 0xC0) \
762 error ("Undecodable char found"); \
763 c = ((c & 1) << 6) | *src++; \
769 #define ONE_MORE_BYTE_NO_CHECK(c) \
772 if (multibytep && (c & 0x80)) \
774 if ((c & 0xFE) != 0xC0) \
775 error ("Undecodable char found"); \
776 c = ((c & 1) << 6) | *src++; \
782 /* Store a byte C in the place pointed by DST and increment DST to the
783 next free point, and increment PRODUCED_CHARS. The caller should
784 assure that C is 0..127, and declare and set the variable `dst'
785 appropriately in advance.
789 #define EMIT_ONE_ASCII_BYTE(c) \
796 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
798 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
800 produced_chars += 2; \
801 *dst++ = (c1), *dst++ = (c2); \
805 /* Store a byte C in the place pointed by DST and increment DST to the
806 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
807 nonzero, store in an appropriate multibyte from. The caller should
808 declare and set the variables `dst' and `multibytep' appropriately
811 #define EMIT_ONE_BYTE(c) \
818 ch = BYTE8_TO_CHAR (ch); \
819 CHAR_STRING_ADVANCE (ch, dst); \
826 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
828 #define EMIT_TWO_BYTES(c1, c2) \
830 produced_chars += 2; \
837 ch = BYTE8_TO_CHAR (ch); \
838 CHAR_STRING_ADVANCE (ch, dst); \
841 ch = BYTE8_TO_CHAR (ch); \
842 CHAR_STRING_ADVANCE (ch, dst); \
852 #define EMIT_THREE_BYTES(c1, c2, c3) \
854 EMIT_ONE_BYTE (c1); \
855 EMIT_TWO_BYTES (c2, c3); \
859 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
861 EMIT_TWO_BYTES (c1, c2); \
862 EMIT_TWO_BYTES (c3, c4); \
866 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
868 charset_map_loaded = 0; \
869 c = DECODE_CHAR (charset, code); \
870 if (charset_map_loaded) \
872 unsigned char *orig = coding->source; \
875 coding_set_source (coding); \
876 offset = coding->source - orig; \
878 src_base += offset; \
884 #define ASSURE_DESTINATION(bytes) \
886 if (dst + (bytes) >= dst_end) \
888 int more_bytes = charbuf_end - charbuf + (bytes); \
890 dst = alloc_destination (coding, more_bytes, dst); \
891 dst_end = coding->destination + coding->dst_bytes; \
898 coding_set_source (coding
)
899 struct coding_system
*coding
;
901 if (BUFFERP (coding
->src_object
))
903 if (coding
->src_pos
< 0)
904 coding
->source
= GAP_END_ADDR
+ coding
->src_pos_byte
;
907 struct buffer
*buf
= XBUFFER (coding
->src_object
);
908 EMACS_INT gpt_byte
= BUF_GPT_BYTE (buf
);
909 unsigned char *beg_addr
= BUF_BEG_ADDR (buf
);
911 coding
->source
= beg_addr
+ coding
->src_pos_byte
- 1;
912 if (coding
->src_pos_byte
>= gpt_byte
)
913 coding
->source
+= BUF_GAP_SIZE (buf
);
916 else if (STRINGP (coding
->src_object
))
918 coding
->source
= (XSTRING (coding
->src_object
)->data
919 + coding
->src_pos_byte
);
922 /* Otherwise, the source is C string and is never relocated
923 automatically. Thus we don't have to update anything. */
928 coding_set_destination (coding
)
929 struct coding_system
*coding
;
931 if (BUFFERP (coding
->dst_object
))
933 if (coding
->src_pos
< 0)
935 coding
->destination
= BEG_ADDR
+ coding
->dst_pos_byte
- 1;
936 coding
->dst_bytes
= (GAP_END_ADDR
937 - (coding
->src_bytes
- coding
->consumed
)
938 - coding
->destination
);
942 /* We are sure that coding->dst_pos_byte is before the gap
944 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
945 + coding
->dst_pos_byte
- 1);
946 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
947 - coding
->destination
);
951 /* Otherwise, the destination is C string and is never relocated
952 automatically. Thus we don't have to update anything. */
958 coding_alloc_by_realloc (coding
, bytes
)
959 struct coding_system
*coding
;
962 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
963 coding
->dst_bytes
+ bytes
);
964 coding
->dst_bytes
+= bytes
;
968 coding_alloc_by_making_gap (coding
, bytes
)
969 struct coding_system
*coding
;
972 if (BUFFERP (coding
->dst_object
)
973 && EQ (coding
->src_object
, coding
->dst_object
))
975 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
977 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
979 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
983 Lisp_Object this_buffer
;
985 this_buffer
= Fcurrent_buffer ();
986 set_buffer_internal (XBUFFER (coding
->dst_object
));
988 set_buffer_internal (XBUFFER (this_buffer
));
993 static unsigned char *
994 alloc_destination (coding
, nbytes
, dst
)
995 struct coding_system
*coding
;
999 EMACS_INT offset
= dst
- coding
->destination
;
1001 if (BUFFERP (coding
->dst_object
))
1002 coding_alloc_by_making_gap (coding
, nbytes
);
1004 coding_alloc_by_realloc (coding
, nbytes
);
1005 coding
->result
= CODING_RESULT_SUCCESS
;
1006 coding_set_destination (coding
);
1007 dst
= coding
->destination
+ offset
;
1011 /** Macros for annotations. */
1013 /* Maximum length of annotation data (sum of annotations for
1014 composition and charset). */
1015 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1017 /* An annotation data is stored in the array coding->charbuf in this
1019 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1020 LENGTH is the number of elements in the annotation.
1021 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1022 FROM and TO specify the range of text annotated. They are relative
1023 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1025 The format of the following elements depend on ANNOTATION_MASK.
1027 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1029 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1030 METHOD is one of enum composition_method.
1031 Optionnal COMPOSITION-COMPONENTS are characters and composition
1034 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1037 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1039 *(buf)++ = -(len); \
1040 *(buf)++ = (mask); \
1041 *(buf)++ = (from); \
1043 coding->annotated = 1; \
1046 #define ADD_COMPOSITION_DATA(buf, from, to, method) \
1048 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1053 #define ADD_CHARSET_DATA(buf, from, to, id) \
1055 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1060 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1067 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1068 Check if a text is encoded in UTF-8. If it is, return 1, else
1071 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1072 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1073 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1074 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1075 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1076 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1079 detect_coding_utf_8 (coding
, detect_info
)
1080 struct coding_system
*coding
;
1081 struct coding_detection_info
*detect_info
;
1083 unsigned char *src
= coding
->source
, *src_base
= src
;
1084 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1085 int multibytep
= coding
->src_multibyte
;
1086 int consumed_chars
= 0;
1090 detect_info
->checked
|= CATEGORY_MASK_UTF_8
;
1091 /* A coding system of this category is always ASCII compatible. */
1092 src
+= coding
->head_ascii
;
1096 int c
, c1
, c2
, c3
, c4
;
1100 if (UTF_8_1_OCTET_P (c
))
1104 if (! UTF_8_EXTRA_OCTET_P (c1
))
1106 if (UTF_8_2_OCTET_LEADING_P (c
))
1108 found
= CATEGORY_MASK_UTF_8
;
1112 if (! UTF_8_EXTRA_OCTET_P (c2
))
1114 if (UTF_8_3_OCTET_LEADING_P (c
))
1116 found
= CATEGORY_MASK_UTF_8
;
1120 if (! UTF_8_EXTRA_OCTET_P (c3
))
1122 if (UTF_8_4_OCTET_LEADING_P (c
))
1124 found
= CATEGORY_MASK_UTF_8
;
1128 if (! UTF_8_EXTRA_OCTET_P (c4
))
1130 if (UTF_8_5_OCTET_LEADING_P (c
))
1132 found
= CATEGORY_MASK_UTF_8
;
1137 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1141 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1143 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1146 detect_info
->found
|= found
;
1152 decode_coding_utf_8 (coding
)
1153 struct coding_system
*coding
;
1155 unsigned char *src
= coding
->source
+ coding
->consumed
;
1156 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1157 unsigned char *src_base
;
1158 int *charbuf
= coding
->charbuf
;
1159 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1160 int consumed_chars
= 0, consumed_chars_base
;
1161 int multibytep
= coding
->src_multibyte
;
1162 Lisp_Object attr
, eol_type
, charset_list
;
1164 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1168 int c
, c1
, c2
, c3
, c4
, c5
;
1171 consumed_chars_base
= consumed_chars
;
1173 if (charbuf
>= charbuf_end
)
1177 if (UTF_8_1_OCTET_P(c1
))
1182 if (EQ (eol_type
, Qdos
))
1186 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
1187 goto no_more_source
;
1192 else if (EQ (eol_type
, Qmac
))
1199 if (! UTF_8_EXTRA_OCTET_P (c2
))
1201 if (UTF_8_2_OCTET_LEADING_P (c1
))
1203 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1204 /* Reject overlong sequences here and below. Encoders
1205 producing them are incorrect, they can be misleading,
1206 and they mess up read/write invariance. */
1213 if (! UTF_8_EXTRA_OCTET_P (c3
))
1215 if (UTF_8_3_OCTET_LEADING_P (c1
))
1217 c
= (((c1
& 0xF) << 12)
1218 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1220 || (c
>= 0xd800 && c
< 0xe000)) /* surrogates (invalid) */
1226 if (! UTF_8_EXTRA_OCTET_P (c4
))
1228 if (UTF_8_4_OCTET_LEADING_P (c1
))
1230 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1231 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1238 if (! UTF_8_EXTRA_OCTET_P (c5
))
1240 if (UTF_8_5_OCTET_LEADING_P (c1
))
1242 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1243 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1245 if ((c
> MAX_CHAR
) || (c
< 0x200000))
1260 consumed_chars
= consumed_chars_base
;
1262 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1267 coding
->consumed_char
+= consumed_chars_base
;
1268 coding
->consumed
= src_base
- coding
->source
;
1269 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1274 encode_coding_utf_8 (coding
)
1275 struct coding_system
*coding
;
1277 int multibytep
= coding
->dst_multibyte
;
1278 int *charbuf
= coding
->charbuf
;
1279 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1280 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1281 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1282 int produced_chars
= 0;
1287 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1289 while (charbuf
< charbuf_end
)
1291 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1293 ASSURE_DESTINATION (safe_room
);
1295 if (CHAR_BYTE8_P (c
))
1297 c
= CHAR_TO_BYTE8 (c
);
1302 CHAR_STRING_ADVANCE (c
, pend
);
1303 for (p
= str
; p
< pend
; p
++)
1310 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1312 while (charbuf
< charbuf_end
)
1314 ASSURE_DESTINATION (safe_room
);
1316 dst
+= CHAR_STRING (c
, dst
);
1320 coding
->result
= CODING_RESULT_SUCCESS
;
1321 coding
->produced_char
+= produced_chars
;
1322 coding
->produced
= dst
- coding
->destination
;
1327 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1328 Check if a text is encoded in one of UTF-16 based coding systems.
1329 If it is, return 1, else return 0. */
1331 #define UTF_16_HIGH_SURROGATE_P(val) \
1332 (((val) & 0xFC00) == 0xD800)
1334 #define UTF_16_LOW_SURROGATE_P(val) \
1335 (((val) & 0xFC00) == 0xDC00)
1337 #define UTF_16_INVALID_P(val) \
1338 (((val) == 0xFFFE) \
1339 || ((val) == 0xFFFF) \
1340 || UTF_16_LOW_SURROGATE_P (val))
1344 detect_coding_utf_16 (coding
, detect_info
)
1345 struct coding_system
*coding
;
1346 struct coding_detection_info
*detect_info
;
1348 unsigned char *src
= coding
->source
, *src_base
= src
;
1349 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1350 int multibytep
= coding
->src_multibyte
;
1351 int consumed_chars
= 0;
1354 detect_info
->checked
|= CATEGORY_MASK_UTF_16
;
1356 if (coding
->mode
& CODING_MODE_LAST_BLOCK
1357 && (coding
->src_bytes
& 1))
1359 detect_info
->rejected
|= CATEGORY_MASK_UTF_16
;
1365 if ((c1
== 0xFF) && (c2
== 0xFE))
1367 detect_info
->found
|= CATEGORY_MASK_UTF_16_LE
;
1368 detect_info
->rejected
|= CATEGORY_MASK_UTF_16_BE
;
1370 else if ((c1
== 0xFE) && (c2
== 0xFF))
1372 detect_info
->found
|= CATEGORY_MASK_UTF_16_BE
;
1373 detect_info
->rejected
|= CATEGORY_MASK_UTF_16_LE
;
1380 decode_coding_utf_16 (coding
)
1381 struct coding_system
*coding
;
1383 unsigned char *src
= coding
->source
+ coding
->consumed
;
1384 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1385 unsigned char *src_base
;
1386 int *charbuf
= coding
->charbuf
;
1387 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1388 int consumed_chars
= 0, consumed_chars_base
;
1389 int multibytep
= coding
->src_multibyte
;
1390 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1391 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1392 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1393 Lisp_Object attr
, eol_type
, charset_list
;
1395 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1397 if (bom
!= utf_16_without_bom
)
1405 if (bom
== utf_16_with_bom
)
1407 if (endian
== utf_16_big_endian
1408 ? c
!= 0xFFFE : c
!= 0xFEFF)
1410 /* We are sure that there's enouph room at CHARBUF. */
1419 CODING_UTF_16_ENDIAN (coding
)
1420 = endian
= utf_16_big_endian
;
1421 else if (c
== 0xFEFF)
1422 CODING_UTF_16_ENDIAN (coding
)
1423 = endian
= utf_16_little_endian
;
1426 CODING_UTF_16_ENDIAN (coding
)
1427 = endian
= utf_16_big_endian
;
1431 CODING_UTF_16_BOM (coding
) = utf_16_with_bom
;
1439 consumed_chars_base
= consumed_chars
;
1441 if (charbuf
+ 2 >= charbuf_end
)
1446 c
= (endian
== utf_16_big_endian
1447 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1450 if (! UTF_16_LOW_SURROGATE_P (c
))
1452 if (endian
== utf_16_big_endian
)
1453 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1455 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1459 if (UTF_16_HIGH_SURROGATE_P (c
))
1460 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1466 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1467 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1473 if (UTF_16_HIGH_SURROGATE_P (c
))
1474 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1481 coding
->consumed_char
+= consumed_chars_base
;
1482 coding
->consumed
= src_base
- coding
->source
;
1483 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1487 encode_coding_utf_16 (coding
)
1488 struct coding_system
*coding
;
1490 int multibytep
= coding
->dst_multibyte
;
1491 int *charbuf
= coding
->charbuf
;
1492 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1493 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1494 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1496 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1497 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1498 int produced_chars
= 0;
1499 Lisp_Object attrs
, eol_type
, charset_list
;
1502 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1504 if (bom
== utf_16_with_bom
)
1506 ASSURE_DESTINATION (safe_room
);
1508 EMIT_TWO_BYTES (0xFF, 0xFE);
1510 EMIT_TWO_BYTES (0xFE, 0xFF);
1511 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1514 while (charbuf
< charbuf_end
)
1516 ASSURE_DESTINATION (safe_room
);
1518 if (c
>= MAX_UNICODE_CHAR
)
1519 c
= coding
->default_char
;
1524 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1526 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1533 c1
= (c
>> 10) + 0xD800;
1534 c2
= (c
& 0x3FF) + 0xDC00;
1536 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1538 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1541 coding
->result
= CODING_RESULT_SUCCESS
;
1542 coding
->produced
= dst
- coding
->destination
;
1543 coding
->produced_char
+= produced_chars
;
1548 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1550 /* Emacs' internal format for representation of multiple character
1551 sets is a kind of multi-byte encoding, i.e. characters are
1552 represented by variable-length sequences of one-byte codes.
1554 ASCII characters and control characters (e.g. `tab', `newline') are
1555 represented by one-byte sequences which are their ASCII codes, in
1556 the range 0x00 through 0x7F.
1558 8-bit characters of the range 0x80..0x9F are represented by
1559 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1562 8-bit characters of the range 0xA0..0xFF are represented by
1563 one-byte sequences which are their 8-bit code.
1565 The other characters are represented by a sequence of `base
1566 leading-code', optional `extended leading-code', and one or two
1567 `position-code's. The length of the sequence is determined by the
1568 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1569 whereas extended leading-code and position-code take the range 0xA0
1570 through 0xFF. See `charset.h' for more details about leading-code
1573 --- CODE RANGE of Emacs' internal format ---
1577 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1578 eight-bit-graphic 0xA0..0xBF
1579 ELSE 0x81..0x9D + [0xA0..0xFF]+
1580 ---------------------------------------------
1582 As this is the internal character representation, the format is
1583 usually not used externally (i.e. in a file or in a data sent to a
1584 process). But, it is possible to have a text externally in this
1585 format (i.e. by encoding by the coding system `emacs-mule').
1587 In that case, a sequence of one-byte codes has a slightly different
1590 At first, all characters in eight-bit-control are represented by
1591 one-byte sequences which are their 8-bit code.
1593 Next, character composition data are represented by the byte
1594 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1596 METHOD is 0xF0 plus one of composition method (enum
1597 composition_method),
1599 BYTES is 0xA0 plus a byte length of this composition data,
1601 CHARS is 0x20 plus a number of characters composed by this
1604 COMPONENTs are characters of multibye form or composition
1605 rules encoded by two-byte of ASCII codes.
1607 In addition, for backward compatibility, the following formats are
1608 also recognized as composition data on decoding.
1611 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1614 MSEQ is a multibyte form but in these special format:
1615 ASCII: 0xA0 ASCII_CODE+0x80,
1616 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1617 RULE is a one byte code of the range 0xA0..0xF0 that
1618 represents a composition rule.
1621 char emacs_mule_bytes
[256];
1624 emacs_mule_char (coding
, src
, nbytes
, nchars
, id
)
1625 struct coding_system
*coding
;
1627 int *nbytes
, *nchars
, *id
;
1629 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1630 int multibytep
= coding
->src_multibyte
;
1631 unsigned char *src_base
= src
;
1632 struct charset
*charset
;
1635 int consumed_chars
= 0;
1638 switch (emacs_mule_bytes
[c
])
1641 if (! (charset
= emacs_mule_charset
[c
]))
1648 if (c
== EMACS_MULE_LEADING_CODE_PRIVATE_11
1649 || c
== EMACS_MULE_LEADING_CODE_PRIVATE_12
)
1652 if (! (charset
= emacs_mule_charset
[c
]))
1659 if (! (charset
= emacs_mule_charset
[c
]))
1662 code
= (c
& 0x7F) << 8;
1670 if (! (charset
= emacs_mule_charset
[c
]))
1673 code
= (c
& 0x7F) << 8;
1680 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
)
1681 ? charset_ascii
: charset_eight_bit
);
1687 c
= DECODE_CHAR (charset
, code
);
1690 *nbytes
= src
- src_base
;
1691 *nchars
= consumed_chars
;
1704 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1705 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1709 detect_coding_emacs_mule (coding
, detect_info
)
1710 struct coding_system
*coding
;
1711 struct coding_detection_info
*detect_info
;
1713 unsigned char *src
= coding
->source
, *src_base
= src
;
1714 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1715 int multibytep
= coding
->src_multibyte
;
1716 int consumed_chars
= 0;
1721 detect_info
->checked
|= CATEGORY_MASK_EMACS_MULE
;
1722 /* A coding system of this category is always ASCII compatible. */
1723 src
+= coding
->head_ascii
;
1733 /* Perhaps the start of composite character. We simple skip
1734 it because analyzing it is too heavy for detecting. But,
1735 at least, we check that the composite character
1736 constitues of more than 4 bytes. */
1737 unsigned char *src_base
;
1747 if (src
- src_base
<= 4)
1749 found
= CATEGORY_MASK_EMACS_MULE
;
1757 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1762 unsigned char *src_base
= src
- 1;
1769 if (src
- src_base
!= emacs_mule_bytes
[*src_base
])
1771 found
= CATEGORY_MASK_EMACS_MULE
;
1774 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1778 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1780 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1783 detect_info
->found
|= found
;
1788 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1790 /* Decode a character represented as a component of composition
1791 sequence of Emacs 20/21 style at SRC. Set C to that character and
1792 update SRC to the head of next character (or an encoded composition
1793 rule). If SRC doesn't points a composition component, set C to -1.
1794 If SRC points an invalid byte sequence, global exit by a return
1797 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1801 int nbytes, nchars; \
1803 if (src == src_end) \
1805 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1810 goto invalid_code; \
1814 consumed_chars += nchars; \
1819 /* Decode a composition rule represented as a component of composition
1820 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1821 and increment BUF. If SRC points an invalid byte sequence, set C
1824 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1826 int c, gref, nref; \
1828 if (src >= src_end) \
1829 goto invalid_code; \
1830 ONE_MORE_BYTE_NO_CHECK (c); \
1832 if (c < 0 || c >= 81) \
1833 goto invalid_code; \
1835 gref = c / 9, nref = c % 9; \
1836 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1840 /* Decode a composition rule represented as a component of composition
1841 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1842 and increment BUF. If SRC points an invalid byte sequence, set C
1845 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1849 if (src + 1>= src_end) \
1850 goto invalid_code; \
1851 ONE_MORE_BYTE_NO_CHECK (gref); \
1853 ONE_MORE_BYTE_NO_CHECK (nref); \
1855 if (gref < 0 || gref >= 81 \
1856 || nref < 0 || nref >= 81) \
1857 goto invalid_code; \
1858 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1862 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1864 /* Emacs 21 style format. The first three bytes at SRC are \
1865 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1866 the byte length of this composition information, CHARS is the \
1867 number of characters composed by this composition. */ \
1868 enum composition_method method = c - 0xF2; \
1869 int *charbuf_base = charbuf; \
1871 int consumed_chars_limit; \
1872 int nbytes, nchars; \
1874 ONE_MORE_BYTE (c); \
1875 nbytes = c - 0xA0; \
1877 goto invalid_code; \
1878 ONE_MORE_BYTE (c); \
1879 nchars = c - 0xA0; \
1880 from = coding->produced + char_offset; \
1881 to = from + nchars; \
1882 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1883 consumed_chars_limit = consumed_chars_base + nbytes; \
1884 if (method != COMPOSITION_RELATIVE) \
1887 while (consumed_chars < consumed_chars_limit) \
1889 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1890 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1892 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1895 if (consumed_chars < consumed_chars_limit) \
1896 goto invalid_code; \
1897 charbuf_base[0] -= i; \
1902 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1904 /* Emacs 20 style format for relative composition. */ \
1905 /* Store multibyte form of characters to be composed. */ \
1906 enum composition_method method = COMPOSITION_RELATIVE; \
1907 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1908 int *buf = components; \
1913 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1914 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1915 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1917 goto invalid_code; \
1918 from = coding->produced_char + char_offset; \
1920 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1921 for (j = 0; j < i; j++) \
1922 *charbuf++ = components[j]; \
1926 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1928 /* Emacs 20 style format for rule-base composition. */ \
1929 /* Store multibyte form of characters to be composed. */ \
1930 enum composition_method method = COMPOSITION_WITH_RULE; \
1931 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1932 int *buf = components; \
1936 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1937 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1939 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1940 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1942 if (i < 1 || (buf - components) % 2 == 0) \
1943 goto invalid_code; \
1944 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1945 goto no_more_source; \
1946 from = coding->produced_char + char_offset; \
1948 ADD_COMPOSITION_DATA (buf, from, to, method); \
1949 for (j = 0; j < i; j++) \
1950 *charbuf++ = components[j]; \
1951 for (j = 0; j < i; j += 2) \
1952 *charbuf++ = components[j]; \
1957 decode_coding_emacs_mule (coding
)
1958 struct coding_system
*coding
;
1960 unsigned char *src
= coding
->source
+ coding
->consumed
;
1961 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1962 unsigned char *src_base
;
1963 int *charbuf
= coding
->charbuf
;
1964 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
1965 int consumed_chars
= 0, consumed_chars_base
;
1966 int multibytep
= coding
->src_multibyte
;
1967 Lisp_Object attrs
, eol_type
, charset_list
;
1968 int char_offset
= coding
->produced_char
;
1969 int last_offset
= char_offset
;
1970 int last_id
= charset_ascii
;
1972 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1979 consumed_chars_base
= consumed_chars
;
1981 if (charbuf
>= charbuf_end
)
1990 if (EQ (eol_type
, Qdos
))
1994 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
1995 goto no_more_source
;
2000 else if (EQ (eol_type
, Qmac
))
2009 if (c
- 0xF2 >= COMPOSITION_RELATIVE
2010 && c
- 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS
)
2011 DECODE_EMACS_MULE_21_COMPOSITION (c
);
2013 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
2015 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
2019 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
2025 consumed_chars
= consumed_chars_base
;
2026 c
= emacs_mule_char (coding
, src
, &nbytes
, &nchars
, &id
);
2035 if (last_id
!= charset_ascii
)
2036 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2038 last_offset
= char_offset
;
2042 consumed_chars
+= nchars
;
2049 consumed_chars
= consumed_chars_base
;
2051 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
2057 if (last_id
!= charset_ascii
)
2058 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2059 coding
->consumed_char
+= consumed_chars_base
;
2060 coding
->consumed
= src_base
- coding
->source
;
2061 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
2065 #define EMACS_MULE_LEADING_CODES(id, codes) \
2068 codes[0] = id, codes[1] = 0; \
2069 else if (id < 0xE0) \
2070 codes[0] = 0x9A, codes[1] = id; \
2071 else if (id < 0xF0) \
2072 codes[0] = 0x9B, codes[1] = id; \
2073 else if (id < 0xF5) \
2074 codes[0] = 0x9C, codes[1] = id; \
2076 codes[0] = 0x9D, codes[1] = id; \
2081 encode_coding_emacs_mule (coding
)
2082 struct coding_system
*coding
;
2084 int multibytep
= coding
->dst_multibyte
;
2085 int *charbuf
= coding
->charbuf
;
2086 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
2087 unsigned char *dst
= coding
->destination
+ coding
->produced
;
2088 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
2090 int produced_chars
= 0;
2091 Lisp_Object attrs
, eol_type
, charset_list
;
2093 int preferred_charset_id
= -1;
2095 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
2097 while (charbuf
< charbuf_end
)
2099 ASSURE_DESTINATION (safe_room
);
2104 /* Handle an annotation. */
2107 case CODING_ANNOTATE_COMPOSITION_MASK
:
2108 /* Not yet implemented. */
2110 case CODING_ANNOTATE_CHARSET_MASK
:
2111 preferred_charset_id
= charbuf
[3];
2112 if (preferred_charset_id
>= 0
2113 && NILP (Fmemq (make_number (preferred_charset_id
),
2115 preferred_charset_id
= -1;
2124 if (ASCII_CHAR_P (c
))
2125 EMIT_ONE_ASCII_BYTE (c
);
2126 else if (CHAR_BYTE8_P (c
))
2128 c
= CHAR_TO_BYTE8 (c
);
2133 struct charset
*charset
;
2137 unsigned char leading_codes
[2];
2139 if (preferred_charset_id
>= 0)
2141 charset
= CHARSET_FROM_ID (preferred_charset_id
);
2142 if (! CHAR_CHARSET_P (c
, charset
))
2143 charset
= char_charset (c
, charset_list
, NULL
);
2146 charset
= char_charset (c
, charset_list
, &code
);
2149 c
= coding
->default_char
;
2150 if (ASCII_CHAR_P (c
))
2152 EMIT_ONE_ASCII_BYTE (c
);
2155 charset
= char_charset (c
, charset_list
, &code
);
2157 dimension
= CHARSET_DIMENSION (charset
);
2158 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
2159 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
2160 EMIT_ONE_BYTE (leading_codes
[0]);
2161 if (leading_codes
[1])
2162 EMIT_ONE_BYTE (leading_codes
[1]);
2164 EMIT_ONE_BYTE (code
);
2167 EMIT_ONE_BYTE (code
>> 8);
2168 EMIT_ONE_BYTE (code
& 0xFF);
2172 coding
->result
= CODING_RESULT_SUCCESS
;
2173 coding
->produced_char
+= produced_chars
;
2174 coding
->produced
= dst
- coding
->destination
;
2179 /*** 7. ISO2022 handlers ***/
2181 /* The following note describes the coding system ISO2022 briefly.
2182 Since the intention of this note is to help understand the
2183 functions in this file, some parts are NOT ACCURATE or are OVERLY
2184 SIMPLIFIED. For thorough understanding, please refer to the
2185 original document of ISO2022. This is equivalent to the standard
2186 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2188 ISO2022 provides many mechanisms to encode several character sets
2189 in 7-bit and 8-bit environments. For 7-bit environments, all text
2190 is encoded using bytes less than 128. This may make the encoded
2191 text a little bit longer, but the text passes more easily through
2192 several types of gateway, some of which strip off the MSB (Most
2195 There are two kinds of character sets: control character sets and
2196 graphic character sets. The former contain control characters such
2197 as `newline' and `escape' to provide control functions (control
2198 functions are also provided by escape sequences). The latter
2199 contain graphic characters such as 'A' and '-'. Emacs recognizes
2200 two control character sets and many graphic character sets.
2202 Graphic character sets are classified into one of the following
2203 four classes, according to the number of bytes (DIMENSION) and
2204 number of characters in one dimension (CHARS) of the set:
2205 - DIMENSION1_CHARS94
2206 - DIMENSION1_CHARS96
2207 - DIMENSION2_CHARS94
2208 - DIMENSION2_CHARS96
2210 In addition, each character set is assigned an identification tag,
2211 unique for each set, called the "final character" (denoted as <F>
2212 hereafter). The <F> of each character set is decided by ECMA(*)
2213 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2214 (0x30..0x3F are for private use only).
2216 Note (*): ECMA = European Computer Manufacturers Association
2218 Here are examples of graphic character sets [NAME(<F>)]:
2219 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2220 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2221 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2222 o DIMENSION2_CHARS96 -- none for the moment
2224 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2225 C0 [0x00..0x1F] -- control character plane 0
2226 GL [0x20..0x7F] -- graphic character plane 0
2227 C1 [0x80..0x9F] -- control character plane 1
2228 GR [0xA0..0xFF] -- graphic character plane 1
2230 A control character set is directly designated and invoked to C0 or
2231 C1 by an escape sequence. The most common case is that:
2232 - ISO646's control character set is designated/invoked to C0, and
2233 - ISO6429's control character set is designated/invoked to C1,
2234 and usually these designations/invocations are omitted in encoded
2235 text. In a 7-bit environment, only C0 can be used, and a control
2236 character for C1 is encoded by an appropriate escape sequence to
2237 fit into the environment. All control characters for C1 are
2238 defined to have corresponding escape sequences.
2240 A graphic character set is at first designated to one of four
2241 graphic registers (G0 through G3), then these graphic registers are
2242 invoked to GL or GR. These designations and invocations can be
2243 done independently. The most common case is that G0 is invoked to
2244 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2245 these invocations and designations are omitted in encoded text.
2246 In a 7-bit environment, only GL can be used.
2248 When a graphic character set of CHARS94 is invoked to GL, codes
2249 0x20 and 0x7F of the GL area work as control characters SPACE and
2250 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2253 There are two ways of invocation: locking-shift and single-shift.
2254 With locking-shift, the invocation lasts until the next different
2255 invocation, whereas with single-shift, the invocation affects the
2256 following character only and doesn't affect the locking-shift
2257 state. Invocations are done by the following control characters or
2260 ----------------------------------------------------------------------
2261 abbrev function cntrl escape seq description
2262 ----------------------------------------------------------------------
2263 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2264 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2265 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2266 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2267 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2268 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2269 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2270 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2271 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2272 ----------------------------------------------------------------------
2273 (*) These are not used by any known coding system.
2275 Control characters for these functions are defined by macros
2276 ISO_CODE_XXX in `coding.h'.
2278 Designations are done by the following escape sequences:
2279 ----------------------------------------------------------------------
2280 escape sequence description
2281 ----------------------------------------------------------------------
2282 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2283 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2284 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2285 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2286 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2287 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2288 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2289 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2290 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2291 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2292 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2293 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2294 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2295 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2296 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2297 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2298 ----------------------------------------------------------------------
2300 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2301 of dimension 1, chars 94, and final character <F>, etc...
2303 Note (*): Although these designations are not allowed in ISO2022,
2304 Emacs accepts them on decoding, and produces them on encoding
2305 CHARS96 character sets in a coding system which is characterized as
2306 7-bit environment, non-locking-shift, and non-single-shift.
2308 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2309 '(' must be omitted. We refer to this as "short-form" hereafter.
2311 Now you may notice that there are a lot of ways of encoding the
2312 same multilingual text in ISO2022. Actually, there exist many
2313 coding systems such as Compound Text (used in X11's inter client
2314 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2315 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2316 localized platforms), and all of these are variants of ISO2022.
2318 In addition to the above, Emacs handles two more kinds of escape
2319 sequences: ISO6429's direction specification and Emacs' private
2320 sequence for specifying character composition.
2322 ISO6429's direction specification takes the following form:
2323 o CSI ']' -- end of the current direction
2324 o CSI '0' ']' -- end of the current direction
2325 o CSI '1' ']' -- start of left-to-right text
2326 o CSI '2' ']' -- start of right-to-left text
2327 The control character CSI (0x9B: control sequence introducer) is
2328 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2330 Character composition specification takes the following form:
2331 o ESC '0' -- start relative composition
2332 o ESC '1' -- end composition
2333 o ESC '2' -- start rule-base composition (*)
2334 o ESC '3' -- start relative composition with alternate chars (**)
2335 o ESC '4' -- start rule-base composition with alternate chars (**)
2336 Since these are not standard escape sequences of any ISO standard,
2337 the use of them with these meanings is restricted to Emacs only.
2339 (*) This form is used only in Emacs 20.7 and older versions,
2340 but newer versions can safely decode it.
2341 (**) This form is used only in Emacs 21.1 and newer versions,
2342 and older versions can't decode it.
2344 Here's a list of example usages of these composition escape
2345 sequences (categorized by `enum composition_method').
2347 COMPOSITION_RELATIVE:
2348 ESC 0 CHAR [ CHAR ] ESC 1
2349 COMPOSITION_WITH_RULE:
2350 ESC 2 CHAR [ RULE CHAR ] ESC 1
2351 COMPOSITION_WITH_ALTCHARS:
2352 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2353 COMPOSITION_WITH_RULE_ALTCHARS:
2354 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2356 enum iso_code_class_type iso_code_class
[256];
2358 #define SAFE_CHARSET_P(coding, id) \
2359 ((id) <= (coding)->max_charset_id \
2360 && (coding)->safe_charsets[id] >= 0)
2363 #define SHIFT_OUT_OK(category) \
2364 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2367 setup_iso_safe_charsets (attrs
)
2370 Lisp_Object charset_list
, safe_charsets
;
2371 Lisp_Object request
;
2372 Lisp_Object reg_usage
;
2375 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2378 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2379 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2380 && ! EQ (charset_list
, Viso_2022_charset_list
))
2382 CODING_ATTR_CHARSET_LIST (attrs
)
2383 = charset_list
= Viso_2022_charset_list
;
2384 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2387 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2391 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2393 int id
= XINT (XCAR (tail
));
2394 if (max_charset_id
< id
)
2395 max_charset_id
= id
;
2398 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2400 request
= AREF (attrs
, coding_attr_iso_request
);
2401 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2402 reg94
= XINT (XCAR (reg_usage
));
2403 reg96
= XINT (XCDR (reg_usage
));
2405 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2409 struct charset
*charset
;
2412 charset
= CHARSET_FROM_ID (XINT (id
));
2413 reg
= Fcdr (Fassq (id
, request
));
2415 XSTRING (safe_charsets
)->data
[XINT (id
)] = XINT (reg
);
2416 else if (charset
->iso_chars_96
)
2419 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg96
;
2424 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg94
;
2427 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2432 Check if a text is encoded in one of ISO-2022 based codig systems.
2433 If it is, return 1, else return 0. */
2436 detect_coding_iso_2022 (coding
, detect_info
)
2437 struct coding_system
*coding
;
2438 struct coding_detection_info
*detect_info
;
2440 unsigned char *src
= coding
->source
, *src_base
= src
;
2441 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2442 int multibytep
= coding
->src_multibyte
;
2443 int single_shifting
= 0;
2446 int consumed_chars
= 0;
2451 detect_info
->checked
|= CATEGORY_MASK_ISO
;
2453 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2455 struct coding_system
*this = &(coding_categories
[i
]);
2456 Lisp_Object attrs
, val
;
2458 attrs
= CODING_ID_ATTRS (this->id
);
2459 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2460 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2461 setup_iso_safe_charsets (attrs
);
2462 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2463 this->max_charset_id
= XSTRING (val
)->size
- 1;
2464 this->safe_charsets
= (char *) XSTRING (val
)->data
;
2467 /* A coding system of this category is always ASCII compatible. */
2468 src
+= coding
->head_ascii
;
2470 while (rejected
!= CATEGORY_MASK_ISO
)
2476 if (inhibit_iso_escape_detection
)
2478 single_shifting
= 0;
2480 if (c
>= '(' && c
<= '/')
2482 /* Designation sequence for a charset of dimension 1. */
2484 if (c1
< ' ' || c1
>= 0x80
2485 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2486 /* Invalid designation sequence. Just ignore. */
2491 /* Designation sequence for a charset of dimension 2. */
2493 if (c
>= '@' && c
<= 'B')
2494 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2495 id
= iso_charset_table
[1][0][c
];
2496 else if (c
>= '(' && c
<= '/')
2499 if (c1
< ' ' || c1
>= 0x80
2500 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2501 /* Invalid designation sequence. Just ignore. */
2505 /* Invalid designation sequence. Just ignore it. */
2508 else if (c
== 'N' || c
== 'O')
2510 /* ESC <Fe> for SS2 or SS3. */
2511 single_shifting
= 1;
2512 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2515 else if (c
>= '0' && c
<= '4')
2517 /* ESC <Fp> for start/end composition. */
2518 found
|= CATEGORY_MASK_ISO
;
2523 /* Invalid escape sequence. Just ignore it. */
2527 /* We found a valid designation sequence for CHARSET. */
2528 rejected
|= CATEGORY_MASK_ISO_8BIT
;
2529 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2531 found
|= CATEGORY_MASK_ISO_7
;
2533 rejected
|= CATEGORY_MASK_ISO_7
;
2534 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2536 found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2538 rejected
|= CATEGORY_MASK_ISO_7_TIGHT
;
2539 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2541 found
|= CATEGORY_MASK_ISO_7_ELSE
;
2543 rejected
|= CATEGORY_MASK_ISO_7_ELSE
;
2544 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2546 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2548 rejected
|= CATEGORY_MASK_ISO_8_ELSE
;
2553 /* Locking shift out/in. */
2554 if (inhibit_iso_escape_detection
)
2556 single_shifting
= 0;
2557 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2558 found
|= CATEGORY_MASK_ISO_ELSE
;
2562 /* Control sequence introducer. */
2563 single_shifting
= 0;
2564 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2565 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2566 goto check_extra_latin
;
2572 if (inhibit_iso_escape_detection
)
2574 single_shifting
= 1;
2575 rejected
|= CATEGORY_MASK_ISO_7BIT
;
2576 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2577 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2578 found
|= CATEGORY_MASK_ISO_8_1
;
2579 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2580 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2581 found
|= CATEGORY_MASK_ISO_8_2
;
2582 goto check_extra_latin
;
2587 single_shifting
= 0;
2592 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2593 found
|= CATEGORY_MASK_ISO_8_1
;
2594 /* Check the length of succeeding codes of the range
2595 0xA0..0FF. If the byte length is even, we include
2596 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2597 only when we are not single shifting. */
2598 if (! single_shifting
2599 && ! (rejected
& CATEGORY_MASK_ISO_8_2
))
2602 while (src
< src_end
)
2610 if (i
& 1 && src
< src_end
)
2611 rejected
|= CATEGORY_MASK_ISO_8_2
;
2613 found
|= CATEGORY_MASK_ISO_8_2
;
2618 single_shifting
= 0;
2619 if (! VECTORP (Vlatin_extra_code_table
)
2620 || NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2622 rejected
= CATEGORY_MASK_ISO
;
2625 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2626 & CODING_ISO_FLAG_LATIN_EXTRA
)
2627 found
|= CATEGORY_MASK_ISO_8_1
;
2629 rejected
|= CATEGORY_MASK_ISO_8_1
;
2630 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2631 & CODING_ISO_FLAG_LATIN_EXTRA
)
2632 found
|= CATEGORY_MASK_ISO_8_2
;
2634 rejected
|= CATEGORY_MASK_ISO_8_2
;
2637 detect_info
->rejected
|= CATEGORY_MASK_ISO
;
2641 detect_info
->rejected
|= rejected
;
2642 detect_info
->found
|= (found
& ~rejected
);
2647 /* Set designation state into CODING. */
2648 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2652 if (final < '0' || final >= 128 \
2653 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2654 || !SAFE_CHARSET_P (coding, id)) \
2656 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2657 goto invalid_code; \
2659 prev = CODING_ISO_DESIGNATION (coding, reg); \
2660 if (id == charset_jisx0201_roman) \
2662 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2663 id = charset_ascii; \
2665 else if (id == charset_jisx0208_1978) \
2667 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2668 id = charset_jisx0208; \
2670 CODING_ISO_DESIGNATION (coding, reg) = id; \
2671 /* If there was an invalid designation to REG previously, and this \
2672 designation is ASCII to REG, we should keep this designation \
2674 if (prev == -2 && id == charset_ascii) \
2675 goto invalid_code; \
2679 #define MAYBE_FINISH_COMPOSITION() \
2682 if (composition_state == COMPOSING_NO) \
2684 /* It is assured that we have enough room for producing \
2685 characters stored in the table `components'. */ \
2686 if (charbuf + component_idx > charbuf_end) \
2687 goto no_more_source; \
2688 composition_state = COMPOSING_NO; \
2689 if (method == COMPOSITION_RELATIVE \
2690 || method == COMPOSITION_WITH_ALTCHARS) \
2692 for (i = 0; i < component_idx; i++) \
2693 *charbuf++ = components[i]; \
2694 char_offset += component_idx; \
2698 for (i = 0; i < component_idx; i += 2) \
2699 *charbuf++ = components[i]; \
2700 char_offset += (component_idx / 2) + 1; \
2705 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2706 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2707 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2708 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2709 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2712 #define DECODE_COMPOSITION_START(c1) \
2715 && composition_state == COMPOSING_COMPONENT_RULE) \
2717 component_len = component_idx; \
2718 composition_state = COMPOSING_CHAR; \
2724 MAYBE_FINISH_COMPOSITION (); \
2725 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2726 goto no_more_source; \
2727 for (p = src; p < src_end - 1; p++) \
2728 if (*p == ISO_CODE_ESC && p[1] == '1') \
2730 if (p == src_end - 1) \
2732 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2733 goto invalid_code; \
2734 goto no_more_source; \
2737 /* This is surely the start of a composition. */ \
2738 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2739 : c1 == '2' ? COMPOSITION_WITH_RULE \
2740 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2741 : COMPOSITION_WITH_RULE_ALTCHARS); \
2742 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2743 : COMPOSING_COMPONENT_CHAR); \
2744 component_idx = component_len = 0; \
2749 /* Handle compositoin end sequence ESC 1. */
2751 #define DECODE_COMPOSITION_END() \
2753 int nchars = (component_len > 0 ? component_idx - component_len \
2754 : method == COMPOSITION_RELATIVE ? component_idx \
2755 : (component_idx + 1) / 2); \
2757 int *saved_charbuf = charbuf; \
2758 int from = coding->produced_char + char_offset; \
2759 int to = from + nchars; \
2761 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
2762 if (method != COMPOSITION_RELATIVE) \
2764 if (component_len == 0) \
2765 for (i = 0; i < component_idx; i++) \
2766 *charbuf++ = components[i]; \
2768 for (i = 0; i < component_len; i++) \
2769 *charbuf++ = components[i]; \
2770 *saved_charbuf = saved_charbuf - charbuf; \
2772 if (method == COMPOSITION_WITH_RULE) \
2773 for (i = 0; i < component_idx; i += 2, char_offset++) \
2774 *charbuf++ = components[i]; \
2776 for (i = component_len; i < component_idx; i++, char_offset++) \
2777 *charbuf++ = components[i]; \
2778 coding->annotated = 1; \
2779 composition_state = COMPOSING_NO; \
2783 /* Decode a composition rule from the byte C1 (and maybe one more byte
2784 from SRC) and store one encoded composition rule in
2785 coding->cmp_data. */
2787 #define DECODE_COMPOSITION_RULE(c1) \
2790 if (c1 < 81) /* old format (before ver.21) */ \
2792 int gref = (c1) / 9; \
2793 int nref = (c1) % 9; \
2794 if (gref == 4) gref = 10; \
2795 if (nref == 4) nref = 10; \
2796 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2798 else if (c1 < 93) /* new format (after ver.21) */ \
2800 ONE_MORE_BYTE (c2); \
2801 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2808 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2811 decode_coding_iso_2022 (coding
)
2812 struct coding_system
*coding
;
2814 unsigned char *src
= coding
->source
+ coding
->consumed
;
2815 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2816 unsigned char *src_base
;
2817 int *charbuf
= coding
->charbuf
;
2819 = charbuf
+ coding
->charbuf_size
- 4 - MAX_ANNOTATION_LENGTH
;
2820 int consumed_chars
= 0, consumed_chars_base
;
2821 int multibytep
= coding
->src_multibyte
;
2822 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2823 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2824 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2825 struct charset
*charset
;
2827 /* For handling composition sequence. */
2828 #define COMPOSING_NO 0
2829 #define COMPOSING_CHAR 1
2830 #define COMPOSING_RULE 2
2831 #define COMPOSING_COMPONENT_CHAR 3
2832 #define COMPOSING_COMPONENT_RULE 4
2834 int composition_state
= COMPOSING_NO
;
2835 enum composition_method method
;
2836 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2839 Lisp_Object attrs
, eol_type
, charset_list
;
2840 int char_offset
= coding
->produced_char
;
2841 int last_offset
= char_offset
;
2842 int last_id
= charset_ascii
;
2844 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
2845 setup_iso_safe_charsets (attrs
);
2852 consumed_chars_base
= consumed_chars
;
2854 if (charbuf
>= charbuf_end
)
2859 /* We produce at most one character. */
2860 switch (iso_code_class
[c1
])
2862 case ISO_0x20_or_0x7F
:
2863 if (composition_state
!= COMPOSING_NO
)
2865 if (composition_state
== COMPOSING_RULE
2866 || composition_state
== COMPOSING_COMPONENT_RULE
)
2868 DECODE_COMPOSITION_RULE (c1
);
2869 components
[component_idx
++] = c1
;
2870 composition_state
--;
2874 if (charset_id_0
< 0
2875 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2876 /* This is SPACE or DEL. */
2877 charset
= CHARSET_FROM_ID (charset_ascii
);
2879 charset
= CHARSET_FROM_ID (charset_id_0
);
2882 case ISO_graphic_plane_0
:
2883 if (composition_state
!= COMPOSING_NO
)
2885 if (composition_state
== COMPOSING_RULE
2886 || composition_state
== COMPOSING_COMPONENT_RULE
)
2888 DECODE_COMPOSITION_RULE (c1
);
2889 components
[component_idx
++] = c1
;
2890 composition_state
--;
2894 charset
= CHARSET_FROM_ID (charset_id_0
);
2897 case ISO_0xA0_or_0xFF
:
2898 if (charset_id_1
< 0
2899 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2900 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2902 /* This is a graphic character, we fall down ... */
2904 case ISO_graphic_plane_1
:
2905 if (charset_id_1
< 0)
2907 charset
= CHARSET_FROM_ID (charset_id_1
);
2910 case ISO_carriage_return
:
2913 if (EQ (eol_type
, Qdos
))
2917 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
2918 goto no_more_source
;
2923 else if (EQ (eol_type
, Qmac
))
2929 MAYBE_FINISH_COMPOSITION ();
2930 charset
= CHARSET_FROM_ID (charset_ascii
);
2934 MAYBE_FINISH_COMPOSITION ();
2938 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2939 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
2941 CODING_ISO_INVOCATION (coding
, 0) = 1;
2942 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2946 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
2948 CODING_ISO_INVOCATION (coding
, 0) = 0;
2949 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2952 case ISO_single_shift_2_7
:
2953 case ISO_single_shift_2
:
2954 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2956 /* SS2 is handled as an escape sequence of ESC 'N' */
2958 goto label_escape_sequence
;
2960 case ISO_single_shift_3
:
2961 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2963 /* SS2 is handled as an escape sequence of ESC 'O' */
2965 goto label_escape_sequence
;
2967 case ISO_control_sequence_introducer
:
2968 /* CSI is handled as an escape sequence of ESC '[' ... */
2970 goto label_escape_sequence
;
2974 label_escape_sequence
:
2975 /* Escape sequences handled here are invocation,
2976 designation, direction specification, and character
2977 composition specification. */
2980 case '&': /* revision of following character set */
2982 if (!(c1
>= '@' && c1
<= '~'))
2985 if (c1
!= ISO_CODE_ESC
)
2988 goto label_escape_sequence
;
2990 case '$': /* designation of 2-byte character set */
2991 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
2994 if (c1
>= '@' && c1
<= 'B')
2995 { /* designation of JISX0208.1978, GB2312.1980,
2997 DECODE_DESIGNATION (0, 2, 0, c1
);
2999 else if (c1
>= 0x28 && c1
<= 0x2B)
3000 { /* designation of DIMENSION2_CHARS94 character set */
3002 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
3004 else if (c1
>= 0x2C && c1
<= 0x2F)
3005 { /* designation of DIMENSION2_CHARS96 character set */
3007 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
3011 /* We must update these variables now. */
3012 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3013 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3016 case 'n': /* invocation of locking-shift-2 */
3017 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3018 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3020 CODING_ISO_INVOCATION (coding
, 0) = 2;
3021 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3024 case 'o': /* invocation of locking-shift-3 */
3025 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3026 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3028 CODING_ISO_INVOCATION (coding
, 0) = 3;
3029 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3032 case 'N': /* invocation of single-shift-2 */
3033 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3034 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3036 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
3038 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3042 case 'O': /* invocation of single-shift-3 */
3043 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3044 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3046 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
3048 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3052 case '0': case '2': case '3': case '4': /* start composition */
3053 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
3055 DECODE_COMPOSITION_START (c1
);
3058 case '1': /* end composition */
3059 if (composition_state
== COMPOSING_NO
)
3061 DECODE_COMPOSITION_END ();
3064 case '[': /* specification of direction */
3065 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
3067 /* For the moment, nested direction is not supported.
3068 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3069 left-to-right, and nozero means right-to-left. */
3073 case ']': /* end of the current direction */
3074 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3076 case '0': /* end of the current direction */
3077 case '1': /* start of left-to-right direction */
3080 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3085 case '2': /* start of right-to-left direction */
3088 coding
->mode
|= CODING_MODE_DIRECTION
;
3099 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3101 if (c1
>= 0x28 && c1
<= 0x2B)
3102 { /* designation of DIMENSION1_CHARS94 character set */
3104 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
3106 else if (c1
>= 0x2C && c1
<= 0x2F)
3107 { /* designation of DIMENSION1_CHARS96 character set */
3109 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
3113 /* We must update these variables now. */
3114 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3115 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3120 if (charset
->id
!= charset_ascii
3121 && last_id
!= charset
->id
)
3123 if (last_id
!= charset_ascii
)
3124 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3125 last_id
= charset
->id
;
3126 last_offset
= char_offset
;
3129 /* Now we know CHARSET and 1st position code C1 of a character.
3130 Produce a decoded character while getting 2nd position code
3133 if (CHARSET_DIMENSION (charset
) > 1)
3136 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3137 /* C2 is not in a valid range. */
3139 c1
= (c1
<< 8) | (c2
& 0x7F);
3140 if (CHARSET_DIMENSION (charset
) > 2)
3143 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3144 /* C2 is not in a valid range. */
3146 c1
= (c1
<< 8) | (c2
& 0x7F);
3150 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3153 MAYBE_FINISH_COMPOSITION ();
3154 for (; src_base
< src
; src_base
++, char_offset
++)
3156 if (ASCII_BYTE_P (*src_base
))
3157 *charbuf
++ = *src_base
;
3159 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3163 else if (composition_state
== COMPOSING_NO
)
3170 components
[component_idx
++] = c
;
3171 if (method
== COMPOSITION_WITH_RULE
3172 || (method
== COMPOSITION_WITH_RULE_ALTCHARS
3173 && composition_state
== COMPOSING_COMPONENT_CHAR
))
3174 composition_state
++;
3179 MAYBE_FINISH_COMPOSITION ();
3181 consumed_chars
= consumed_chars_base
;
3183 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3189 if (last_id
!= charset_ascii
)
3190 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3191 coding
->consumed_char
+= consumed_chars_base
;
3192 coding
->consumed
= src_base
- coding
->source
;
3193 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3197 /* ISO2022 encoding stuff. */
3200 It is not enough to say just "ISO2022" on encoding, we have to
3201 specify more details. In Emacs, each coding system of ISO2022
3202 variant has the following specifications:
3203 1. Initial designation to G0 thru G3.
3204 2. Allows short-form designation?
3205 3. ASCII should be designated to G0 before control characters?
3206 4. ASCII should be designated to G0 at end of line?
3207 5. 7-bit environment or 8-bit environment?
3208 6. Use locking-shift?
3209 7. Use Single-shift?
3210 And the following two are only for Japanese:
3211 8. Use ASCII in place of JIS0201-1976-Roman?
3212 9. Use JISX0208-1983 in place of JISX0208-1978?
3213 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3214 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3218 /* Produce codes (escape sequence) for designating CHARSET to graphic
3219 register REG at DST, and increment DST. If <final-char> of CHARSET is
3220 '@', 'A', or 'B' and the coding system CODING allows, produce
3221 designation sequence of short-form. */
3223 #define ENCODE_DESIGNATION(charset, reg, coding) \
3225 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3226 char *intermediate_char_94 = "()*+"; \
3227 char *intermediate_char_96 = ",-./"; \
3228 int revision = -1; \
3231 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3232 revision = CHARSET_ISO_REVISION (charset); \
3234 if (revision >= 0) \
3236 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3237 EMIT_ONE_BYTE ('@' + revision); \
3239 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3240 if (CHARSET_DIMENSION (charset) == 1) \
3242 if (! CHARSET_ISO_CHARS_96 (charset)) \
3243 c = intermediate_char_94[reg]; \
3245 c = intermediate_char_96[reg]; \
3246 EMIT_ONE_ASCII_BYTE (c); \
3250 EMIT_ONE_ASCII_BYTE ('$'); \
3251 if (! CHARSET_ISO_CHARS_96 (charset)) \
3253 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3255 || final_char < '@' || final_char > 'B') \
3256 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3259 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3261 EMIT_ONE_ASCII_BYTE (final_char); \
3263 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3267 /* The following two macros produce codes (control character or escape
3268 sequence) for ISO2022 single-shift functions (single-shift-2 and
3271 #define ENCODE_SINGLE_SHIFT_2 \
3273 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3274 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3276 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3277 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3281 #define ENCODE_SINGLE_SHIFT_3 \
3283 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3284 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3286 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3287 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3291 /* The following four macros produce codes (control character or
3292 escape sequence) for ISO2022 locking-shift functions (shift-in,
3293 shift-out, locking-shift-2, and locking-shift-3). */
3295 #define ENCODE_SHIFT_IN \
3297 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3298 CODING_ISO_INVOCATION (coding, 0) = 0; \
3302 #define ENCODE_SHIFT_OUT \
3304 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3305 CODING_ISO_INVOCATION (coding, 0) = 1; \
3309 #define ENCODE_LOCKING_SHIFT_2 \
3311 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3312 CODING_ISO_INVOCATION (coding, 0) = 2; \
3316 #define ENCODE_LOCKING_SHIFT_3 \
3318 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3319 CODING_ISO_INVOCATION (coding, 0) = 3; \
3323 /* Produce codes for a DIMENSION1 character whose character set is
3324 CHARSET and whose position-code is C1. Designation and invocation
3325 sequences are also produced in advance if necessary. */
3327 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3329 int id = CHARSET_ID (charset); \
3331 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3332 && id == charset_ascii) \
3334 id = charset_jisx0201_roman; \
3335 charset = CHARSET_FROM_ID (id); \
3338 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3340 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3341 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3343 EMIT_ONE_BYTE (c1 | 0x80); \
3344 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3347 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3349 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3352 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3354 EMIT_ONE_BYTE (c1 | 0x80); \
3358 /* Since CHARSET is not yet invoked to any graphic planes, we \
3359 must invoke it, or, at first, designate it to some graphic \
3360 register. Then repeat the loop to actually produce the \
3362 dst = encode_invocation_designation (charset, coding, dst, \
3367 /* Produce codes for a DIMENSION2 character whose character set is
3368 CHARSET and whose position-codes are C1 and C2. Designation and
3369 invocation codes are also produced in advance if necessary. */
3371 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3373 int id = CHARSET_ID (charset); \
3375 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3376 && id == charset_jisx0208) \
3378 id = charset_jisx0208_1978; \
3379 charset = CHARSET_FROM_ID (id); \
3382 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3384 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3385 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3387 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3388 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3391 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3393 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3396 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3398 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3402 /* Since CHARSET is not yet invoked to any graphic planes, we \
3403 must invoke it, or, at first, designate it to some graphic \
3404 register. Then repeat the loop to actually produce the \
3406 dst = encode_invocation_designation (charset, coding, dst, \
3411 #define ENCODE_ISO_CHARACTER(charset, c) \
3413 int code = ENCODE_CHAR ((charset),(c)); \
3415 if (CHARSET_DIMENSION (charset) == 1) \
3416 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3418 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3422 /* Produce designation and invocation codes at a place pointed by DST
3423 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3427 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3428 struct charset
*charset
;
3429 struct coding_system
*coding
;
3433 int multibytep
= coding
->dst_multibyte
;
3434 int produced_chars
= *p_nchars
;
3435 int reg
; /* graphic register number */
3436 int id
= CHARSET_ID (charset
);
3438 /* At first, check designations. */
3439 for (reg
= 0; reg
< 4; reg
++)
3440 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3445 /* CHARSET is not yet designated to any graphic registers. */
3446 /* At first check the requested designation. */
3447 reg
= CODING_ISO_REQUEST (coding
, id
);
3449 /* Since CHARSET requests no special designation, designate it
3450 to graphic register 0. */
3453 ENCODE_DESIGNATION (charset
, reg
, coding
);
3456 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3457 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3459 /* Since the graphic register REG is not invoked to any graphic
3460 planes, invoke it to graphic plane 0. */
3463 case 0: /* graphic register 0 */
3467 case 1: /* graphic register 1 */
3471 case 2: /* graphic register 2 */
3472 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3473 ENCODE_SINGLE_SHIFT_2
;
3475 ENCODE_LOCKING_SHIFT_2
;
3478 case 3: /* graphic register 3 */
3479 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3480 ENCODE_SINGLE_SHIFT_3
;
3482 ENCODE_LOCKING_SHIFT_3
;
3487 *p_nchars
= produced_chars
;
3491 /* The following three macros produce codes for indicating direction
3493 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3495 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3496 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3498 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3502 #define ENCODE_DIRECTION_R2L() \
3504 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3505 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3509 #define ENCODE_DIRECTION_L2R() \
3511 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3512 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3516 /* Produce codes for designation and invocation to reset the graphic
3517 planes and registers to initial state. */
3518 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3521 struct charset *charset; \
3523 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3525 for (reg = 0; reg < 4; reg++) \
3526 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3527 && (CODING_ISO_DESIGNATION (coding, reg) \
3528 != CODING_ISO_INITIAL (coding, reg))) \
3530 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3531 ENCODE_DESIGNATION (charset, reg, coding); \
3536 /* Produce designation sequences of charsets in the line started from
3537 SRC to a place pointed by DST, and return updated DST.
3539 If the current block ends before any end-of-line, we may fail to
3540 find all the necessary designations. */
3542 static unsigned char *
3543 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3544 struct coding_system
*coding
;
3545 int *charbuf
, *charbuf_end
;
3548 struct charset
*charset
;
3549 /* Table of charsets to be designated to each graphic register. */
3551 int c
, found
= 0, reg
;
3552 int produced_chars
= 0;
3553 int multibytep
= coding
->dst_multibyte
;
3555 Lisp_Object charset_list
;
3557 attrs
= CODING_ID_ATTRS (coding
->id
);
3558 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3559 if (EQ (charset_list
, Qiso_2022
))
3560 charset_list
= Viso_2022_charset_list
;
3562 for (reg
= 0; reg
< 4; reg
++)
3572 charset
= char_charset (c
, charset_list
, NULL
);
3573 id
= CHARSET_ID (charset
);
3574 reg
= CODING_ISO_REQUEST (coding
, id
);
3575 if (reg
>= 0 && r
[reg
] < 0)
3584 for (reg
= 0; reg
< 4; reg
++)
3586 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3587 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3593 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3596 encode_coding_iso_2022 (coding
)
3597 struct coding_system
*coding
;
3599 int multibytep
= coding
->dst_multibyte
;
3600 int *charbuf
= coding
->charbuf
;
3601 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3602 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3603 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3606 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3607 && CODING_ISO_BOL (coding
));
3608 int produced_chars
= 0;
3609 Lisp_Object attrs
, eol_type
, charset_list
;
3610 int ascii_compatible
;
3612 int preferred_charset_id
= -1;
3614 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3615 setup_iso_safe_charsets (attrs
);
3616 /* Charset list may have been changed. */
3617 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
); \
3618 coding
->safe_charsets
3619 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs
))->data
;
3621 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3623 while (charbuf
< charbuf_end
)
3625 ASSURE_DESTINATION (safe_room
);
3627 if (bol_designation
)
3629 unsigned char *dst_prev
= dst
;
3631 /* We have to produce designation sequences if any now. */
3632 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3633 bol_designation
= 0;
3634 /* We are sure that designation sequences are all ASCII bytes. */
3635 produced_chars
+= dst
- dst_prev
;
3642 /* Handle an annotation. */
3645 case CODING_ANNOTATE_COMPOSITION_MASK
:
3646 /* Not yet implemented. */
3648 case CODING_ANNOTATE_CHARSET_MASK
:
3649 preferred_charset_id
= charbuf
[3];
3650 if (preferred_charset_id
>= 0
3651 && NILP (Fmemq (make_number (preferred_charset_id
),
3653 preferred_charset_id
= -1;
3662 /* Now encode the character C. */
3663 if (c
< 0x20 || c
== 0x7F)
3666 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3668 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3669 ENCODE_RESET_PLANE_AND_REGISTER ();
3670 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3674 for (i
= 0; i
< 4; i
++)
3675 CODING_ISO_DESIGNATION (coding
, i
)
3676 = CODING_ISO_INITIAL (coding
, i
);
3679 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3681 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3682 ENCODE_RESET_PLANE_AND_REGISTER ();
3683 EMIT_ONE_ASCII_BYTE (c
);
3685 else if (ASCII_CHAR_P (c
))
3687 if (ascii_compatible
)
3688 EMIT_ONE_ASCII_BYTE (c
);
3691 struct charset
*charset
= CHARSET_FROM_ID (charset_ascii
);
3692 ENCODE_ISO_CHARACTER (charset
, c
);
3695 else if (CHAR_BYTE8_P (c
))
3697 c
= CHAR_TO_BYTE8 (c
);
3702 struct charset
*charset
;
3704 if (preferred_charset_id
>= 0)
3706 charset
= CHARSET_FROM_ID (preferred_charset_id
);
3707 if (! CHAR_CHARSET_P (c
, charset
))
3708 charset
= char_charset (c
, charset_list
, NULL
);
3711 charset
= char_charset (c
, charset_list
, NULL
);
3714 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3716 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3717 charset
= CHARSET_FROM_ID (charset_ascii
);
3721 c
= coding
->default_char
;
3722 charset
= char_charset (c
, charset_list
, NULL
);
3725 ENCODE_ISO_CHARACTER (charset
, c
);
3729 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3730 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3732 ASSURE_DESTINATION (safe_room
);
3733 ENCODE_RESET_PLANE_AND_REGISTER ();
3735 coding
->result
= CODING_RESULT_SUCCESS
;
3736 CODING_ISO_BOL (coding
) = bol_designation
;
3737 coding
->produced_char
+= produced_chars
;
3738 coding
->produced
= dst
- coding
->destination
;
3743 /*** 8,9. SJIS and BIG5 handlers ***/
3745 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3746 quite widely. So, for the moment, Emacs supports them in the bare
3747 C code. But, in the future, they may be supported only by CCL. */
3749 /* SJIS is a coding system encoding three character sets: ASCII, right
3750 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3751 as is. A character of charset katakana-jisx0201 is encoded by
3752 "position-code + 0x80". A character of charset japanese-jisx0208
3753 is encoded in 2-byte but two position-codes are divided and shifted
3754 so that it fit in the range below.
3756 --- CODE RANGE of SJIS ---
3757 (character set) (range)
3759 KATAKANA-JISX0201 0xA0 .. 0xDF
3760 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3761 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3762 -------------------------------
3766 /* BIG5 is a coding system encoding two character sets: ASCII and
3767 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3768 character set and is encoded in two-byte.
3770 --- CODE RANGE of BIG5 ---
3771 (character set) (range)
3773 Big5 (1st byte) 0xA1 .. 0xFE
3774 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3775 --------------------------
3779 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3780 Check if a text is encoded in SJIS. If it is, return
3781 CATEGORY_MASK_SJIS, else return 0. */
3784 detect_coding_sjis (coding
, detect_info
)
3785 struct coding_system
*coding
;
3786 struct coding_detection_info
*detect_info
;
3788 unsigned char *src
= coding
->source
, *src_base
= src
;
3789 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3790 int multibytep
= coding
->src_multibyte
;
3791 int consumed_chars
= 0;
3796 detect_info
->checked
|= CATEGORY_MASK_SJIS
;
3797 /* A coding system of this category is always ASCII compatible. */
3798 src
+= coding
->head_ascii
;
3807 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3810 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3812 found
= CATEGORY_MASK_SJIS
;
3814 else if (c
>= 0xA0 && c
< 0xE0)
3815 found
= CATEGORY_MASK_SJIS
;
3819 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3823 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3825 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3828 detect_info
->found
|= found
;
3832 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3833 Check if a text is encoded in BIG5. If it is, return
3834 CATEGORY_MASK_BIG5, else return 0. */
3837 detect_coding_big5 (coding
, detect_info
)
3838 struct coding_system
*coding
;
3839 struct coding_detection_info
*detect_info
;
3841 unsigned char *src
= coding
->source
, *src_base
= src
;
3842 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3843 int multibytep
= coding
->src_multibyte
;
3844 int consumed_chars
= 0;
3849 detect_info
->checked
|= CATEGORY_MASK_BIG5
;
3850 /* A coding system of this category is always ASCII compatible. */
3851 src
+= coding
->head_ascii
;
3863 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3865 found
= CATEGORY_MASK_BIG5
;
3870 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3874 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3876 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3879 detect_info
->found
|= found
;
3883 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3884 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3887 decode_coding_sjis (coding
)
3888 struct coding_system
*coding
;
3890 unsigned char *src
= coding
->source
+ coding
->consumed
;
3891 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3892 unsigned char *src_base
;
3893 int *charbuf
= coding
->charbuf
;
3894 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
3895 int consumed_chars
= 0, consumed_chars_base
;
3896 int multibytep
= coding
->src_multibyte
;
3897 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3898 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3899 int char_offset
= coding
->produced_char
;
3900 int last_offset
= char_offset
;
3901 int last_id
= charset_ascii
;
3903 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3906 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3907 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3908 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3915 consumed_chars_base
= consumed_chars
;
3917 if (charbuf
>= charbuf_end
)
3924 if (EQ (eol_type
, Qdos
))
3928 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
3929 goto no_more_source
;
3934 else if (EQ (eol_type
, Qmac
))
3939 struct charset
*charset
;
3942 charset
= charset_roman
;
3947 if (c
< 0xA0 || c
>= 0xE0)
3949 /* SJIS -> JISX0208 */
3951 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
3955 charset
= charset_kanji
;
3959 /* SJIS -> JISX0201-Kana */
3961 charset
= charset_kana
;
3964 if (charset
->id
!= charset_ascii
3965 && last_id
!= charset
->id
)
3967 if (last_id
!= charset_ascii
)
3968 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3969 last_id
= charset
->id
;
3970 last_offset
= char_offset
;
3972 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
3980 consumed_chars
= consumed_chars_base
;
3982 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3988 if (last_id
!= charset_ascii
)
3989 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3990 coding
->consumed_char
+= consumed_chars_base
;
3991 coding
->consumed
= src_base
- coding
->source
;
3992 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3996 decode_coding_big5 (coding
)
3997 struct coding_system
*coding
;
3999 unsigned char *src
= coding
->source
+ coding
->consumed
;
4000 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4001 unsigned char *src_base
;
4002 int *charbuf
= coding
->charbuf
;
4003 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4004 int consumed_chars
= 0, consumed_chars_base
;
4005 int multibytep
= coding
->src_multibyte
;
4006 struct charset
*charset_roman
, *charset_big5
;
4007 Lisp_Object attrs
, eol_type
, charset_list
, val
;
4008 int char_offset
= coding
->produced_char
;
4009 int last_offset
= char_offset
;
4010 int last_id
= charset_ascii
;
4012 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4014 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4015 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4022 consumed_chars_base
= consumed_chars
;
4024 if (charbuf
>= charbuf_end
)
4031 if (EQ (eol_type
, Qdos
))
4035 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4036 goto no_more_source
;
4041 else if (EQ (eol_type
, Qmac
))
4046 struct charset
*charset
;
4048 charset
= charset_roman
;
4052 if (c
< 0xA1 || c
> 0xFE)
4055 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
4058 charset
= charset_big5
;
4060 if (charset
->id
!= charset_ascii
4061 && last_id
!= charset
->id
)
4063 if (last_id
!= charset_ascii
)
4064 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4065 last_id
= charset
->id
;
4066 last_offset
= char_offset
;
4068 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4077 consumed_chars
= consumed_chars_base
;
4079 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4085 if (last_id
!= charset_ascii
)
4086 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4087 coding
->consumed_char
+= consumed_chars_base
;
4088 coding
->consumed
= src_base
- coding
->source
;
4089 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4092 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4093 This function can encode charsets `ascii', `katakana-jisx0201',
4094 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4095 are sure that all these charsets are registered as official charset
4096 (i.e. do not have extended leading-codes). Characters of other
4097 charsets are produced without any encoding. If SJIS_P is 1, encode
4098 SJIS text, else encode BIG5 text. */
4101 encode_coding_sjis (coding
)
4102 struct coding_system
*coding
;
4104 int multibytep
= coding
->dst_multibyte
;
4105 int *charbuf
= coding
->charbuf
;
4106 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4107 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4108 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4110 int produced_chars
= 0;
4111 Lisp_Object attrs
, eol_type
, charset_list
, val
;
4112 int ascii_compatible
;
4113 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4116 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4118 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4119 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4120 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4122 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4124 while (charbuf
< charbuf_end
)
4126 ASSURE_DESTINATION (safe_room
);
4128 /* Now encode the character C. */
4129 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4130 EMIT_ONE_ASCII_BYTE (c
);
4131 else if (CHAR_BYTE8_P (c
))
4133 c
= CHAR_TO_BYTE8 (c
);
4139 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4143 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4145 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4146 charset
= CHARSET_FROM_ID (charset_ascii
);
4150 c
= coding
->default_char
;
4151 charset
= char_charset (c
, charset_list
, &code
);
4154 if (code
== CHARSET_INVALID_CODE (charset
))
4156 if (charset
== charset_kanji
)
4160 c1
= code
>> 8, c2
= code
& 0xFF;
4161 EMIT_TWO_BYTES (c1
, c2
);
4163 else if (charset
== charset_kana
)
4164 EMIT_ONE_BYTE (code
| 0x80);
4166 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4169 coding
->result
= CODING_RESULT_SUCCESS
;
4170 coding
->produced_char
+= produced_chars
;
4171 coding
->produced
= dst
- coding
->destination
;
4176 encode_coding_big5 (coding
)
4177 struct coding_system
*coding
;
4179 int multibytep
= coding
->dst_multibyte
;
4180 int *charbuf
= coding
->charbuf
;
4181 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4182 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4183 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4185 int produced_chars
= 0;
4186 Lisp_Object attrs
, eol_type
, charset_list
, val
;
4187 int ascii_compatible
;
4188 struct charset
*charset_roman
, *charset_big5
;
4191 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4193 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4194 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4195 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4197 while (charbuf
< charbuf_end
)
4199 ASSURE_DESTINATION (safe_room
);
4201 /* Now encode the character C. */
4202 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4203 EMIT_ONE_ASCII_BYTE (c
);
4204 else if (CHAR_BYTE8_P (c
))
4206 c
= CHAR_TO_BYTE8 (c
);
4212 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4216 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4218 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4219 charset
= CHARSET_FROM_ID (charset_ascii
);
4223 c
= coding
->default_char
;
4224 charset
= char_charset (c
, charset_list
, &code
);
4227 if (code
== CHARSET_INVALID_CODE (charset
))
4229 if (charset
== charset_big5
)
4233 c1
= code
>> 8, c2
= code
& 0xFF;
4234 EMIT_TWO_BYTES (c1
, c2
);
4237 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4240 coding
->result
= CODING_RESULT_SUCCESS
;
4241 coding
->produced_char
+= produced_chars
;
4242 coding
->produced
= dst
- coding
->destination
;
4247 /*** 10. CCL handlers ***/
4249 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4250 Check if a text is encoded in a coding system of which
4251 encoder/decoder are written in CCL program. If it is, return
4252 CATEGORY_MASK_CCL, else return 0. */
4255 detect_coding_ccl (coding
, detect_info
)
4256 struct coding_system
*coding
;
4257 struct coding_detection_info
*detect_info
;
4259 unsigned char *src
= coding
->source
, *src_base
= src
;
4260 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4261 int multibytep
= coding
->src_multibyte
;
4262 int consumed_chars
= 0;
4264 unsigned char *valids
= CODING_CCL_VALIDS (coding
);
4265 int head_ascii
= coding
->head_ascii
;
4268 detect_info
->checked
|= CATEGORY_MASK_CCL
;
4270 coding
= &coding_categories
[coding_category_ccl
];
4271 attrs
= CODING_ID_ATTRS (coding
->id
);
4272 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4281 if ((valids
[c
] > 1))
4282 found
= CATEGORY_MASK_CCL
;
4284 detect_info
->rejected
|= CATEGORY_MASK_CCL
;
4288 detect_info
->found
|= found
;
4293 decode_coding_ccl (coding
)
4294 struct coding_system
*coding
;
4296 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4297 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4298 int *charbuf
= coding
->charbuf
;
4299 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
4300 int consumed_chars
= 0;
4301 int multibytep
= coding
->src_multibyte
;
4302 struct ccl_program ccl
;
4303 int source_charbuf
[1024];
4304 int source_byteidx
[1024];
4305 Lisp_Object attrs
, eol_type
, charset_list
, valids
;
4307 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4308 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4310 while (src
< src_end
)
4312 const unsigned char *p
= src
;
4313 int *source
, *source_end
;
4317 while (i
< 1024 && p
< src_end
)
4319 source_byteidx
[i
] = p
- src
;
4320 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4323 while (i
< 1024 && p
< src_end
)
4324 source_charbuf
[i
++] = *p
++;
4326 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4329 source
= source_charbuf
;
4330 source_end
= source
+ i
;
4331 while (source
< source_end
)
4333 ccl_driver (&ccl
, source
, charbuf
,
4334 source_end
- source
, charbuf_end
- charbuf
,
4336 source
+= ccl
.consumed
;
4337 charbuf
+= ccl
.produced
;
4338 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4341 if (source
< source_end
)
4342 src
+= source_byteidx
[source
- source_charbuf
];
4345 consumed_chars
+= source
- source_charbuf
;
4347 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4348 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4354 case CCL_STAT_SUSPEND_BY_SRC
:
4355 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4357 case CCL_STAT_SUSPEND_BY_DST
:
4360 case CCL_STAT_INVALID_CMD
:
4361 coding
->result
= CODING_RESULT_INTERRUPT
;
4364 coding
->result
= CODING_RESULT_SUCCESS
;
4367 coding
->consumed_char
+= consumed_chars
;
4368 coding
->consumed
= src
- coding
->source
;
4369 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4373 encode_coding_ccl (coding
)
4374 struct coding_system
*coding
;
4376 struct ccl_program ccl
;
4377 int multibytep
= coding
->dst_multibyte
;
4378 int *charbuf
= coding
->charbuf
;
4379 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4380 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4381 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4382 unsigned char *adjusted_dst_end
= dst_end
- 1;
4383 int destination_charbuf
[1024];
4384 int i
, produced_chars
= 0;
4385 Lisp_Object attrs
, eol_type
, charset_list
;
4387 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4388 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4390 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4391 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4393 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4395 int dst_bytes
= dst_end
- dst
;
4396 if (dst_bytes
> 1024)
4399 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4400 charbuf_end
- charbuf
, dst_bytes
, charset_list
);
4401 charbuf
+= ccl
.consumed
;
4403 for (i
= 0; i
< ccl
.produced
; i
++)
4404 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4407 for (i
= 0; i
< ccl
.produced
; i
++)
4408 *dst
++ = destination_charbuf
[i
] & 0xFF;
4409 produced_chars
+= ccl
.produced
;
4415 case CCL_STAT_SUSPEND_BY_SRC
:
4416 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4418 case CCL_STAT_SUSPEND_BY_DST
:
4419 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
4422 case CCL_STAT_INVALID_CMD
:
4423 coding
->result
= CODING_RESULT_INTERRUPT
;
4426 coding
->result
= CODING_RESULT_SUCCESS
;
4430 coding
->produced_char
+= produced_chars
;
4431 coding
->produced
= dst
- coding
->destination
;
4437 /*** 10, 11. no-conversion handlers ***/
4439 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4442 decode_coding_raw_text (coding
)
4443 struct coding_system
*coding
;
4445 coding
->chars_at_source
= 1;
4446 coding
->consumed_char
= 0;
4447 coding
->consumed
= 0;
4448 coding
->result
= CODING_RESULT_SUCCESS
;
4452 encode_coding_raw_text (coding
)
4453 struct coding_system
*coding
;
4455 int multibytep
= coding
->dst_multibyte
;
4456 int *charbuf
= coding
->charbuf
;
4457 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4458 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4459 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4460 int produced_chars
= 0;
4465 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4467 if (coding
->src_multibyte
)
4468 while (charbuf
< charbuf_end
)
4470 ASSURE_DESTINATION (safe_room
);
4472 if (ASCII_CHAR_P (c
))
4473 EMIT_ONE_ASCII_BYTE (c
);
4474 else if (CHAR_BYTE8_P (c
))
4476 c
= CHAR_TO_BYTE8 (c
);
4481 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4483 CHAR_STRING_ADVANCE (c
, p1
);
4486 EMIT_ONE_BYTE (*p0
);
4492 while (charbuf
< charbuf_end
)
4494 ASSURE_DESTINATION (safe_room
);
4501 if (coding
->src_multibyte
)
4503 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4505 while (charbuf
< charbuf_end
)
4507 ASSURE_DESTINATION (safe_room
);
4509 if (ASCII_CHAR_P (c
))
4511 else if (CHAR_BYTE8_P (c
))
4512 *dst
++ = CHAR_TO_BYTE8 (c
);
4514 CHAR_STRING_ADVANCE (c
, dst
);
4520 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4521 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4522 *dst
++ = *charbuf
++;
4523 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4526 coding
->result
= CODING_RESULT_SUCCESS
;
4527 coding
->produced_char
+= produced_chars
;
4528 coding
->produced
= dst
- coding
->destination
;
4532 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4533 Check if a text is encoded in a charset-based coding system. If it
4534 is, return 1, else return 0. */
4537 detect_coding_charset (coding
, detect_info
)
4538 struct coding_system
*coding
;
4539 struct coding_detection_info
*detect_info
;
4541 unsigned char *src
= coding
->source
, *src_base
= src
;
4542 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4543 int multibytep
= coding
->src_multibyte
;
4544 int consumed_chars
= 0;
4545 Lisp_Object attrs
, valids
;
4548 detect_info
->checked
|= CATEGORY_MASK_CHARSET
;
4550 coding
= &coding_categories
[coding_category_charset
];
4551 attrs
= CODING_ID_ATTRS (coding
->id
);
4552 valids
= AREF (attrs
, coding_attr_charset_valids
);
4554 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4555 src
+= coding
->head_ascii
;
4562 if (NILP (AREF (valids
, c
)))
4565 found
= CATEGORY_MASK_CHARSET
;
4567 detect_info
->rejected
|= CATEGORY_MASK_CHARSET
;
4571 detect_info
->found
|= found
;
4576 decode_coding_charset (coding
)
4577 struct coding_system
*coding
;
4579 unsigned char *src
= coding
->source
+ coding
->consumed
;
4580 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4581 unsigned char *src_base
;
4582 int *charbuf
= coding
->charbuf
;
4583 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4584 int consumed_chars
= 0, consumed_chars_base
;
4585 int multibytep
= coding
->src_multibyte
;
4586 Lisp_Object attrs
, eol_type
, charset_list
, valids
;
4587 int char_offset
= coding
->produced_char
;
4588 int last_offset
= char_offset
;
4589 int last_id
= charset_ascii
;
4591 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4592 valids
= AREF (attrs
, coding_attr_charset_valids
);
4599 consumed_chars_base
= consumed_chars
;
4601 if (charbuf
>= charbuf_end
)
4607 /* Here we assume that no charset maps '\r' to something
4609 if (EQ (eol_type
, Qdos
))
4613 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4614 goto no_more_source
;
4619 else if (EQ (eol_type
, Qmac
))
4625 struct charset
*charset
;
4630 val
= AREF (valids
, c
);
4635 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4636 dim
= CHARSET_DIMENSION (charset
);
4640 code
= (code
<< 8) | c
;
4643 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
,
4648 /* VAL is a list of charset IDs. It is assured that the
4649 list is sorted by charset dimensions (smaller one
4653 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (val
)));
4654 dim
= CHARSET_DIMENSION (charset
);
4658 code
= (code
<< 8) | c
;
4661 CODING_DECODE_CHAR (coding
, src
, src_base
,
4662 src_end
, charset
, code
, c
);
4670 if (charset
->id
!= charset_ascii
4671 && last_id
!= charset
->id
)
4673 if (last_id
!= charset_ascii
)
4674 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4675 last_id
= charset
->id
;
4676 last_offset
= char_offset
;
4685 consumed_chars
= consumed_chars_base
;
4687 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4693 if (last_id
!= charset_ascii
)
4694 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4695 coding
->consumed_char
+= consumed_chars_base
;
4696 coding
->consumed
= src_base
- coding
->source
;
4697 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4701 encode_coding_charset (coding
)
4702 struct coding_system
*coding
;
4704 int multibytep
= coding
->dst_multibyte
;
4705 int *charbuf
= coding
->charbuf
;
4706 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4707 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4708 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4709 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4710 int produced_chars
= 0;
4711 Lisp_Object attrs
, eol_type
, charset_list
;
4712 int ascii_compatible
;
4715 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4716 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4718 while (charbuf
< charbuf_end
)
4720 struct charset
*charset
;
4723 ASSURE_DESTINATION (safe_room
);
4725 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4726 EMIT_ONE_ASCII_BYTE (c
);
4727 else if (CHAR_BYTE8_P (c
))
4729 c
= CHAR_TO_BYTE8 (c
);
4734 charset
= char_charset (c
, charset_list
, &code
);
4737 if (CHARSET_DIMENSION (charset
) == 1)
4738 EMIT_ONE_BYTE (code
);
4739 else if (CHARSET_DIMENSION (charset
) == 2)
4740 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4741 else if (CHARSET_DIMENSION (charset
) == 3)
4742 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4744 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4745 (code
>> 8) & 0xFF, code
& 0xFF);
4749 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4750 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4752 c
= coding
->default_char
;
4758 coding
->result
= CODING_RESULT_SUCCESS
;
4759 coding
->produced_char
+= produced_chars
;
4760 coding
->produced
= dst
- coding
->destination
;
4765 /*** 7. C library functions ***/
4767 /* Setup coding context CODING from information about CODING_SYSTEM.
4768 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4769 CODING_SYSTEM is invalid, signal an error. */
4772 setup_coding_system (coding_system
, coding
)
4773 Lisp_Object coding_system
;
4774 struct coding_system
*coding
;
4777 Lisp_Object eol_type
;
4778 Lisp_Object coding_type
;
4781 if (NILP (coding_system
))
4782 coding_system
= Qno_conversion
;
4784 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4786 attrs
= CODING_ID_ATTRS (coding
->id
);
4787 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4790 coding
->head_ascii
= -1;
4791 coding
->common_flags
4792 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4794 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4795 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4796 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4797 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4799 coding_type
= CODING_ATTR_TYPE (attrs
);
4800 if (EQ (coding_type
, Qundecided
))
4802 coding
->detector
= NULL
;
4803 coding
->decoder
= decode_coding_raw_text
;
4804 coding
->encoder
= encode_coding_raw_text
;
4805 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4807 else if (EQ (coding_type
, Qiso_2022
))
4810 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4811 enum coding_category category
= XINT (CODING_ATTR_CATEGORY (attrs
));
4813 /* Invoke graphic register 0 to plane 0. */
4814 CODING_ISO_INVOCATION (coding
, 0) = 0;
4815 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4816 CODING_ISO_INVOCATION (coding
, 1)
4817 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4818 /* Setup the initial status of designation. */
4819 for (i
= 0; i
< 4; i
++)
4820 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4821 /* Not single shifting initially. */
4822 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4823 /* Beginning of buffer should also be regarded as bol. */
4824 CODING_ISO_BOL (coding
) = 1;
4825 coding
->detector
= detect_coding_iso_2022
;
4826 coding
->decoder
= decode_coding_iso_2022
;
4827 coding
->encoder
= encode_coding_iso_2022
;
4828 if (flags
& CODING_ISO_FLAG_SAFE
)
4829 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4830 coding
->common_flags
4831 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4832 | CODING_REQUIRE_FLUSHING_MASK
);
4833 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4834 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4835 if (flags
& CODING_ISO_FLAG_DESIGNATION
)
4836 coding
->common_flags
|= CODING_ANNOTATE_CHARSET_MASK
;
4837 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4839 setup_iso_safe_charsets (attrs
);
4840 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4841 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4842 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4844 CODING_ISO_FLAGS (coding
) = flags
;
4846 else if (EQ (coding_type
, Qcharset
))
4848 coding
->detector
= detect_coding_charset
;
4849 coding
->decoder
= decode_coding_charset
;
4850 coding
->encoder
= encode_coding_charset
;
4851 coding
->common_flags
4852 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4854 else if (EQ (coding_type
, Qutf_8
))
4856 coding
->detector
= detect_coding_utf_8
;
4857 coding
->decoder
= decode_coding_utf_8
;
4858 coding
->encoder
= encode_coding_utf_8
;
4859 coding
->common_flags
4860 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4862 else if (EQ (coding_type
, Qutf_16
))
4864 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4865 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4866 : EQ (val
, Qt
) ? utf_16_with_bom
4867 : utf_16_without_bom
);
4868 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4869 CODING_UTF_16_ENDIAN (coding
) = (NILP (val
) ? utf_16_big_endian
4870 : utf_16_little_endian
);
4871 CODING_UTF_16_SURROGATE (coding
) = 0;
4872 coding
->detector
= detect_coding_utf_16
;
4873 coding
->decoder
= decode_coding_utf_16
;
4874 coding
->encoder
= encode_coding_utf_16
;
4875 coding
->common_flags
4876 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4878 else if (EQ (coding_type
, Qccl
))
4880 coding
->detector
= detect_coding_ccl
;
4881 coding
->decoder
= decode_coding_ccl
;
4882 coding
->encoder
= encode_coding_ccl
;
4883 coding
->common_flags
4884 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4885 | CODING_REQUIRE_FLUSHING_MASK
);
4887 else if (EQ (coding_type
, Qemacs_mule
))
4889 coding
->detector
= detect_coding_emacs_mule
;
4890 coding
->decoder
= decode_coding_emacs_mule
;
4891 coding
->encoder
= encode_coding_emacs_mule
;
4892 coding
->common_flags
4893 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4894 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
4895 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
4897 Lisp_Object tail
, safe_charsets
;
4898 int max_charset_id
= 0;
4900 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4902 if (max_charset_id
< XFASTINT (XCAR (tail
)))
4903 max_charset_id
= XFASTINT (XCAR (tail
));
4904 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
4906 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4908 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
4909 coding
->max_charset_id
= max_charset_id
;
4910 coding
->safe_charsets
= (char *) XSTRING (safe_charsets
)->data
;
4913 else if (EQ (coding_type
, Qshift_jis
))
4915 coding
->detector
= detect_coding_sjis
;
4916 coding
->decoder
= decode_coding_sjis
;
4917 coding
->encoder
= encode_coding_sjis
;
4918 coding
->common_flags
4919 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4921 else if (EQ (coding_type
, Qbig5
))
4923 coding
->detector
= detect_coding_big5
;
4924 coding
->decoder
= decode_coding_big5
;
4925 coding
->encoder
= encode_coding_big5
;
4926 coding
->common_flags
4927 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4929 else /* EQ (coding_type, Qraw_text) */
4931 coding
->detector
= NULL
;
4932 coding
->decoder
= decode_coding_raw_text
;
4933 coding
->encoder
= encode_coding_raw_text
;
4934 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4940 /* Return raw-text or one of its subsidiaries that has the same
4941 eol_type as CODING-SYSTEM. */
4944 raw_text_coding_system (coding_system
)
4945 Lisp_Object coding_system
;
4947 Lisp_Object spec
, attrs
;
4948 Lisp_Object eol_type
, raw_text_eol_type
;
4950 spec
= CODING_SYSTEM_SPEC (coding_system
);
4951 attrs
= AREF (spec
, 0);
4953 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
4954 return coding_system
;
4956 eol_type
= AREF (spec
, 2);
4957 if (VECTORP (eol_type
))
4959 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
4960 raw_text_eol_type
= AREF (spec
, 2);
4961 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
4962 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
4963 : AREF (raw_text_eol_type
, 2));
4967 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4968 does, return one of the subsidiary that has the same eol-spec as
4969 PARENT. Otherwise, return CODING_SYSTEM. */
4972 coding_inherit_eol_type (coding_system
, parent
)
4973 Lisp_Object coding_system
, parent
;
4975 Lisp_Object spec
, attrs
, eol_type
;
4977 spec
= CODING_SYSTEM_SPEC (coding_system
);
4978 attrs
= AREF (spec
, 0);
4979 eol_type
= AREF (spec
, 2);
4980 if (VECTORP (eol_type
))
4982 Lisp_Object parent_spec
;
4983 Lisp_Object parent_eol_type
;
4986 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
4987 parent_eol_type
= AREF (parent_spec
, 2);
4988 if (EQ (parent_eol_type
, Qunix
))
4989 coding_system
= AREF (eol_type
, 0);
4990 else if (EQ (parent_eol_type
, Qdos
))
4991 coding_system
= AREF (eol_type
, 1);
4992 else if (EQ (parent_eol_type
, Qmac
))
4993 coding_system
= AREF (eol_type
, 2);
4995 return coding_system
;
4998 /* Emacs has a mechanism to automatically detect a coding system if it
4999 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5000 it's impossible to distinguish some coding systems accurately
5001 because they use the same range of codes. So, at first, coding
5002 systems are categorized into 7, those are:
5004 o coding-category-emacs-mule
5006 The category for a coding system which has the same code range
5007 as Emacs' internal format. Assigned the coding-system (Lisp
5008 symbol) `emacs-mule' by default.
5010 o coding-category-sjis
5012 The category for a coding system which has the same code range
5013 as SJIS. Assigned the coding-system (Lisp
5014 symbol) `japanese-shift-jis' by default.
5016 o coding-category-iso-7
5018 The category for a coding system which has the same code range
5019 as ISO2022 of 7-bit environment. This doesn't use any locking
5020 shift and single shift functions. This can encode/decode all
5021 charsets. Assigned the coding-system (Lisp symbol)
5022 `iso-2022-7bit' by default.
5024 o coding-category-iso-7-tight
5026 Same as coding-category-iso-7 except that this can
5027 encode/decode only the specified charsets.
5029 o coding-category-iso-8-1
5031 The category for a coding system which has the same code range
5032 as ISO2022 of 8-bit environment and graphic plane 1 used only
5033 for DIMENSION1 charset. This doesn't use any locking shift
5034 and single shift functions. Assigned the coding-system (Lisp
5035 symbol) `iso-latin-1' by default.
5037 o coding-category-iso-8-2
5039 The category for a coding system which has the same code range
5040 as ISO2022 of 8-bit environment and graphic plane 1 used only
5041 for DIMENSION2 charset. This doesn't use any locking shift
5042 and single shift functions. Assigned the coding-system (Lisp
5043 symbol) `japanese-iso-8bit' by default.
5045 o coding-category-iso-7-else
5047 The category for a coding system which has the same code range
5048 as ISO2022 of 7-bit environemnt but uses locking shift or
5049 single shift functions. Assigned the coding-system (Lisp
5050 symbol) `iso-2022-7bit-lock' by default.
5052 o coding-category-iso-8-else
5054 The category for a coding system which has the same code range
5055 as ISO2022 of 8-bit environemnt but uses locking shift or
5056 single shift functions. Assigned the coding-system (Lisp
5057 symbol) `iso-2022-8bit-ss2' by default.
5059 o coding-category-big5
5061 The category for a coding system which has the same code range
5062 as BIG5. Assigned the coding-system (Lisp symbol)
5063 `cn-big5' by default.
5065 o coding-category-utf-8
5067 The category for a coding system which has the same code range
5068 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5069 symbol) `utf-8' by default.
5071 o coding-category-utf-16-be
5073 The category for a coding system in which a text has an
5074 Unicode signature (cf. Unicode Standard) in the order of BIG
5075 endian at the head. Assigned the coding-system (Lisp symbol)
5076 `utf-16-be' by default.
5078 o coding-category-utf-16-le
5080 The category for a coding system in which a text has an
5081 Unicode signature (cf. Unicode Standard) in the order of
5082 LITTLE endian at the head. Assigned the coding-system (Lisp
5083 symbol) `utf-16-le' by default.
5085 o coding-category-ccl
5087 The category for a coding system of which encoder/decoder is
5088 written in CCL programs. The default value is nil, i.e., no
5089 coding system is assigned.
5091 o coding-category-binary
5093 The category for a coding system not categorized in any of the
5094 above. Assigned the coding-system (Lisp symbol)
5095 `no-conversion' by default.
5097 Each of them is a Lisp symbol and the value is an actual
5098 `coding-system's (this is also a Lisp symbol) assigned by a user.
5099 What Emacs does actually is to detect a category of coding system.
5100 Then, it uses a `coding-system' assigned to it. If Emacs can't
5101 decide only one possible category, it selects a category of the
5102 highest priority. Priorities of categories are also specified by a
5103 user in a Lisp variable `coding-category-list'.
5107 #define EOL_SEEN_NONE 0
5108 #define EOL_SEEN_LF 1
5109 #define EOL_SEEN_CR 2
5110 #define EOL_SEEN_CRLF 4
5112 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5113 SOURCE is encoded. If CATEGORY is one of
5114 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5115 two-byte, else they are encoded by one-byte.
5117 Return one of EOL_SEEN_XXX. */
5119 #define MAX_EOL_CHECK_COUNT 3
5122 detect_eol (source
, src_bytes
, category
)
5123 unsigned char *source
;
5124 EMACS_INT src_bytes
;
5125 enum coding_category category
;
5127 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
5130 int eol_seen
= EOL_SEEN_NONE
;
5132 if ((1 << category
) & CATEGORY_MASK_UTF_16
)
5136 msb
= category
== (coding_category_utf_16_le
5137 | coding_category_utf_16_le_nosig
);
5140 while (src
+ 1 < src_end
)
5143 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
5148 this_eol
= EOL_SEEN_LF
;
5149 else if (src
+ 3 >= src_end
5150 || src
[msb
+ 2] != 0
5151 || src
[lsb
+ 2] != '\n')
5152 this_eol
= EOL_SEEN_CR
;
5154 this_eol
= EOL_SEEN_CRLF
;
5156 if (eol_seen
== EOL_SEEN_NONE
)
5157 /* This is the first end-of-line. */
5158 eol_seen
= this_eol
;
5159 else if (eol_seen
!= this_eol
)
5161 /* The found type is different from what found before. */
5162 eol_seen
= EOL_SEEN_LF
;
5165 if (++total
== MAX_EOL_CHECK_COUNT
)
5173 while (src
< src_end
)
5176 if (c
== '\n' || c
== '\r')
5181 this_eol
= EOL_SEEN_LF
;
5182 else if (src
>= src_end
|| *src
!= '\n')
5183 this_eol
= EOL_SEEN_CR
;
5185 this_eol
= EOL_SEEN_CRLF
, src
++;
5187 if (eol_seen
== EOL_SEEN_NONE
)
5188 /* This is the first end-of-line. */
5189 eol_seen
= this_eol
;
5190 else if (eol_seen
!= this_eol
)
5192 /* The found type is different from what found before. */
5193 eol_seen
= EOL_SEEN_LF
;
5196 if (++total
== MAX_EOL_CHECK_COUNT
)
5206 adjust_coding_eol_type (coding
, eol_seen
)
5207 struct coding_system
*coding
;
5210 Lisp_Object eol_type
;
5212 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5213 if (eol_seen
& EOL_SEEN_LF
)
5214 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
5215 else if (eol_seen
& EOL_SEEN_CRLF
)
5216 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
5217 else if (eol_seen
& EOL_SEEN_CR
)
5218 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
5221 /* Detect how a text specified in CODING is encoded. If a coding
5222 system is detected, update fields of CODING by the detected coding
5226 detect_coding (coding
)
5227 struct coding_system
*coding
;
5229 unsigned char *src
, *src_end
;
5230 Lisp_Object attrs
, coding_type
;
5232 coding
->consumed
= coding
->consumed_char
= 0;
5233 coding
->produced
= coding
->produced_char
= 0;
5234 coding_set_source (coding
);
5236 src_end
= coding
->source
+ coding
->src_bytes
;
5238 /* If we have not yet decided the text encoding type, detect it
5240 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
5244 for (src
= coding
->source
; src
< src_end
; src
++)
5247 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
5249 || c
== ISO_CODE_SO
)))
5252 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
5254 if (coding
->head_ascii
< coding
->src_bytes
)
5256 struct coding_detection_info detect_info
;
5257 enum coding_category category
;
5258 struct coding_system
*this;
5260 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
5261 for (i
= 0; i
< coding_category_raw_text
; i
++)
5263 category
= coding_priorities
[i
];
5264 this = coding_categories
+ category
;
5267 /* No coding system of this category is defined. */
5268 detect_info
.rejected
|= (1 << category
);
5270 else if (category
>= coding_category_raw_text
)
5272 else if (detect_info
.checked
& (1 << category
))
5274 if (detect_info
.found
& (1 << category
))
5277 else if ((*(this->detector
)) (coding
, &detect_info
)
5278 && detect_info
.found
& (1 << category
))
5281 if (i
< coding_category_raw_text
)
5282 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5283 else if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
5284 setup_coding_system (Qraw_text
, coding
);
5285 else if (detect_info
.rejected
)
5286 for (i
= 0; i
< coding_category_raw_text
; i
++)
5287 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
5289 this = coding_categories
+ coding_priorities
[i
];
5290 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5296 attrs
= CODING_ID_ATTRS (coding
->id
);
5297 coding_type
= CODING_ATTR_TYPE (attrs
);
5299 /* If we have not yet decided the EOL type, detect it now. But, the
5300 detection is impossible for a CCL based coding system, in which
5301 case, we detct the EOL type after decoding. */
5302 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
))
5303 && ! EQ (coding_type
, Qccl
))
5305 int eol_seen
= detect_eol (coding
->source
, coding
->src_bytes
,
5306 XINT (CODING_ATTR_CATEGORY (attrs
)));
5308 if (eol_seen
!= EOL_SEEN_NONE
)
5309 adjust_coding_eol_type (coding
, eol_seen
);
5316 struct coding_system
*coding
;
5318 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
)))
5320 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5321 unsigned char *pend
= p
+ coding
->produced
;
5322 int eol_seen
= EOL_SEEN_NONE
;
5324 for (; p
< pend
; p
++)
5327 eol_seen
|= EOL_SEEN_LF
;
5328 else if (*p
== '\r')
5330 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5332 eol_seen
|= EOL_SEEN_CRLF
;
5336 eol_seen
|= EOL_SEEN_CR
;
5339 if (eol_seen
!= EOL_SEEN_NONE
)
5340 adjust_coding_eol_type (coding
, eol_seen
);
5343 if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qmac
))
5345 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5346 unsigned char *pend
= p
+ coding
->produced
;
5348 for (; p
< pend
; p
++)
5352 else if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qdos
))
5354 unsigned char *p
, *pbeg
, *pend
;
5355 Lisp_Object undo_list
;
5357 move_gap_both (coding
->dst_pos
+ coding
->produced_char
,
5358 coding
->dst_pos_byte
+ coding
->produced
);
5359 undo_list
= current_buffer
->undo_list
;
5360 current_buffer
->undo_list
= Qt
;
5361 del_range_2 (coding
->dst_pos
, coding
->dst_pos_byte
, GPT
, GPT_BYTE
, 0);
5362 current_buffer
->undo_list
= undo_list
;
5364 pend
= pbeg
+ coding
->produced
;
5366 for (p
= pend
- 1; p
>= pbeg
; p
--)
5369 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
- p
- 1);
5372 coding
->produced_char
-= coding
->produced
- (pend
- pbeg
);
5373 coding
->produced
= pend
- pbeg
;
5374 insert_from_gap (coding
->produced_char
, coding
->produced
);
5379 translate_chars (coding
, table
)
5380 struct coding_system
*coding
;
5383 int *charbuf
= coding
->charbuf
;
5384 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5387 if (coding
->chars_at_source
)
5390 while (charbuf
< charbuf_end
)
5396 *charbuf
++ = translate_char (table
, c
);
5401 produce_chars (coding
)
5402 struct coding_system
*coding
;
5404 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5405 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5407 int produced_chars
= 0;
5409 if (! coding
->chars_at_source
)
5411 /* Characters are in coding->charbuf. */
5412 int *buf
= coding
->charbuf
;
5413 int *buf_end
= buf
+ coding
->charbuf_used
;
5414 unsigned char *adjusted_dst_end
;
5416 if (BUFFERP (coding
->src_object
)
5417 && EQ (coding
->src_object
, coding
->dst_object
))
5418 dst_end
= coding
->source
+ coding
->consumed
;
5419 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5421 while (buf
< buf_end
)
5425 if (dst
>= adjusted_dst_end
)
5427 dst
= alloc_destination (coding
,
5428 buf_end
- buf
+ MAX_MULTIBYTE_LENGTH
,
5430 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5431 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5435 if (coding
->dst_multibyte
5436 || ! CHAR_BYTE8_P (c
))
5437 CHAR_STRING_ADVANCE (c
, dst
);
5439 *dst
++ = CHAR_TO_BYTE8 (c
);
5443 /* This is an annotation datum. */
5449 unsigned char *src
= coding
->source
;
5450 unsigned char *src_end
= src
+ coding
->src_bytes
;
5451 Lisp_Object eol_type
;
5453 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5455 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5457 if (coding
->src_multibyte
)
5464 unsigned char *src_base
= src
;
5470 if (EQ (eol_type
, Qdos
))
5474 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
5475 goto no_more_source
;
5480 else if (EQ (eol_type
, Qmac
))
5485 coding
->consumed
= src
- coding
->source
;
5487 if (EQ (coding
->src_object
, coding
->dst_object
))
5491 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5493 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5494 coding_set_source (coding
);
5495 src
= coding
->source
+ coding
->consumed
;
5496 src_end
= coding
->source
+ coding
->src_bytes
;
5506 while (src
< src_end
)
5513 if (EQ (eol_type
, Qdos
))
5519 else if (EQ (eol_type
, Qmac
))
5522 if (dst
>= dst_end
- 1)
5524 coding
->consumed
= src
- coding
->source
;
5526 if (EQ (coding
->src_object
, coding
->dst_object
))
5528 if (dst
>= dst_end
- 1)
5530 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5532 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5533 coding_set_source (coding
);
5534 src
= coding
->source
+ coding
->consumed
;
5535 src_end
= coding
->source
+ coding
->src_bytes
;
5543 if (!EQ (coding
->src_object
, coding
->dst_object
))
5545 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5549 EMACS_INT offset
= src
- coding
->source
;
5551 dst
= alloc_destination (coding
, require
, dst
);
5552 coding_set_source (coding
);
5553 src
= coding
->source
+ offset
;
5554 src_end
= coding
->source
+ coding
->src_bytes
;
5557 produced_chars
= coding
->src_chars
;
5558 while (src
< src_end
)
5564 if (EQ (eol_type
, Qdos
))
5571 else if (EQ (eol_type
, Qmac
))
5577 coding
->consumed
= coding
->src_bytes
;
5578 coding
->consumed_char
= coding
->src_chars
;
5581 produced
= dst
- (coding
->destination
+ coding
->produced
);
5582 if (BUFFERP (coding
->dst_object
))
5583 insert_from_gap (produced_chars
, produced
);
5584 coding
->produced
+= produced
;
5585 coding
->produced_char
+= produced_chars
;
5586 return produced_chars
;
5589 /* Compose text in CODING->object according to the annotation data at
5590 CHARBUF. CHARBUF is an array:
5591 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5595 produce_composition (coding
, charbuf
)
5596 struct coding_system
*coding
;
5601 enum composition_method method
;
5602 Lisp_Object components
;
5605 from
= coding
->dst_pos
+ charbuf
[2];
5606 to
= coding
->dst_pos
+ charbuf
[3];
5607 method
= (enum composition_method
) (charbuf
[4]);
5609 if (method
== COMPOSITION_RELATIVE
)
5613 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5618 for (i
= 0; i
< len
; i
++)
5619 args
[i
] = make_number (charbuf
[i
]);
5620 components
= (method
== COMPOSITION_WITH_ALTCHARS
5621 ? Fstring (len
, args
) : Fvector (len
, args
));
5623 compose_text (from
, to
, components
, Qnil
, coding
->dst_object
);
5627 /* Put `charset' property on text in CODING->object according to
5628 the annotation data at CHARBUF. CHARBUF is an array:
5629 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5633 produce_charset (coding
, charbuf
)
5634 struct coding_system
*coding
;
5637 EMACS_INT from
= coding
->dst_pos
+ charbuf
[2];
5638 EMACS_INT to
= coding
->dst_pos
+ charbuf
[3];
5639 struct charset
*charset
= CHARSET_FROM_ID (charbuf
[4]);
5641 Fput_text_property (make_number (from
), make_number (to
),
5642 Qcharset
, CHARSET_NAME (charset
),
5643 coding
->dst_object
);
5647 #define CHARBUF_SIZE 0x4000
5649 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5651 int size = CHARBUF_SIZE;; \
5653 coding->charbuf = NULL; \
5654 while (size > 1024) \
5656 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5657 if (coding->charbuf) \
5661 if (! coding->charbuf) \
5663 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5664 return coding->result; \
5666 coding->charbuf_size = size; \
5671 produce_annotation (coding
)
5672 struct coding_system
*coding
;
5674 int *charbuf
= coding
->charbuf
;
5675 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5677 if (NILP (coding
->dst_object
))
5680 while (charbuf
< charbuf_end
)
5686 int len
= -*charbuf
;
5689 case CODING_ANNOTATE_COMPOSITION_MASK
:
5690 produce_composition (coding
, charbuf
);
5692 case CODING_ANNOTATE_CHARSET_MASK
:
5693 produce_charset (coding
, charbuf
);
5703 /* Decode the data at CODING->src_object into CODING->dst_object.
5704 CODING->src_object is a buffer, a string, or nil.
5705 CODING->dst_object is a buffer.
5707 If CODING->src_object is a buffer, it must be the current buffer.
5708 In this case, if CODING->src_pos is positive, it is a position of
5709 the source text in the buffer, otherwise, the source text is in the
5710 gap area of the buffer, and CODING->src_pos specifies the offset of
5711 the text from GPT (which must be the same as PT). If this is the
5712 same buffer as CODING->dst_object, CODING->src_pos must be
5715 If CODING->src_object is a string, CODING->src_pos in an index to
5718 If CODING->src_object is nil, CODING->source must already point to
5719 the non-relocatable memory area. In this case, CODING->src_pos is
5720 an offset from CODING->source.
5722 The decoded data is inserted at the current point of the buffer
5727 decode_coding (coding
)
5728 struct coding_system
*coding
;
5732 if (BUFFERP (coding
->src_object
)
5733 && coding
->src_pos
> 0
5734 && coding
->src_pos
< GPT
5735 && coding
->src_pos
+ coding
->src_chars
> GPT
)
5736 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
5738 if (BUFFERP (coding
->dst_object
))
5740 if (current_buffer
!= XBUFFER (coding
->dst_object
))
5741 set_buffer_internal (XBUFFER (coding
->dst_object
));
5743 move_gap_both (PT
, PT_BYTE
);
5746 coding
->consumed
= coding
->consumed_char
= 0;
5747 coding
->produced
= coding
->produced_char
= 0;
5748 coding
->chars_at_source
= 0;
5749 coding
->result
= CODING_RESULT_SUCCESS
;
5752 ALLOC_CONVERSION_WORK_AREA (coding
);
5754 attrs
= CODING_ID_ATTRS (coding
->id
);
5758 coding_set_source (coding
);
5759 coding
->annotated
= 0;
5760 (*(coding
->decoder
)) (coding
);
5761 if (!NILP (CODING_ATTR_DECODE_TBL (attrs
)))
5762 translate_chars (coding
, CODING_ATTR_DECODE_TBL (attrs
));
5763 else if (!NILP (Vstandard_translation_table_for_decode
))
5764 translate_chars (coding
, Vstandard_translation_table_for_decode
);
5765 coding_set_destination (coding
);
5766 produce_chars (coding
);
5767 if (coding
->annotated
)
5768 produce_annotation (coding
);
5770 while (coding
->consumed
< coding
->src_bytes
5771 && ! coding
->result
);
5773 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qccl
)
5774 && SYMBOLP (CODING_ID_EOL_TYPE (coding
->id
))
5775 && ! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
5776 decode_eol (coding
);
5778 coding
->carryover_bytes
= 0;
5779 if (coding
->consumed
< coding
->src_bytes
)
5781 int nbytes
= coding
->src_bytes
- coding
->consumed
;
5784 coding_set_source (coding
);
5785 coding_set_destination (coding
);
5786 src
= coding
->source
+ coding
->consumed
;
5788 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
5790 /* Flush out unprocessed data as binary chars. We are sure
5791 that the number of data is less than the size of
5793 while (nbytes
-- > 0)
5797 coding
->charbuf
[coding
->charbuf_used
++] = (c
& 0x80 ? - c
: c
);
5799 produce_chars (coding
);
5803 /* Record unprocessed bytes in coding->carryover. We are
5804 sure that the number of data is less than the size of
5805 coding->carryover. */
5806 unsigned char *p
= coding
->carryover
;
5808 coding
->carryover_bytes
= nbytes
;
5809 while (nbytes
-- > 0)
5812 coding
->consumed
= coding
->src_bytes
;
5815 return coding
->result
;
5819 /* Extract an annotation datum from a composition starting at POS and
5820 ending before LIMIT of CODING->src_object (buffer or string), store
5821 the data in BUF, set *STOP to a starting position of the next
5822 composition (if any) or to LIMIT, and return the address of the
5823 next element of BUF.
5825 If such an annotation is not found, set *STOP to a starting
5826 position of a composition after POS (if any) or to LIMIT, and
5830 handle_composition_annotation (pos
, limit
, coding
, buf
, stop
)
5831 EMACS_INT pos
, limit
;
5832 struct coding_system
*coding
;
5836 EMACS_INT start
, end
;
5839 if (! find_composition (pos
, limit
, &start
, &end
, &prop
, coding
->src_object
)
5842 else if (start
> pos
)
5848 /* We found a composition. Store the corresponding
5849 annotation data in BUF. */
5851 enum composition_method method
= COMPOSITION_METHOD (prop
);
5852 int nchars
= COMPOSITION_LENGTH (prop
);
5854 ADD_COMPOSITION_DATA (buf
, 0, nchars
, method
);
5855 if (method
!= COMPOSITION_RELATIVE
)
5857 Lisp_Object components
;
5860 components
= COMPOSITION_COMPONENTS (prop
);
5861 if (VECTORP (components
))
5863 len
= XVECTOR (components
)->size
;
5864 for (i
= 0; i
< len
; i
++)
5865 *buf
++ = XINT (AREF (components
, i
));
5867 else if (STRINGP (components
))
5869 len
= XSTRING (components
)->size
;
5873 FETCH_STRING_CHAR_ADVANCE (*buf
, components
, i
, i_byte
);
5877 else if (INTEGERP (components
))
5880 *buf
++ = XINT (components
);
5882 else if (CONSP (components
))
5884 for (len
= 0; CONSP (components
);
5885 len
++, components
= XCDR (components
))
5886 *buf
++ = XINT (XCAR (components
));
5894 if (find_composition (end
, limit
, &start
, &end
, &prop
,
5905 /* Extract an annotation datum from a text property `charset' at POS of
5906 CODING->src_object (buffer of string), store the data in BUF, set
5907 *STOP to the position where the value of `charset' property changes
5908 (limiting by LIMIT), and return the address of the next element of
5911 If the property value is nil, set *STOP to the position where the
5912 property value is non-nil (limiting by LIMIT), and return BUF. */
5915 handle_charset_annotation (pos
, limit
, coding
, buf
, stop
)
5916 EMACS_INT pos
, limit
;
5917 struct coding_system
*coding
;
5921 Lisp_Object val
, next
;
5924 val
= Fget_text_property (make_number (pos
), Qcharset
, coding
->src_object
);
5925 if (! NILP (val
) && CHARSETP (val
))
5926 id
= XINT (CHARSET_SYMBOL_ID (val
));
5929 ADD_CHARSET_DATA (buf
, 0, 0, id
);
5930 next
= Fnext_single_property_change (make_number (pos
), Qcharset
,
5932 make_number (limit
));
5933 *stop
= XINT (next
);
5939 consume_chars (coding
)
5940 struct coding_system
*coding
;
5942 int *buf
= coding
->charbuf
;
5943 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
;
5944 const unsigned char *src
= coding
->source
+ coding
->consumed
;
5945 EMACS_INT pos
= coding
->src_pos
+ coding
->consumed_char
;
5946 EMACS_INT end_pos
= coding
->src_pos
+ coding
->src_chars
;
5947 int multibytep
= coding
->src_multibyte
;
5948 Lisp_Object eol_type
;
5950 EMACS_INT stop
, stop_composition
, stop_charset
;
5953 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5954 if (VECTORP (eol_type
))
5957 /* Note: composition handling is not yet implemented. */
5958 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
5960 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
)
5961 stop
= stop_composition
= pos
;
5963 stop
= stop_composition
= end_pos
;
5964 if (coding
->common_flags
& CODING_ANNOTATE_CHARSET_MASK
)
5965 stop
= stop_charset
= pos
;
5967 stop_charset
= end_pos
;
5969 /* Compensate for CRLF and annotation. */
5970 buf_end
-= 1 + MAX_ANNOTATION_LENGTH
;
5971 while (buf
< buf_end
)
5979 if (pos
== stop_composition
)
5980 buf
= handle_composition_annotation (pos
, end_pos
, coding
,
5981 buf
, &stop_composition
);
5982 if (pos
== stop_charset
)
5983 buf
= handle_charset_annotation (pos
, end_pos
, coding
,
5984 buf
, &stop_charset
);
5985 stop
= (stop_composition
< stop_charset
5986 ? stop_composition
: stop_charset
);
5992 c
= STRING_CHAR_ADVANCE (src
);
5993 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
5995 if (! EQ (eol_type
, Qunix
))
5999 if (EQ (eol_type
, Qdos
))
6009 coding
->consumed
= src
- coding
->source
;
6010 coding
->consumed_char
= pos
- coding
->src_pos
;
6011 coding
->charbuf_used
= buf
- coding
->charbuf
;
6012 coding
->chars_at_source
= 0;
6016 /* Encode the text at CODING->src_object into CODING->dst_object.
6017 CODING->src_object is a buffer or a string.
6018 CODING->dst_object is a buffer or nil.
6020 If CODING->src_object is a buffer, it must be the current buffer.
6021 In this case, if CODING->src_pos is positive, it is a position of
6022 the source text in the buffer, otherwise. the source text is in the
6023 gap area of the buffer, and coding->src_pos specifies the offset of
6024 the text from GPT (which must be the same as PT). If this is the
6025 same buffer as CODING->dst_object, CODING->src_pos must be
6026 negative and CODING should not have `pre-write-conversion'.
6028 If CODING->src_object is a string, CODING should not have
6029 `pre-write-conversion'.
6031 If CODING->dst_object is a buffer, the encoded data is inserted at
6032 the current point of that buffer.
6034 If CODING->dst_object is nil, the encoded data is placed at the
6035 memory area specified by CODING->destination. */
6038 encode_coding (coding
)
6039 struct coding_system
*coding
;
6043 attrs
= CODING_ID_ATTRS (coding
->id
);
6045 if (BUFFERP (coding
->dst_object
))
6047 set_buffer_internal (XBUFFER (coding
->dst_object
));
6048 coding
->dst_multibyte
6049 = ! NILP (current_buffer
->enable_multibyte_characters
);
6052 coding
->consumed
= coding
->consumed_char
= 0;
6053 coding
->produced
= coding
->produced_char
= 0;
6054 coding
->result
= CODING_RESULT_SUCCESS
;
6057 ALLOC_CONVERSION_WORK_AREA (coding
);
6060 coding_set_source (coding
);
6061 consume_chars (coding
);
6063 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs
)))
6064 translate_chars (coding
, CODING_ATTR_ENCODE_TBL (attrs
));
6065 else if (!NILP (Vstandard_translation_table_for_encode
))
6066 translate_chars (coding
, Vstandard_translation_table_for_encode
);
6068 coding_set_destination (coding
);
6069 (*(coding
->encoder
)) (coding
);
6070 } while (coding
->consumed_char
< coding
->src_chars
);
6072 if (BUFFERP (coding
->dst_object
))
6073 insert_from_gap (coding
->produced_char
, coding
->produced
);
6075 return (coding
->result
);
6080 /* List of currently used working buffer. */
6081 Lisp_Object Vcode_conversion_work_buf_list
;
6083 /* A working buffer used by the top level conversion. */
6084 Lisp_Object Vcode_conversion_reused_work_buf
;
6087 /* Return a working buffer that can be freely used by the following
6088 code conversion. MULTIBYTEP specifies the multibyteness of the
6092 make_conversion_work_buffer (multibytep
)
6095 struct buffer
*current
= current_buffer
;
6098 if (NILP (Vcode_conversion_work_buf_list
))
6100 if (NILP (Vcode_conversion_reused_work_buf
))
6101 Vcode_conversion_reused_work_buf
6102 = Fget_buffer_create (build_string (" *code-conversion-work*"));
6103 Vcode_conversion_work_buf_list
6104 = Fcons (Vcode_conversion_reused_work_buf
, Qnil
);
6108 int depth
= XINT (Flength (Vcode_conversion_work_buf_list
));
6111 sprintf (str
, " *code-conversion-work*<%d>", depth
);
6112 Vcode_conversion_work_buf_list
6113 = Fcons (Fget_buffer_create (build_string (str
)),
6114 Vcode_conversion_work_buf_list
);
6117 buf
= XCAR (Vcode_conversion_work_buf_list
);
6118 set_buffer_internal (XBUFFER (buf
));
6119 current_buffer
->undo_list
= Qt
;
6121 Fset_buffer_multibyte (multibytep
? Qt
: Qnil
, Qnil
);
6122 set_buffer_internal (current
);
6126 static struct coding_system
*saved_coding
;
6129 code_conversion_restore (info
)
6132 int depth
= XINT (Flength (Vcode_conversion_work_buf_list
));
6137 buf
= XCAR (Vcode_conversion_work_buf_list
);
6138 Vcode_conversion_work_buf_list
= XCDR (Vcode_conversion_work_buf_list
);
6139 if (depth
> 1 && !NILP (Fbuffer_live_p (buf
)))
6143 if (EQ (saved_coding
->dst_object
, Qt
)
6144 && saved_coding
->destination
)
6145 xfree (saved_coding
->destination
);
6147 return save_excursion_restore (info
);
6152 decode_coding_gap (coding
, chars
, bytes
)
6153 struct coding_system
*coding
;
6154 EMACS_INT chars
, bytes
;
6156 int count
= specpdl_ptr
- specpdl
;
6158 saved_coding
= coding
;
6159 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6161 coding
->src_object
= Fcurrent_buffer ();
6162 coding
->src_chars
= chars
;
6163 coding
->src_bytes
= bytes
;
6164 coding
->src_pos
= -chars
;
6165 coding
->src_pos_byte
= -bytes
;
6166 coding
->src_multibyte
= chars
< bytes
;
6167 coding
->dst_object
= coding
->src_object
;
6168 coding
->dst_pos
= PT
;
6169 coding
->dst_pos_byte
= PT_BYTE
;
6170 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
6171 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
6173 if (CODING_REQUIRE_DETECTION (coding
))
6174 detect_coding (coding
);
6176 decode_coding (coding
);
6178 unbind_to (count
, Qnil
);
6179 return coding
->result
;
6183 encode_coding_gap (coding
, chars
, bytes
)
6184 struct coding_system
*coding
;
6185 EMACS_INT chars
, bytes
;
6187 int count
= specpdl_ptr
- specpdl
;
6190 saved_coding
= coding
;
6191 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6193 buffer
= Fcurrent_buffer ();
6194 coding
->src_object
= buffer
;
6195 coding
->src_chars
= chars
;
6196 coding
->src_bytes
= bytes
;
6197 coding
->src_pos
= -chars
;
6198 coding
->src_pos_byte
= -bytes
;
6199 coding
->src_multibyte
= chars
< bytes
;
6200 coding
->dst_object
= coding
->src_object
;
6201 coding
->dst_pos
= PT
;
6202 coding
->dst_pos_byte
= PT_BYTE
;
6204 encode_coding (coding
);
6206 unbind_to (count
, Qnil
);
6207 return coding
->result
;
6211 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6212 SRC_OBJECT into DST_OBJECT by coding context CODING.
6214 SRC_OBJECT is a buffer, a string, or Qnil.
6216 If it is a buffer, the text is at point of the buffer. FROM and TO
6217 are positions in the buffer.
6219 If it is a string, the text is at the beginning of the string.
6220 FROM and TO are indices to the string.
6222 If it is nil, the text is at coding->source. FROM and TO are
6223 indices to coding->source.
6225 DST_OBJECT is a buffer, Qt, or Qnil.
6227 If it is a buffer, the decoded text is inserted at point of the
6228 buffer. If the buffer is the same as SRC_OBJECT, the source text
6231 If it is Qt, a string is made from the decoded text, and
6232 set in CODING->dst_object.
6234 If it is Qnil, the decoded text is stored at CODING->destination.
6235 The called must allocate CODING->dst_bytes bytes at
6236 CODING->destination by xmalloc. If the decoded text is longer than
6237 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6241 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6243 struct coding_system
*coding
;
6244 Lisp_Object src_object
;
6245 EMACS_INT from
, from_byte
, to
, to_byte
;
6246 Lisp_Object dst_object
;
6248 int count
= specpdl_ptr
- specpdl
;
6249 unsigned char *destination
;
6250 EMACS_INT dst_bytes
;
6251 EMACS_INT chars
= to
- from
;
6252 EMACS_INT bytes
= to_byte
- from_byte
;
6255 saved_coding
= coding
;
6256 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6258 if (NILP (dst_object
))
6260 destination
= coding
->destination
;
6261 dst_bytes
= coding
->dst_bytes
;
6264 coding
->src_object
= src_object
;
6265 coding
->src_chars
= chars
;
6266 coding
->src_bytes
= bytes
;
6267 coding
->src_multibyte
= chars
< bytes
;
6269 if (STRINGP (src_object
))
6271 coding
->src_pos
= from
;
6272 coding
->src_pos_byte
= from_byte
;
6274 else if (BUFFERP (src_object
))
6276 set_buffer_internal (XBUFFER (src_object
));
6278 move_gap_both (from
, from_byte
);
6279 if (EQ (src_object
, dst_object
))
6281 TEMP_SET_PT_BOTH (from
, from_byte
);
6282 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6283 coding
->src_pos
= -chars
;
6284 coding
->src_pos_byte
= -bytes
;
6288 coding
->src_pos
= from
;
6289 coding
->src_pos_byte
= from_byte
;
6293 if (CODING_REQUIRE_DETECTION (coding
))
6294 detect_coding (coding
);
6295 attrs
= CODING_ID_ATTRS (coding
->id
);
6297 if (! NILP (CODING_ATTR_POST_READ (attrs
))
6298 || EQ (dst_object
, Qt
))
6300 coding
->dst_object
= make_conversion_work_buffer (1);
6301 coding
->dst_pos
= BEG
;
6302 coding
->dst_pos_byte
= BEG_BYTE
;
6303 coding
->dst_multibyte
= 1;
6305 else if (BUFFERP (dst_object
))
6307 coding
->dst_object
= dst_object
;
6308 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6309 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6310 coding
->dst_multibyte
6311 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6315 coding
->dst_object
= Qnil
;
6316 coding
->dst_multibyte
= 1;
6319 decode_coding (coding
);
6321 if (BUFFERP (coding
->dst_object
))
6322 set_buffer_internal (XBUFFER (coding
->dst_object
));
6324 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6326 struct gcpro gcpro1
, gcpro2
;
6327 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6330 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6331 GCPRO2 (coding
->src_object
, coding
->dst_object
);
6332 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6333 make_number (coding
->produced_char
));
6336 coding
->produced_char
+= Z
- prev_Z
;
6337 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6340 if (EQ (dst_object
, Qt
))
6342 coding
->dst_object
= Fbuffer_string ();
6344 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
6346 set_buffer_internal (XBUFFER (coding
->dst_object
));
6347 if (dst_bytes
< coding
->produced
)
6350 = (unsigned char *) xrealloc (destination
, coding
->produced
);
6353 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
6354 unbind_to (count
, Qnil
);
6357 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
6358 move_gap_both (BEGV
, BEGV_BYTE
);
6359 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
6360 coding
->destination
= destination
;
6364 unbind_to (count
, Qnil
);
6369 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6371 struct coding_system
*coding
;
6372 Lisp_Object src_object
;
6373 EMACS_INT from
, from_byte
, to
, to_byte
;
6374 Lisp_Object dst_object
;
6376 int count
= specpdl_ptr
- specpdl
;
6377 EMACS_INT chars
= to
- from
;
6378 EMACS_INT bytes
= to_byte
- from_byte
;
6381 saved_coding
= coding
;
6382 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6384 coding
->src_object
= src_object
;
6385 coding
->src_chars
= chars
;
6386 coding
->src_bytes
= bytes
;
6387 coding
->src_multibyte
= chars
< bytes
;
6389 attrs
= CODING_ID_ATTRS (coding
->id
);
6391 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
6393 coding
->src_object
= make_conversion_work_buffer (coding
->src_multibyte
);
6394 set_buffer_internal (XBUFFER (coding
->src_object
));
6395 if (STRINGP (src_object
))
6396 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
6397 else if (BUFFERP (src_object
))
6398 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
6400 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
6402 if (EQ (src_object
, dst_object
))
6404 set_buffer_internal (XBUFFER (src_object
));
6405 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6406 set_buffer_internal (XBUFFER (coding
->src_object
));
6409 call2 (CODING_ATTR_PRE_WRITE (attrs
),
6410 make_number (BEG
), make_number (Z
));
6411 coding
->src_object
= Fcurrent_buffer ();
6413 move_gap_both (BEG
, BEG_BYTE
);
6414 coding
->src_chars
= Z
- BEG
;
6415 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6416 coding
->src_pos
= BEG
;
6417 coding
->src_pos_byte
= BEG_BYTE
;
6418 coding
->src_multibyte
= Z
< Z_BYTE
;
6420 else if (STRINGP (src_object
))
6422 coding
->src_pos
= from
;
6423 coding
->src_pos_byte
= from_byte
;
6425 else if (BUFFERP (src_object
))
6427 set_buffer_internal (XBUFFER (src_object
));
6428 if (EQ (src_object
, dst_object
))
6430 coding
->src_object
= del_range_1 (from
, to
, 1, 1);
6431 coding
->src_pos
= 0;
6432 coding
->src_pos_byte
= 0;
6436 if (from
< GPT
&& to
>= GPT
)
6437 move_gap_both (from
, from_byte
);
6438 coding
->src_pos
= from
;
6439 coding
->src_pos_byte
= from_byte
;
6443 if (BUFFERP (dst_object
))
6445 coding
->dst_object
= dst_object
;
6446 if (EQ (src_object
, dst_object
))
6448 coding
->dst_pos
= from
;
6449 coding
->dst_pos_byte
= from_byte
;
6453 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6454 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6456 coding
->dst_multibyte
6457 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6459 else if (EQ (dst_object
, Qt
))
6461 coding
->dst_object
= Qnil
;
6462 coding
->dst_bytes
= coding
->src_chars
;
6463 if (coding
->dst_bytes
== 0)
6464 coding
->dst_bytes
= 1;
6465 coding
->destination
= (unsigned char *) xmalloc (coding
->dst_bytes
);
6466 coding
->dst_multibyte
= 0;
6470 coding
->dst_object
= Qnil
;
6471 coding
->dst_multibyte
= 0;
6474 encode_coding (coding
);
6476 if (EQ (dst_object
, Qt
))
6478 if (BUFFERP (coding
->dst_object
))
6479 coding
->dst_object
= Fbuffer_string ();
6483 = make_unibyte_string ((char *) coding
->destination
,
6485 xfree (coding
->destination
);
6489 unbind_to (count
, Qnil
);
6494 preferred_coding_system ()
6496 int id
= coding_categories
[coding_priorities
[0]].id
;
6498 return CODING_ID_NAME (id
);
6503 /*** 8. Emacs Lisp library functions ***/
6505 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6506 doc
: /* Return t if OBJECT is nil or a coding-system.
6507 See the documentation of `define-coding-system' for information
6508 about coding-system objects. */)
6512 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6515 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6516 Sread_non_nil_coding_system
, 1, 1, 0,
6517 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6524 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6525 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6527 while (XSTRING (val
)->size
== 0);
6528 return (Fintern (val
, Qnil
));
6531 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6532 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6533 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6534 (prompt
, default_coding_system
)
6535 Lisp_Object prompt
, default_coding_system
;
6538 if (SYMBOLP (default_coding_system
))
6539 XSETSTRING (default_coding_system
, XSYMBOL (default_coding_system
)->name
);
6540 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6541 Qt
, Qnil
, Qcoding_system_history
,
6542 default_coding_system
, Qnil
);
6543 return (XSTRING (val
)->size
== 0 ? Qnil
: Fintern (val
, Qnil
));
6546 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6548 doc
: /* Check validity of CODING-SYSTEM.
6549 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
6551 Lisp_Object coding_system
;
6553 CHECK_SYMBOL (coding_system
);
6554 if (!NILP (Fcoding_system_p (coding_system
)))
6555 return coding_system
;
6557 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
6561 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6562 HIGHEST is nonzero, return the coding system of the highest
6563 priority among the detected coding systems. Otherwize return a
6564 list of detected coding systems sorted by their priorities. If
6565 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6566 multibyte form but contains only ASCII and eight-bit chars.
6567 Otherwise, the bytes are raw bytes.
6569 CODING-SYSTEM controls the detection as below:
6571 If it is nil, detect both text-format and eol-format. If the
6572 text-format part of CODING-SYSTEM is already specified
6573 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6574 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6575 detect only text-format. */
6578 detect_coding_system (src
, src_bytes
, highest
, multibytep
, coding_system
)
6580 int src_bytes
, highest
;
6582 Lisp_Object coding_system
;
6584 unsigned char *src_end
= src
+ src_bytes
;
6585 int mask
= CATEGORY_MASK_ANY
;
6586 Lisp_Object attrs
, eol_type
;
6588 struct coding_system coding
;
6590 struct coding_detection_info detect_info
;
6592 if (NILP (coding_system
))
6593 coding_system
= Qundecided
;
6594 setup_coding_system (coding_system
, &coding
);
6595 attrs
= CODING_ID_ATTRS (coding
.id
);
6596 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
6597 coding_system
= CODING_ATTR_BASE_NAME (attrs
);
6599 coding
.source
= src
;
6600 coding
.src_bytes
= src_bytes
;
6601 coding
.src_multibyte
= multibytep
;
6602 coding
.consumed
= 0;
6603 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6605 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
6607 /* At first, detect text-format if necessary. */
6608 if (XINT (CODING_ATTR_CATEGORY (attrs
)) == coding_category_undecided
)
6610 enum coding_category category
;
6611 struct coding_system
*this;
6614 for (; src
< src_end
; src
++)
6618 || (c
< 0x20 && (c
== ISO_CODE_ESC
6620 || c
== ISO_CODE_SO
)))
6623 coding
.head_ascii
= src
- coding
.source
;
6626 for (i
= 0; i
< coding_category_raw_text
; i
++)
6628 category
= coding_priorities
[i
];
6629 this = coding_categories
+ category
;
6633 /* No coding system of this category is defined. */
6634 detect_info
.rejected
|= (1 << category
);
6636 else if (category
>= coding_category_raw_text
)
6638 else if (detect_info
.checked
& (1 << category
))
6641 && (detect_info
.found
& (1 << category
)))
6646 if ((*(this->detector
)) (&coding
, &detect_info
)
6648 && (detect_info
.found
& (1 << category
)))
6654 if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
6656 detect_info
.found
= CATEGORY_MASK_RAW_TEXT
;
6657 id
= coding_categories
[coding_category_raw_text
].id
;
6658 val
= Fcons (make_number (id
), Qnil
);
6660 else if (! detect_info
.rejected
&& ! detect_info
.found
)
6662 detect_info
.found
= CATEGORY_MASK_ANY
;
6663 id
= coding_categories
[coding_category_undecided
].id
;
6664 val
= Fcons (make_number (id
), Qnil
);
6668 if (detect_info
.found
)
6670 detect_info
.found
= 1 << category
;
6671 val
= Fcons (make_number (this->id
), Qnil
);
6674 for (i
= 0; i
< coding_category_raw_text
; i
++)
6675 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
6677 detect_info
.found
= 1 << coding_priorities
[i
];
6678 id
= coding_categories
[coding_priorities
[i
]].id
;
6679 val
= Fcons (make_number (id
), Qnil
);
6685 int mask
= detect_info
.rejected
| detect_info
.found
;
6689 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6691 category
= coding_priorities
[i
];
6692 if (! (mask
& (1 << category
)))
6694 found
|= 1 << category
;
6695 id
= coding_categories
[category
].id
;
6696 val
= Fcons (make_number (id
), val
);
6699 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6701 category
= coding_priorities
[i
];
6702 if (detect_info
.found
& (1 << category
))
6704 id
= coding_categories
[category
].id
;
6705 val
= Fcons (make_number (id
), val
);
6708 detect_info
.found
|= found
;
6713 detect_info
.found
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
6714 val
= Fcons (make_number (coding
.id
), Qnil
);
6717 /* Then, detect eol-format if necessary. */
6719 int normal_eol
= -1, utf_16_be_eol
= -1, utf_16_le_eol
;
6722 if (VECTORP (eol_type
))
6724 if (detect_info
.found
& ~CATEGORY_MASK_UTF_16
)
6725 normal_eol
= detect_eol (coding
.source
, src_bytes
,
6726 coding_category_raw_text
);
6727 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_BE
6728 | CATEGORY_MASK_UTF_16_BE_NOSIG
))
6729 utf_16_be_eol
= detect_eol (coding
.source
, src_bytes
,
6730 coding_category_utf_16_be
);
6731 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_LE
6732 | CATEGORY_MASK_UTF_16_LE_NOSIG
))
6733 utf_16_le_eol
= detect_eol (coding
.source
, src_bytes
,
6734 coding_category_utf_16_le
);
6738 if (EQ (eol_type
, Qunix
))
6739 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_LF
;
6740 else if (EQ (eol_type
, Qdos
))
6741 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CRLF
;
6743 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CR
;
6746 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
6748 enum coding_category category
;
6751 id
= XINT (XCAR (tail
));
6752 attrs
= CODING_ID_ATTRS (id
);
6753 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
6754 eol_type
= CODING_ID_EOL_TYPE (id
);
6755 if (VECTORP (eol_type
))
6757 if (category
== coding_category_utf_16_be
6758 || category
== coding_category_utf_16_be_nosig
)
6759 this_eol
= utf_16_be_eol
;
6760 else if (category
== coding_category_utf_16_le
6761 || category
== coding_category_utf_16_le_nosig
)
6762 this_eol
= utf_16_le_eol
;
6764 this_eol
= normal_eol
;
6766 if (this_eol
== EOL_SEEN_LF
)
6767 XSETCAR (tail
, AREF (eol_type
, 0));
6768 else if (this_eol
== EOL_SEEN_CRLF
)
6769 XSETCAR (tail
, AREF (eol_type
, 1));
6770 else if (this_eol
== EOL_SEEN_CR
)
6771 XSETCAR (tail
, AREF (eol_type
, 2));
6773 XSETCAR (tail
, CODING_ID_NAME (id
));
6776 XSETCAR (tail
, CODING_ID_NAME (id
));
6780 return (highest
? XCAR (val
) : val
);
6784 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
6786 doc
: /* Detect coding system of the text in the region between START and END.
6787 Return a list of possible coding systems ordered by priority.
6789 If only ASCII characters are found, it returns a list of single element
6790 `undecided' or its subsidiary coding system according to a detected
6793 If optional argument HIGHEST is non-nil, return the coding system of
6794 highest priority. */)
6795 (start
, end
, highest
)
6796 Lisp_Object start
, end
, highest
;
6799 int from_byte
, to_byte
;
6801 CHECK_NUMBER_COERCE_MARKER (start
);
6802 CHECK_NUMBER_COERCE_MARKER (end
);
6804 validate_region (&start
, &end
);
6805 from
= XINT (start
), to
= XINT (end
);
6806 from_byte
= CHAR_TO_BYTE (from
);
6807 to_byte
= CHAR_TO_BYTE (to
);
6809 if (from
< GPT
&& to
>= GPT
)
6810 move_gap_both (to
, to_byte
);
6812 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
6813 to_byte
- from_byte
,
6815 !NILP (current_buffer
6816 ->enable_multibyte_characters
),
6820 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
6822 doc
: /* Detect coding system of the text in STRING.
6823 Return a list of possible coding systems ordered by priority.
6825 If only ASCII characters are found, it returns a list of single element
6826 `undecided' or its subsidiary coding system according to a detected
6829 If optional argument HIGHEST is non-nil, return the coding system of
6830 highest priority. */)
6832 Lisp_Object string
, highest
;
6834 CHECK_STRING (string
);
6836 return detect_coding_system (XSTRING (string
)->data
,
6837 STRING_BYTES (XSTRING (string
)),
6839 STRING_MULTIBYTE (string
),
6845 char_encodable_p (c
, attrs
)
6850 struct charset
*charset
;
6852 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
6853 CONSP (tail
); tail
= XCDR (tail
))
6855 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
6856 if (CHAR_CHARSET_P (c
, charset
))
6859 return (! NILP (tail
));
6863 /* Return a list of coding systems that safely encode the text between
6864 START and END. If EXCLUDE is non-nil, it is a list of coding
6865 systems not to check. The returned list doesn't contain any such
6866 coding systems. In any case, if the text contains only ASCII or is
6867 unibyte, return t. */
6869 DEFUN ("find-coding-systems-region-internal",
6870 Ffind_coding_systems_region_internal
,
6871 Sfind_coding_systems_region_internal
, 2, 3, 0,
6872 doc
: /* Internal use only. */)
6873 (start
, end
, exclude
)
6874 Lisp_Object start
, end
, exclude
;
6876 Lisp_Object coding_attrs_list
, safe_codings
;
6877 EMACS_INT start_byte
, end_byte
;
6878 const unsigned char *p
, *pbeg
, *pend
;
6880 Lisp_Object tail
, elt
;
6882 if (STRINGP (start
))
6884 if (!STRING_MULTIBYTE (start
)
6885 || XSTRING (start
)->size
== STRING_BYTES (XSTRING (start
)))
6888 end_byte
= STRING_BYTES (XSTRING (start
));
6892 CHECK_NUMBER_COERCE_MARKER (start
);
6893 CHECK_NUMBER_COERCE_MARKER (end
);
6894 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
6895 args_out_of_range (start
, end
);
6896 if (NILP (current_buffer
->enable_multibyte_characters
))
6898 start_byte
= CHAR_TO_BYTE (XINT (start
));
6899 end_byte
= CHAR_TO_BYTE (XINT (end
));
6900 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
6903 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
6905 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
6906 move_gap_both (XINT (start
), start_byte
);
6908 move_gap_both (XINT (end
), end_byte
);
6912 coding_attrs_list
= Qnil
;
6913 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
6915 || NILP (Fmemq (XCAR (tail
), exclude
)))
6919 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
6920 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
6921 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
6922 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
6925 if (STRINGP (start
))
6926 p
= pbeg
= XSTRING (start
)->data
;
6928 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
6929 pend
= p
+ (end_byte
- start_byte
);
6931 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
6932 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
6936 if (ASCII_BYTE_P (*p
))
6940 c
= STRING_CHAR_ADVANCE (p
);
6942 charset_map_loaded
= 0;
6943 for (tail
= coding_attrs_list
; CONSP (tail
);)
6948 else if (char_encodable_p (c
, elt
))
6950 else if (CONSP (XCDR (tail
)))
6952 XSETCAR (tail
, XCAR (XCDR (tail
)));
6953 XSETCDR (tail
, XCDR (XCDR (tail
)));
6957 XSETCAR (tail
, Qnil
);
6961 if (charset_map_loaded
)
6963 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
6965 if (STRINGP (start
))
6966 pbeg
= XSTRING (start
)->data
;
6968 pbeg
= BYTE_POS_ADDR (start_byte
);
6969 p
= pbeg
+ p_offset
;
6970 pend
= pbeg
+ pend_offset
;
6975 safe_codings
= Qnil
;
6976 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
6977 if (! NILP (XCAR (tail
)))
6978 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
6980 return safe_codings
;
6984 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
6985 Scheck_coding_systems_region
, 3, 3, 0,
6986 doc
: /* Check if the region is encodable by coding systems.
6988 START and END are buffer positions specifying the region.
6989 CODING-SYSTEM-LIST is a list of coding systems to check.
6991 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6992 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6993 whole region, POS0, POS1, ... are buffer positions where non-encodable
6994 characters are found.
6996 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6999 START may be a string. In that case, check if the string is
7000 encodable, and the value contains indices to the string instead of
7001 buffer positions. END is ignored. */)
7002 (start
, end
, coding_system_list
)
7003 Lisp_Object start
, end
, coding_system_list
;
7006 EMACS_INT start_byte
, end_byte
;
7008 const unsigned char *p
, *pbeg
, *pend
;
7010 Lisp_Object tail
, elt
;
7012 if (STRINGP (start
))
7014 if (!STRING_MULTIBYTE (start
)
7015 && XSTRING (start
)->size
!= STRING_BYTES (XSTRING (start
)))
7018 end_byte
= STRING_BYTES (XSTRING (start
));
7023 CHECK_NUMBER_COERCE_MARKER (start
);
7024 CHECK_NUMBER_COERCE_MARKER (end
);
7025 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7026 args_out_of_range (start
, end
);
7027 if (NILP (current_buffer
->enable_multibyte_characters
))
7029 start_byte
= CHAR_TO_BYTE (XINT (start
));
7030 end_byte
= CHAR_TO_BYTE (XINT (end
));
7031 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7034 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7036 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7037 move_gap_both (XINT (start
), start_byte
);
7039 move_gap_both (XINT (end
), end_byte
);
7045 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7048 list
= Fcons (Fcons (elt
, Fcons (AREF (CODING_SYSTEM_SPEC (elt
), 0),
7053 if (STRINGP (start
))
7054 p
= pbeg
= XSTRING (start
)->data
;
7056 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7057 pend
= p
+ (end_byte
- start_byte
);
7059 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
7060 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7064 if (ASCII_BYTE_P (*p
))
7068 c
= STRING_CHAR_ADVANCE (p
);
7070 charset_map_loaded
= 0;
7071 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
7073 elt
= XCDR (XCAR (tail
));
7074 if (! char_encodable_p (c
, XCAR (elt
)))
7075 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
7077 if (charset_map_loaded
)
7079 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7081 if (STRINGP (start
))
7082 pbeg
= XSTRING (start
)->data
;
7084 pbeg
= BYTE_POS_ADDR (start_byte
);
7085 p
= pbeg
+ p_offset
;
7086 pend
= pbeg
+ pend_offset
;
7094 for (; CONSP (tail
); tail
= XCDR (tail
))
7097 if (CONSP (XCDR (XCDR (elt
))))
7098 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
7108 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
7109 Lisp_Object start
, end
, coding_system
, dst_object
;
7110 int encodep
, norecord
;
7112 struct coding_system coding
;
7113 EMACS_INT from
, from_byte
, to
, to_byte
;
7114 Lisp_Object src_object
;
7116 CHECK_NUMBER_COERCE_MARKER (start
);
7117 CHECK_NUMBER_COERCE_MARKER (end
);
7118 if (NILP (coding_system
))
7119 coding_system
= Qno_conversion
;
7121 CHECK_CODING_SYSTEM (coding_system
);
7122 src_object
= Fcurrent_buffer ();
7123 if (NILP (dst_object
))
7124 dst_object
= src_object
;
7125 else if (! EQ (dst_object
, Qt
))
7126 CHECK_BUFFER (dst_object
);
7128 validate_region (&start
, &end
);
7129 from
= XFASTINT (start
);
7130 from_byte
= CHAR_TO_BYTE (from
);
7131 to
= XFASTINT (end
);
7132 to_byte
= CHAR_TO_BYTE (to
);
7134 setup_coding_system (coding_system
, &coding
);
7135 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7138 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7141 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7144 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7146 if (coding
.result
!= CODING_RESULT_SUCCESS
)
7147 error ("Code conversion error: %d", coding
.result
);
7149 return (BUFFERP (dst_object
)
7150 ? make_number (coding
.produced_char
)
7151 : coding
.dst_object
);
7155 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
7156 3, 4, "r\nzCoding system: ",
7157 doc
: /* Decode the current region from the specified coding system.
7158 When called from a program, takes four arguments:
7159 START, END, CODING-SYSTEM, and DESTINATION.
7160 START and END are buffer positions.
7162 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7163 If nil, the region between START and END is replace by the decoded text.
7164 If buffer, the decoded text is inserted in the buffer.
7165 If t, the decoded text is returned.
7167 This function sets `last-coding-system-used' to the precise coding system
7168 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7169 not fully specified.)
7170 It returns the length of the decoded text. */)
7171 (start
, end
, coding_system
, destination
)
7172 Lisp_Object start
, end
, coding_system
, destination
;
7174 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
7177 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
7178 3, 4, "r\nzCoding system: ",
7179 doc
: /* Encode the current region by specified coding system.
7180 When called from a program, takes three arguments:
7181 START, END, and CODING-SYSTEM. START and END are buffer positions.
7183 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7184 If nil, the region between START and END is replace by the encoded text.
7185 If buffer, the encoded text is inserted in the buffer.
7186 If t, the encoded text is returned.
7188 This function sets `last-coding-system-used' to the precise coding system
7189 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7190 not fully specified.)
7191 It returns the length of the encoded text. */)
7192 (start
, end
, coding_system
, destination
)
7193 Lisp_Object start
, end
, coding_system
, destination
;
7195 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
7199 code_convert_string (string
, coding_system
, dst_object
,
7200 encodep
, nocopy
, norecord
)
7201 Lisp_Object string
, coding_system
, dst_object
;
7202 int encodep
, nocopy
, norecord
;
7204 struct coding_system coding
;
7205 EMACS_INT chars
, bytes
;
7207 CHECK_STRING (string
);
7208 if (NILP (coding_system
))
7211 Vlast_coding_system_used
= Qno_conversion
;
7212 if (NILP (dst_object
))
7213 return (nocopy
? Fcopy_sequence (string
) : string
);
7216 if (NILP (coding_system
))
7217 coding_system
= Qno_conversion
;
7219 CHECK_CODING_SYSTEM (coding_system
);
7220 if (NILP (dst_object
))
7222 else if (! EQ (dst_object
, Qt
))
7223 CHECK_BUFFER (dst_object
);
7225 setup_coding_system (coding_system
, &coding
);
7226 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7227 chars
= XSTRING (string
)->size
;
7228 bytes
= STRING_BYTES (XSTRING (string
));
7230 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7232 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7234 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7236 if (coding
.result
!= CODING_RESULT_SUCCESS
)
7237 error ("Code conversion error: %d", coding
.result
);
7239 return (BUFFERP (dst_object
)
7240 ? make_number (coding
.produced_char
)
7241 : coding
.dst_object
);
7245 /* Encode or decode STRING according to CODING_SYSTEM.
7246 Do not set Vlast_coding_system_used.
7248 This function is called only from macros DECODE_FILE and
7249 ENCODE_FILE, thus we ignore character composition. */
7252 code_convert_string_norecord (string
, coding_system
, encodep
)
7253 Lisp_Object string
, coding_system
;
7256 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
7260 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
7262 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7264 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7265 if the decoding operation is trivial.
7267 Optional fourth arg BUFFER non-nil meant that the decoded text is
7268 inserted in BUFFER instead of returned as a string. In this case,
7269 the return value is BUFFER.
7271 This function sets `last-coding-system-used' to the precise coding system
7272 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7273 not fully specified. */)
7274 (string
, coding_system
, nocopy
, buffer
)
7275 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7277 return code_convert_string (string
, coding_system
, buffer
,
7278 0, ! NILP (nocopy
), 0);
7281 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
7283 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
7285 Optional third arg NOCOPY non-nil means it is OK to return STRING
7286 itself if the encoding operation is trivial.
7288 Optional fourth arg BUFFER non-nil meant that the encoded text is
7289 inserted in BUFFER instead of returned as a string. In this case,
7290 the return value is BUFFER.
7292 This function sets `last-coding-system-used' to the precise coding system
7293 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7294 not fully specified.) */)
7295 (string
, coding_system
, nocopy
, buffer
)
7296 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7298 return code_convert_string (string
, coding_system
, buffer
,
7299 1, ! NILP (nocopy
), 1);
7303 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
7304 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
7305 Return the corresponding character. */)
7309 Lisp_Object spec
, attrs
, val
;
7310 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
7313 CHECK_NATNUM (code
);
7314 c
= XFASTINT (code
);
7315 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7316 attrs
= AREF (spec
, 0);
7318 if (ASCII_BYTE_P (c
)
7319 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7322 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7323 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7324 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7325 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7328 charset
= charset_roman
;
7329 else if (c
>= 0xA0 && c
< 0xDF)
7331 charset
= charset_kana
;
7336 int s1
= c
>> 8, s2
= c
& 0xFF;
7338 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
7339 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
7340 error ("Invalid code: %d", code
);
7342 charset
= charset_kanji
;
7344 c
= DECODE_CHAR (charset
, c
);
7346 error ("Invalid code: %d", code
);
7347 return make_number (c
);
7351 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
7352 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
7353 Return the corresponding code in SJIS. */)
7357 Lisp_Object spec
, attrs
, charset_list
;
7359 struct charset
*charset
;
7362 CHECK_CHARACTER (ch
);
7364 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7365 attrs
= AREF (spec
, 0);
7367 if (ASCII_CHAR_P (c
)
7368 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7371 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7372 charset
= char_charset (c
, charset_list
, &code
);
7373 if (code
== CHARSET_INVALID_CODE (charset
))
7374 error ("Can't encode by shift_jis encoding: %d", c
);
7377 return make_number (code
);
7380 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
7381 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
7382 Return the corresponding character. */)
7386 Lisp_Object spec
, attrs
, val
;
7387 struct charset
*charset_roman
, *charset_big5
, *charset
;
7390 CHECK_NATNUM (code
);
7391 c
= XFASTINT (code
);
7392 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7393 attrs
= AREF (spec
, 0);
7395 if (ASCII_BYTE_P (c
)
7396 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7399 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7400 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7401 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7404 charset
= charset_roman
;
7407 int b1
= c
>> 8, b2
= c
& 0x7F;
7408 if (b1
< 0xA1 || b1
> 0xFE
7409 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
7410 error ("Invalid code: %d", code
);
7411 charset
= charset_big5
;
7413 c
= DECODE_CHAR (charset
, (unsigned )c
);
7415 error ("Invalid code: %d", code
);
7416 return make_number (c
);
7419 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
7420 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
7421 Return the corresponding character code in Big5. */)
7425 Lisp_Object spec
, attrs
, charset_list
;
7426 struct charset
*charset
;
7430 CHECK_CHARACTER (ch
);
7432 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7433 attrs
= AREF (spec
, 0);
7434 if (ASCII_CHAR_P (c
)
7435 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7438 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7439 charset
= char_charset (c
, charset_list
, &code
);
7440 if (code
== CHARSET_INVALID_CODE (charset
))
7441 error ("Can't encode by Big5 encoding: %d", c
);
7443 return make_number (code
);
7447 DEFUN ("set-terminal-coding-system-internal",
7448 Fset_terminal_coding_system_internal
,
7449 Sset_terminal_coding_system_internal
, 1, 1, 0,
7450 doc
: /* Internal use only. */)
7452 Lisp_Object coding_system
;
7454 CHECK_SYMBOL (coding_system
);
7455 setup_coding_system (Fcheck_coding_system (coding_system
),
7458 /* We had better not send unsafe characters to terminal. */
7459 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
7460 /* Characer composition should be disabled. */
7461 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7462 terminal_coding
.src_multibyte
= 1;
7463 terminal_coding
.dst_multibyte
= 0;
7467 DEFUN ("set-safe-terminal-coding-system-internal",
7468 Fset_safe_terminal_coding_system_internal
,
7469 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
7470 doc
: /* Internal use only. */)
7472 Lisp_Object coding_system
;
7474 CHECK_SYMBOL (coding_system
);
7475 setup_coding_system (Fcheck_coding_system (coding_system
),
7476 &safe_terminal_coding
);
7477 /* Characer composition should be disabled. */
7478 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7479 safe_terminal_coding
.src_multibyte
= 1;
7480 safe_terminal_coding
.dst_multibyte
= 0;
7484 DEFUN ("terminal-coding-system",
7485 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
7486 doc
: /* Return coding system specified for terminal output. */)
7489 return CODING_ID_NAME (terminal_coding
.id
);
7492 DEFUN ("set-keyboard-coding-system-internal",
7493 Fset_keyboard_coding_system_internal
,
7494 Sset_keyboard_coding_system_internal
, 1, 1, 0,
7495 doc
: /* Internal use only. */)
7497 Lisp_Object coding_system
;
7499 CHECK_SYMBOL (coding_system
);
7500 setup_coding_system (Fcheck_coding_system (coding_system
),
7502 /* Characer composition should be disabled. */
7503 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7507 DEFUN ("keyboard-coding-system",
7508 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
7509 doc
: /* Return coding system specified for decoding keyboard input. */)
7512 return CODING_ID_NAME (keyboard_coding
.id
);
7516 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
7517 Sfind_operation_coding_system
, 1, MANY
, 0,
7518 doc
: /* Choose a coding system for an operation based on the target name.
7519 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7520 DECODING-SYSTEM is the coding system to use for decoding
7521 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7522 for encoding (in case OPERATION does encoding).
7524 The first argument OPERATION specifies an I/O primitive:
7525 For file I/O, `insert-file-contents' or `write-region'.
7526 For process I/O, `call-process', `call-process-region', or `start-process'.
7527 For network I/O, `open-network-stream'.
7529 The remaining arguments should be the same arguments that were passed
7530 to the primitive. Depending on which primitive, one of those arguments
7531 is selected as the TARGET. For example, if OPERATION does file I/O,
7532 whichever argument specifies the file name is TARGET.
7534 TARGET has a meaning which depends on OPERATION:
7535 For file I/O, TARGET is a file name.
7536 For process I/O, TARGET is a process name.
7537 For network I/O, TARGET is a service name or a port number
7539 This function looks up what specified for TARGET in,
7540 `file-coding-system-alist', `process-coding-system-alist',
7541 or `network-coding-system-alist' depending on OPERATION.
7542 They may specify a coding system, a cons of coding systems,
7543 or a function symbol to call.
7544 In the last case, we call the function with one argument,
7545 which is a list of all the arguments given to this function.
7547 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7552 Lisp_Object operation
, target_idx
, target
, val
;
7553 register Lisp_Object chain
;
7556 error ("Too few arguments");
7557 operation
= args
[0];
7558 if (!SYMBOLP (operation
)
7559 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
7560 error ("Invalid first arguement");
7561 if (nargs
< 1 + XINT (target_idx
))
7562 error ("Too few arguments for operation: %s",
7563 XSYMBOL (operation
)->name
->data
);
7564 target
= args
[XINT (target_idx
) + 1];
7565 if (!(STRINGP (target
)
7566 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
7567 error ("Invalid %dth argument", XINT (target_idx
) + 1);
7569 chain
= ((EQ (operation
, Qinsert_file_contents
)
7570 || EQ (operation
, Qwrite_region
))
7571 ? Vfile_coding_system_alist
7572 : (EQ (operation
, Qopen_network_stream
)
7573 ? Vnetwork_coding_system_alist
7574 : Vprocess_coding_system_alist
));
7578 for (; CONSP (chain
); chain
= XCDR (chain
))
7584 && ((STRINGP (target
)
7585 && STRINGP (XCAR (elt
))
7586 && fast_string_match (XCAR (elt
), target
) >= 0)
7587 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
7590 /* Here, if VAL is both a valid coding system and a valid
7591 function symbol, we return VAL as a coding system. */
7594 if (! SYMBOLP (val
))
7596 if (! NILP (Fcoding_system_p (val
)))
7597 return Fcons (val
, val
);
7598 if (! NILP (Ffboundp (val
)))
7600 val
= call1 (val
, Flist (nargs
, args
));
7603 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
7604 return Fcons (val
, val
);
7612 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
7613 Sset_coding_system_priority
, 0, MANY
, 0,
7614 doc
: /* Assign higher priority to the coding systems given as arguments.
7615 usage: (set-coding-system-priority CODING-SYSTEM ...) */)
7621 int changed
[coding_category_max
];
7622 enum coding_category priorities
[coding_category_max
];
7624 bzero (changed
, sizeof changed
);
7626 for (i
= j
= 0; i
< nargs
; i
++)
7628 enum coding_category category
;
7629 Lisp_Object spec
, attrs
;
7631 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
7632 attrs
= AREF (spec
, 0);
7633 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7634 if (changed
[category
])
7635 /* Ignore this coding system because a coding system of the
7636 same category already had a higher priority. */
7638 changed
[category
] = 1;
7639 priorities
[j
++] = category
;
7640 if (coding_categories
[category
].id
>= 0
7641 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
7642 setup_coding_system (args
[i
], &coding_categories
[category
]);
7645 /* Now we have decided top J priorities. Reflect the order of the
7646 original priorities to the remaining priorities. */
7648 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
7650 while (j
< coding_category_max
7651 && changed
[coding_priorities
[j
]])
7653 if (j
== coding_category_max
)
7655 priorities
[i
] = coding_priorities
[j
];
7658 bcopy (priorities
, coding_priorities
, sizeof priorities
);
7662 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
7663 Scoding_system_priority_list
, 0, 1, 0,
7664 doc
: /* Return a list of coding systems ordered by their priorities.
7665 HIGHESTP non-nil means just return the highest priority one. */)
7667 Lisp_Object highestp
;
7672 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
7674 enum coding_category category
= coding_priorities
[i
];
7675 int id
= coding_categories
[category
].id
;
7680 attrs
= CODING_ID_ATTRS (id
);
7681 if (! NILP (highestp
))
7682 return CODING_ATTR_BASE_NAME (attrs
);
7683 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
7685 return Fnreverse (val
);
7688 static char *suffixes
[] = { "-unix", "-dos", "-mac" };
7691 make_subsidiaries (base
)
7694 Lisp_Object subsidiaries
;
7695 int base_name_len
= STRING_BYTES (XSYMBOL (base
)->name
);
7696 char *buf
= (char *) alloca (base_name_len
+ 6);
7699 bcopy (XSYMBOL (base
)->name
->data
, buf
, base_name_len
);
7700 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
7701 for (i
= 0; i
< 3; i
++)
7703 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
7704 ASET (subsidiaries
, i
, intern (buf
));
7706 return subsidiaries
;
7710 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
7711 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
7712 doc
: /* For internal use only.
7713 usage: (define-coding-system-internal ...) */)
7719 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
7720 Lisp_Object attrs
; /* Vector of attributes. */
7721 Lisp_Object eol_type
;
7722 Lisp_Object aliases
;
7723 Lisp_Object coding_type
, charset_list
, safe_charsets
;
7724 enum coding_category category
;
7725 Lisp_Object tail
, val
;
7726 int max_charset_id
= 0;
7729 if (nargs
< coding_arg_max
)
7732 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
7734 name
= args
[coding_arg_name
];
7735 CHECK_SYMBOL (name
);
7736 CODING_ATTR_BASE_NAME (attrs
) = name
;
7738 val
= args
[coding_arg_mnemonic
];
7739 if (! STRINGP (val
))
7740 CHECK_CHARACTER (val
);
7741 CODING_ATTR_MNEMONIC (attrs
) = val
;
7743 coding_type
= args
[coding_arg_coding_type
];
7744 CHECK_SYMBOL (coding_type
);
7745 CODING_ATTR_TYPE (attrs
) = coding_type
;
7747 charset_list
= args
[coding_arg_charset_list
];
7748 if (SYMBOLP (charset_list
))
7750 if (EQ (charset_list
, Qiso_2022
))
7752 if (! EQ (coding_type
, Qiso_2022
))
7753 error ("Invalid charset-list");
7754 charset_list
= Viso_2022_charset_list
;
7756 else if (EQ (charset_list
, Qemacs_mule
))
7758 if (! EQ (coding_type
, Qemacs_mule
))
7759 error ("Invalid charset-list");
7760 charset_list
= Vemacs_mule_charset_list
;
7762 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7763 if (max_charset_id
< XFASTINT (XCAR (tail
)))
7764 max_charset_id
= XFASTINT (XCAR (tail
));
7768 charset_list
= Fcopy_sequence (charset_list
);
7769 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
7771 struct charset
*charset
;
7774 CHECK_CHARSET_GET_CHARSET (val
, charset
);
7775 if (EQ (coding_type
, Qiso_2022
)
7776 ? CHARSET_ISO_FINAL (charset
) < 0
7777 : EQ (coding_type
, Qemacs_mule
)
7778 ? CHARSET_EMACS_MULE_ID (charset
) < 0
7780 error ("Can't handle charset `%s'",
7781 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7783 XCAR (tail
) = make_number (charset
->id
);
7784 if (max_charset_id
< charset
->id
)
7785 max_charset_id
= charset
->id
;
7788 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
7790 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
7792 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7793 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
7794 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
7796 CODING_ATTR_ASCII_COMPAT (attrs
) = args
[coding_arg_ascii_compatible_p
];
7798 val
= args
[coding_arg_decode_translation_table
];
7800 CHECK_CHAR_TABLE (val
);
7801 CODING_ATTR_DECODE_TBL (attrs
) = val
;
7803 val
= args
[coding_arg_encode_translation_table
];
7805 CHECK_CHAR_TABLE (val
);
7806 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
7808 val
= args
[coding_arg_post_read_conversion
];
7810 CODING_ATTR_POST_READ (attrs
) = val
;
7812 val
= args
[coding_arg_pre_write_conversion
];
7814 CODING_ATTR_PRE_WRITE (attrs
) = val
;
7816 val
= args
[coding_arg_default_char
];
7818 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
7821 CHECK_CHARACTER (val
);
7822 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
7825 val
= args
[coding_arg_plist
];
7827 CODING_ATTR_PLIST (attrs
) = val
;
7829 if (EQ (coding_type
, Qcharset
))
7831 /* Generate a lisp vector of 256 elements. Each element is nil,
7832 integer, or a list of charset IDs.
7834 If Nth element is nil, the byte code N is invalid in this
7837 If Nth element is a number NUM, N is the first byte of a
7838 charset whose ID is NUM.
7840 If Nth element is a list of charset IDs, N is the first byte
7841 of one of them. The list is sorted by dimensions of the
7842 charsets. A charset of smaller dimension comes firtst.
7844 val
= Fmake_vector (make_number (256), Qnil
);
7846 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7848 struct charset
*charset
= CHARSET_FROM_ID (XFASTINT (XCAR (tail
)));
7849 int dim
= CHARSET_DIMENSION (charset
);
7850 int idx
= (dim
- 1) * 4;
7852 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
7853 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
7855 for (i
= charset
->code_space
[idx
];
7856 i
<= charset
->code_space
[idx
+ 1]; i
++)
7858 Lisp_Object tmp
, tmp2
;
7861 tmp
= AREF (val
, i
);
7864 else if (NUMBERP (tmp
))
7866 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp
)));
7868 tmp
= Fcons (XCAR (tail
), Fcons (tmp
, Qnil
));
7870 tmp
= Fcons (tmp
, Fcons (XCAR (tail
), Qnil
));
7874 for (tmp2
= tmp
; CONSP (tmp2
); tmp2
= XCDR (tmp2
))
7876 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2
))));
7881 tmp
= nconc2 (tmp
, Fcons (XCAR (tail
), Qnil
));
7884 XSETCDR (tmp2
, Fcons (XCAR (tmp2
), XCDR (tmp2
)));
7885 XSETCAR (tmp2
, XCAR (tail
));
7891 ASET (attrs
, coding_attr_charset_valids
, val
);
7892 category
= coding_category_charset
;
7894 else if (EQ (coding_type
, Qccl
))
7898 if (nargs
< coding_arg_ccl_max
)
7901 val
= args
[coding_arg_ccl_decoder
];
7902 CHECK_CCL_PROGRAM (val
);
7904 val
= Fcopy_sequence (val
);
7905 ASET (attrs
, coding_attr_ccl_decoder
, val
);
7907 val
= args
[coding_arg_ccl_encoder
];
7908 CHECK_CCL_PROGRAM (val
);
7910 val
= Fcopy_sequence (val
);
7911 ASET (attrs
, coding_attr_ccl_encoder
, val
);
7913 val
= args
[coding_arg_ccl_valids
];
7914 valids
= Fmake_string (make_number (256), make_number (0));
7915 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
7922 from
= to
= XINT (val
);
7923 if (from
< 0 || from
> 255)
7924 args_out_of_range_3 (val
, make_number (0), make_number (255));
7929 CHECK_NUMBER (XCAR (val
));
7930 CHECK_NUMBER (XCDR (val
));
7931 from
= XINT (XCAR (val
));
7932 if (from
< 0 || from
> 255)
7933 args_out_of_range_3 (XCAR (val
),
7934 make_number (0), make_number (255));
7935 to
= XINT (XCDR (val
));
7936 if (to
< from
|| to
> 255)
7937 args_out_of_range_3 (XCDR (val
),
7938 XCAR (val
), make_number (255));
7940 for (i
= from
; i
<= to
; i
++)
7941 XSTRING (valids
)->data
[i
] = 1;
7943 ASET (attrs
, coding_attr_ccl_valids
, valids
);
7945 category
= coding_category_ccl
;
7947 else if (EQ (coding_type
, Qutf_16
))
7949 Lisp_Object bom
, endian
;
7951 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
7953 if (nargs
< coding_arg_utf16_max
)
7956 bom
= args
[coding_arg_utf16_bom
];
7957 if (! NILP (bom
) && ! EQ (bom
, Qt
))
7960 CHECK_CODING_SYSTEM (XCAR (bom
));
7961 CHECK_CODING_SYSTEM (XCDR (bom
));
7963 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
7965 endian
= args
[coding_arg_utf16_endian
];
7966 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
7968 category
= (CONSP (bom
)
7969 ? coding_category_utf_16_auto
7972 ? coding_category_utf_16_be_nosig
7973 : coding_category_utf_16_le_nosig
)
7975 ? coding_category_utf_16_be
7976 : coding_category_utf_16_le
));
7978 else if (EQ (coding_type
, Qiso_2022
))
7980 Lisp_Object initial
, reg_usage
, request
, flags
;
7983 if (nargs
< coding_arg_iso2022_max
)
7986 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
7987 CHECK_VECTOR (initial
);
7988 for (i
= 0; i
< 4; i
++)
7990 val
= Faref (initial
, make_number (i
));
7993 struct charset
*charset
;
7995 CHECK_CHARSET_GET_CHARSET (val
, charset
);
7996 ASET (initial
, i
, make_number (CHARSET_ID (charset
)));
7997 if (i
== 0 && CHARSET_ASCII_COMPATIBLE_P (charset
))
7998 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8001 ASET (initial
, i
, make_number (-1));
8004 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
8005 CHECK_CONS (reg_usage
);
8006 CHECK_NATNUM (XCAR (reg_usage
));
8007 CHECK_NATNUM (XCDR (reg_usage
));
8009 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
8010 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
8016 CHECK_CHARSET_GET_ID (XCAR (val
), id
);
8017 CHECK_NATNUM (XCDR (val
));
8018 if (XINT (XCDR (val
)) >= 4)
8019 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
8020 XCAR (val
) = make_number (id
);
8023 flags
= args
[coding_arg_iso2022_flags
];
8024 CHECK_NATNUM (flags
);
8026 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
8027 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
8029 ASET (attrs
, coding_attr_iso_initial
, initial
);
8030 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
8031 ASET (attrs
, coding_attr_iso_request
, request
);
8032 ASET (attrs
, coding_attr_iso_flags
, flags
);
8033 setup_iso_safe_charsets (attrs
);
8035 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
8036 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
8037 | CODING_ISO_FLAG_SINGLE_SHIFT
))
8038 ? coding_category_iso_7_else
8039 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8040 ? coding_category_iso_7
8041 : coding_category_iso_7_tight
);
8044 int id
= XINT (AREF (initial
, 1));
8046 category
= (((i
& CODING_ISO_FLAG_LOCKING_SHIFT
)
8047 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8049 ? coding_category_iso_8_else
8050 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
8051 ? coding_category_iso_8_1
8052 : coding_category_iso_8_2
);
8054 if (category
!= coding_category_iso_8_1
8055 && category
!= coding_category_iso_8_2
)
8056 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8058 else if (EQ (coding_type
, Qemacs_mule
))
8060 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
8061 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
8062 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8063 category
= coding_category_emacs_mule
;
8065 else if (EQ (coding_type
, Qshift_jis
))
8068 struct charset
*charset
;
8070 if (XINT (Flength (charset_list
)) != 3)
8071 error ("There should be just three charsets");
8073 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8074 if (CHARSET_DIMENSION (charset
) != 1)
8075 error ("Dimension of charset %s is not one",
8076 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8077 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8078 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8080 charset_list
= XCDR (charset_list
);
8081 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8082 if (CHARSET_DIMENSION (charset
) != 1)
8083 error ("Dimension of charset %s is not one",
8084 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8086 charset_list
= XCDR (charset_list
);
8087 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8088 if (CHARSET_DIMENSION (charset
) != 2)
8089 error ("Dimension of charset %s is not two",
8090 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8092 category
= coding_category_sjis
;
8093 Vsjis_coding_system
= name
;
8095 else if (EQ (coding_type
, Qbig5
))
8097 struct charset
*charset
;
8099 if (XINT (Flength (charset_list
)) != 2)
8100 error ("There should be just two charsets");
8102 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8103 if (CHARSET_DIMENSION (charset
) != 1)
8104 error ("Dimension of charset %s is not one",
8105 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8106 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8107 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8109 charset_list
= XCDR (charset_list
);
8110 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8111 if (CHARSET_DIMENSION (charset
) != 2)
8112 error ("Dimension of charset %s is not two",
8113 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8115 category
= coding_category_big5
;
8116 Vbig5_coding_system
= name
;
8118 else if (EQ (coding_type
, Qraw_text
))
8120 category
= coding_category_raw_text
;
8121 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8123 else if (EQ (coding_type
, Qutf_8
))
8125 category
= coding_category_utf_8
;
8126 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8128 else if (EQ (coding_type
, Qundecided
))
8129 category
= coding_category_undecided
;
8131 error ("Invalid coding system type: %s",
8132 XSYMBOL (coding_type
)->name
->data
);
8134 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
8136 eol_type
= args
[coding_arg_eol_type
];
8137 if (! NILP (eol_type
)
8138 && ! EQ (eol_type
, Qunix
)
8139 && ! EQ (eol_type
, Qdos
)
8140 && ! EQ (eol_type
, Qmac
))
8141 error ("Invalid eol-type");
8143 aliases
= Fcons (name
, Qnil
);
8145 if (NILP (eol_type
))
8147 eol_type
= make_subsidiaries (name
);
8148 for (i
= 0; i
< 3; i
++)
8150 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
8152 this_name
= AREF (eol_type
, i
);
8153 this_aliases
= Fcons (this_name
, Qnil
);
8154 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
8155 this_spec
= Fmake_vector (make_number (3), attrs
);
8156 ASET (this_spec
, 1, this_aliases
);
8157 ASET (this_spec
, 2, this_eol_type
);
8158 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
8159 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
8160 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
8161 Vcoding_system_alist
);
8165 spec_vec
= Fmake_vector (make_number (3), attrs
);
8166 ASET (spec_vec
, 1, aliases
);
8167 ASET (spec_vec
, 2, eol_type
);
8169 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
8170 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
8171 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
8172 Vcoding_system_alist
);
8175 int id
= coding_categories
[category
].id
;
8177 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
8178 setup_coding_system (name
, &coding_categories
[category
]);
8184 return Fsignal (Qwrong_number_of_arguments
,
8185 Fcons (intern ("define-coding-system-internal"),
8186 make_number (nargs
)));
8189 /* Fixme: should this record the alias relationships for
8190 diagnostics? Should it update coding-system-list? */
8191 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
8192 Sdefine_coding_system_alias
, 2, 2, 0,
8193 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8194 (alias
, coding_system
)
8195 Lisp_Object alias
, coding_system
;
8197 Lisp_Object spec
, aliases
, eol_type
;
8199 CHECK_SYMBOL (alias
);
8200 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8201 aliases
= AREF (spec
, 1);
8202 while (!NILP (XCDR (aliases
)))
8203 aliases
= XCDR (aliases
);
8204 XCDR (aliases
) = Fcons (alias
, Qnil
);
8206 eol_type
= AREF (spec
, 2);
8207 if (VECTORP (eol_type
))
8209 Lisp_Object subsidiaries
;
8212 subsidiaries
= make_subsidiaries (alias
);
8213 for (i
= 0; i
< 3; i
++)
8214 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
8215 AREF (eol_type
, i
));
8217 ASET (spec
, 2, subsidiaries
);
8220 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
8221 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (alias
), Qnil
),
8222 Vcoding_system_alist
);
8227 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
8229 doc
: /* Return the base of CODING-SYSTEM.
8230 Any alias or subsidiary coding system is not a base coding system. */)
8232 Lisp_Object coding_system
;
8234 Lisp_Object spec
, attrs
;
8236 if (NILP (coding_system
))
8237 return (Qno_conversion
);
8238 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8239 attrs
= AREF (spec
, 0);
8240 return CODING_ATTR_BASE_NAME (attrs
);
8243 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
8245 doc
: "Return the property list of CODING-SYSTEM.")
8247 Lisp_Object coding_system
;
8249 Lisp_Object spec
, attrs
;
8251 if (NILP (coding_system
))
8252 coding_system
= Qno_conversion
;
8253 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8254 attrs
= AREF (spec
, 0);
8255 return CODING_ATTR_PLIST (attrs
);
8259 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
8261 doc
: /* Return the list of aliases of CODING-SYSTEM. */)
8263 Lisp_Object coding_system
;
8267 if (NILP (coding_system
))
8268 coding_system
= Qno_conversion
;
8269 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8270 return AREF (spec
, 1);
8273 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
8274 Scoding_system_eol_type
, 1, 1, 0,
8275 doc
: /* Return eol-type of CODING-SYSTEM.
8276 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8278 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8279 and CR respectively.
8281 A vector value indicates that a format of end-of-line should be
8282 detected automatically. Nth element of the vector is the subsidiary
8283 coding system whose eol-type is N. */)
8285 Lisp_Object coding_system
;
8287 Lisp_Object spec
, eol_type
;
8290 if (NILP (coding_system
))
8291 coding_system
= Qno_conversion
;
8292 if (! CODING_SYSTEM_P (coding_system
))
8294 spec
= CODING_SYSTEM_SPEC (coding_system
);
8295 eol_type
= AREF (spec
, 2);
8296 if (VECTORP (eol_type
))
8297 return Fcopy_sequence (eol_type
);
8298 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
8299 return make_number (n
);
8305 /*** 9. Post-amble ***/
8312 for (i
= 0; i
< coding_category_max
; i
++)
8314 coding_categories
[i
].id
= -1;
8315 coding_priorities
[i
] = i
;
8318 /* ISO2022 specific initialize routine. */
8319 for (i
= 0; i
< 0x20; i
++)
8320 iso_code_class
[i
] = ISO_control_0
;
8321 for (i
= 0x21; i
< 0x7F; i
++)
8322 iso_code_class
[i
] = ISO_graphic_plane_0
;
8323 for (i
= 0x80; i
< 0xA0; i
++)
8324 iso_code_class
[i
] = ISO_control_1
;
8325 for (i
= 0xA1; i
< 0xFF; i
++)
8326 iso_code_class
[i
] = ISO_graphic_plane_1
;
8327 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
8328 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
8329 iso_code_class
[ISO_CODE_CR
] = ISO_carriage_return
;
8330 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
8331 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
8332 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
8333 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
8334 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
8335 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
8336 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
8338 inhibit_pre_post_conversion
= 0;
8340 for (i
= 0; i
< 256; i
++)
8342 emacs_mule_bytes
[i
] = 1;
8344 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_11
] = 3;
8345 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_12
] = 3;
8346 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_21
] = 4;
8347 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_22
] = 4;
8355 staticpro (&Vcoding_system_hash_table
);
8356 Vcoding_system_hash_table
= Fmakehash (Qeq
);
8358 staticpro (&Vsjis_coding_system
);
8359 Vsjis_coding_system
= Qnil
;
8361 staticpro (&Vbig5_coding_system
);
8362 Vbig5_coding_system
= Qnil
;
8364 staticpro (&Vcode_conversion_work_buf_list
);
8365 Vcode_conversion_work_buf_list
= Qnil
;
8367 staticpro (&Vcode_conversion_reused_work_buf
);
8368 Vcode_conversion_reused_work_buf
= Qnil
;
8370 DEFSYM (Qcharset
, "charset");
8371 DEFSYM (Qtarget_idx
, "target-idx");
8372 DEFSYM (Qcoding_system_history
, "coding-system-history");
8373 Fset (Qcoding_system_history
, Qnil
);
8375 /* Target FILENAME is the first argument. */
8376 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
8377 /* Target FILENAME is the third argument. */
8378 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
8380 DEFSYM (Qcall_process
, "call-process");
8381 /* Target PROGRAM is the first argument. */
8382 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
8384 DEFSYM (Qcall_process_region
, "call-process-region");
8385 /* Target PROGRAM is the third argument. */
8386 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
8388 DEFSYM (Qstart_process
, "start-process");
8389 /* Target PROGRAM is the third argument. */
8390 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
8392 DEFSYM (Qopen_network_stream
, "open-network-stream");
8393 /* Target SERVICE is the fourth argument. */
8394 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
8396 DEFSYM (Qcoding_system
, "coding-system");
8397 DEFSYM (Qcoding_aliases
, "coding-aliases");
8399 DEFSYM (Qeol_type
, "eol-type");
8400 DEFSYM (Qunix
, "unix");
8401 DEFSYM (Qdos
, "dos");
8403 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
8404 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
8405 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
8406 DEFSYM (Qdefault_char
, "default-char");
8407 DEFSYM (Qundecided
, "undecided");
8408 DEFSYM (Qno_conversion
, "no-conversion");
8409 DEFSYM (Qraw_text
, "raw-text");
8411 DEFSYM (Qiso_2022
, "iso-2022");
8413 DEFSYM (Qutf_8
, "utf-8");
8415 DEFSYM (Qutf_16
, "utf-16");
8416 DEFSYM (Qutf_16_be
, "utf-16-be");
8417 DEFSYM (Qutf_16_be_nosig
, "utf-16-be-nosig");
8418 DEFSYM (Qutf_16_le
, "utf-16-l3");
8419 DEFSYM (Qutf_16_le_nosig
, "utf-16-le-nosig");
8420 DEFSYM (Qsignature
, "signature");
8421 DEFSYM (Qendian
, "endian");
8422 DEFSYM (Qbig
, "big");
8423 DEFSYM (Qlittle
, "little");
8425 DEFSYM (Qshift_jis
, "shift-jis");
8426 DEFSYM (Qbig5
, "big5");
8428 DEFSYM (Qcoding_system_p
, "coding-system-p");
8430 DEFSYM (Qcoding_system_error
, "coding-system-error");
8431 Fput (Qcoding_system_error
, Qerror_conditions
,
8432 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
8433 Fput (Qcoding_system_error
, Qerror_message
,
8434 build_string ("Invalid coding system"));
8436 /* Intern this now in case it isn't already done.
8437 Setting this variable twice is harmless.
8438 But don't staticpro it here--that is done in alloc.c. */
8439 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
8441 DEFSYM (Qtranslation_table
, "translation-table");
8442 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
8443 DEFSYM (Qtranslation_table_id
, "translation-table-id");
8444 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
8445 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
8447 DEFSYM (Qvalid_codes
, "valid-codes");
8449 DEFSYM (Qemacs_mule
, "emacs-mule");
8451 Vcoding_category_table
8452 = Fmake_vector (make_number (coding_category_max
), Qnil
);
8453 staticpro (&Vcoding_category_table
);
8454 /* Followings are target of code detection. */
8455 ASET (Vcoding_category_table
, coding_category_iso_7
,
8456 intern ("coding-category-iso-7"));
8457 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
8458 intern ("coding-category-iso-7-tight"));
8459 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
8460 intern ("coding-category-iso-8-1"));
8461 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
8462 intern ("coding-category-iso-8-2"));
8463 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
8464 intern ("coding-category-iso-7-else"));
8465 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
8466 intern ("coding-category-iso-8-else"));
8467 ASET (Vcoding_category_table
, coding_category_utf_8
,
8468 intern ("coding-category-utf-8"));
8469 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
8470 intern ("coding-category-utf-16-be"));
8471 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
8472 intern ("coding-category-utf-16-le"));
8473 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
8474 intern ("coding-category-utf-16-be-nosig"));
8475 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
8476 intern ("coding-category-utf-16-le-nosig"));
8477 ASET (Vcoding_category_table
, coding_category_charset
,
8478 intern ("coding-category-charset"));
8479 ASET (Vcoding_category_table
, coding_category_sjis
,
8480 intern ("coding-category-sjis"));
8481 ASET (Vcoding_category_table
, coding_category_big5
,
8482 intern ("coding-category-big5"));
8483 ASET (Vcoding_category_table
, coding_category_ccl
,
8484 intern ("coding-category-ccl"));
8485 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
8486 intern ("coding-category-emacs-mule"));
8487 /* Followings are NOT target of code detection. */
8488 ASET (Vcoding_category_table
, coding_category_raw_text
,
8489 intern ("coding-category-raw-text"));
8490 ASET (Vcoding_category_table
, coding_category_undecided
,
8491 intern ("coding-category-undecided"));
8493 defsubr (&Scoding_system_p
);
8494 defsubr (&Sread_coding_system
);
8495 defsubr (&Sread_non_nil_coding_system
);
8496 defsubr (&Scheck_coding_system
);
8497 defsubr (&Sdetect_coding_region
);
8498 defsubr (&Sdetect_coding_string
);
8499 defsubr (&Sfind_coding_systems_region_internal
);
8500 defsubr (&Scheck_coding_systems_region
);
8501 defsubr (&Sdecode_coding_region
);
8502 defsubr (&Sencode_coding_region
);
8503 defsubr (&Sdecode_coding_string
);
8504 defsubr (&Sencode_coding_string
);
8505 defsubr (&Sdecode_sjis_char
);
8506 defsubr (&Sencode_sjis_char
);
8507 defsubr (&Sdecode_big5_char
);
8508 defsubr (&Sencode_big5_char
);
8509 defsubr (&Sset_terminal_coding_system_internal
);
8510 defsubr (&Sset_safe_terminal_coding_system_internal
);
8511 defsubr (&Sterminal_coding_system
);
8512 defsubr (&Sset_keyboard_coding_system_internal
);
8513 defsubr (&Skeyboard_coding_system
);
8514 defsubr (&Sfind_operation_coding_system
);
8515 defsubr (&Sset_coding_system_priority
);
8516 defsubr (&Sdefine_coding_system_internal
);
8517 defsubr (&Sdefine_coding_system_alias
);
8518 defsubr (&Scoding_system_base
);
8519 defsubr (&Scoding_system_plist
);
8520 defsubr (&Scoding_system_aliases
);
8521 defsubr (&Scoding_system_eol_type
);
8522 defsubr (&Scoding_system_priority_list
);
8524 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
8525 doc
: /* List of coding systems.
8527 Do not alter the value of this variable manually. This variable should be
8528 updated by the functions `define-coding-system' and
8529 `define-coding-system-alias'. */);
8530 Vcoding_system_list
= Qnil
;
8532 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
8533 doc
: /* Alist of coding system names.
8534 Each element is one element list of coding system name.
8535 This variable is given to `completing-read' as TABLE argument.
8537 Do not alter the value of this variable manually. This variable should be
8538 updated by the functions `make-coding-system' and
8539 `define-coding-system-alias'. */);
8540 Vcoding_system_alist
= Qnil
;
8542 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
8543 doc
: /* List of coding-categories (symbols) ordered by priority.
8545 On detecting a coding system, Emacs tries code detection algorithms
8546 associated with each coding-category one by one in this order. When
8547 one algorithm agrees with a byte sequence of source text, the coding
8548 system bound to the corresponding coding-category is selected. */);
8552 Vcoding_category_list
= Qnil
;
8553 for (i
= coding_category_max
- 1; i
>= 0; i
--)
8554 Vcoding_category_list
8555 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
8556 Vcoding_category_list
);
8559 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
8560 doc
: /* Specify the coding system for read operations.
8561 It is useful to bind this variable with `let', but do not set it globally.
8562 If the value is a coding system, it is used for decoding on read operation.
8563 If not, an appropriate element is used from one of the coding system alists:
8564 There are three such tables, `file-coding-system-alist',
8565 `process-coding-system-alist', and `network-coding-system-alist'. */);
8566 Vcoding_system_for_read
= Qnil
;
8568 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
8569 doc
: /* Specify the coding system for write operations.
8570 Programs bind this variable with `let', but you should not set it globally.
8571 If the value is a coding system, it is used for encoding of output,
8572 when writing it to a file and when sending it to a file or subprocess.
8574 If this does not specify a coding system, an appropriate element
8575 is used from one of the coding system alists:
8576 There are three such tables, `file-coding-system-alist',
8577 `process-coding-system-alist', and `network-coding-system-alist'.
8578 For output to files, if the above procedure does not specify a coding system,
8579 the value of `buffer-file-coding-system' is used. */);
8580 Vcoding_system_for_write
= Qnil
;
8582 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
8584 Coding system used in the latest file or process I/O. */);
8585 Vlast_coding_system_used
= Qnil
;
8587 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
8589 *Non-nil means always inhibit code conversion of end-of-line format.
8590 See info node `Coding Systems' and info node `Text and Binary' concerning
8591 such conversion. */);
8592 inhibit_eol_conversion
= 0;
8594 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
8596 Non-nil means process buffer inherits coding system of process output.
8597 Bind it to t if the process output is to be treated as if it were a file
8598 read from some filesystem. */);
8599 inherit_process_coding_system
= 0;
8601 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
8603 Alist to decide a coding system to use for a file I/O operation.
8604 The format is ((PATTERN . VAL) ...),
8605 where PATTERN is a regular expression matching a file name,
8606 VAL is a coding system, a cons of coding systems, or a function symbol.
8607 If VAL is a coding system, it is used for both decoding and encoding
8609 If VAL is a cons of coding systems, the car part is used for decoding,
8610 and the cdr part is used for encoding.
8611 If VAL is a function symbol, the function must return a coding system
8612 or a cons of coding systems which are used as above. The function gets
8613 the arguments with which `find-operation-coding-systems' was called.
8615 See also the function `find-operation-coding-system'
8616 and the variable `auto-coding-alist'. */);
8617 Vfile_coding_system_alist
= Qnil
;
8619 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
8621 Alist to decide a coding system to use for a process I/O operation.
8622 The format is ((PATTERN . VAL) ...),
8623 where PATTERN is a regular expression matching a program name,
8624 VAL is a coding system, a cons of coding systems, or a function symbol.
8625 If VAL is a coding system, it is used for both decoding what received
8626 from the program and encoding what sent to the program.
8627 If VAL is a cons of coding systems, the car part is used for decoding,
8628 and the cdr part is used for encoding.
8629 If VAL is a function symbol, the function must return a coding system
8630 or a cons of coding systems which are used as above.
8632 See also the function `find-operation-coding-system'. */);
8633 Vprocess_coding_system_alist
= Qnil
;
8635 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
8637 Alist to decide a coding system to use for a network I/O operation.
8638 The format is ((PATTERN . VAL) ...),
8639 where PATTERN is a regular expression matching a network service name
8640 or is a port number to connect to,
8641 VAL is a coding system, a cons of coding systems, or a function symbol.
8642 If VAL is a coding system, it is used for both decoding what received
8643 from the network stream and encoding what sent to the network stream.
8644 If VAL is a cons of coding systems, the car part is used for decoding,
8645 and the cdr part is used for encoding.
8646 If VAL is a function symbol, the function must return a coding system
8647 or a cons of coding systems which are used as above.
8649 See also the function `find-operation-coding-system'. */);
8650 Vnetwork_coding_system_alist
= Qnil
;
8652 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
8653 doc
: /* Coding system to use with system messages.
8654 Also used for decoding keyboard input on X Window system. */);
8655 Vlocale_coding_system
= Qnil
;
8657 /* The eol mnemonics are reset in startup.el system-dependently. */
8658 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
8660 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8661 eol_mnemonic_unix
= build_string (":");
8663 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
8665 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8666 eol_mnemonic_dos
= build_string ("\\");
8668 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
8670 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
8671 eol_mnemonic_mac
= build_string ("/");
8673 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
8675 *String displayed in mode line when end-of-line format is not yet determined. */);
8676 eol_mnemonic_undecided
= build_string (":");
8678 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
8680 *Non-nil enables character translation while encoding and decoding. */);
8681 Venable_character_translation
= Qt
;
8683 DEFVAR_LISP ("standard-translation-table-for-decode",
8684 &Vstandard_translation_table_for_decode
,
8685 doc
: /* Table for translating characters while decoding. */);
8686 Vstandard_translation_table_for_decode
= Qnil
;
8688 DEFVAR_LISP ("standard-translation-table-for-encode",
8689 &Vstandard_translation_table_for_encode
,
8690 doc
: /* Table for translating characters while encoding. */);
8691 Vstandard_translation_table_for_encode
= Qnil
;
8693 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
8694 doc
: /* Alist of charsets vs revision numbers.
8695 While encoding, if a charset (car part of an element) is found,
8696 designate it with the escape sequence identifying revision (cdr part
8697 of the element). */);
8698 Vcharset_revision_table
= Qnil
;
8700 DEFVAR_LISP ("default-process-coding-system",
8701 &Vdefault_process_coding_system
,
8702 doc
: /* Cons of coding systems used for process I/O by default.
8703 The car part is used for decoding a process output,
8704 the cdr part is used for encoding a text to be sent to a process. */);
8705 Vdefault_process_coding_system
= Qnil
;
8707 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
8709 Table of extra Latin codes in the range 128..159 (inclusive).
8710 This is a vector of length 256.
8711 If Nth element is non-nil, the existence of code N in a file
8712 \(or output of subprocess) doesn't prevent it to be detected as
8713 a coding system of ISO 2022 variant which has a flag
8714 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8715 or reading output of a subprocess.
8716 Only 128th through 159th elements has a meaning. */);
8717 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
8719 DEFVAR_LISP ("select-safe-coding-system-function",
8720 &Vselect_safe_coding_system_function
,
8722 Function to call to select safe coding system for encoding a text.
8724 If set, this function is called to force a user to select a proper
8725 coding system which can encode the text in the case that a default
8726 coding system used in each operation can't encode the text.
8728 The default value is `select-safe-coding-system' (which see). */);
8729 Vselect_safe_coding_system_function
= Qnil
;
8731 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8732 &inhibit_iso_escape_detection
,
8734 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8736 By default, on reading a file, Emacs tries to detect how the text is
8737 encoded. This code detection is sensitive to escape sequences. If
8738 the sequence is valid as ISO2022, the code is determined as one of
8739 the ISO2022 encodings, and the file is decoded by the corresponding
8740 coding system (e.g. `iso-2022-7bit').
8742 However, there may be a case that you want to read escape sequences in
8743 a file as is. In such a case, you can set this variable to non-nil.
8744 Then, as the code detection ignores any escape sequences, no file is
8745 detected as encoded in some ISO2022 encoding. The result is that all
8746 escape sequences become visible in a buffer.
8748 The default value is nil, and it is strongly recommended not to change
8749 it. That is because many Emacs Lisp source files that contain
8750 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8751 in Emacs's distribution, and they won't be decoded correctly on
8752 reading if you suppress escape sequence detection.
8754 The other way to read escape sequences in a file without decoding is
8755 to explicitly specify some coding system that doesn't use ISO2022's
8756 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8757 inhibit_iso_escape_detection
= 0;
8760 Lisp_Object args
[coding_arg_max
];
8761 Lisp_Object plist
[14];
8764 for (i
= 0; i
< coding_arg_max
; i
++)
8767 plist
[0] = intern (":name");
8768 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
8769 plist
[2] = intern (":mnemonic");
8770 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
8771 plist
[4] = intern (":coding-type");
8772 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
8773 plist
[6] = intern (":ascii-compatible-p");
8774 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
8775 plist
[8] = intern (":default-char");
8776 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
8777 plist
[10] = intern (":docstring");
8778 plist
[11] = build_string ("Do no conversion.\n\
8780 When you visit a file with this coding, the file is read into a\n\
8781 unibyte buffer as is, thus each byte of a file is treated as a\n\
8783 plist
[12] = intern (":eol-type");
8784 plist
[13] = args
[coding_arg_eol_type
] = Qunix
;
8785 args
[coding_arg_plist
] = Flist (14, plist
);
8786 Fdefine_coding_system_internal (coding_arg_max
, args
);
8789 setup_coding_system (Qno_conversion
, &keyboard_coding
);
8790 setup_coding_system (Qno_conversion
, &terminal_coding
);
8791 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
8795 emacs_strerror (error_number
)
8800 synchronize_system_messages_locale ();
8801 str
= strerror (error_number
);
8803 if (! NILP (Vlocale_coding_system
))
8805 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
8806 Vlocale_coding_system
,
8808 str
= (char *) XSTRING (dec
)->data
;