1 /* Coding system handler (conversion, detection, etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
77 o Old Emacs internal format (emacs-mule)
79 The coding system adopted by old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX, and update the members of
150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
152 Below is the template of these functions. */
156 detect_coding_XXX (coding
, detect_info
)
157 struct coding_system
*coding
;
158 struct coding_detection_info
*detect_info
;
160 unsigned char *src
= coding
->source
;
161 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
162 int multibytep
= coding
->src_multibyte
;
163 int consumed_chars
= 0;
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
173 if (! __C_conforms_to_XXX___ (c
))
175 if (! __C_strongly_suggests_XXX__ (c
))
176 found
= CATEGORY_MASK_XXX
;
178 /* The byte sequence is invalid for XXX. */
179 detect_info
->rejected
|= CATEGORY_MASK_XXX
;
183 /* The source exausted successfully. */
184 detect_info
->found
|= found
;
189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
201 Below is the template of these functions. */
205 decode_coding_XXXX (coding
)
206 struct coding_system
*coding
;
208 unsigned char *src
= coding
->source
+ coding
->consumed
;
209 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base
;
214 /* A buffer to produce decoded characters. */
215 int *charbuf
= coding
->charbuf
;
216 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
217 int multibytep
= coding
->src_multibyte
;
222 if (charbuf
< charbuf_end
)
223 /* No more room to produce a decoded character. */
230 if (src_base
< src_end
231 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base
< src_end
&& charbuf
< charbuf_end
)
235 *charbuf
++ = *src_base
++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
239 /* Remember how many characters we produced. */
240 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
260 Below is a template of these functions. */
263 encode_coding_XXX (coding
)
264 struct coding_system
*coding
;
266 int multibytep
= coding
->dst_multibyte
;
267 int *charbuf
= coding
->charbuf
;
268 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
269 unsigned char *dst
= coding
->destination
+ coding
->produced
;
270 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
271 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
272 int produced_chars
= 0;
274 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
277 /* Encode C into DST, and increment DST. */
279 label_no_more_destination
:
280 /* How many chars and bytes we produced. */
281 coding
->produced_char
+= produced_chars
;
282 coding
->produced
= dst
- coding
->destination
;
287 /*** 1. Preamble ***/
294 #include "character.h"
297 #include "composite.h"
301 Lisp_Object Vcoding_system_hash_table
;
303 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
304 Lisp_Object Qunix
, Qdos
;
305 extern Lisp_Object Qmac
; /* frame.c */
306 Lisp_Object Qbuffer_file_coding_system
;
307 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
308 Lisp_Object Qdefault_char
;
309 Lisp_Object Qno_conversion
, Qundecided
;
310 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
311 Lisp_Object Qsignature
, Qendian
, Qbig
, Qlittle
;
312 Lisp_Object Qcoding_system_history
;
313 Lisp_Object Qvalid_codes
;
315 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
316 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
317 Lisp_Object Qstart_process
, Qopen_network_stream
;
318 Lisp_Object Qtarget_idx
;
320 Lisp_Object Vselect_safe_coding_system_function
;
322 /* Mnemonic string for each format of end-of-line. */
323 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
324 /* Mnemonic string to indicate format of end-of-line is not yet
326 Lisp_Object eol_mnemonic_undecided
;
330 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
332 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
334 /* Coding system emacs-mule and raw-text are for converting only
335 end-of-line format. */
336 Lisp_Object Qemacs_mule
, Qraw_text
;
338 /* Coding-systems are handed between Emacs Lisp programs and C internal
339 routines by the following three variables. */
340 /* Coding-system for reading files and receiving data from process. */
341 Lisp_Object Vcoding_system_for_read
;
342 /* Coding-system for writing files and sending data to process. */
343 Lisp_Object Vcoding_system_for_write
;
344 /* Coding-system actually used in the latest I/O. */
345 Lisp_Object Vlast_coding_system_used
;
347 /* A vector of length 256 which contains information about special
348 Latin codes (especially for dealing with Microsoft codes). */
349 Lisp_Object Vlatin_extra_code_table
;
351 /* Flag to inhibit code conversion of end-of-line format. */
352 int inhibit_eol_conversion
;
354 /* Flag to inhibit ISO2022 escape sequence detection. */
355 int inhibit_iso_escape_detection
;
357 /* Flag to make buffer-file-coding-system inherit from process-coding. */
358 int inherit_process_coding_system
;
360 /* Coding system to be used to encode text for terminal display. */
361 struct coding_system terminal_coding
;
363 /* Coding system to be used to encode text for terminal display when
364 terminal coding system is nil. */
365 struct coding_system safe_terminal_coding
;
367 /* Coding system of what is sent from terminal keyboard. */
368 struct coding_system keyboard_coding
;
370 Lisp_Object Vfile_coding_system_alist
;
371 Lisp_Object Vprocess_coding_system_alist
;
372 Lisp_Object Vnetwork_coding_system_alist
;
374 Lisp_Object Vlocale_coding_system
;
378 /* Flag to tell if we look up translation table on character code
380 Lisp_Object Venable_character_translation
;
381 /* Standard translation table to look up on decoding (reading). */
382 Lisp_Object Vstandard_translation_table_for_decode
;
383 /* Standard translation table to look up on encoding (writing). */
384 Lisp_Object Vstandard_translation_table_for_encode
;
386 Lisp_Object Qtranslation_table
;
387 Lisp_Object Qtranslation_table_id
;
388 Lisp_Object Qtranslation_table_for_decode
;
389 Lisp_Object Qtranslation_table_for_encode
;
391 /* Alist of charsets vs revision number. */
392 static Lisp_Object Vcharset_revision_table
;
394 /* Default coding systems used for process I/O. */
395 Lisp_Object Vdefault_process_coding_system
;
397 /* Global flag to tell that we can't call post-read-conversion and
398 pre-write-conversion functions. Usually the value is zero, but it
399 is set to 1 temporarily while such functions are running. This is
400 to avoid infinite recursive call. */
401 static int inhibit_pre_post_conversion
;
403 /* Two special coding systems. */
404 Lisp_Object Vsjis_coding_system
;
405 Lisp_Object Vbig5_coding_system
;
408 static int detect_coding_utf_8
P_ ((struct coding_system
*,
409 struct coding_detection_info
*info
));
410 static void decode_coding_utf_8
P_ ((struct coding_system
*));
411 static int encode_coding_utf_8
P_ ((struct coding_system
*));
413 static int detect_coding_utf_16
P_ ((struct coding_system
*,
414 struct coding_detection_info
*info
));
415 static void decode_coding_utf_16
P_ ((struct coding_system
*));
416 static int encode_coding_utf_16
P_ ((struct coding_system
*));
418 static int detect_coding_iso_2022
P_ ((struct coding_system
*,
419 struct coding_detection_info
*info
));
420 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
421 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
423 static int detect_coding_emacs_mule
P_ ((struct coding_system
*,
424 struct coding_detection_info
*info
));
425 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
426 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
428 static int detect_coding_sjis
P_ ((struct coding_system
*,
429 struct coding_detection_info
*info
));
430 static void decode_coding_sjis
P_ ((struct coding_system
*));
431 static int encode_coding_sjis
P_ ((struct coding_system
*));
433 static int detect_coding_big5
P_ ((struct coding_system
*,
434 struct coding_detection_info
*info
));
435 static void decode_coding_big5
P_ ((struct coding_system
*));
436 static int encode_coding_big5
P_ ((struct coding_system
*));
438 static int detect_coding_ccl
P_ ((struct coding_system
*,
439 struct coding_detection_info
*info
));
440 static void decode_coding_ccl
P_ ((struct coding_system
*));
441 static int encode_coding_ccl
P_ ((struct coding_system
*));
443 static void decode_coding_raw_text
P_ ((struct coding_system
*));
444 static int encode_coding_raw_text
P_ ((struct coding_system
*));
447 /* ISO2022 section */
449 #define CODING_ISO_INITIAL(coding, reg) \
450 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
451 coding_attr_iso_initial), \
455 #define CODING_ISO_REQUEST(coding, charset_id) \
456 ((charset_id <= (coding)->max_charset_id \
457 ? (coding)->safe_charsets[charset_id] \
461 #define CODING_ISO_FLAGS(coding) \
462 ((coding)->spec.iso_2022.flags)
463 #define CODING_ISO_DESIGNATION(coding, reg) \
464 ((coding)->spec.iso_2022.current_designation[reg])
465 #define CODING_ISO_INVOCATION(coding, plane) \
466 ((coding)->spec.iso_2022.current_invocation[plane])
467 #define CODING_ISO_SINGLE_SHIFTING(coding) \
468 ((coding)->spec.iso_2022.single_shifting)
469 #define CODING_ISO_BOL(coding) \
470 ((coding)->spec.iso_2022.bol)
471 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
472 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
474 /* Control characters of ISO2022. */
475 /* code */ /* function */
476 #define ISO_CODE_LF 0x0A /* line-feed */
477 #define ISO_CODE_CR 0x0D /* carriage-return */
478 #define ISO_CODE_SO 0x0E /* shift-out */
479 #define ISO_CODE_SI 0x0F /* shift-in */
480 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
481 #define ISO_CODE_ESC 0x1B /* escape */
482 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
483 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
484 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
486 /* All code (1-byte) of ISO2022 is classified into one of the
488 enum iso_code_class_type
490 ISO_control_0
, /* Control codes in the range
491 0x00..0x1F and 0x7F, except for the
492 following 5 codes. */
493 ISO_carriage_return
, /* ISO_CODE_CR (0x0D) */
494 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
495 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
496 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
497 ISO_escape
, /* ISO_CODE_SO (0x1B) */
498 ISO_control_1
, /* Control codes in the range
499 0x80..0x9F, except for the
500 following 3 codes. */
501 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
502 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
503 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
504 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
505 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
506 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
507 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
510 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
511 `iso-flags' attribute of an iso2022 coding system. */
513 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
514 instead of the correct short-form sequence (e.g. ESC $ A). */
515 #define CODING_ISO_FLAG_LONG_FORM 0x0001
517 /* If set, reset graphic planes and registers at end-of-line to the
519 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
521 /* If set, reset graphic planes and registers before any control
522 characters to the initial state. */
523 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
525 /* If set, encode by 7-bit environment. */
526 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
528 /* If set, use locking-shift function. */
529 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
531 /* If set, use single-shift function. Overwrite
532 CODING_ISO_FLAG_LOCKING_SHIFT. */
533 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
535 /* If set, use designation escape sequence. */
536 #define CODING_ISO_FLAG_DESIGNATION 0x0040
538 /* If set, produce revision number sequence. */
539 #define CODING_ISO_FLAG_REVISION 0x0080
541 /* If set, produce ISO6429's direction specifying sequence. */
542 #define CODING_ISO_FLAG_DIRECTION 0x0100
544 /* If set, assume designation states are reset at beginning of line on
546 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
548 /* If set, designation sequence should be placed at beginning of line
550 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
552 /* If set, do not encode unsafe charactes on output. */
553 #define CODING_ISO_FLAG_SAFE 0x0800
555 /* If set, extra latin codes (128..159) are accepted as a valid code
557 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
559 #define CODING_ISO_FLAG_COMPOSITION 0x2000
561 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
563 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
565 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
567 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
569 /* A character to be produced on output if encoding of the original
570 character is prohibited by CODING_ISO_FLAG_SAFE. */
571 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
575 #define CODING_UTF_16_BOM(coding) \
576 ((coding)->spec.utf_16.bom)
578 #define CODING_UTF_16_ENDIAN(coding) \
579 ((coding)->spec.utf_16.endian)
581 #define CODING_UTF_16_SURROGATE(coding) \
582 ((coding)->spec.utf_16.surrogate)
586 #define CODING_CCL_DECODER(coding) \
587 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
588 #define CODING_CCL_ENCODER(coding) \
589 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
590 #define CODING_CCL_VALIDS(coding) \
591 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
594 /* Index for each coding category in `coding_categories' */
598 coding_category_iso_7
,
599 coding_category_iso_7_tight
,
600 coding_category_iso_8_1
,
601 coding_category_iso_8_2
,
602 coding_category_iso_7_else
,
603 coding_category_iso_8_else
,
604 coding_category_utf_8
,
605 coding_category_utf_16_auto
,
606 coding_category_utf_16_be
,
607 coding_category_utf_16_le
,
608 coding_category_utf_16_be_nosig
,
609 coding_category_utf_16_le_nosig
,
610 coding_category_charset
,
611 coding_category_sjis
,
612 coding_category_big5
,
614 coding_category_emacs_mule
,
615 /* All above are targets of code detection. */
616 coding_category_raw_text
,
617 coding_category_undecided
,
621 /* Definitions of flag bits used in detect_coding_XXXX. */
622 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
623 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
624 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
625 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
626 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
627 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
628 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
629 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
630 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
631 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
632 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
633 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
634 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
635 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
636 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
637 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
638 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
640 /* This value is returned if detect_coding_mask () find nothing other
641 than ASCII characters. */
642 #define CATEGORY_MASK_ANY \
643 (CATEGORY_MASK_ISO_7 \
644 | CATEGORY_MASK_ISO_7_TIGHT \
645 | CATEGORY_MASK_ISO_8_1 \
646 | CATEGORY_MASK_ISO_8_2 \
647 | CATEGORY_MASK_ISO_7_ELSE \
648 | CATEGORY_MASK_ISO_8_ELSE \
649 | CATEGORY_MASK_UTF_8 \
650 | CATEGORY_MASK_UTF_16_BE \
651 | CATEGORY_MASK_UTF_16_LE \
652 | CATEGORY_MASK_UTF_16_BE_NOSIG \
653 | CATEGORY_MASK_UTF_16_LE_NOSIG \
654 | CATEGORY_MASK_CHARSET \
655 | CATEGORY_MASK_SJIS \
656 | CATEGORY_MASK_BIG5 \
657 | CATEGORY_MASK_CCL \
658 | CATEGORY_MASK_EMACS_MULE)
661 #define CATEGORY_MASK_ISO_7BIT \
662 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
664 #define CATEGORY_MASK_ISO_8BIT \
665 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
667 #define CATEGORY_MASK_ISO_ELSE \
668 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
670 #define CATEGORY_MASK_ISO_ESCAPE \
671 (CATEGORY_MASK_ISO_7 \
672 | CATEGORY_MASK_ISO_7_TIGHT \
673 | CATEGORY_MASK_ISO_7_ELSE \
674 | CATEGORY_MASK_ISO_8_ELSE)
676 #define CATEGORY_MASK_ISO \
677 ( CATEGORY_MASK_ISO_7BIT \
678 | CATEGORY_MASK_ISO_8BIT \
679 | CATEGORY_MASK_ISO_ELSE)
681 #define CATEGORY_MASK_UTF_16 \
682 (CATEGORY_MASK_UTF_16_BE \
683 | CATEGORY_MASK_UTF_16_LE \
684 | CATEGORY_MASK_UTF_16_BE_NOSIG \
685 | CATEGORY_MASK_UTF_16_LE_NOSIG)
688 /* List of symbols `coding-category-xxx' ordered by priority. This
689 variable is exposed to Emacs Lisp. */
690 static Lisp_Object Vcoding_category_list
;
692 /* Table of coding categories (Lisp symbols). This variable is for
694 static Lisp_Object Vcoding_category_table
;
696 /* Table of coding-categories ordered by priority. */
697 static enum coding_category coding_priorities
[coding_category_max
];
699 /* Nth element is a coding context for the coding system bound to the
700 Nth coding category. */
701 static struct coding_system coding_categories
[coding_category_max
];
703 static int detected_mask
[coding_category_raw_text
] =
711 CATEGORY_MASK_UTF_16
,
712 CATEGORY_MASK_UTF_16
,
713 CATEGORY_MASK_UTF_16
,
714 CATEGORY_MASK_UTF_16
,
715 CATEGORY_MASK_UTF_16
,
716 CATEGORY_MASK_CHARSET
,
720 CATEGORY_MASK_EMACS_MULE
723 /*** Commonly used macros and functions ***/
726 #define min(a, b) ((a) < (b) ? (a) : (b))
729 #define max(a, b) ((a) > (b) ? (a) : (b))
732 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
734 attrs = CODING_ID_ATTRS (coding->id); \
735 eol_type = CODING_ID_EOL_TYPE (coding->id); \
736 if (VECTORP (eol_type)) \
738 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
742 /* Safely get one byte from the source text pointed by SRC which ends
743 at SRC_END, and set C to that byte. If there are not enough bytes
744 in the source, it jumps to `no_more_source'. The caller
745 should declare and set these variables appropriately in advance:
746 src, src_end, multibytep
749 #define ONE_MORE_BYTE(c) \
751 if (src == src_end) \
753 if (src_base < src) \
754 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
755 goto no_more_source; \
758 if (multibytep && (c & 0x80)) \
760 if ((c & 0xFE) != 0xC0) \
761 error ("Undecodable char found"); \
762 c = ((c & 1) << 6) | *src++; \
768 #define ONE_MORE_BYTE_NO_CHECK(c) \
771 if (multibytep && (c & 0x80)) \
773 if ((c & 0xFE) != 0xC0) \
774 error ("Undecodable char found"); \
775 c = ((c & 1) << 6) | *src++; \
781 /* Store a byte C in the place pointed by DST and increment DST to the
782 next free point, and increment PRODUCED_CHARS. The caller should
783 assure that C is 0..127, and declare and set the variable `dst'
784 appropriately in advance.
788 #define EMIT_ONE_ASCII_BYTE(c) \
795 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
797 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
799 produced_chars += 2; \
800 *dst++ = (c1), *dst++ = (c2); \
804 /* Store a byte C in the place pointed by DST and increment DST to the
805 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
806 nonzero, store in an appropriate multibyte from. The caller should
807 declare and set the variables `dst' and `multibytep' appropriately
810 #define EMIT_ONE_BYTE(c) \
817 ch = BYTE8_TO_CHAR (ch); \
818 CHAR_STRING_ADVANCE (ch, dst); \
825 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
827 #define EMIT_TWO_BYTES(c1, c2) \
829 produced_chars += 2; \
836 ch = BYTE8_TO_CHAR (ch); \
837 CHAR_STRING_ADVANCE (ch, dst); \
840 ch = BYTE8_TO_CHAR (ch); \
841 CHAR_STRING_ADVANCE (ch, dst); \
851 #define EMIT_THREE_BYTES(c1, c2, c3) \
853 EMIT_ONE_BYTE (c1); \
854 EMIT_TWO_BYTES (c2, c3); \
858 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
860 EMIT_TWO_BYTES (c1, c2); \
861 EMIT_TWO_BYTES (c3, c4); \
865 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
867 charset_map_loaded = 0; \
868 c = DECODE_CHAR (charset, code); \
869 if (charset_map_loaded) \
871 unsigned char *orig = coding->source; \
874 coding_set_source (coding); \
875 offset = coding->source - orig; \
877 src_base += offset; \
883 #define ASSURE_DESTINATION(bytes) \
885 if (dst + (bytes) >= dst_end) \
887 int more_bytes = charbuf_end - charbuf + (bytes); \
889 dst = alloc_destination (coding, more_bytes, dst); \
890 dst_end = coding->destination + coding->dst_bytes; \
897 coding_set_source (coding
)
898 struct coding_system
*coding
;
900 if (BUFFERP (coding
->src_object
))
902 if (coding
->src_pos
< 0)
903 coding
->source
= GAP_END_ADDR
+ coding
->src_pos_byte
;
906 struct buffer
*buf
= XBUFFER (coding
->src_object
);
907 EMACS_INT gpt_byte
= BUF_GPT_BYTE (buf
);
908 unsigned char *beg_addr
= BUF_BEG_ADDR (buf
);
910 coding
->source
= beg_addr
+ coding
->src_pos_byte
- 1;
911 if (coding
->src_pos_byte
>= gpt_byte
)
912 coding
->source
+= BUF_GAP_SIZE (buf
);
915 else if (STRINGP (coding
->src_object
))
917 coding
->source
= (XSTRING (coding
->src_object
)->data
918 + coding
->src_pos_byte
);
921 /* Otherwise, the source is C string and is never relocated
922 automatically. Thus we don't have to update anything. */
927 coding_set_destination (coding
)
928 struct coding_system
*coding
;
930 if (BUFFERP (coding
->dst_object
))
932 if (coding
->src_pos
< 0)
934 coding
->destination
= BEG_ADDR
+ coding
->dst_pos_byte
- 1;
935 coding
->dst_bytes
= (GAP_END_ADDR
936 - (coding
->src_bytes
- coding
->consumed
)
937 - coding
->destination
);
941 /* We are sure that coding->dst_pos_byte is before the gap
943 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
944 + coding
->dst_pos_byte
- 1);
945 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
946 - coding
->destination
);
950 /* Otherwise, the destination is C string and is never relocated
951 automatically. Thus we don't have to update anything. */
957 coding_alloc_by_realloc (coding
, bytes
)
958 struct coding_system
*coding
;
961 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
962 coding
->dst_bytes
+ bytes
);
963 coding
->dst_bytes
+= bytes
;
967 coding_alloc_by_making_gap (coding
, bytes
)
968 struct coding_system
*coding
;
971 if (BUFFERP (coding
->dst_object
)
972 && EQ (coding
->src_object
, coding
->dst_object
))
974 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
976 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
978 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
982 Lisp_Object this_buffer
;
984 this_buffer
= Fcurrent_buffer ();
985 set_buffer_internal (XBUFFER (coding
->dst_object
));
987 set_buffer_internal (XBUFFER (this_buffer
));
992 static unsigned char *
993 alloc_destination (coding
, nbytes
, dst
)
994 struct coding_system
*coding
;
998 EMACS_INT offset
= dst
- coding
->destination
;
1000 if (BUFFERP (coding
->dst_object
))
1001 coding_alloc_by_making_gap (coding
, nbytes
);
1003 coding_alloc_by_realloc (coding
, nbytes
);
1004 coding
->result
= CODING_RESULT_SUCCESS
;
1005 coding_set_destination (coding
);
1006 dst
= coding
->destination
+ offset
;
1010 /** Macros for annotations. */
1012 /* Maximum length of annotation data (sum of annotations for
1013 composition and charset). */
1014 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1016 /* An annotation data is stored in the array coding->charbuf in this
1018 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1019 LENGTH is the number of elements in the annotation.
1020 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1021 FROM and TO specify the range of text annotated. They are relative
1022 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1024 The format of the following elements depend on ANNOTATION_MASK.
1026 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1028 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1029 METHOD is one of enum composition_method.
1030 Optionnal COMPOSITION-COMPONENTS are characters and composition
1033 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1036 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1038 *(buf)++ = -(len); \
1039 *(buf)++ = (mask); \
1040 *(buf)++ = (from); \
1042 coding->annotated = 1; \
1045 #define ADD_COMPOSITION_DATA(buf, from, to, method) \
1047 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1052 #define ADD_CHARSET_DATA(buf, from, to, id) \
1054 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1059 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1066 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1067 Check if a text is encoded in UTF-8. If it is, return 1, else
1070 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1071 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1072 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1073 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1074 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1075 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1078 detect_coding_utf_8 (coding
, detect_info
)
1079 struct coding_system
*coding
;
1080 struct coding_detection_info
*detect_info
;
1082 unsigned char *src
= coding
->source
, *src_base
= src
;
1083 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1084 int multibytep
= coding
->src_multibyte
;
1085 int consumed_chars
= 0;
1089 detect_info
->checked
|= CATEGORY_MASK_UTF_8
;
1090 /* A coding system of this category is always ASCII compatible. */
1091 src
+= coding
->head_ascii
;
1095 int c
, c1
, c2
, c3
, c4
;
1099 if (UTF_8_1_OCTET_P (c
))
1103 if (! UTF_8_EXTRA_OCTET_P (c1
))
1105 if (UTF_8_2_OCTET_LEADING_P (c
))
1107 found
= CATEGORY_MASK_UTF_8
;
1111 if (! UTF_8_EXTRA_OCTET_P (c2
))
1113 if (UTF_8_3_OCTET_LEADING_P (c
))
1115 found
= CATEGORY_MASK_UTF_8
;
1119 if (! UTF_8_EXTRA_OCTET_P (c3
))
1121 if (UTF_8_4_OCTET_LEADING_P (c
))
1123 found
= CATEGORY_MASK_UTF_8
;
1127 if (! UTF_8_EXTRA_OCTET_P (c4
))
1129 if (UTF_8_5_OCTET_LEADING_P (c
))
1131 found
= CATEGORY_MASK_UTF_8
;
1136 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1140 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1142 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1145 detect_info
->found
|= found
;
1151 decode_coding_utf_8 (coding
)
1152 struct coding_system
*coding
;
1154 unsigned char *src
= coding
->source
+ coding
->consumed
;
1155 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1156 unsigned char *src_base
;
1157 int *charbuf
= coding
->charbuf
;
1158 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1159 int consumed_chars
= 0, consumed_chars_base
;
1160 int multibytep
= coding
->src_multibyte
;
1161 Lisp_Object attr
, eol_type
, charset_list
;
1163 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1167 int c
, c1
, c2
, c3
, c4
, c5
;
1170 consumed_chars_base
= consumed_chars
;
1172 if (charbuf
>= charbuf_end
)
1176 if (UTF_8_1_OCTET_P(c1
))
1181 if (EQ (eol_type
, Qdos
))
1185 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
1186 goto no_more_source
;
1191 else if (EQ (eol_type
, Qmac
))
1198 if (! UTF_8_EXTRA_OCTET_P (c2
))
1200 if (UTF_8_2_OCTET_LEADING_P (c1
))
1202 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1203 /* Reject overlong sequences here and below. Encoders
1204 producing them are incorrect, they can be misleading,
1205 and they mess up read/write invariance. */
1212 if (! UTF_8_EXTRA_OCTET_P (c3
))
1214 if (UTF_8_3_OCTET_LEADING_P (c1
))
1216 c
= (((c1
& 0xF) << 12)
1217 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1219 || (c
>= 0xd800 && c
< 0xe000)) /* surrogates (invalid) */
1225 if (! UTF_8_EXTRA_OCTET_P (c4
))
1227 if (UTF_8_4_OCTET_LEADING_P (c1
))
1229 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1230 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1237 if (! UTF_8_EXTRA_OCTET_P (c5
))
1239 if (UTF_8_5_OCTET_LEADING_P (c1
))
1241 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1242 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1244 if ((c
> MAX_CHAR
) || (c
< 0x200000))
1259 consumed_chars
= consumed_chars_base
;
1261 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1266 coding
->consumed_char
+= consumed_chars_base
;
1267 coding
->consumed
= src_base
- coding
->source
;
1268 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1273 encode_coding_utf_8 (coding
)
1274 struct coding_system
*coding
;
1276 int multibytep
= coding
->dst_multibyte
;
1277 int *charbuf
= coding
->charbuf
;
1278 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1279 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1280 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1281 int produced_chars
= 0;
1286 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1288 while (charbuf
< charbuf_end
)
1290 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1292 ASSURE_DESTINATION (safe_room
);
1294 if (CHAR_BYTE8_P (c
))
1296 c
= CHAR_TO_BYTE8 (c
);
1301 CHAR_STRING_ADVANCE (c
, pend
);
1302 for (p
= str
; p
< pend
; p
++)
1309 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1311 while (charbuf
< charbuf_end
)
1313 ASSURE_DESTINATION (safe_room
);
1315 dst
+= CHAR_STRING (c
, dst
);
1319 coding
->result
= CODING_RESULT_SUCCESS
;
1320 coding
->produced_char
+= produced_chars
;
1321 coding
->produced
= dst
- coding
->destination
;
1326 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1327 Check if a text is encoded in one of UTF-16 based coding systems.
1328 If it is, return 1, else return 0. */
1330 #define UTF_16_HIGH_SURROGATE_P(val) \
1331 (((val) & 0xFC00) == 0xD800)
1333 #define UTF_16_LOW_SURROGATE_P(val) \
1334 (((val) & 0xFC00) == 0xDC00)
1336 #define UTF_16_INVALID_P(val) \
1337 (((val) == 0xFFFE) \
1338 || ((val) == 0xFFFF) \
1339 || UTF_16_LOW_SURROGATE_P (val))
1343 detect_coding_utf_16 (coding
, detect_info
)
1344 struct coding_system
*coding
;
1345 struct coding_detection_info
*detect_info
;
1347 unsigned char *src
= coding
->source
, *src_base
= src
;
1348 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1349 int multibytep
= coding
->src_multibyte
;
1350 int consumed_chars
= 0;
1353 detect_info
->checked
|= CATEGORY_MASK_UTF_16
;
1355 if (coding
->mode
& CODING_MODE_LAST_BLOCK
1356 && (coding
->src_bytes
& 1))
1358 detect_info
->rejected
|= CATEGORY_MASK_UTF_16
;
1364 if ((c1
== 0xFF) && (c2
== 0xFE))
1366 detect_info
->found
|= CATEGORY_MASK_UTF_16_LE
;
1367 detect_info
->rejected
|= CATEGORY_MASK_UTF_16_BE
;
1369 else if ((c1
== 0xFE) && (c2
== 0xFF))
1371 detect_info
->found
|= CATEGORY_MASK_UTF_16_BE
;
1372 detect_info
->rejected
|= CATEGORY_MASK_UTF_16_LE
;
1379 decode_coding_utf_16 (coding
)
1380 struct coding_system
*coding
;
1382 unsigned char *src
= coding
->source
+ coding
->consumed
;
1383 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1384 unsigned char *src_base
;
1385 int *charbuf
= coding
->charbuf
;
1386 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1387 int consumed_chars
= 0, consumed_chars_base
;
1388 int multibytep
= coding
->src_multibyte
;
1389 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1390 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1391 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1392 Lisp_Object attr
, eol_type
, charset_list
;
1394 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1396 if (bom
!= utf_16_without_bom
)
1404 if (bom
== utf_16_with_bom
)
1406 if (endian
== utf_16_big_endian
1407 ? c
!= 0xFEFF : c
!= 0xFFFE)
1409 /* We are sure that there's enouph room at CHARBUF. */
1418 CODING_UTF_16_ENDIAN (coding
)
1419 = endian
= utf_16_big_endian
;
1420 else if (c
== 0xFFFE)
1421 CODING_UTF_16_ENDIAN (coding
)
1422 = endian
= utf_16_little_endian
;
1425 CODING_UTF_16_ENDIAN (coding
)
1426 = endian
= utf_16_big_endian
;
1430 CODING_UTF_16_BOM (coding
) = utf_16_with_bom
;
1438 consumed_chars_base
= consumed_chars
;
1440 if (charbuf
+ 2 >= charbuf_end
)
1445 c
= (endian
== utf_16_big_endian
1446 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1449 if (! UTF_16_LOW_SURROGATE_P (c
))
1451 if (endian
== utf_16_big_endian
)
1452 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1454 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1458 if (UTF_16_HIGH_SURROGATE_P (c
))
1459 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1465 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1466 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1472 if (UTF_16_HIGH_SURROGATE_P (c
))
1473 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1480 coding
->consumed_char
+= consumed_chars_base
;
1481 coding
->consumed
= src_base
- coding
->source
;
1482 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1486 encode_coding_utf_16 (coding
)
1487 struct coding_system
*coding
;
1489 int multibytep
= coding
->dst_multibyte
;
1490 int *charbuf
= coding
->charbuf
;
1491 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1492 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1493 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1495 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1496 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1497 int produced_chars
= 0;
1498 Lisp_Object attrs
, eol_type
, charset_list
;
1501 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1503 if (bom
== utf_16_with_bom
)
1505 ASSURE_DESTINATION (safe_room
);
1507 EMIT_TWO_BYTES (0xFE, 0xFF);
1509 EMIT_TWO_BYTES (0xFF, 0xFE);
1510 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1513 while (charbuf
< charbuf_end
)
1515 ASSURE_DESTINATION (safe_room
);
1517 if (c
>= MAX_UNICODE_CHAR
)
1518 c
= coding
->default_char
;
1523 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1525 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1532 c1
= (c
>> 10) + 0xD800;
1533 c2
= (c
& 0x3FF) + 0xDC00;
1535 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1537 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1540 coding
->result
= CODING_RESULT_SUCCESS
;
1541 coding
->produced
= dst
- coding
->destination
;
1542 coding
->produced_char
+= produced_chars
;
1547 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1549 /* Emacs' internal format for representation of multiple character
1550 sets is a kind of multi-byte encoding, i.e. characters are
1551 represented by variable-length sequences of one-byte codes.
1553 ASCII characters and control characters (e.g. `tab', `newline') are
1554 represented by one-byte sequences which are their ASCII codes, in
1555 the range 0x00 through 0x7F.
1557 8-bit characters of the range 0x80..0x9F are represented by
1558 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1561 8-bit characters of the range 0xA0..0xFF are represented by
1562 one-byte sequences which are their 8-bit code.
1564 The other characters are represented by a sequence of `base
1565 leading-code', optional `extended leading-code', and one or two
1566 `position-code's. The length of the sequence is determined by the
1567 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1568 whereas extended leading-code and position-code take the range 0xA0
1569 through 0xFF. See `charset.h' for more details about leading-code
1572 --- CODE RANGE of Emacs' internal format ---
1576 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1577 eight-bit-graphic 0xA0..0xBF
1578 ELSE 0x81..0x9D + [0xA0..0xFF]+
1579 ---------------------------------------------
1581 As this is the internal character representation, the format is
1582 usually not used externally (i.e. in a file or in a data sent to a
1583 process). But, it is possible to have a text externally in this
1584 format (i.e. by encoding by the coding system `emacs-mule').
1586 In that case, a sequence of one-byte codes has a slightly different
1589 At first, all characters in eight-bit-control are represented by
1590 one-byte sequences which are their 8-bit code.
1592 Next, character composition data are represented by the byte
1593 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1595 METHOD is 0xF0 plus one of composition method (enum
1596 composition_method),
1598 BYTES is 0xA0 plus a byte length of this composition data,
1600 CHARS is 0x20 plus a number of characters composed by this
1603 COMPONENTs are characters of multibye form or composition
1604 rules encoded by two-byte of ASCII codes.
1606 In addition, for backward compatibility, the following formats are
1607 also recognized as composition data on decoding.
1610 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1613 MSEQ is a multibyte form but in these special format:
1614 ASCII: 0xA0 ASCII_CODE+0x80,
1615 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1616 RULE is a one byte code of the range 0xA0..0xF0 that
1617 represents a composition rule.
1620 char emacs_mule_bytes
[256];
1623 emacs_mule_char (coding
, src
, nbytes
, nchars
, id
)
1624 struct coding_system
*coding
;
1626 int *nbytes
, *nchars
, *id
;
1628 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1629 int multibytep
= coding
->src_multibyte
;
1630 unsigned char *src_base
= src
;
1631 struct charset
*charset
;
1634 int consumed_chars
= 0;
1637 switch (emacs_mule_bytes
[c
])
1640 if (! (charset
= emacs_mule_charset
[c
]))
1647 if (c
== EMACS_MULE_LEADING_CODE_PRIVATE_11
1648 || c
== EMACS_MULE_LEADING_CODE_PRIVATE_12
)
1651 if (! (charset
= emacs_mule_charset
[c
]))
1658 if (! (charset
= emacs_mule_charset
[c
]))
1661 code
= (c
& 0x7F) << 8;
1669 if (! (charset
= emacs_mule_charset
[c
]))
1672 code
= (c
& 0x7F) << 8;
1679 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
)
1680 ? charset_ascii
: charset_eight_bit
);
1686 c
= DECODE_CHAR (charset
, code
);
1689 *nbytes
= src
- src_base
;
1690 *nchars
= consumed_chars
;
1703 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1704 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1708 detect_coding_emacs_mule (coding
, detect_info
)
1709 struct coding_system
*coding
;
1710 struct coding_detection_info
*detect_info
;
1712 unsigned char *src
= coding
->source
, *src_base
= src
;
1713 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1714 int multibytep
= coding
->src_multibyte
;
1715 int consumed_chars
= 0;
1720 detect_info
->checked
|= CATEGORY_MASK_EMACS_MULE
;
1721 /* A coding system of this category is always ASCII compatible. */
1722 src
+= coding
->head_ascii
;
1732 /* Perhaps the start of composite character. We simple skip
1733 it because analyzing it is too heavy for detecting. But,
1734 at least, we check that the composite character
1735 constitues of more than 4 bytes. */
1736 unsigned char *src_base
;
1746 if (src
- src_base
<= 4)
1748 found
= CATEGORY_MASK_EMACS_MULE
;
1756 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1761 unsigned char *src_base
= src
- 1;
1768 if (src
- src_base
!= emacs_mule_bytes
[*src_base
])
1770 found
= CATEGORY_MASK_EMACS_MULE
;
1773 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1777 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1779 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1782 detect_info
->found
|= found
;
1787 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1789 /* Decode a character represented as a component of composition
1790 sequence of Emacs 20/21 style at SRC. Set C to that character and
1791 update SRC to the head of next character (or an encoded composition
1792 rule). If SRC doesn't points a composition component, set C to -1.
1793 If SRC points an invalid byte sequence, global exit by a return
1796 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1800 int nbytes, nchars; \
1802 if (src == src_end) \
1804 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1809 goto invalid_code; \
1813 consumed_chars += nchars; \
1818 /* Decode a composition rule represented as a component of composition
1819 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1820 and increment BUF. If SRC points an invalid byte sequence, set C
1823 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1825 int c, gref, nref; \
1827 if (src >= src_end) \
1828 goto invalid_code; \
1829 ONE_MORE_BYTE_NO_CHECK (c); \
1831 if (c < 0 || c >= 81) \
1832 goto invalid_code; \
1834 gref = c / 9, nref = c % 9; \
1835 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1839 /* Decode a composition rule represented as a component of composition
1840 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1841 and increment BUF. If SRC points an invalid byte sequence, set C
1844 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1848 if (src + 1>= src_end) \
1849 goto invalid_code; \
1850 ONE_MORE_BYTE_NO_CHECK (gref); \
1852 ONE_MORE_BYTE_NO_CHECK (nref); \
1854 if (gref < 0 || gref >= 81 \
1855 || nref < 0 || nref >= 81) \
1856 goto invalid_code; \
1857 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1861 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1863 /* Emacs 21 style format. The first three bytes at SRC are \
1864 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1865 the byte length of this composition information, CHARS is the \
1866 number of characters composed by this composition. */ \
1867 enum composition_method method = c - 0xF2; \
1868 int *charbuf_base = charbuf; \
1870 int consumed_chars_limit; \
1871 int nbytes, nchars; \
1873 ONE_MORE_BYTE (c); \
1874 nbytes = c - 0xA0; \
1876 goto invalid_code; \
1877 ONE_MORE_BYTE (c); \
1878 nchars = c - 0xA0; \
1879 from = coding->produced + char_offset; \
1880 to = from + nchars; \
1881 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1882 consumed_chars_limit = consumed_chars_base + nbytes; \
1883 if (method != COMPOSITION_RELATIVE) \
1886 while (consumed_chars < consumed_chars_limit) \
1888 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1889 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1891 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1894 if (consumed_chars < consumed_chars_limit) \
1895 goto invalid_code; \
1896 charbuf_base[0] -= i; \
1901 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1903 /* Emacs 20 style format for relative composition. */ \
1904 /* Store multibyte form of characters to be composed. */ \
1905 enum composition_method method = COMPOSITION_RELATIVE; \
1906 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1907 int *buf = components; \
1912 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1913 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1914 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1916 goto invalid_code; \
1917 from = coding->produced_char + char_offset; \
1919 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1920 for (j = 0; j < i; j++) \
1921 *charbuf++ = components[j]; \
1925 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1927 /* Emacs 20 style format for rule-base composition. */ \
1928 /* Store multibyte form of characters to be composed. */ \
1929 enum composition_method method = COMPOSITION_WITH_RULE; \
1930 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1931 int *buf = components; \
1935 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1936 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1938 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1939 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1941 if (i < 1 || (buf - components) % 2 == 0) \
1942 goto invalid_code; \
1943 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1944 goto no_more_source; \
1945 from = coding->produced_char + char_offset; \
1947 ADD_COMPOSITION_DATA (buf, from, to, method); \
1948 for (j = 0; j < i; j++) \
1949 *charbuf++ = components[j]; \
1950 for (j = 0; j < i; j += 2) \
1951 *charbuf++ = components[j]; \
1956 decode_coding_emacs_mule (coding
)
1957 struct coding_system
*coding
;
1959 unsigned char *src
= coding
->source
+ coding
->consumed
;
1960 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1961 unsigned char *src_base
;
1962 int *charbuf
= coding
->charbuf
;
1963 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
1964 int consumed_chars
= 0, consumed_chars_base
;
1965 int multibytep
= coding
->src_multibyte
;
1966 Lisp_Object attrs
, eol_type
, charset_list
;
1967 int char_offset
= coding
->produced_char
;
1968 int last_offset
= char_offset
;
1969 int last_id
= charset_ascii
;
1971 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1978 consumed_chars_base
= consumed_chars
;
1980 if (charbuf
>= charbuf_end
)
1989 if (EQ (eol_type
, Qdos
))
1993 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
1994 goto no_more_source
;
1999 else if (EQ (eol_type
, Qmac
))
2008 if (c
- 0xF2 >= COMPOSITION_RELATIVE
2009 && c
- 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS
)
2010 DECODE_EMACS_MULE_21_COMPOSITION (c
);
2012 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
2014 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
2018 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
2024 consumed_chars
= consumed_chars_base
;
2025 c
= emacs_mule_char (coding
, src
, &nbytes
, &nchars
, &id
);
2034 if (last_id
!= charset_ascii
)
2035 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2037 last_offset
= char_offset
;
2041 consumed_chars
+= nchars
;
2048 consumed_chars
= consumed_chars_base
;
2050 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
2056 if (last_id
!= charset_ascii
)
2057 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2058 coding
->consumed_char
+= consumed_chars_base
;
2059 coding
->consumed
= src_base
- coding
->source
;
2060 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
2064 #define EMACS_MULE_LEADING_CODES(id, codes) \
2067 codes[0] = id, codes[1] = 0; \
2068 else if (id < 0xE0) \
2069 codes[0] = 0x9A, codes[1] = id; \
2070 else if (id < 0xF0) \
2071 codes[0] = 0x9B, codes[1] = id; \
2072 else if (id < 0xF5) \
2073 codes[0] = 0x9C, codes[1] = id; \
2075 codes[0] = 0x9D, codes[1] = id; \
2080 encode_coding_emacs_mule (coding
)
2081 struct coding_system
*coding
;
2083 int multibytep
= coding
->dst_multibyte
;
2084 int *charbuf
= coding
->charbuf
;
2085 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
2086 unsigned char *dst
= coding
->destination
+ coding
->produced
;
2087 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
2089 int produced_chars
= 0;
2090 Lisp_Object attrs
, eol_type
, charset_list
;
2092 int preferred_charset_id
= -1;
2094 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
2096 while (charbuf
< charbuf_end
)
2098 ASSURE_DESTINATION (safe_room
);
2103 /* Handle an annotation. */
2106 case CODING_ANNOTATE_COMPOSITION_MASK
:
2107 /* Not yet implemented. */
2109 case CODING_ANNOTATE_CHARSET_MASK
:
2110 preferred_charset_id
= charbuf
[3];
2111 if (preferred_charset_id
>= 0
2112 && NILP (Fmemq (make_number (preferred_charset_id
),
2114 preferred_charset_id
= -1;
2123 if (ASCII_CHAR_P (c
))
2124 EMIT_ONE_ASCII_BYTE (c
);
2125 else if (CHAR_BYTE8_P (c
))
2127 c
= CHAR_TO_BYTE8 (c
);
2132 struct charset
*charset
;
2136 unsigned char leading_codes
[2];
2138 if (preferred_charset_id
>= 0)
2140 charset
= CHARSET_FROM_ID (preferred_charset_id
);
2141 if (! CHAR_CHARSET_P (c
, charset
))
2142 charset
= char_charset (c
, charset_list
, NULL
);
2145 charset
= char_charset (c
, charset_list
, &code
);
2148 c
= coding
->default_char
;
2149 if (ASCII_CHAR_P (c
))
2151 EMIT_ONE_ASCII_BYTE (c
);
2154 charset
= char_charset (c
, charset_list
, &code
);
2156 dimension
= CHARSET_DIMENSION (charset
);
2157 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
2158 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
2159 EMIT_ONE_BYTE (leading_codes
[0]);
2160 if (leading_codes
[1])
2161 EMIT_ONE_BYTE (leading_codes
[1]);
2163 EMIT_ONE_BYTE (code
);
2166 EMIT_ONE_BYTE (code
>> 8);
2167 EMIT_ONE_BYTE (code
& 0xFF);
2171 coding
->result
= CODING_RESULT_SUCCESS
;
2172 coding
->produced_char
+= produced_chars
;
2173 coding
->produced
= dst
- coding
->destination
;
2178 /*** 7. ISO2022 handlers ***/
2180 /* The following note describes the coding system ISO2022 briefly.
2181 Since the intention of this note is to help understand the
2182 functions in this file, some parts are NOT ACCURATE or are OVERLY
2183 SIMPLIFIED. For thorough understanding, please refer to the
2184 original document of ISO2022. This is equivalent to the standard
2185 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2187 ISO2022 provides many mechanisms to encode several character sets
2188 in 7-bit and 8-bit environments. For 7-bit environments, all text
2189 is encoded using bytes less than 128. This may make the encoded
2190 text a little bit longer, but the text passes more easily through
2191 several types of gateway, some of which strip off the MSB (Most
2194 There are two kinds of character sets: control character sets and
2195 graphic character sets. The former contain control characters such
2196 as `newline' and `escape' to provide control functions (control
2197 functions are also provided by escape sequences). The latter
2198 contain graphic characters such as 'A' and '-'. Emacs recognizes
2199 two control character sets and many graphic character sets.
2201 Graphic character sets are classified into one of the following
2202 four classes, according to the number of bytes (DIMENSION) and
2203 number of characters in one dimension (CHARS) of the set:
2204 - DIMENSION1_CHARS94
2205 - DIMENSION1_CHARS96
2206 - DIMENSION2_CHARS94
2207 - DIMENSION2_CHARS96
2209 In addition, each character set is assigned an identification tag,
2210 unique for each set, called the "final character" (denoted as <F>
2211 hereafter). The <F> of each character set is decided by ECMA(*)
2212 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2213 (0x30..0x3F are for private use only).
2215 Note (*): ECMA = European Computer Manufacturers Association
2217 Here are examples of graphic character sets [NAME(<F>)]:
2218 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2219 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2220 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2221 o DIMENSION2_CHARS96 -- none for the moment
2223 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2224 C0 [0x00..0x1F] -- control character plane 0
2225 GL [0x20..0x7F] -- graphic character plane 0
2226 C1 [0x80..0x9F] -- control character plane 1
2227 GR [0xA0..0xFF] -- graphic character plane 1
2229 A control character set is directly designated and invoked to C0 or
2230 C1 by an escape sequence. The most common case is that:
2231 - ISO646's control character set is designated/invoked to C0, and
2232 - ISO6429's control character set is designated/invoked to C1,
2233 and usually these designations/invocations are omitted in encoded
2234 text. In a 7-bit environment, only C0 can be used, and a control
2235 character for C1 is encoded by an appropriate escape sequence to
2236 fit into the environment. All control characters for C1 are
2237 defined to have corresponding escape sequences.
2239 A graphic character set is at first designated to one of four
2240 graphic registers (G0 through G3), then these graphic registers are
2241 invoked to GL or GR. These designations and invocations can be
2242 done independently. The most common case is that G0 is invoked to
2243 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2244 these invocations and designations are omitted in encoded text.
2245 In a 7-bit environment, only GL can be used.
2247 When a graphic character set of CHARS94 is invoked to GL, codes
2248 0x20 and 0x7F of the GL area work as control characters SPACE and
2249 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2252 There are two ways of invocation: locking-shift and single-shift.
2253 With locking-shift, the invocation lasts until the next different
2254 invocation, whereas with single-shift, the invocation affects the
2255 following character only and doesn't affect the locking-shift
2256 state. Invocations are done by the following control characters or
2259 ----------------------------------------------------------------------
2260 abbrev function cntrl escape seq description
2261 ----------------------------------------------------------------------
2262 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2263 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2264 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2265 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2266 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2267 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2268 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2269 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2270 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2271 ----------------------------------------------------------------------
2272 (*) These are not used by any known coding system.
2274 Control characters for these functions are defined by macros
2275 ISO_CODE_XXX in `coding.h'.
2277 Designations are done by the following escape sequences:
2278 ----------------------------------------------------------------------
2279 escape sequence description
2280 ----------------------------------------------------------------------
2281 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2282 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2283 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2284 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2285 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2286 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2287 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2288 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2289 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2290 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2291 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2292 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2293 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2294 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2295 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2296 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2297 ----------------------------------------------------------------------
2299 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2300 of dimension 1, chars 94, and final character <F>, etc...
2302 Note (*): Although these designations are not allowed in ISO2022,
2303 Emacs accepts them on decoding, and produces them on encoding
2304 CHARS96 character sets in a coding system which is characterized as
2305 7-bit environment, non-locking-shift, and non-single-shift.
2307 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2308 '(' must be omitted. We refer to this as "short-form" hereafter.
2310 Now you may notice that there are a lot of ways of encoding the
2311 same multilingual text in ISO2022. Actually, there exist many
2312 coding systems such as Compound Text (used in X11's inter client
2313 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2314 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2315 localized platforms), and all of these are variants of ISO2022.
2317 In addition to the above, Emacs handles two more kinds of escape
2318 sequences: ISO6429's direction specification and Emacs' private
2319 sequence for specifying character composition.
2321 ISO6429's direction specification takes the following form:
2322 o CSI ']' -- end of the current direction
2323 o CSI '0' ']' -- end of the current direction
2324 o CSI '1' ']' -- start of left-to-right text
2325 o CSI '2' ']' -- start of right-to-left text
2326 The control character CSI (0x9B: control sequence introducer) is
2327 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2329 Character composition specification takes the following form:
2330 o ESC '0' -- start relative composition
2331 o ESC '1' -- end composition
2332 o ESC '2' -- start rule-base composition (*)
2333 o ESC '3' -- start relative composition with alternate chars (**)
2334 o ESC '4' -- start rule-base composition with alternate chars (**)
2335 Since these are not standard escape sequences of any ISO standard,
2336 the use of them with these meanings is restricted to Emacs only.
2338 (*) This form is used only in Emacs 20.7 and older versions,
2339 but newer versions can safely decode it.
2340 (**) This form is used only in Emacs 21.1 and newer versions,
2341 and older versions can't decode it.
2343 Here's a list of example usages of these composition escape
2344 sequences (categorized by `enum composition_method').
2346 COMPOSITION_RELATIVE:
2347 ESC 0 CHAR [ CHAR ] ESC 1
2348 COMPOSITION_WITH_RULE:
2349 ESC 2 CHAR [ RULE CHAR ] ESC 1
2350 COMPOSITION_WITH_ALTCHARS:
2351 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2352 COMPOSITION_WITH_RULE_ALTCHARS:
2353 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2355 enum iso_code_class_type iso_code_class
[256];
2357 #define SAFE_CHARSET_P(coding, id) \
2358 ((id) <= (coding)->max_charset_id \
2359 && (coding)->safe_charsets[id] >= 0)
2362 #define SHIFT_OUT_OK(category) \
2363 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2366 setup_iso_safe_charsets (attrs
)
2369 Lisp_Object charset_list
, safe_charsets
;
2370 Lisp_Object request
;
2371 Lisp_Object reg_usage
;
2374 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2377 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2378 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2379 && ! EQ (charset_list
, Viso_2022_charset_list
))
2381 CODING_ATTR_CHARSET_LIST (attrs
)
2382 = charset_list
= Viso_2022_charset_list
;
2383 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2386 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2390 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2392 int id
= XINT (XCAR (tail
));
2393 if (max_charset_id
< id
)
2394 max_charset_id
= id
;
2397 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2399 request
= AREF (attrs
, coding_attr_iso_request
);
2400 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2401 reg94
= XINT (XCAR (reg_usage
));
2402 reg96
= XINT (XCDR (reg_usage
));
2404 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2408 struct charset
*charset
;
2411 charset
= CHARSET_FROM_ID (XINT (id
));
2412 reg
= Fcdr (Fassq (id
, request
));
2414 XSTRING (safe_charsets
)->data
[XINT (id
)] = XINT (reg
);
2415 else if (charset
->iso_chars_96
)
2418 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg96
;
2423 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg94
;
2426 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2430 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2431 Check if a text is encoded in one of ISO-2022 based codig systems.
2432 If it is, return 1, else return 0. */
2435 detect_coding_iso_2022 (coding
, detect_info
)
2436 struct coding_system
*coding
;
2437 struct coding_detection_info
*detect_info
;
2439 unsigned char *src
= coding
->source
, *src_base
= src
;
2440 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2441 int multibytep
= coding
->src_multibyte
;
2442 int single_shifting
= 0;
2445 int consumed_chars
= 0;
2450 detect_info
->checked
|= CATEGORY_MASK_ISO
;
2452 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2454 struct coding_system
*this = &(coding_categories
[i
]);
2455 Lisp_Object attrs
, val
;
2457 attrs
= CODING_ID_ATTRS (this->id
);
2458 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2459 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2460 setup_iso_safe_charsets (attrs
);
2461 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2462 this->max_charset_id
= XSTRING (val
)->size
- 1;
2463 this->safe_charsets
= (char *) XSTRING (val
)->data
;
2466 /* A coding system of this category is always ASCII compatible. */
2467 src
+= coding
->head_ascii
;
2469 while (rejected
!= CATEGORY_MASK_ISO
)
2475 if (inhibit_iso_escape_detection
)
2477 single_shifting
= 0;
2479 if (c
>= '(' && c
<= '/')
2481 /* Designation sequence for a charset of dimension 1. */
2483 if (c1
< ' ' || c1
>= 0x80
2484 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2485 /* Invalid designation sequence. Just ignore. */
2490 /* Designation sequence for a charset of dimension 2. */
2492 if (c
>= '@' && c
<= 'B')
2493 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2494 id
= iso_charset_table
[1][0][c
];
2495 else if (c
>= '(' && c
<= '/')
2498 if (c1
< ' ' || c1
>= 0x80
2499 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2500 /* Invalid designation sequence. Just ignore. */
2504 /* Invalid designation sequence. Just ignore it. */
2507 else if (c
== 'N' || c
== 'O')
2509 /* ESC <Fe> for SS2 or SS3. */
2510 single_shifting
= 1;
2511 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2514 else if (c
>= '0' && c
<= '4')
2516 /* ESC <Fp> for start/end composition. */
2517 found
|= CATEGORY_MASK_ISO
;
2522 /* Invalid escape sequence. Just ignore it. */
2526 /* We found a valid designation sequence for CHARSET. */
2527 rejected
|= CATEGORY_MASK_ISO_8BIT
;
2528 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2530 found
|= CATEGORY_MASK_ISO_7
;
2532 rejected
|= CATEGORY_MASK_ISO_7
;
2533 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2535 found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2537 rejected
|= CATEGORY_MASK_ISO_7_TIGHT
;
2538 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2540 found
|= CATEGORY_MASK_ISO_7_ELSE
;
2542 rejected
|= CATEGORY_MASK_ISO_7_ELSE
;
2543 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2545 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2547 rejected
|= CATEGORY_MASK_ISO_8_ELSE
;
2552 /* Locking shift out/in. */
2553 if (inhibit_iso_escape_detection
)
2555 single_shifting
= 0;
2556 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2557 found
|= CATEGORY_MASK_ISO_ELSE
;
2561 /* Control sequence introducer. */
2562 single_shifting
= 0;
2563 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2564 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2565 goto check_extra_latin
;
2571 if (inhibit_iso_escape_detection
)
2573 single_shifting
= 1;
2574 rejected
|= CATEGORY_MASK_ISO_7BIT
;
2575 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2576 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2577 found
|= CATEGORY_MASK_ISO_8_1
;
2578 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2579 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2580 found
|= CATEGORY_MASK_ISO_8_2
;
2581 goto check_extra_latin
;
2586 single_shifting
= 0;
2591 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2592 found
|= CATEGORY_MASK_ISO_8_1
;
2593 /* Check the length of succeeding codes of the range
2594 0xA0..0FF. If the byte length is even, we include
2595 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2596 only when we are not single shifting. */
2597 if (! single_shifting
2598 && ! (rejected
& CATEGORY_MASK_ISO_8_2
))
2601 while (src
< src_end
)
2609 if (i
& 1 && src
< src_end
)
2610 rejected
|= CATEGORY_MASK_ISO_8_2
;
2612 found
|= CATEGORY_MASK_ISO_8_2
;
2617 single_shifting
= 0;
2618 if (! VECTORP (Vlatin_extra_code_table
)
2619 || NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2621 rejected
= CATEGORY_MASK_ISO
;
2624 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2625 & CODING_ISO_FLAG_LATIN_EXTRA
)
2626 found
|= CATEGORY_MASK_ISO_8_1
;
2628 rejected
|= CATEGORY_MASK_ISO_8_1
;
2629 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2630 & CODING_ISO_FLAG_LATIN_EXTRA
)
2631 found
|= CATEGORY_MASK_ISO_8_2
;
2633 rejected
|= CATEGORY_MASK_ISO_8_2
;
2636 detect_info
->rejected
|= CATEGORY_MASK_ISO
;
2640 detect_info
->rejected
|= rejected
;
2641 detect_info
->found
|= (found
& ~rejected
);
2646 /* Set designation state into CODING. */
2647 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2651 if (final < '0' || final >= 128 \
2652 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2653 || !SAFE_CHARSET_P (coding, id)) \
2655 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2656 goto invalid_code; \
2658 prev = CODING_ISO_DESIGNATION (coding, reg); \
2659 if (id == charset_jisx0201_roman) \
2661 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2662 id = charset_ascii; \
2664 else if (id == charset_jisx0208_1978) \
2666 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2667 id = charset_jisx0208; \
2669 CODING_ISO_DESIGNATION (coding, reg) = id; \
2670 /* If there was an invalid designation to REG previously, and this \
2671 designation is ASCII to REG, we should keep this designation \
2673 if (prev == -2 && id == charset_ascii) \
2674 goto invalid_code; \
2678 #define MAYBE_FINISH_COMPOSITION() \
2681 if (composition_state == COMPOSING_NO) \
2683 /* It is assured that we have enough room for producing \
2684 characters stored in the table `components'. */ \
2685 if (charbuf + component_idx > charbuf_end) \
2686 goto no_more_source; \
2687 composition_state = COMPOSING_NO; \
2688 if (method == COMPOSITION_RELATIVE \
2689 || method == COMPOSITION_WITH_ALTCHARS) \
2691 for (i = 0; i < component_idx; i++) \
2692 *charbuf++ = components[i]; \
2693 char_offset += component_idx; \
2697 for (i = 0; i < component_idx; i += 2) \
2698 *charbuf++ = components[i]; \
2699 char_offset += (component_idx / 2) + 1; \
2704 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2705 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2706 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2707 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2708 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2711 #define DECODE_COMPOSITION_START(c1) \
2714 && composition_state == COMPOSING_COMPONENT_RULE) \
2716 component_len = component_idx; \
2717 composition_state = COMPOSING_CHAR; \
2723 MAYBE_FINISH_COMPOSITION (); \
2724 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2725 goto no_more_source; \
2726 for (p = src; p < src_end - 1; p++) \
2727 if (*p == ISO_CODE_ESC && p[1] == '1') \
2729 if (p == src_end - 1) \
2731 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2732 goto invalid_code; \
2733 goto no_more_source; \
2736 /* This is surely the start of a composition. */ \
2737 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2738 : c1 == '2' ? COMPOSITION_WITH_RULE \
2739 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2740 : COMPOSITION_WITH_RULE_ALTCHARS); \
2741 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2742 : COMPOSING_COMPONENT_CHAR); \
2743 component_idx = component_len = 0; \
2748 /* Handle compositoin end sequence ESC 1. */
2750 #define DECODE_COMPOSITION_END() \
2752 int nchars = (component_len > 0 ? component_idx - component_len \
2753 : method == COMPOSITION_RELATIVE ? component_idx \
2754 : (component_idx + 1) / 2); \
2756 int *saved_charbuf = charbuf; \
2757 int from = coding->produced_char + char_offset; \
2758 int to = from + nchars; \
2760 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
2761 if (method != COMPOSITION_RELATIVE) \
2763 if (component_len == 0) \
2764 for (i = 0; i < component_idx; i++) \
2765 *charbuf++ = components[i]; \
2767 for (i = 0; i < component_len; i++) \
2768 *charbuf++ = components[i]; \
2769 *saved_charbuf = saved_charbuf - charbuf; \
2771 if (method == COMPOSITION_WITH_RULE) \
2772 for (i = 0; i < component_idx; i += 2, char_offset++) \
2773 *charbuf++ = components[i]; \
2775 for (i = component_len; i < component_idx; i++, char_offset++) \
2776 *charbuf++ = components[i]; \
2777 coding->annotated = 1; \
2778 composition_state = COMPOSING_NO; \
2782 /* Decode a composition rule from the byte C1 (and maybe one more byte
2783 from SRC) and store one encoded composition rule in
2784 coding->cmp_data. */
2786 #define DECODE_COMPOSITION_RULE(c1) \
2789 if (c1 < 81) /* old format (before ver.21) */ \
2791 int gref = (c1) / 9; \
2792 int nref = (c1) % 9; \
2793 if (gref == 4) gref = 10; \
2794 if (nref == 4) nref = 10; \
2795 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2797 else if (c1 < 93) /* new format (after ver.21) */ \
2799 ONE_MORE_BYTE (c2); \
2800 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2807 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2810 decode_coding_iso_2022 (coding
)
2811 struct coding_system
*coding
;
2813 unsigned char *src
= coding
->source
+ coding
->consumed
;
2814 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2815 unsigned char *src_base
;
2816 int *charbuf
= coding
->charbuf
;
2818 = charbuf
+ coding
->charbuf_size
- 4 - MAX_ANNOTATION_LENGTH
;
2819 int consumed_chars
= 0, consumed_chars_base
;
2820 int multibytep
= coding
->src_multibyte
;
2821 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2822 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2823 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2824 struct charset
*charset
;
2826 /* For handling composition sequence. */
2827 #define COMPOSING_NO 0
2828 #define COMPOSING_CHAR 1
2829 #define COMPOSING_RULE 2
2830 #define COMPOSING_COMPONENT_CHAR 3
2831 #define COMPOSING_COMPONENT_RULE 4
2833 int composition_state
= COMPOSING_NO
;
2834 enum composition_method method
;
2835 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2838 Lisp_Object attrs
, eol_type
, charset_list
;
2839 int char_offset
= coding
->produced_char
;
2840 int last_offset
= char_offset
;
2841 int last_id
= charset_ascii
;
2843 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
2844 setup_iso_safe_charsets (attrs
);
2851 consumed_chars_base
= consumed_chars
;
2853 if (charbuf
>= charbuf_end
)
2858 /* We produce at most one character. */
2859 switch (iso_code_class
[c1
])
2861 case ISO_0x20_or_0x7F
:
2862 if (composition_state
!= COMPOSING_NO
)
2864 if (composition_state
== COMPOSING_RULE
2865 || composition_state
== COMPOSING_COMPONENT_RULE
)
2867 DECODE_COMPOSITION_RULE (c1
);
2868 components
[component_idx
++] = c1
;
2869 composition_state
--;
2873 if (charset_id_0
< 0
2874 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2875 /* This is SPACE or DEL. */
2876 charset
= CHARSET_FROM_ID (charset_ascii
);
2878 charset
= CHARSET_FROM_ID (charset_id_0
);
2881 case ISO_graphic_plane_0
:
2882 if (composition_state
!= COMPOSING_NO
)
2884 if (composition_state
== COMPOSING_RULE
2885 || composition_state
== COMPOSING_COMPONENT_RULE
)
2887 DECODE_COMPOSITION_RULE (c1
);
2888 components
[component_idx
++] = c1
;
2889 composition_state
--;
2893 charset
= CHARSET_FROM_ID (charset_id_0
);
2896 case ISO_0xA0_or_0xFF
:
2897 if (charset_id_1
< 0
2898 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2899 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2901 /* This is a graphic character, we fall down ... */
2903 case ISO_graphic_plane_1
:
2904 if (charset_id_1
< 0)
2906 charset
= CHARSET_FROM_ID (charset_id_1
);
2909 case ISO_carriage_return
:
2912 if (EQ (eol_type
, Qdos
))
2916 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
2917 goto no_more_source
;
2922 else if (EQ (eol_type
, Qmac
))
2928 MAYBE_FINISH_COMPOSITION ();
2929 charset
= CHARSET_FROM_ID (charset_ascii
);
2933 MAYBE_FINISH_COMPOSITION ();
2937 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2938 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
2940 CODING_ISO_INVOCATION (coding
, 0) = 1;
2941 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2945 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
2947 CODING_ISO_INVOCATION (coding
, 0) = 0;
2948 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2951 case ISO_single_shift_2_7
:
2952 case ISO_single_shift_2
:
2953 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2955 /* SS2 is handled as an escape sequence of ESC 'N' */
2957 goto label_escape_sequence
;
2959 case ISO_single_shift_3
:
2960 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2962 /* SS2 is handled as an escape sequence of ESC 'O' */
2964 goto label_escape_sequence
;
2966 case ISO_control_sequence_introducer
:
2967 /* CSI is handled as an escape sequence of ESC '[' ... */
2969 goto label_escape_sequence
;
2973 label_escape_sequence
:
2974 /* Escape sequences handled here are invocation,
2975 designation, direction specification, and character
2976 composition specification. */
2979 case '&': /* revision of following character set */
2981 if (!(c1
>= '@' && c1
<= '~'))
2984 if (c1
!= ISO_CODE_ESC
)
2987 goto label_escape_sequence
;
2989 case '$': /* designation of 2-byte character set */
2990 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
2993 if (c1
>= '@' && c1
<= 'B')
2994 { /* designation of JISX0208.1978, GB2312.1980,
2996 DECODE_DESIGNATION (0, 2, 0, c1
);
2998 else if (c1
>= 0x28 && c1
<= 0x2B)
2999 { /* designation of DIMENSION2_CHARS94 character set */
3001 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
3003 else if (c1
>= 0x2C && c1
<= 0x2F)
3004 { /* designation of DIMENSION2_CHARS96 character set */
3006 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
3010 /* We must update these variables now. */
3011 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3012 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3015 case 'n': /* invocation of locking-shift-2 */
3016 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3017 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3019 CODING_ISO_INVOCATION (coding
, 0) = 2;
3020 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3023 case 'o': /* invocation of locking-shift-3 */
3024 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3025 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3027 CODING_ISO_INVOCATION (coding
, 0) = 3;
3028 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3031 case 'N': /* invocation of single-shift-2 */
3032 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3033 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3035 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
3037 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3041 case 'O': /* invocation of single-shift-3 */
3042 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3043 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3045 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
3047 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3051 case '0': case '2': case '3': case '4': /* start composition */
3052 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
3054 DECODE_COMPOSITION_START (c1
);
3057 case '1': /* end composition */
3058 if (composition_state
== COMPOSING_NO
)
3060 DECODE_COMPOSITION_END ();
3063 case '[': /* specification of direction */
3064 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
3066 /* For the moment, nested direction is not supported.
3067 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3068 left-to-right, and nozero means right-to-left. */
3072 case ']': /* end of the current direction */
3073 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3075 case '0': /* end of the current direction */
3076 case '1': /* start of left-to-right direction */
3079 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3084 case '2': /* start of right-to-left direction */
3087 coding
->mode
|= CODING_MODE_DIRECTION
;
3098 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3100 if (c1
>= 0x28 && c1
<= 0x2B)
3101 { /* designation of DIMENSION1_CHARS94 character set */
3103 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
3105 else if (c1
>= 0x2C && c1
<= 0x2F)
3106 { /* designation of DIMENSION1_CHARS96 character set */
3108 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
3112 /* We must update these variables now. */
3113 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3114 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3119 if (charset
->id
!= charset_ascii
3120 && last_id
!= charset
->id
)
3122 if (last_id
!= charset_ascii
)
3123 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3124 last_id
= charset
->id
;
3125 last_offset
= char_offset
;
3128 /* Now we know CHARSET and 1st position code C1 of a character.
3129 Produce a decoded character while getting 2nd position code
3132 if (CHARSET_DIMENSION (charset
) > 1)
3135 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3136 /* C2 is not in a valid range. */
3138 c1
= (c1
<< 8) | (c2
& 0x7F);
3139 if (CHARSET_DIMENSION (charset
) > 2)
3142 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3143 /* C2 is not in a valid range. */
3145 c1
= (c1
<< 8) | (c2
& 0x7F);
3149 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3152 MAYBE_FINISH_COMPOSITION ();
3153 for (; src_base
< src
; src_base
++, char_offset
++)
3155 if (ASCII_BYTE_P (*src_base
))
3156 *charbuf
++ = *src_base
;
3158 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3162 else if (composition_state
== COMPOSING_NO
)
3169 components
[component_idx
++] = c
;
3170 if (method
== COMPOSITION_WITH_RULE
3171 || (method
== COMPOSITION_WITH_RULE_ALTCHARS
3172 && composition_state
== COMPOSING_COMPONENT_CHAR
))
3173 composition_state
++;
3178 MAYBE_FINISH_COMPOSITION ();
3180 consumed_chars
= consumed_chars_base
;
3182 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3188 if (last_id
!= charset_ascii
)
3189 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3190 coding
->consumed_char
+= consumed_chars_base
;
3191 coding
->consumed
= src_base
- coding
->source
;
3192 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3196 /* ISO2022 encoding stuff. */
3199 It is not enough to say just "ISO2022" on encoding, we have to
3200 specify more details. In Emacs, each coding system of ISO2022
3201 variant has the following specifications:
3202 1. Initial designation to G0 thru G3.
3203 2. Allows short-form designation?
3204 3. ASCII should be designated to G0 before control characters?
3205 4. ASCII should be designated to G0 at end of line?
3206 5. 7-bit environment or 8-bit environment?
3207 6. Use locking-shift?
3208 7. Use Single-shift?
3209 And the following two are only for Japanese:
3210 8. Use ASCII in place of JIS0201-1976-Roman?
3211 9. Use JISX0208-1983 in place of JISX0208-1978?
3212 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3213 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3217 /* Produce codes (escape sequence) for designating CHARSET to graphic
3218 register REG at DST, and increment DST. If <final-char> of CHARSET is
3219 '@', 'A', or 'B' and the coding system CODING allows, produce
3220 designation sequence of short-form. */
3222 #define ENCODE_DESIGNATION(charset, reg, coding) \
3224 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3225 char *intermediate_char_94 = "()*+"; \
3226 char *intermediate_char_96 = ",-./"; \
3227 int revision = -1; \
3230 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3231 revision = CHARSET_ISO_REVISION (charset); \
3233 if (revision >= 0) \
3235 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3236 EMIT_ONE_BYTE ('@' + revision); \
3238 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3239 if (CHARSET_DIMENSION (charset) == 1) \
3241 if (! CHARSET_ISO_CHARS_96 (charset)) \
3242 c = intermediate_char_94[reg]; \
3244 c = intermediate_char_96[reg]; \
3245 EMIT_ONE_ASCII_BYTE (c); \
3249 EMIT_ONE_ASCII_BYTE ('$'); \
3250 if (! CHARSET_ISO_CHARS_96 (charset)) \
3252 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3254 || final_char < '@' || final_char > 'B') \
3255 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3258 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3260 EMIT_ONE_ASCII_BYTE (final_char); \
3262 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3266 /* The following two macros produce codes (control character or escape
3267 sequence) for ISO2022 single-shift functions (single-shift-2 and
3270 #define ENCODE_SINGLE_SHIFT_2 \
3272 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3273 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3275 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3276 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3280 #define ENCODE_SINGLE_SHIFT_3 \
3282 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3283 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3285 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3286 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3290 /* The following four macros produce codes (control character or
3291 escape sequence) for ISO2022 locking-shift functions (shift-in,
3292 shift-out, locking-shift-2, and locking-shift-3). */
3294 #define ENCODE_SHIFT_IN \
3296 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3297 CODING_ISO_INVOCATION (coding, 0) = 0; \
3301 #define ENCODE_SHIFT_OUT \
3303 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3304 CODING_ISO_INVOCATION (coding, 0) = 1; \
3308 #define ENCODE_LOCKING_SHIFT_2 \
3310 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3311 CODING_ISO_INVOCATION (coding, 0) = 2; \
3315 #define ENCODE_LOCKING_SHIFT_3 \
3317 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3318 CODING_ISO_INVOCATION (coding, 0) = 3; \
3322 /* Produce codes for a DIMENSION1 character whose character set is
3323 CHARSET and whose position-code is C1. Designation and invocation
3324 sequences are also produced in advance if necessary. */
3326 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3328 int id = CHARSET_ID (charset); \
3330 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3331 && id == charset_ascii) \
3333 id = charset_jisx0201_roman; \
3334 charset = CHARSET_FROM_ID (id); \
3337 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3339 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3340 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3342 EMIT_ONE_BYTE (c1 | 0x80); \
3343 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3346 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3348 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3351 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3353 EMIT_ONE_BYTE (c1 | 0x80); \
3357 /* Since CHARSET is not yet invoked to any graphic planes, we \
3358 must invoke it, or, at first, designate it to some graphic \
3359 register. Then repeat the loop to actually produce the \
3361 dst = encode_invocation_designation (charset, coding, dst, \
3366 /* Produce codes for a DIMENSION2 character whose character set is
3367 CHARSET and whose position-codes are C1 and C2. Designation and
3368 invocation codes are also produced in advance if necessary. */
3370 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3372 int id = CHARSET_ID (charset); \
3374 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3375 && id == charset_jisx0208) \
3377 id = charset_jisx0208_1978; \
3378 charset = CHARSET_FROM_ID (id); \
3381 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3383 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3384 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3386 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3387 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3390 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3392 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3395 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3397 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3401 /* Since CHARSET is not yet invoked to any graphic planes, we \
3402 must invoke it, or, at first, designate it to some graphic \
3403 register. Then repeat the loop to actually produce the \
3405 dst = encode_invocation_designation (charset, coding, dst, \
3410 #define ENCODE_ISO_CHARACTER(charset, c) \
3412 int code = ENCODE_CHAR ((charset),(c)); \
3414 if (CHARSET_DIMENSION (charset) == 1) \
3415 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3417 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3421 /* Produce designation and invocation codes at a place pointed by DST
3422 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3426 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3427 struct charset
*charset
;
3428 struct coding_system
*coding
;
3432 int multibytep
= coding
->dst_multibyte
;
3433 int produced_chars
= *p_nchars
;
3434 int reg
; /* graphic register number */
3435 int id
= CHARSET_ID (charset
);
3437 /* At first, check designations. */
3438 for (reg
= 0; reg
< 4; reg
++)
3439 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3444 /* CHARSET is not yet designated to any graphic registers. */
3445 /* At first check the requested designation. */
3446 reg
= CODING_ISO_REQUEST (coding
, id
);
3448 /* Since CHARSET requests no special designation, designate it
3449 to graphic register 0. */
3452 ENCODE_DESIGNATION (charset
, reg
, coding
);
3455 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3456 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3458 /* Since the graphic register REG is not invoked to any graphic
3459 planes, invoke it to graphic plane 0. */
3462 case 0: /* graphic register 0 */
3466 case 1: /* graphic register 1 */
3470 case 2: /* graphic register 2 */
3471 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3472 ENCODE_SINGLE_SHIFT_2
;
3474 ENCODE_LOCKING_SHIFT_2
;
3477 case 3: /* graphic register 3 */
3478 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3479 ENCODE_SINGLE_SHIFT_3
;
3481 ENCODE_LOCKING_SHIFT_3
;
3486 *p_nchars
= produced_chars
;
3490 /* The following three macros produce codes for indicating direction
3492 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3494 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3495 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3497 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3501 #define ENCODE_DIRECTION_R2L() \
3503 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3504 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3508 #define ENCODE_DIRECTION_L2R() \
3510 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3511 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3515 /* Produce codes for designation and invocation to reset the graphic
3516 planes and registers to initial state. */
3517 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3520 struct charset *charset; \
3522 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3524 for (reg = 0; reg < 4; reg++) \
3525 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3526 && (CODING_ISO_DESIGNATION (coding, reg) \
3527 != CODING_ISO_INITIAL (coding, reg))) \
3529 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3530 ENCODE_DESIGNATION (charset, reg, coding); \
3535 /* Produce designation sequences of charsets in the line started from
3536 SRC to a place pointed by DST, and return updated DST.
3538 If the current block ends before any end-of-line, we may fail to
3539 find all the necessary designations. */
3541 static unsigned char *
3542 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3543 struct coding_system
*coding
;
3544 int *charbuf
, *charbuf_end
;
3547 struct charset
*charset
;
3548 /* Table of charsets to be designated to each graphic register. */
3550 int c
, found
= 0, reg
;
3551 int produced_chars
= 0;
3552 int multibytep
= coding
->dst_multibyte
;
3554 Lisp_Object charset_list
;
3556 attrs
= CODING_ID_ATTRS (coding
->id
);
3557 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3558 if (EQ (charset_list
, Qiso_2022
))
3559 charset_list
= Viso_2022_charset_list
;
3561 for (reg
= 0; reg
< 4; reg
++)
3571 charset
= char_charset (c
, charset_list
, NULL
);
3572 id
= CHARSET_ID (charset
);
3573 reg
= CODING_ISO_REQUEST (coding
, id
);
3574 if (reg
>= 0 && r
[reg
] < 0)
3583 for (reg
= 0; reg
< 4; reg
++)
3585 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3586 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3592 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3595 encode_coding_iso_2022 (coding
)
3596 struct coding_system
*coding
;
3598 int multibytep
= coding
->dst_multibyte
;
3599 int *charbuf
= coding
->charbuf
;
3600 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3601 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3602 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3605 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3606 && CODING_ISO_BOL (coding
));
3607 int produced_chars
= 0;
3608 Lisp_Object attrs
, eol_type
, charset_list
;
3609 int ascii_compatible
;
3611 int preferred_charset_id
= -1;
3613 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3614 setup_iso_safe_charsets (attrs
);
3615 /* Charset list may have been changed. */
3616 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
); \
3617 coding
->safe_charsets
3618 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs
))->data
;
3620 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3622 while (charbuf
< charbuf_end
)
3624 ASSURE_DESTINATION (safe_room
);
3626 if (bol_designation
)
3628 unsigned char *dst_prev
= dst
;
3630 /* We have to produce designation sequences if any now. */
3631 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3632 bol_designation
= 0;
3633 /* We are sure that designation sequences are all ASCII bytes. */
3634 produced_chars
+= dst
- dst_prev
;
3641 /* Handle an annotation. */
3644 case CODING_ANNOTATE_COMPOSITION_MASK
:
3645 /* Not yet implemented. */
3647 case CODING_ANNOTATE_CHARSET_MASK
:
3648 preferred_charset_id
= charbuf
[3];
3649 if (preferred_charset_id
>= 0
3650 && NILP (Fmemq (make_number (preferred_charset_id
),
3652 preferred_charset_id
= -1;
3661 /* Now encode the character C. */
3662 if (c
< 0x20 || c
== 0x7F)
3665 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3667 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3668 ENCODE_RESET_PLANE_AND_REGISTER ();
3669 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3673 for (i
= 0; i
< 4; i
++)
3674 CODING_ISO_DESIGNATION (coding
, i
)
3675 = CODING_ISO_INITIAL (coding
, i
);
3678 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3680 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3681 ENCODE_RESET_PLANE_AND_REGISTER ();
3682 EMIT_ONE_ASCII_BYTE (c
);
3684 else if (ASCII_CHAR_P (c
))
3686 if (ascii_compatible
)
3687 EMIT_ONE_ASCII_BYTE (c
);
3690 struct charset
*charset
= CHARSET_FROM_ID (charset_ascii
);
3691 ENCODE_ISO_CHARACTER (charset
, c
);
3694 else if (CHAR_BYTE8_P (c
))
3696 c
= CHAR_TO_BYTE8 (c
);
3701 struct charset
*charset
;
3703 if (preferred_charset_id
>= 0)
3705 charset
= CHARSET_FROM_ID (preferred_charset_id
);
3706 if (! CHAR_CHARSET_P (c
, charset
))
3707 charset
= char_charset (c
, charset_list
, NULL
);
3710 charset
= char_charset (c
, charset_list
, NULL
);
3713 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3715 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3716 charset
= CHARSET_FROM_ID (charset_ascii
);
3720 c
= coding
->default_char
;
3721 charset
= char_charset (c
, charset_list
, NULL
);
3724 ENCODE_ISO_CHARACTER (charset
, c
);
3728 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3729 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3731 ASSURE_DESTINATION (safe_room
);
3732 ENCODE_RESET_PLANE_AND_REGISTER ();
3734 coding
->result
= CODING_RESULT_SUCCESS
;
3735 CODING_ISO_BOL (coding
) = bol_designation
;
3736 coding
->produced_char
+= produced_chars
;
3737 coding
->produced
= dst
- coding
->destination
;
3742 /*** 8,9. SJIS and BIG5 handlers ***/
3744 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3745 quite widely. So, for the moment, Emacs supports them in the bare
3746 C code. But, in the future, they may be supported only by CCL. */
3748 /* SJIS is a coding system encoding three character sets: ASCII, right
3749 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3750 as is. A character of charset katakana-jisx0201 is encoded by
3751 "position-code + 0x80". A character of charset japanese-jisx0208
3752 is encoded in 2-byte but two position-codes are divided and shifted
3753 so that it fit in the range below.
3755 --- CODE RANGE of SJIS ---
3756 (character set) (range)
3758 KATAKANA-JISX0201 0xA0 .. 0xDF
3759 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3760 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3761 -------------------------------
3765 /* BIG5 is a coding system encoding two character sets: ASCII and
3766 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3767 character set and is encoded in two-byte.
3769 --- CODE RANGE of BIG5 ---
3770 (character set) (range)
3772 Big5 (1st byte) 0xA1 .. 0xFE
3773 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3774 --------------------------
3778 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3779 Check if a text is encoded in SJIS. If it is, return
3780 CATEGORY_MASK_SJIS, else return 0. */
3783 detect_coding_sjis (coding
, detect_info
)
3784 struct coding_system
*coding
;
3785 struct coding_detection_info
*detect_info
;
3787 unsigned char *src
= coding
->source
, *src_base
= src
;
3788 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3789 int multibytep
= coding
->src_multibyte
;
3790 int consumed_chars
= 0;
3795 detect_info
->checked
|= CATEGORY_MASK_SJIS
;
3796 /* A coding system of this category is always ASCII compatible. */
3797 src
+= coding
->head_ascii
;
3806 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3809 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3811 found
= CATEGORY_MASK_SJIS
;
3813 else if (c
>= 0xA0 && c
< 0xE0)
3814 found
= CATEGORY_MASK_SJIS
;
3818 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3822 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3824 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3827 detect_info
->found
|= found
;
3831 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3832 Check if a text is encoded in BIG5. If it is, return
3833 CATEGORY_MASK_BIG5, else return 0. */
3836 detect_coding_big5 (coding
, detect_info
)
3837 struct coding_system
*coding
;
3838 struct coding_detection_info
*detect_info
;
3840 unsigned char *src
= coding
->source
, *src_base
= src
;
3841 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3842 int multibytep
= coding
->src_multibyte
;
3843 int consumed_chars
= 0;
3848 detect_info
->checked
|= CATEGORY_MASK_BIG5
;
3849 /* A coding system of this category is always ASCII compatible. */
3850 src
+= coding
->head_ascii
;
3862 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3864 found
= CATEGORY_MASK_BIG5
;
3869 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3873 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3875 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3878 detect_info
->found
|= found
;
3882 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3883 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3886 decode_coding_sjis (coding
)
3887 struct coding_system
*coding
;
3889 unsigned char *src
= coding
->source
+ coding
->consumed
;
3890 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3891 unsigned char *src_base
;
3892 int *charbuf
= coding
->charbuf
;
3893 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
3894 int consumed_chars
= 0, consumed_chars_base
;
3895 int multibytep
= coding
->src_multibyte
;
3896 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3897 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3898 int char_offset
= coding
->produced_char
;
3899 int last_offset
= char_offset
;
3900 int last_id
= charset_ascii
;
3902 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3905 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3906 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3907 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3914 consumed_chars_base
= consumed_chars
;
3916 if (charbuf
>= charbuf_end
)
3923 if (EQ (eol_type
, Qdos
))
3927 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
3928 goto no_more_source
;
3933 else if (EQ (eol_type
, Qmac
))
3938 struct charset
*charset
;
3941 charset
= charset_roman
;
3946 if (c
< 0xA0 || c
>= 0xE0)
3948 /* SJIS -> JISX0208 */
3950 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
3954 charset
= charset_kanji
;
3958 /* SJIS -> JISX0201-Kana */
3960 charset
= charset_kana
;
3963 if (charset
->id
!= charset_ascii
3964 && last_id
!= charset
->id
)
3966 if (last_id
!= charset_ascii
)
3967 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3968 last_id
= charset
->id
;
3969 last_offset
= char_offset
;
3971 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
3979 consumed_chars
= consumed_chars_base
;
3981 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3987 if (last_id
!= charset_ascii
)
3988 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3989 coding
->consumed_char
+= consumed_chars_base
;
3990 coding
->consumed
= src_base
- coding
->source
;
3991 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3995 decode_coding_big5 (coding
)
3996 struct coding_system
*coding
;
3998 unsigned char *src
= coding
->source
+ coding
->consumed
;
3999 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4000 unsigned char *src_base
;
4001 int *charbuf
= coding
->charbuf
;
4002 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4003 int consumed_chars
= 0, consumed_chars_base
;
4004 int multibytep
= coding
->src_multibyte
;
4005 struct charset
*charset_roman
, *charset_big5
;
4006 Lisp_Object attrs
, eol_type
, charset_list
, val
;
4007 int char_offset
= coding
->produced_char
;
4008 int last_offset
= char_offset
;
4009 int last_id
= charset_ascii
;
4011 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4013 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4014 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4021 consumed_chars_base
= consumed_chars
;
4023 if (charbuf
>= charbuf_end
)
4030 if (EQ (eol_type
, Qdos
))
4034 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4035 goto no_more_source
;
4040 else if (EQ (eol_type
, Qmac
))
4045 struct charset
*charset
;
4047 charset
= charset_roman
;
4051 if (c
< 0xA1 || c
> 0xFE)
4054 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
4057 charset
= charset_big5
;
4059 if (charset
->id
!= charset_ascii
4060 && last_id
!= charset
->id
)
4062 if (last_id
!= charset_ascii
)
4063 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4064 last_id
= charset
->id
;
4065 last_offset
= char_offset
;
4067 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4076 consumed_chars
= consumed_chars_base
;
4078 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4084 if (last_id
!= charset_ascii
)
4085 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4086 coding
->consumed_char
+= consumed_chars_base
;
4087 coding
->consumed
= src_base
- coding
->source
;
4088 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4091 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4092 This function can encode charsets `ascii', `katakana-jisx0201',
4093 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4094 are sure that all these charsets are registered as official charset
4095 (i.e. do not have extended leading-codes). Characters of other
4096 charsets are produced without any encoding. If SJIS_P is 1, encode
4097 SJIS text, else encode BIG5 text. */
4100 encode_coding_sjis (coding
)
4101 struct coding_system
*coding
;
4103 int multibytep
= coding
->dst_multibyte
;
4104 int *charbuf
= coding
->charbuf
;
4105 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4106 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4107 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4109 int produced_chars
= 0;
4110 Lisp_Object attrs
, eol_type
, charset_list
, val
;
4111 int ascii_compatible
;
4112 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4115 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4117 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4118 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4119 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4121 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4123 while (charbuf
< charbuf_end
)
4125 ASSURE_DESTINATION (safe_room
);
4127 /* Now encode the character C. */
4128 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4129 EMIT_ONE_ASCII_BYTE (c
);
4130 else if (CHAR_BYTE8_P (c
))
4132 c
= CHAR_TO_BYTE8 (c
);
4138 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4142 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4144 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4145 charset
= CHARSET_FROM_ID (charset_ascii
);
4149 c
= coding
->default_char
;
4150 charset
= char_charset (c
, charset_list
, &code
);
4153 if (code
== CHARSET_INVALID_CODE (charset
))
4155 if (charset
== charset_kanji
)
4159 c1
= code
>> 8, c2
= code
& 0xFF;
4160 EMIT_TWO_BYTES (c1
, c2
);
4162 else if (charset
== charset_kana
)
4163 EMIT_ONE_BYTE (code
| 0x80);
4165 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4168 coding
->result
= CODING_RESULT_SUCCESS
;
4169 coding
->produced_char
+= produced_chars
;
4170 coding
->produced
= dst
- coding
->destination
;
4175 encode_coding_big5 (coding
)
4176 struct coding_system
*coding
;
4178 int multibytep
= coding
->dst_multibyte
;
4179 int *charbuf
= coding
->charbuf
;
4180 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4181 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4182 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4184 int produced_chars
= 0;
4185 Lisp_Object attrs
, eol_type
, charset_list
, val
;
4186 int ascii_compatible
;
4187 struct charset
*charset_roman
, *charset_big5
;
4190 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4192 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4193 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4194 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4196 while (charbuf
< charbuf_end
)
4198 ASSURE_DESTINATION (safe_room
);
4200 /* Now encode the character C. */
4201 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4202 EMIT_ONE_ASCII_BYTE (c
);
4203 else if (CHAR_BYTE8_P (c
))
4205 c
= CHAR_TO_BYTE8 (c
);
4211 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4215 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4217 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4218 charset
= CHARSET_FROM_ID (charset_ascii
);
4222 c
= coding
->default_char
;
4223 charset
= char_charset (c
, charset_list
, &code
);
4226 if (code
== CHARSET_INVALID_CODE (charset
))
4228 if (charset
== charset_big5
)
4232 c1
= code
>> 8, c2
= code
& 0xFF;
4233 EMIT_TWO_BYTES (c1
, c2
);
4236 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4239 coding
->result
= CODING_RESULT_SUCCESS
;
4240 coding
->produced_char
+= produced_chars
;
4241 coding
->produced
= dst
- coding
->destination
;
4246 /*** 10. CCL handlers ***/
4248 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4249 Check if a text is encoded in a coding system of which
4250 encoder/decoder are written in CCL program. If it is, return
4251 CATEGORY_MASK_CCL, else return 0. */
4254 detect_coding_ccl (coding
, detect_info
)
4255 struct coding_system
*coding
;
4256 struct coding_detection_info
*detect_info
;
4258 unsigned char *src
= coding
->source
, *src_base
= src
;
4259 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4260 int multibytep
= coding
->src_multibyte
;
4261 int consumed_chars
= 0;
4263 unsigned char *valids
= CODING_CCL_VALIDS (coding
);
4264 int head_ascii
= coding
->head_ascii
;
4267 detect_info
->checked
|= CATEGORY_MASK_CCL
;
4269 coding
= &coding_categories
[coding_category_ccl
];
4270 attrs
= CODING_ID_ATTRS (coding
->id
);
4271 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4280 if ((valids
[c
] > 1))
4281 found
= CATEGORY_MASK_CCL
;
4283 detect_info
->rejected
|= CATEGORY_MASK_CCL
;
4287 detect_info
->found
|= found
;
4292 decode_coding_ccl (coding
)
4293 struct coding_system
*coding
;
4295 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4296 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4297 int *charbuf
= coding
->charbuf
;
4298 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
4299 int consumed_chars
= 0;
4300 int multibytep
= coding
->src_multibyte
;
4301 struct ccl_program ccl
;
4302 int source_charbuf
[1024];
4303 int source_byteidx
[1024];
4304 Lisp_Object attrs
, eol_type
, charset_list
, valids
;
4306 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4307 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4309 while (src
< src_end
)
4311 const unsigned char *p
= src
;
4312 int *source
, *source_end
;
4316 while (i
< 1024 && p
< src_end
)
4318 source_byteidx
[i
] = p
- src
;
4319 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4322 while (i
< 1024 && p
< src_end
)
4323 source_charbuf
[i
++] = *p
++;
4325 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4328 source
= source_charbuf
;
4329 source_end
= source
+ i
;
4330 while (source
< source_end
)
4332 ccl_driver (&ccl
, source
, charbuf
,
4333 source_end
- source
, charbuf_end
- charbuf
,
4335 source
+= ccl
.consumed
;
4336 charbuf
+= ccl
.produced
;
4337 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4340 if (source
< source_end
)
4341 src
+= source_byteidx
[source
- source_charbuf
];
4344 consumed_chars
+= source
- source_charbuf
;
4346 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4347 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4353 case CCL_STAT_SUSPEND_BY_SRC
:
4354 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4356 case CCL_STAT_SUSPEND_BY_DST
:
4359 case CCL_STAT_INVALID_CMD
:
4360 coding
->result
= CODING_RESULT_INTERRUPT
;
4363 coding
->result
= CODING_RESULT_SUCCESS
;
4366 coding
->consumed_char
+= consumed_chars
;
4367 coding
->consumed
= src
- coding
->source
;
4368 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4372 encode_coding_ccl (coding
)
4373 struct coding_system
*coding
;
4375 struct ccl_program ccl
;
4376 int multibytep
= coding
->dst_multibyte
;
4377 int *charbuf
= coding
->charbuf
;
4378 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4379 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4380 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4381 unsigned char *adjusted_dst_end
= dst_end
- 1;
4382 int destination_charbuf
[1024];
4383 int i
, produced_chars
= 0;
4384 Lisp_Object attrs
, eol_type
, charset_list
;
4386 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4387 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4389 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4390 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4392 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4394 int dst_bytes
= dst_end
- dst
;
4395 if (dst_bytes
> 1024)
4398 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4399 charbuf_end
- charbuf
, dst_bytes
, charset_list
);
4400 charbuf
+= ccl
.consumed
;
4402 for (i
= 0; i
< ccl
.produced
; i
++)
4403 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4406 for (i
= 0; i
< ccl
.produced
; i
++)
4407 *dst
++ = destination_charbuf
[i
] & 0xFF;
4408 produced_chars
+= ccl
.produced
;
4414 case CCL_STAT_SUSPEND_BY_SRC
:
4415 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4417 case CCL_STAT_SUSPEND_BY_DST
:
4418 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
4421 case CCL_STAT_INVALID_CMD
:
4422 coding
->result
= CODING_RESULT_INTERRUPT
;
4425 coding
->result
= CODING_RESULT_SUCCESS
;
4429 coding
->produced_char
+= produced_chars
;
4430 coding
->produced
= dst
- coding
->destination
;
4436 /*** 10, 11. no-conversion handlers ***/
4438 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4441 decode_coding_raw_text (coding
)
4442 struct coding_system
*coding
;
4444 coding
->chars_at_source
= 1;
4445 coding
->consumed_char
= 0;
4446 coding
->consumed
= 0;
4447 coding
->result
= CODING_RESULT_SUCCESS
;
4451 encode_coding_raw_text (coding
)
4452 struct coding_system
*coding
;
4454 int multibytep
= coding
->dst_multibyte
;
4455 int *charbuf
= coding
->charbuf
;
4456 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4457 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4458 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4459 int produced_chars
= 0;
4464 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4466 if (coding
->src_multibyte
)
4467 while (charbuf
< charbuf_end
)
4469 ASSURE_DESTINATION (safe_room
);
4471 if (ASCII_CHAR_P (c
))
4472 EMIT_ONE_ASCII_BYTE (c
);
4473 else if (CHAR_BYTE8_P (c
))
4475 c
= CHAR_TO_BYTE8 (c
);
4480 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4482 CHAR_STRING_ADVANCE (c
, p1
);
4485 EMIT_ONE_BYTE (*p0
);
4491 while (charbuf
< charbuf_end
)
4493 ASSURE_DESTINATION (safe_room
);
4500 if (coding
->src_multibyte
)
4502 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4504 while (charbuf
< charbuf_end
)
4506 ASSURE_DESTINATION (safe_room
);
4508 if (ASCII_CHAR_P (c
))
4510 else if (CHAR_BYTE8_P (c
))
4511 *dst
++ = CHAR_TO_BYTE8 (c
);
4513 CHAR_STRING_ADVANCE (c
, dst
);
4519 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4520 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4521 *dst
++ = *charbuf
++;
4522 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4525 coding
->result
= CODING_RESULT_SUCCESS
;
4526 coding
->produced_char
+= produced_chars
;
4527 coding
->produced
= dst
- coding
->destination
;
4531 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4532 Check if a text is encoded in a charset-based coding system. If it
4533 is, return 1, else return 0. */
4536 detect_coding_charset (coding
, detect_info
)
4537 struct coding_system
*coding
;
4538 struct coding_detection_info
*detect_info
;
4540 unsigned char *src
= coding
->source
, *src_base
= src
;
4541 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4542 int multibytep
= coding
->src_multibyte
;
4543 int consumed_chars
= 0;
4544 Lisp_Object attrs
, valids
;
4547 detect_info
->checked
|= CATEGORY_MASK_CHARSET
;
4549 coding
= &coding_categories
[coding_category_charset
];
4550 attrs
= CODING_ID_ATTRS (coding
->id
);
4551 valids
= AREF (attrs
, coding_attr_charset_valids
);
4553 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4554 src
+= coding
->head_ascii
;
4561 if (NILP (AREF (valids
, c
)))
4564 found
= CATEGORY_MASK_CHARSET
;
4566 detect_info
->rejected
|= CATEGORY_MASK_CHARSET
;
4570 detect_info
->found
|= found
;
4575 decode_coding_charset (coding
)
4576 struct coding_system
*coding
;
4578 unsigned char *src
= coding
->source
+ coding
->consumed
;
4579 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4580 unsigned char *src_base
;
4581 int *charbuf
= coding
->charbuf
;
4582 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4583 int consumed_chars
= 0, consumed_chars_base
;
4584 int multibytep
= coding
->src_multibyte
;
4585 Lisp_Object attrs
, eol_type
, charset_list
, valids
;
4586 int char_offset
= coding
->produced_char
;
4587 int last_offset
= char_offset
;
4588 int last_id
= charset_ascii
;
4590 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4591 valids
= AREF (attrs
, coding_attr_charset_valids
);
4598 consumed_chars_base
= consumed_chars
;
4600 if (charbuf
>= charbuf_end
)
4606 /* Here we assume that no charset maps '\r' to something
4608 if (EQ (eol_type
, Qdos
))
4612 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4613 goto no_more_source
;
4618 else if (EQ (eol_type
, Qmac
))
4624 struct charset
*charset
;
4629 val
= AREF (valids
, c
);
4634 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4635 dim
= CHARSET_DIMENSION (charset
);
4639 code
= (code
<< 8) | c
;
4642 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
,
4647 /* VAL is a list of charset IDs. It is assured that the
4648 list is sorted by charset dimensions (smaller one
4652 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (val
)));
4653 dim
= CHARSET_DIMENSION (charset
);
4657 code
= (code
<< 8) | c
;
4660 CODING_DECODE_CHAR (coding
, src
, src_base
,
4661 src_end
, charset
, code
, c
);
4669 if (charset
->id
!= charset_ascii
4670 && last_id
!= charset
->id
)
4672 if (last_id
!= charset_ascii
)
4673 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4674 last_id
= charset
->id
;
4675 last_offset
= char_offset
;
4684 consumed_chars
= consumed_chars_base
;
4686 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4692 if (last_id
!= charset_ascii
)
4693 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4694 coding
->consumed_char
+= consumed_chars_base
;
4695 coding
->consumed
= src_base
- coding
->source
;
4696 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4700 encode_coding_charset (coding
)
4701 struct coding_system
*coding
;
4703 int multibytep
= coding
->dst_multibyte
;
4704 int *charbuf
= coding
->charbuf
;
4705 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4706 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4707 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4708 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4709 int produced_chars
= 0;
4710 Lisp_Object attrs
, eol_type
, charset_list
;
4711 int ascii_compatible
;
4714 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4715 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4717 while (charbuf
< charbuf_end
)
4719 struct charset
*charset
;
4722 ASSURE_DESTINATION (safe_room
);
4724 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4725 EMIT_ONE_ASCII_BYTE (c
);
4726 else if (CHAR_BYTE8_P (c
))
4728 c
= CHAR_TO_BYTE8 (c
);
4733 charset
= char_charset (c
, charset_list
, &code
);
4736 if (CHARSET_DIMENSION (charset
) == 1)
4737 EMIT_ONE_BYTE (code
);
4738 else if (CHARSET_DIMENSION (charset
) == 2)
4739 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4740 else if (CHARSET_DIMENSION (charset
) == 3)
4741 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4743 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4744 (code
>> 8) & 0xFF, code
& 0xFF);
4748 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4749 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4751 c
= coding
->default_char
;
4757 coding
->result
= CODING_RESULT_SUCCESS
;
4758 coding
->produced_char
+= produced_chars
;
4759 coding
->produced
= dst
- coding
->destination
;
4764 /*** 7. C library functions ***/
4766 /* Setup coding context CODING from information about CODING_SYSTEM.
4767 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4768 CODING_SYSTEM is invalid, signal an error. */
4771 setup_coding_system (coding_system
, coding
)
4772 Lisp_Object coding_system
;
4773 struct coding_system
*coding
;
4776 Lisp_Object eol_type
;
4777 Lisp_Object coding_type
;
4780 if (NILP (coding_system
))
4781 coding_system
= Qno_conversion
;
4783 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4785 attrs
= CODING_ID_ATTRS (coding
->id
);
4786 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4789 coding
->head_ascii
= -1;
4790 coding
->common_flags
4791 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4793 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4794 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4795 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4796 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4798 coding_type
= CODING_ATTR_TYPE (attrs
);
4799 if (EQ (coding_type
, Qundecided
))
4801 coding
->detector
= NULL
;
4802 coding
->decoder
= decode_coding_raw_text
;
4803 coding
->encoder
= encode_coding_raw_text
;
4804 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4806 else if (EQ (coding_type
, Qiso_2022
))
4809 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4810 enum coding_category category
= XINT (CODING_ATTR_CATEGORY (attrs
));
4812 /* Invoke graphic register 0 to plane 0. */
4813 CODING_ISO_INVOCATION (coding
, 0) = 0;
4814 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4815 CODING_ISO_INVOCATION (coding
, 1)
4816 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4817 /* Setup the initial status of designation. */
4818 for (i
= 0; i
< 4; i
++)
4819 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4820 /* Not single shifting initially. */
4821 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4822 /* Beginning of buffer should also be regarded as bol. */
4823 CODING_ISO_BOL (coding
) = 1;
4824 coding
->detector
= detect_coding_iso_2022
;
4825 coding
->decoder
= decode_coding_iso_2022
;
4826 coding
->encoder
= encode_coding_iso_2022
;
4827 if (flags
& CODING_ISO_FLAG_SAFE
)
4828 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4829 coding
->common_flags
4830 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4831 | CODING_REQUIRE_FLUSHING_MASK
);
4832 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4833 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4834 if (flags
& CODING_ISO_FLAG_DESIGNATION
)
4835 coding
->common_flags
|= CODING_ANNOTATE_CHARSET_MASK
;
4836 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4838 setup_iso_safe_charsets (attrs
);
4839 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4840 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4841 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4843 CODING_ISO_FLAGS (coding
) = flags
;
4845 else if (EQ (coding_type
, Qcharset
))
4847 coding
->detector
= detect_coding_charset
;
4848 coding
->decoder
= decode_coding_charset
;
4849 coding
->encoder
= encode_coding_charset
;
4850 coding
->common_flags
4851 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4853 else if (EQ (coding_type
, Qutf_8
))
4855 coding
->detector
= detect_coding_utf_8
;
4856 coding
->decoder
= decode_coding_utf_8
;
4857 coding
->encoder
= encode_coding_utf_8
;
4858 coding
->common_flags
4859 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4861 else if (EQ (coding_type
, Qutf_16
))
4863 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4864 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4865 : EQ (val
, Qt
) ? utf_16_with_bom
4866 : utf_16_without_bom
);
4867 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4868 CODING_UTF_16_ENDIAN (coding
) = (NILP (val
) ? utf_16_big_endian
4869 : utf_16_little_endian
);
4870 CODING_UTF_16_SURROGATE (coding
) = 0;
4871 coding
->detector
= detect_coding_utf_16
;
4872 coding
->decoder
= decode_coding_utf_16
;
4873 coding
->encoder
= encode_coding_utf_16
;
4874 coding
->common_flags
4875 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4877 else if (EQ (coding_type
, Qccl
))
4879 coding
->detector
= detect_coding_ccl
;
4880 coding
->decoder
= decode_coding_ccl
;
4881 coding
->encoder
= encode_coding_ccl
;
4882 coding
->common_flags
4883 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4884 | CODING_REQUIRE_FLUSHING_MASK
);
4886 else if (EQ (coding_type
, Qemacs_mule
))
4888 coding
->detector
= detect_coding_emacs_mule
;
4889 coding
->decoder
= decode_coding_emacs_mule
;
4890 coding
->encoder
= encode_coding_emacs_mule
;
4891 coding
->common_flags
4892 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4893 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
4894 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
4896 Lisp_Object tail
, safe_charsets
;
4897 int max_charset_id
= 0;
4899 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4901 if (max_charset_id
< XFASTINT (XCAR (tail
)))
4902 max_charset_id
= XFASTINT (XCAR (tail
));
4903 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
4905 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4907 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
4908 coding
->max_charset_id
= max_charset_id
;
4909 coding
->safe_charsets
= (char *) XSTRING (safe_charsets
)->data
;
4912 else if (EQ (coding_type
, Qshift_jis
))
4914 coding
->detector
= detect_coding_sjis
;
4915 coding
->decoder
= decode_coding_sjis
;
4916 coding
->encoder
= encode_coding_sjis
;
4917 coding
->common_flags
4918 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4920 else if (EQ (coding_type
, Qbig5
))
4922 coding
->detector
= detect_coding_big5
;
4923 coding
->decoder
= decode_coding_big5
;
4924 coding
->encoder
= encode_coding_big5
;
4925 coding
->common_flags
4926 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4928 else /* EQ (coding_type, Qraw_text) */
4930 coding
->detector
= NULL
;
4931 coding
->decoder
= decode_coding_raw_text
;
4932 coding
->encoder
= encode_coding_raw_text
;
4933 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4939 /* Return raw-text or one of its subsidiaries that has the same
4940 eol_type as CODING-SYSTEM. */
4943 raw_text_coding_system (coding_system
)
4944 Lisp_Object coding_system
;
4946 Lisp_Object spec
, attrs
;
4947 Lisp_Object eol_type
, raw_text_eol_type
;
4949 spec
= CODING_SYSTEM_SPEC (coding_system
);
4950 attrs
= AREF (spec
, 0);
4952 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
4953 return coding_system
;
4955 eol_type
= AREF (spec
, 2);
4956 if (VECTORP (eol_type
))
4958 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
4959 raw_text_eol_type
= AREF (spec
, 2);
4960 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
4961 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
4962 : AREF (raw_text_eol_type
, 2));
4966 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4967 does, return one of the subsidiary that has the same eol-spec as
4968 PARENT. Otherwise, return CODING_SYSTEM. */
4971 coding_inherit_eol_type (coding_system
, parent
)
4972 Lisp_Object coding_system
, parent
;
4974 Lisp_Object spec
, attrs
, eol_type
;
4976 spec
= CODING_SYSTEM_SPEC (coding_system
);
4977 attrs
= AREF (spec
, 0);
4978 eol_type
= AREF (spec
, 2);
4979 if (VECTORP (eol_type
))
4981 Lisp_Object parent_spec
;
4982 Lisp_Object parent_eol_type
;
4985 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
4986 parent_eol_type
= AREF (parent_spec
, 2);
4987 if (EQ (parent_eol_type
, Qunix
))
4988 coding_system
= AREF (eol_type
, 0);
4989 else if (EQ (parent_eol_type
, Qdos
))
4990 coding_system
= AREF (eol_type
, 1);
4991 else if (EQ (parent_eol_type
, Qmac
))
4992 coding_system
= AREF (eol_type
, 2);
4994 return coding_system
;
4997 /* Emacs has a mechanism to automatically detect a coding system if it
4998 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4999 it's impossible to distinguish some coding systems accurately
5000 because they use the same range of codes. So, at first, coding
5001 systems are categorized into 7, those are:
5003 o coding-category-emacs-mule
5005 The category for a coding system which has the same code range
5006 as Emacs' internal format. Assigned the coding-system (Lisp
5007 symbol) `emacs-mule' by default.
5009 o coding-category-sjis
5011 The category for a coding system which has the same code range
5012 as SJIS. Assigned the coding-system (Lisp
5013 symbol) `japanese-shift-jis' by default.
5015 o coding-category-iso-7
5017 The category for a coding system which has the same code range
5018 as ISO2022 of 7-bit environment. This doesn't use any locking
5019 shift and single shift functions. This can encode/decode all
5020 charsets. Assigned the coding-system (Lisp symbol)
5021 `iso-2022-7bit' by default.
5023 o coding-category-iso-7-tight
5025 Same as coding-category-iso-7 except that this can
5026 encode/decode only the specified charsets.
5028 o coding-category-iso-8-1
5030 The category for a coding system which has the same code range
5031 as ISO2022 of 8-bit environment and graphic plane 1 used only
5032 for DIMENSION1 charset. This doesn't use any locking shift
5033 and single shift functions. Assigned the coding-system (Lisp
5034 symbol) `iso-latin-1' by default.
5036 o coding-category-iso-8-2
5038 The category for a coding system which has the same code range
5039 as ISO2022 of 8-bit environment and graphic plane 1 used only
5040 for DIMENSION2 charset. This doesn't use any locking shift
5041 and single shift functions. Assigned the coding-system (Lisp
5042 symbol) `japanese-iso-8bit' by default.
5044 o coding-category-iso-7-else
5046 The category for a coding system which has the same code range
5047 as ISO2022 of 7-bit environemnt but uses locking shift or
5048 single shift functions. Assigned the coding-system (Lisp
5049 symbol) `iso-2022-7bit-lock' by default.
5051 o coding-category-iso-8-else
5053 The category for a coding system which has the same code range
5054 as ISO2022 of 8-bit environemnt but uses locking shift or
5055 single shift functions. Assigned the coding-system (Lisp
5056 symbol) `iso-2022-8bit-ss2' by default.
5058 o coding-category-big5
5060 The category for a coding system which has the same code range
5061 as BIG5. Assigned the coding-system (Lisp symbol)
5062 `cn-big5' by default.
5064 o coding-category-utf-8
5066 The category for a coding system which has the same code range
5067 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5068 symbol) `utf-8' by default.
5070 o coding-category-utf-16-be
5072 The category for a coding system in which a text has an
5073 Unicode signature (cf. Unicode Standard) in the order of BIG
5074 endian at the head. Assigned the coding-system (Lisp symbol)
5075 `utf-16-be' by default.
5077 o coding-category-utf-16-le
5079 The category for a coding system in which a text has an
5080 Unicode signature (cf. Unicode Standard) in the order of
5081 LITTLE endian at the head. Assigned the coding-system (Lisp
5082 symbol) `utf-16-le' by default.
5084 o coding-category-ccl
5086 The category for a coding system of which encoder/decoder is
5087 written in CCL programs. The default value is nil, i.e., no
5088 coding system is assigned.
5090 o coding-category-binary
5092 The category for a coding system not categorized in any of the
5093 above. Assigned the coding-system (Lisp symbol)
5094 `no-conversion' by default.
5096 Each of them is a Lisp symbol and the value is an actual
5097 `coding-system's (this is also a Lisp symbol) assigned by a user.
5098 What Emacs does actually is to detect a category of coding system.
5099 Then, it uses a `coding-system' assigned to it. If Emacs can't
5100 decide only one possible category, it selects a category of the
5101 highest priority. Priorities of categories are also specified by a
5102 user in a Lisp variable `coding-category-list'.
5106 #define EOL_SEEN_NONE 0
5107 #define EOL_SEEN_LF 1
5108 #define EOL_SEEN_CR 2
5109 #define EOL_SEEN_CRLF 4
5111 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5112 SOURCE is encoded. If CATEGORY is one of
5113 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5114 two-byte, else they are encoded by one-byte.
5116 Return one of EOL_SEEN_XXX. */
5118 #define MAX_EOL_CHECK_COUNT 3
5121 detect_eol (source
, src_bytes
, category
)
5122 unsigned char *source
;
5123 EMACS_INT src_bytes
;
5124 enum coding_category category
;
5126 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
5129 int eol_seen
= EOL_SEEN_NONE
;
5131 if ((1 << category
) & CATEGORY_MASK_UTF_16
)
5135 msb
= category
== (coding_category_utf_16_le
5136 | coding_category_utf_16_le_nosig
);
5139 while (src
+ 1 < src_end
)
5142 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
5147 this_eol
= EOL_SEEN_LF
;
5148 else if (src
+ 3 >= src_end
5149 || src
[msb
+ 2] != 0
5150 || src
[lsb
+ 2] != '\n')
5151 this_eol
= EOL_SEEN_CR
;
5153 this_eol
= EOL_SEEN_CRLF
;
5155 if (eol_seen
== EOL_SEEN_NONE
)
5156 /* This is the first end-of-line. */
5157 eol_seen
= this_eol
;
5158 else if (eol_seen
!= this_eol
)
5160 /* The found type is different from what found before. */
5161 eol_seen
= EOL_SEEN_LF
;
5164 if (++total
== MAX_EOL_CHECK_COUNT
)
5172 while (src
< src_end
)
5175 if (c
== '\n' || c
== '\r')
5180 this_eol
= EOL_SEEN_LF
;
5181 else if (src
>= src_end
|| *src
!= '\n')
5182 this_eol
= EOL_SEEN_CR
;
5184 this_eol
= EOL_SEEN_CRLF
, src
++;
5186 if (eol_seen
== EOL_SEEN_NONE
)
5187 /* This is the first end-of-line. */
5188 eol_seen
= this_eol
;
5189 else if (eol_seen
!= this_eol
)
5191 /* The found type is different from what found before. */
5192 eol_seen
= EOL_SEEN_LF
;
5195 if (++total
== MAX_EOL_CHECK_COUNT
)
5205 adjust_coding_eol_type (coding
, eol_seen
)
5206 struct coding_system
*coding
;
5209 Lisp_Object eol_type
;
5211 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5212 if (eol_seen
& EOL_SEEN_LF
)
5213 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
5214 else if (eol_seen
& EOL_SEEN_CRLF
)
5215 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
5216 else if (eol_seen
& EOL_SEEN_CR
)
5217 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
5220 /* Detect how a text specified in CODING is encoded. If a coding
5221 system is detected, update fields of CODING by the detected coding
5225 detect_coding (coding
)
5226 struct coding_system
*coding
;
5228 unsigned char *src
, *src_end
;
5229 Lisp_Object attrs
, coding_type
;
5231 coding
->consumed
= coding
->consumed_char
= 0;
5232 coding
->produced
= coding
->produced_char
= 0;
5233 coding_set_source (coding
);
5235 src_end
= coding
->source
+ coding
->src_bytes
;
5237 /* If we have not yet decided the text encoding type, detect it
5239 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
5243 for (src
= coding
->source
; src
< src_end
; src
++)
5246 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
5248 || c
== ISO_CODE_SO
)))
5251 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
5253 if (coding
->head_ascii
< coding
->src_bytes
)
5255 struct coding_detection_info detect_info
;
5256 enum coding_category category
;
5257 struct coding_system
*this;
5259 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
5260 for (i
= 0; i
< coding_category_raw_text
; i
++)
5262 category
= coding_priorities
[i
];
5263 this = coding_categories
+ category
;
5266 /* No coding system of this category is defined. */
5267 detect_info
.rejected
|= (1 << category
);
5269 else if (category
>= coding_category_raw_text
)
5271 else if (detect_info
.checked
& (1 << category
))
5273 if (detect_info
.found
& (1 << category
))
5276 else if ((*(this->detector
)) (coding
, &detect_info
)
5277 && detect_info
.found
& (1 << category
))
5280 if (i
< coding_category_raw_text
)
5281 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5282 else if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
5283 setup_coding_system (Qraw_text
, coding
);
5284 else if (detect_info
.rejected
)
5285 for (i
= 0; i
< coding_category_raw_text
; i
++)
5286 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
5288 this = coding_categories
+ coding_priorities
[i
];
5289 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5295 attrs
= CODING_ID_ATTRS (coding
->id
);
5296 coding_type
= CODING_ATTR_TYPE (attrs
);
5298 /* If we have not yet decided the EOL type, detect it now. But, the
5299 detection is impossible for a CCL based coding system, in which
5300 case, we detct the EOL type after decoding. */
5301 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
))
5302 && ! EQ (coding_type
, Qccl
))
5304 int eol_seen
= detect_eol (coding
->source
, coding
->src_bytes
,
5305 XINT (CODING_ATTR_CATEGORY (attrs
)));
5307 if (eol_seen
!= EOL_SEEN_NONE
)
5308 adjust_coding_eol_type (coding
, eol_seen
);
5315 struct coding_system
*coding
;
5317 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
)))
5319 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5320 unsigned char *pend
= p
+ coding
->produced
;
5321 int eol_seen
= EOL_SEEN_NONE
;
5323 for (; p
< pend
; p
++)
5326 eol_seen
|= EOL_SEEN_LF
;
5327 else if (*p
== '\r')
5329 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5331 eol_seen
|= EOL_SEEN_CRLF
;
5335 eol_seen
|= EOL_SEEN_CR
;
5338 if (eol_seen
!= EOL_SEEN_NONE
)
5339 adjust_coding_eol_type (coding
, eol_seen
);
5342 if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qmac
))
5344 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5345 unsigned char *pend
= p
+ coding
->produced
;
5347 for (; p
< pend
; p
++)
5351 else if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qdos
))
5353 unsigned char *p
, *pbeg
, *pend
;
5354 Lisp_Object undo_list
;
5356 move_gap_both (coding
->dst_pos
+ coding
->produced_char
,
5357 coding
->dst_pos_byte
+ coding
->produced
);
5358 undo_list
= current_buffer
->undo_list
;
5359 current_buffer
->undo_list
= Qt
;
5360 del_range_2 (coding
->dst_pos
, coding
->dst_pos_byte
, GPT
, GPT_BYTE
, 0);
5361 current_buffer
->undo_list
= undo_list
;
5363 pend
= pbeg
+ coding
->produced
;
5365 for (p
= pend
- 1; p
>= pbeg
; p
--)
5368 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
- p
- 1);
5371 coding
->produced_char
-= coding
->produced
- (pend
- pbeg
);
5372 coding
->produced
= pend
- pbeg
;
5373 insert_from_gap (coding
->produced_char
, coding
->produced
);
5378 translate_chars (coding
, table
)
5379 struct coding_system
*coding
;
5382 int *charbuf
= coding
->charbuf
;
5383 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5386 if (coding
->chars_at_source
)
5389 while (charbuf
< charbuf_end
)
5395 *charbuf
++ = translate_char (table
, c
);
5400 produce_chars (coding
)
5401 struct coding_system
*coding
;
5403 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5404 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5406 int produced_chars
= 0;
5408 if (! coding
->chars_at_source
)
5410 /* Characters are in coding->charbuf. */
5411 int *buf
= coding
->charbuf
;
5412 int *buf_end
= buf
+ coding
->charbuf_used
;
5413 unsigned char *adjusted_dst_end
;
5415 if (BUFFERP (coding
->src_object
)
5416 && EQ (coding
->src_object
, coding
->dst_object
))
5417 dst_end
= coding
->source
+ coding
->consumed
;
5418 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5420 while (buf
< buf_end
)
5424 if (dst
>= adjusted_dst_end
)
5426 dst
= alloc_destination (coding
,
5427 buf_end
- buf
+ MAX_MULTIBYTE_LENGTH
,
5429 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5430 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5434 if (coding
->dst_multibyte
5435 || ! CHAR_BYTE8_P (c
))
5436 CHAR_STRING_ADVANCE (c
, dst
);
5438 *dst
++ = CHAR_TO_BYTE8 (c
);
5442 /* This is an annotation datum. */
5448 unsigned char *src
= coding
->source
;
5449 unsigned char *src_end
= src
+ coding
->src_bytes
;
5450 Lisp_Object eol_type
;
5452 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5454 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5456 if (coding
->src_multibyte
)
5463 unsigned char *src_base
= src
;
5469 if (EQ (eol_type
, Qdos
))
5473 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
5474 goto no_more_source
;
5479 else if (EQ (eol_type
, Qmac
))
5484 coding
->consumed
= src
- coding
->source
;
5486 if (EQ (coding
->src_object
, coding
->dst_object
))
5490 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5492 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5493 coding_set_source (coding
);
5494 src
= coding
->source
+ coding
->consumed
;
5495 src_end
= coding
->source
+ coding
->src_bytes
;
5505 while (src
< src_end
)
5512 if (EQ (eol_type
, Qdos
))
5518 else if (EQ (eol_type
, Qmac
))
5521 if (dst
>= dst_end
- 1)
5523 coding
->consumed
= src
- coding
->source
;
5525 if (EQ (coding
->src_object
, coding
->dst_object
))
5527 if (dst
>= dst_end
- 1)
5529 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5531 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5532 coding_set_source (coding
);
5533 src
= coding
->source
+ coding
->consumed
;
5534 src_end
= coding
->source
+ coding
->src_bytes
;
5542 if (!EQ (coding
->src_object
, coding
->dst_object
))
5544 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5548 EMACS_INT offset
= src
- coding
->source
;
5550 dst
= alloc_destination (coding
, require
, dst
);
5551 coding_set_source (coding
);
5552 src
= coding
->source
+ offset
;
5553 src_end
= coding
->source
+ coding
->src_bytes
;
5556 produced_chars
= coding
->src_chars
;
5557 while (src
< src_end
)
5563 if (EQ (eol_type
, Qdos
))
5570 else if (EQ (eol_type
, Qmac
))
5576 coding
->consumed
= coding
->src_bytes
;
5577 coding
->consumed_char
= coding
->src_chars
;
5580 produced
= dst
- (coding
->destination
+ coding
->produced
);
5581 if (BUFFERP (coding
->dst_object
))
5582 insert_from_gap (produced_chars
, produced
);
5583 coding
->produced
+= produced
;
5584 coding
->produced_char
+= produced_chars
;
5585 return produced_chars
;
5588 /* Compose text in CODING->object according to the annotation data at
5589 CHARBUF. CHARBUF is an array:
5590 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5594 produce_composition (coding
, charbuf
)
5595 struct coding_system
*coding
;
5600 enum composition_method method
;
5601 Lisp_Object components
;
5604 from
= coding
->dst_pos
+ charbuf
[2];
5605 to
= coding
->dst_pos
+ charbuf
[3];
5606 method
= (enum composition_method
) (charbuf
[4]);
5608 if (method
== COMPOSITION_RELATIVE
)
5612 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5617 for (i
= 0; i
< len
; i
++)
5618 args
[i
] = make_number (charbuf
[i
]);
5619 components
= (method
== COMPOSITION_WITH_ALTCHARS
5620 ? Fstring (len
, args
) : Fvector (len
, args
));
5622 compose_text (from
, to
, components
, Qnil
, coding
->dst_object
);
5626 /* Put `charset' property on text in CODING->object according to
5627 the annotation data at CHARBUF. CHARBUF is an array:
5628 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5632 produce_charset (coding
, charbuf
)
5633 struct coding_system
*coding
;
5636 EMACS_INT from
= coding
->dst_pos
+ charbuf
[2];
5637 EMACS_INT to
= coding
->dst_pos
+ charbuf
[3];
5638 struct charset
*charset
= CHARSET_FROM_ID (charbuf
[4]);
5640 Fput_text_property (make_number (from
), make_number (to
),
5641 Qcharset
, CHARSET_NAME (charset
),
5642 coding
->dst_object
);
5646 #define CHARBUF_SIZE 0x4000
5648 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5650 int size = CHARBUF_SIZE;; \
5652 coding->charbuf = NULL; \
5653 while (size > 1024) \
5655 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5656 if (coding->charbuf) \
5660 if (! coding->charbuf) \
5662 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5663 return coding->result; \
5665 coding->charbuf_size = size; \
5670 produce_annotation (coding
)
5671 struct coding_system
*coding
;
5673 int *charbuf
= coding
->charbuf
;
5674 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5676 if (NILP (coding
->dst_object
))
5679 while (charbuf
< charbuf_end
)
5685 int len
= -*charbuf
;
5688 case CODING_ANNOTATE_COMPOSITION_MASK
:
5689 produce_composition (coding
, charbuf
);
5691 case CODING_ANNOTATE_CHARSET_MASK
:
5692 produce_charset (coding
, charbuf
);
5702 /* Decode the data at CODING->src_object into CODING->dst_object.
5703 CODING->src_object is a buffer, a string, or nil.
5704 CODING->dst_object is a buffer.
5706 If CODING->src_object is a buffer, it must be the current buffer.
5707 In this case, if CODING->src_pos is positive, it is a position of
5708 the source text in the buffer, otherwise, the source text is in the
5709 gap area of the buffer, and CODING->src_pos specifies the offset of
5710 the text from GPT (which must be the same as PT). If this is the
5711 same buffer as CODING->dst_object, CODING->src_pos must be
5714 If CODING->src_object is a string, CODING->src_pos in an index to
5717 If CODING->src_object is nil, CODING->source must already point to
5718 the non-relocatable memory area. In this case, CODING->src_pos is
5719 an offset from CODING->source.
5721 The decoded data is inserted at the current point of the buffer
5726 decode_coding (coding
)
5727 struct coding_system
*coding
;
5731 if (BUFFERP (coding
->src_object
)
5732 && coding
->src_pos
> 0
5733 && coding
->src_pos
< GPT
5734 && coding
->src_pos
+ coding
->src_chars
> GPT
)
5735 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
5737 if (BUFFERP (coding
->dst_object
))
5739 if (current_buffer
!= XBUFFER (coding
->dst_object
))
5740 set_buffer_internal (XBUFFER (coding
->dst_object
));
5742 move_gap_both (PT
, PT_BYTE
);
5745 coding
->consumed
= coding
->consumed_char
= 0;
5746 coding
->produced
= coding
->produced_char
= 0;
5747 coding
->chars_at_source
= 0;
5748 coding
->result
= CODING_RESULT_SUCCESS
;
5751 ALLOC_CONVERSION_WORK_AREA (coding
);
5753 attrs
= CODING_ID_ATTRS (coding
->id
);
5757 coding_set_source (coding
);
5758 coding
->annotated
= 0;
5759 (*(coding
->decoder
)) (coding
);
5760 if (!NILP (CODING_ATTR_DECODE_TBL (attrs
)))
5761 translate_chars (coding
, CODING_ATTR_DECODE_TBL (attrs
));
5762 else if (!NILP (Vstandard_translation_table_for_decode
))
5763 translate_chars (coding
, Vstandard_translation_table_for_decode
);
5764 coding_set_destination (coding
);
5765 produce_chars (coding
);
5766 if (coding
->annotated
)
5767 produce_annotation (coding
);
5769 while (coding
->consumed
< coding
->src_bytes
5770 && ! coding
->result
);
5772 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qccl
)
5773 && SYMBOLP (CODING_ID_EOL_TYPE (coding
->id
))
5774 && ! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
5775 decode_eol (coding
);
5777 coding
->carryover_bytes
= 0;
5778 if (coding
->consumed
< coding
->src_bytes
)
5780 int nbytes
= coding
->src_bytes
- coding
->consumed
;
5783 coding_set_source (coding
);
5784 coding_set_destination (coding
);
5785 src
= coding
->source
+ coding
->consumed
;
5787 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
5789 /* Flush out unprocessed data as binary chars. We are sure
5790 that the number of data is less than the size of
5792 while (nbytes
-- > 0)
5796 coding
->charbuf
[coding
->charbuf_used
++] = (c
& 0x80 ? - c
: c
);
5798 produce_chars (coding
);
5802 /* Record unprocessed bytes in coding->carryover. We are
5803 sure that the number of data is less than the size of
5804 coding->carryover. */
5805 unsigned char *p
= coding
->carryover
;
5807 coding
->carryover_bytes
= nbytes
;
5808 while (nbytes
-- > 0)
5811 coding
->consumed
= coding
->src_bytes
;
5814 return coding
->result
;
5818 /* Extract an annotation datum from a composition starting at POS and
5819 ending before LIMIT of CODING->src_object (buffer or string), store
5820 the data in BUF, set *STOP to a starting position of the next
5821 composition (if any) or to LIMIT, and return the address of the
5822 next element of BUF.
5824 If such an annotation is not found, set *STOP to a starting
5825 position of a composition after POS (if any) or to LIMIT, and
5829 handle_composition_annotation (pos
, limit
, coding
, buf
, stop
)
5830 EMACS_INT pos
, limit
;
5831 struct coding_system
*coding
;
5835 EMACS_INT start
, end
;
5838 if (! find_composition (pos
, limit
, &start
, &end
, &prop
, coding
->src_object
)
5841 else if (start
> pos
)
5847 /* We found a composition. Store the corresponding
5848 annotation data in BUF. */
5850 enum composition_method method
= COMPOSITION_METHOD (prop
);
5851 int nchars
= COMPOSITION_LENGTH (prop
);
5853 ADD_COMPOSITION_DATA (buf
, 0, nchars
, method
);
5854 if (method
!= COMPOSITION_RELATIVE
)
5856 Lisp_Object components
;
5859 components
= COMPOSITION_COMPONENTS (prop
);
5860 if (VECTORP (components
))
5862 len
= XVECTOR (components
)->size
;
5863 for (i
= 0; i
< len
; i
++)
5864 *buf
++ = XINT (AREF (components
, i
));
5866 else if (STRINGP (components
))
5868 len
= XSTRING (components
)->size
;
5872 FETCH_STRING_CHAR_ADVANCE (*buf
, components
, i
, i_byte
);
5876 else if (INTEGERP (components
))
5879 *buf
++ = XINT (components
);
5881 else if (CONSP (components
))
5883 for (len
= 0; CONSP (components
);
5884 len
++, components
= XCDR (components
))
5885 *buf
++ = XINT (XCAR (components
));
5893 if (find_composition (end
, limit
, &start
, &end
, &prop
,
5904 /* Extract an annotation datum from a text property `charset' at POS of
5905 CODING->src_object (buffer of string), store the data in BUF, set
5906 *STOP to the position where the value of `charset' property changes
5907 (limiting by LIMIT), and return the address of the next element of
5910 If the property value is nil, set *STOP to the position where the
5911 property value is non-nil (limiting by LIMIT), and return BUF. */
5914 handle_charset_annotation (pos
, limit
, coding
, buf
, stop
)
5915 EMACS_INT pos
, limit
;
5916 struct coding_system
*coding
;
5920 Lisp_Object val
, next
;
5923 val
= Fget_text_property (make_number (pos
), Qcharset
, coding
->src_object
);
5924 if (! NILP (val
) && CHARSETP (val
))
5925 id
= XINT (CHARSET_SYMBOL_ID (val
));
5928 ADD_CHARSET_DATA (buf
, 0, 0, id
);
5929 next
= Fnext_single_property_change (make_number (pos
), Qcharset
,
5931 make_number (limit
));
5932 *stop
= XINT (next
);
5938 consume_chars (coding
)
5939 struct coding_system
*coding
;
5941 int *buf
= coding
->charbuf
;
5942 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
;
5943 const unsigned char *src
= coding
->source
+ coding
->consumed
;
5944 EMACS_INT pos
= coding
->src_pos
+ coding
->consumed_char
;
5945 EMACS_INT end_pos
= coding
->src_pos
+ coding
->src_chars
;
5946 int multibytep
= coding
->src_multibyte
;
5947 Lisp_Object eol_type
;
5949 EMACS_INT stop
, stop_composition
, stop_charset
;
5952 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5953 if (VECTORP (eol_type
))
5956 /* Note: composition handling is not yet implemented. */
5957 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
5959 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
)
5960 stop
= stop_composition
= pos
;
5962 stop
= stop_composition
= end_pos
;
5963 if (coding
->common_flags
& CODING_ANNOTATE_CHARSET_MASK
)
5964 stop
= stop_charset
= pos
;
5966 stop_charset
= end_pos
;
5968 /* Compensate for CRLF and annotation. */
5969 buf_end
-= 1 + MAX_ANNOTATION_LENGTH
;
5970 while (buf
< buf_end
)
5978 if (pos
== stop_composition
)
5979 buf
= handle_composition_annotation (pos
, end_pos
, coding
,
5980 buf
, &stop_composition
);
5981 if (pos
== stop_charset
)
5982 buf
= handle_charset_annotation (pos
, end_pos
, coding
,
5983 buf
, &stop_charset
);
5984 stop
= (stop_composition
< stop_charset
5985 ? stop_composition
: stop_charset
);
5991 c
= STRING_CHAR_ADVANCE (src
);
5992 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
5994 if (! EQ (eol_type
, Qunix
))
5998 if (EQ (eol_type
, Qdos
))
6008 coding
->consumed
= src
- coding
->source
;
6009 coding
->consumed_char
= pos
- coding
->src_pos
;
6010 coding
->charbuf_used
= buf
- coding
->charbuf
;
6011 coding
->chars_at_source
= 0;
6015 /* Encode the text at CODING->src_object into CODING->dst_object.
6016 CODING->src_object is a buffer or a string.
6017 CODING->dst_object is a buffer or nil.
6019 If CODING->src_object is a buffer, it must be the current buffer.
6020 In this case, if CODING->src_pos is positive, it is a position of
6021 the source text in the buffer, otherwise. the source text is in the
6022 gap area of the buffer, and coding->src_pos specifies the offset of
6023 the text from GPT (which must be the same as PT). If this is the
6024 same buffer as CODING->dst_object, CODING->src_pos must be
6025 negative and CODING should not have `pre-write-conversion'.
6027 If CODING->src_object is a string, CODING should not have
6028 `pre-write-conversion'.
6030 If CODING->dst_object is a buffer, the encoded data is inserted at
6031 the current point of that buffer.
6033 If CODING->dst_object is nil, the encoded data is placed at the
6034 memory area specified by CODING->destination. */
6037 encode_coding (coding
)
6038 struct coding_system
*coding
;
6042 attrs
= CODING_ID_ATTRS (coding
->id
);
6044 if (BUFFERP (coding
->dst_object
))
6046 set_buffer_internal (XBUFFER (coding
->dst_object
));
6047 coding
->dst_multibyte
6048 = ! NILP (current_buffer
->enable_multibyte_characters
);
6051 coding
->consumed
= coding
->consumed_char
= 0;
6052 coding
->produced
= coding
->produced_char
= 0;
6053 coding
->result
= CODING_RESULT_SUCCESS
;
6056 ALLOC_CONVERSION_WORK_AREA (coding
);
6059 coding_set_source (coding
);
6060 consume_chars (coding
);
6062 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs
)))
6063 translate_chars (coding
, CODING_ATTR_ENCODE_TBL (attrs
));
6064 else if (!NILP (Vstandard_translation_table_for_encode
))
6065 translate_chars (coding
, Vstandard_translation_table_for_encode
);
6067 coding_set_destination (coding
);
6068 (*(coding
->encoder
)) (coding
);
6069 } while (coding
->consumed_char
< coding
->src_chars
);
6071 if (BUFFERP (coding
->dst_object
))
6072 insert_from_gap (coding
->produced_char
, coding
->produced
);
6074 return (coding
->result
);
6079 /* List of currently used working buffer. */
6080 Lisp_Object Vcode_conversion_work_buf_list
;
6082 /* A working buffer used by the top level conversion. */
6083 Lisp_Object Vcode_conversion_reused_work_buf
;
6086 /* Return a working buffer that can be freely used by the following
6087 code conversion. MULTIBYTEP specifies the multibyteness of the
6091 make_conversion_work_buffer (multibytep
)
6094 struct buffer
*current
= current_buffer
;
6097 if (NILP (Vcode_conversion_work_buf_list
))
6099 if (NILP (Vcode_conversion_reused_work_buf
))
6100 Vcode_conversion_reused_work_buf
6101 = Fget_buffer_create (build_string (" *code-conversion-work*"));
6102 Vcode_conversion_work_buf_list
6103 = Fcons (Vcode_conversion_reused_work_buf
, Qnil
);
6107 int depth
= XINT (Flength (Vcode_conversion_work_buf_list
));
6110 sprintf (str
, " *code-conversion-work*<%d>", depth
);
6111 Vcode_conversion_work_buf_list
6112 = Fcons (Fget_buffer_create (build_string (str
)),
6113 Vcode_conversion_work_buf_list
);
6116 buf
= XCAR (Vcode_conversion_work_buf_list
);
6117 set_buffer_internal (XBUFFER (buf
));
6118 current_buffer
->undo_list
= Qt
;
6120 Fset_buffer_multibyte (multibytep
? Qt
: Qnil
, Qnil
);
6121 set_buffer_internal (current
);
6125 static struct coding_system
*saved_coding
;
6128 code_conversion_restore (info
)
6131 int depth
= XINT (Flength (Vcode_conversion_work_buf_list
));
6136 buf
= XCAR (Vcode_conversion_work_buf_list
);
6137 Vcode_conversion_work_buf_list
= XCDR (Vcode_conversion_work_buf_list
);
6138 if (depth
> 1 && !NILP (Fbuffer_live_p (buf
)))
6142 if (EQ (saved_coding
->dst_object
, Qt
)
6143 && saved_coding
->destination
)
6144 xfree (saved_coding
->destination
);
6146 return save_excursion_restore (info
);
6151 decode_coding_gap (coding
, chars
, bytes
)
6152 struct coding_system
*coding
;
6153 EMACS_INT chars
, bytes
;
6155 int count
= specpdl_ptr
- specpdl
;
6157 saved_coding
= coding
;
6158 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6160 coding
->src_object
= Fcurrent_buffer ();
6161 coding
->src_chars
= chars
;
6162 coding
->src_bytes
= bytes
;
6163 coding
->src_pos
= -chars
;
6164 coding
->src_pos_byte
= -bytes
;
6165 coding
->src_multibyte
= chars
< bytes
;
6166 coding
->dst_object
= coding
->src_object
;
6167 coding
->dst_pos
= PT
;
6168 coding
->dst_pos_byte
= PT_BYTE
;
6169 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
6170 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
6172 if (CODING_REQUIRE_DETECTION (coding
))
6173 detect_coding (coding
);
6175 decode_coding (coding
);
6177 unbind_to (count
, Qnil
);
6178 return coding
->result
;
6182 encode_coding_gap (coding
, chars
, bytes
)
6183 struct coding_system
*coding
;
6184 EMACS_INT chars
, bytes
;
6186 int count
= specpdl_ptr
- specpdl
;
6189 saved_coding
= coding
;
6190 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6192 buffer
= Fcurrent_buffer ();
6193 coding
->src_object
= buffer
;
6194 coding
->src_chars
= chars
;
6195 coding
->src_bytes
= bytes
;
6196 coding
->src_pos
= -chars
;
6197 coding
->src_pos_byte
= -bytes
;
6198 coding
->src_multibyte
= chars
< bytes
;
6199 coding
->dst_object
= coding
->src_object
;
6200 coding
->dst_pos
= PT
;
6201 coding
->dst_pos_byte
= PT_BYTE
;
6203 encode_coding (coding
);
6205 unbind_to (count
, Qnil
);
6206 return coding
->result
;
6210 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6211 SRC_OBJECT into DST_OBJECT by coding context CODING.
6213 SRC_OBJECT is a buffer, a string, or Qnil.
6215 If it is a buffer, the text is at point of the buffer. FROM and TO
6216 are positions in the buffer.
6218 If it is a string, the text is at the beginning of the string.
6219 FROM and TO are indices to the string.
6221 If it is nil, the text is at coding->source. FROM and TO are
6222 indices to coding->source.
6224 DST_OBJECT is a buffer, Qt, or Qnil.
6226 If it is a buffer, the decoded text is inserted at point of the
6227 buffer. If the buffer is the same as SRC_OBJECT, the source text
6230 If it is Qt, a string is made from the decoded text, and
6231 set in CODING->dst_object.
6233 If it is Qnil, the decoded text is stored at CODING->destination.
6234 The called must allocate CODING->dst_bytes bytes at
6235 CODING->destination by xmalloc. If the decoded text is longer than
6236 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6240 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6242 struct coding_system
*coding
;
6243 Lisp_Object src_object
;
6244 EMACS_INT from
, from_byte
, to
, to_byte
;
6245 Lisp_Object dst_object
;
6247 int count
= specpdl_ptr
- specpdl
;
6248 unsigned char *destination
;
6249 EMACS_INT dst_bytes
;
6250 EMACS_INT chars
= to
- from
;
6251 EMACS_INT bytes
= to_byte
- from_byte
;
6254 saved_coding
= coding
;
6255 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6257 if (NILP (dst_object
))
6259 destination
= coding
->destination
;
6260 dst_bytes
= coding
->dst_bytes
;
6263 coding
->src_object
= src_object
;
6264 coding
->src_chars
= chars
;
6265 coding
->src_bytes
= bytes
;
6266 coding
->src_multibyte
= chars
< bytes
;
6268 if (STRINGP (src_object
))
6270 coding
->src_pos
= from
;
6271 coding
->src_pos_byte
= from_byte
;
6273 else if (BUFFERP (src_object
))
6275 set_buffer_internal (XBUFFER (src_object
));
6277 move_gap_both (from
, from_byte
);
6278 if (EQ (src_object
, dst_object
))
6280 TEMP_SET_PT_BOTH (from
, from_byte
);
6281 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6282 coding
->src_pos
= -chars
;
6283 coding
->src_pos_byte
= -bytes
;
6287 coding
->src_pos
= from
;
6288 coding
->src_pos_byte
= from_byte
;
6292 if (CODING_REQUIRE_DETECTION (coding
))
6293 detect_coding (coding
);
6294 attrs
= CODING_ID_ATTRS (coding
->id
);
6296 if (! NILP (CODING_ATTR_POST_READ (attrs
))
6297 || EQ (dst_object
, Qt
))
6299 coding
->dst_object
= make_conversion_work_buffer (1);
6300 coding
->dst_pos
= BEG
;
6301 coding
->dst_pos_byte
= BEG_BYTE
;
6302 coding
->dst_multibyte
= 1;
6304 else if (BUFFERP (dst_object
))
6306 coding
->dst_object
= dst_object
;
6307 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6308 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6309 coding
->dst_multibyte
6310 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6314 coding
->dst_object
= Qnil
;
6315 coding
->dst_multibyte
= 1;
6318 decode_coding (coding
);
6320 if (BUFFERP (coding
->dst_object
))
6321 set_buffer_internal (XBUFFER (coding
->dst_object
));
6323 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6325 struct gcpro gcpro1
, gcpro2
;
6326 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6329 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6330 GCPRO2 (coding
->src_object
, coding
->dst_object
);
6331 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6332 make_number (coding
->produced_char
));
6335 coding
->produced_char
+= Z
- prev_Z
;
6336 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6339 if (EQ (dst_object
, Qt
))
6341 coding
->dst_object
= Fbuffer_string ();
6343 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
6345 set_buffer_internal (XBUFFER (coding
->dst_object
));
6346 if (dst_bytes
< coding
->produced
)
6349 = (unsigned char *) xrealloc (destination
, coding
->produced
);
6352 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
6353 unbind_to (count
, Qnil
);
6356 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
6357 move_gap_both (BEGV
, BEGV_BYTE
);
6358 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
6359 coding
->destination
= destination
;
6363 unbind_to (count
, Qnil
);
6368 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6370 struct coding_system
*coding
;
6371 Lisp_Object src_object
;
6372 EMACS_INT from
, from_byte
, to
, to_byte
;
6373 Lisp_Object dst_object
;
6375 int count
= specpdl_ptr
- specpdl
;
6376 EMACS_INT chars
= to
- from
;
6377 EMACS_INT bytes
= to_byte
- from_byte
;
6380 saved_coding
= coding
;
6381 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6383 coding
->src_object
= src_object
;
6384 coding
->src_chars
= chars
;
6385 coding
->src_bytes
= bytes
;
6386 coding
->src_multibyte
= chars
< bytes
;
6388 attrs
= CODING_ID_ATTRS (coding
->id
);
6390 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
6392 coding
->src_object
= make_conversion_work_buffer (coding
->src_multibyte
);
6393 set_buffer_internal (XBUFFER (coding
->src_object
));
6394 if (STRINGP (src_object
))
6395 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
6396 else if (BUFFERP (src_object
))
6397 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
6399 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
6401 if (EQ (src_object
, dst_object
))
6403 set_buffer_internal (XBUFFER (src_object
));
6404 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6405 set_buffer_internal (XBUFFER (coding
->src_object
));
6408 call2 (CODING_ATTR_PRE_WRITE (attrs
),
6409 make_number (BEG
), make_number (Z
));
6410 coding
->src_object
= Fcurrent_buffer ();
6412 move_gap_both (BEG
, BEG_BYTE
);
6413 coding
->src_chars
= Z
- BEG
;
6414 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6415 coding
->src_pos
= BEG
;
6416 coding
->src_pos_byte
= BEG_BYTE
;
6417 coding
->src_multibyte
= Z
< Z_BYTE
;
6419 else if (STRINGP (src_object
))
6421 coding
->src_pos
= from
;
6422 coding
->src_pos_byte
= from_byte
;
6424 else if (BUFFERP (src_object
))
6426 set_buffer_internal (XBUFFER (src_object
));
6427 if (EQ (src_object
, dst_object
))
6429 coding
->src_object
= del_range_1 (from
, to
, 1, 1);
6430 coding
->src_pos
= 0;
6431 coding
->src_pos_byte
= 0;
6435 if (from
< GPT
&& to
>= GPT
)
6436 move_gap_both (from
, from_byte
);
6437 coding
->src_pos
= from
;
6438 coding
->src_pos_byte
= from_byte
;
6442 if (BUFFERP (dst_object
))
6444 coding
->dst_object
= dst_object
;
6445 if (EQ (src_object
, dst_object
))
6447 coding
->dst_pos
= from
;
6448 coding
->dst_pos_byte
= from_byte
;
6452 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6453 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6455 coding
->dst_multibyte
6456 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6458 else if (EQ (dst_object
, Qt
))
6460 coding
->dst_object
= Qnil
;
6461 coding
->dst_bytes
= coding
->src_chars
;
6462 if (coding
->dst_bytes
== 0)
6463 coding
->dst_bytes
= 1;
6464 coding
->destination
= (unsigned char *) xmalloc (coding
->dst_bytes
);
6465 coding
->dst_multibyte
= 0;
6469 coding
->dst_object
= Qnil
;
6470 coding
->dst_multibyte
= 0;
6473 encode_coding (coding
);
6475 if (EQ (dst_object
, Qt
))
6477 if (BUFFERP (coding
->dst_object
))
6478 coding
->dst_object
= Fbuffer_string ();
6482 = make_unibyte_string ((char *) coding
->destination
,
6484 xfree (coding
->destination
);
6488 unbind_to (count
, Qnil
);
6493 preferred_coding_system ()
6495 int id
= coding_categories
[coding_priorities
[0]].id
;
6497 return CODING_ID_NAME (id
);
6502 /*** 8. Emacs Lisp library functions ***/
6504 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6505 doc
: /* Return t if OBJECT is nil or a coding-system.
6506 See the documentation of `define-coding-system' for information
6507 about coding-system objects. */)
6511 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6514 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6515 Sread_non_nil_coding_system
, 1, 1, 0,
6516 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6523 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6524 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6526 while (XSTRING (val
)->size
== 0);
6527 return (Fintern (val
, Qnil
));
6530 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6531 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6532 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6533 (prompt
, default_coding_system
)
6534 Lisp_Object prompt
, default_coding_system
;
6537 if (SYMBOLP (default_coding_system
))
6538 XSETSTRING (default_coding_system
, XSYMBOL (default_coding_system
)->name
);
6539 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6540 Qt
, Qnil
, Qcoding_system_history
,
6541 default_coding_system
, Qnil
);
6542 return (XSTRING (val
)->size
== 0 ? Qnil
: Fintern (val
, Qnil
));
6545 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6547 doc
: /* Check validity of CODING-SYSTEM.
6548 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
6550 Lisp_Object coding_system
;
6552 CHECK_SYMBOL (coding_system
);
6553 if (!NILP (Fcoding_system_p (coding_system
)))
6554 return coding_system
;
6556 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
6560 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6561 HIGHEST is nonzero, return the coding system of the highest
6562 priority among the detected coding systems. Otherwize return a
6563 list of detected coding systems sorted by their priorities. If
6564 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6565 multibyte form but contains only ASCII and eight-bit chars.
6566 Otherwise, the bytes are raw bytes.
6568 CODING-SYSTEM controls the detection as below:
6570 If it is nil, detect both text-format and eol-format. If the
6571 text-format part of CODING-SYSTEM is already specified
6572 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6573 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6574 detect only text-format. */
6577 detect_coding_system (src
, src_bytes
, highest
, multibytep
, coding_system
)
6579 int src_bytes
, highest
;
6581 Lisp_Object coding_system
;
6583 unsigned char *src_end
= src
+ src_bytes
;
6584 int mask
= CATEGORY_MASK_ANY
;
6585 Lisp_Object attrs
, eol_type
;
6587 struct coding_system coding
;
6589 struct coding_detection_info detect_info
;
6591 if (NILP (coding_system
))
6592 coding_system
= Qundecided
;
6593 setup_coding_system (coding_system
, &coding
);
6594 attrs
= CODING_ID_ATTRS (coding
.id
);
6595 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
6596 coding_system
= CODING_ATTR_BASE_NAME (attrs
);
6598 coding
.source
= src
;
6599 coding
.src_bytes
= src_bytes
;
6600 coding
.src_multibyte
= multibytep
;
6601 coding
.consumed
= 0;
6602 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6604 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
6606 /* At first, detect text-format if necessary. */
6607 if (XINT (CODING_ATTR_CATEGORY (attrs
)) == coding_category_undecided
)
6609 enum coding_category category
;
6610 struct coding_system
*this;
6613 for (; src
< src_end
; src
++)
6617 || (c
< 0x20 && (c
== ISO_CODE_ESC
6619 || c
== ISO_CODE_SO
)))
6622 coding
.head_ascii
= src
- coding
.source
;
6625 for (i
= 0; i
< coding_category_raw_text
; i
++)
6627 category
= coding_priorities
[i
];
6628 this = coding_categories
+ category
;
6632 /* No coding system of this category is defined. */
6633 detect_info
.rejected
|= (1 << category
);
6635 else if (category
>= coding_category_raw_text
)
6637 else if (detect_info
.checked
& (1 << category
))
6640 && (detect_info
.found
& (1 << category
)))
6645 if ((*(this->detector
)) (&coding
, &detect_info
)
6647 && (detect_info
.found
& (1 << category
)))
6653 if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
6655 detect_info
.found
= CATEGORY_MASK_RAW_TEXT
;
6656 id
= coding_categories
[coding_category_raw_text
].id
;
6657 val
= Fcons (make_number (id
), Qnil
);
6659 else if (! detect_info
.rejected
&& ! detect_info
.found
)
6661 detect_info
.found
= CATEGORY_MASK_ANY
;
6662 id
= coding_categories
[coding_category_undecided
].id
;
6663 val
= Fcons (make_number (id
), Qnil
);
6667 if (detect_info
.found
)
6669 detect_info
.found
= 1 << category
;
6670 val
= Fcons (make_number (this->id
), Qnil
);
6673 for (i
= 0; i
< coding_category_raw_text
; i
++)
6674 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
6676 detect_info
.found
= 1 << coding_priorities
[i
];
6677 id
= coding_categories
[coding_priorities
[i
]].id
;
6678 val
= Fcons (make_number (id
), Qnil
);
6684 int mask
= detect_info
.rejected
| detect_info
.found
;
6688 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6690 category
= coding_priorities
[i
];
6691 if (! (mask
& (1 << category
)))
6693 found
|= 1 << category
;
6694 id
= coding_categories
[category
].id
;
6695 val
= Fcons (make_number (id
), val
);
6698 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6700 category
= coding_priorities
[i
];
6701 if (detect_info
.found
& (1 << category
))
6703 id
= coding_categories
[category
].id
;
6704 val
= Fcons (make_number (id
), val
);
6707 detect_info
.found
|= found
;
6712 detect_info
.found
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
6713 val
= Fcons (make_number (coding
.id
), Qnil
);
6716 /* Then, detect eol-format if necessary. */
6718 int normal_eol
= -1, utf_16_be_eol
= -1, utf_16_le_eol
;
6721 if (VECTORP (eol_type
))
6723 if (detect_info
.found
& ~CATEGORY_MASK_UTF_16
)
6724 normal_eol
= detect_eol (coding
.source
, src_bytes
,
6725 coding_category_raw_text
);
6726 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_BE
6727 | CATEGORY_MASK_UTF_16_BE_NOSIG
))
6728 utf_16_be_eol
= detect_eol (coding
.source
, src_bytes
,
6729 coding_category_utf_16_be
);
6730 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_LE
6731 | CATEGORY_MASK_UTF_16_LE_NOSIG
))
6732 utf_16_le_eol
= detect_eol (coding
.source
, src_bytes
,
6733 coding_category_utf_16_le
);
6737 if (EQ (eol_type
, Qunix
))
6738 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_LF
;
6739 else if (EQ (eol_type
, Qdos
))
6740 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CRLF
;
6742 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CR
;
6745 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
6747 enum coding_category category
;
6750 id
= XINT (XCAR (tail
));
6751 attrs
= CODING_ID_ATTRS (id
);
6752 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
6753 eol_type
= CODING_ID_EOL_TYPE (id
);
6754 if (VECTORP (eol_type
))
6756 if (category
== coding_category_utf_16_be
6757 || category
== coding_category_utf_16_be_nosig
)
6758 this_eol
= utf_16_be_eol
;
6759 else if (category
== coding_category_utf_16_le
6760 || category
== coding_category_utf_16_le_nosig
)
6761 this_eol
= utf_16_le_eol
;
6763 this_eol
= normal_eol
;
6765 if (this_eol
== EOL_SEEN_LF
)
6766 XSETCAR (tail
, AREF (eol_type
, 0));
6767 else if (this_eol
== EOL_SEEN_CRLF
)
6768 XSETCAR (tail
, AREF (eol_type
, 1));
6769 else if (this_eol
== EOL_SEEN_CR
)
6770 XSETCAR (tail
, AREF (eol_type
, 2));
6772 XSETCAR (tail
, CODING_ID_NAME (id
));
6775 XSETCAR (tail
, CODING_ID_NAME (id
));
6779 return (highest
? XCAR (val
) : val
);
6783 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
6785 doc
: /* Detect coding system of the text in the region between START and END.
6786 Return a list of possible coding systems ordered by priority.
6788 If only ASCII characters are found, it returns a list of single element
6789 `undecided' or its subsidiary coding system according to a detected
6792 If optional argument HIGHEST is non-nil, return the coding system of
6793 highest priority. */)
6794 (start
, end
, highest
)
6795 Lisp_Object start
, end
, highest
;
6798 int from_byte
, to_byte
;
6800 CHECK_NUMBER_COERCE_MARKER (start
);
6801 CHECK_NUMBER_COERCE_MARKER (end
);
6803 validate_region (&start
, &end
);
6804 from
= XINT (start
), to
= XINT (end
);
6805 from_byte
= CHAR_TO_BYTE (from
);
6806 to_byte
= CHAR_TO_BYTE (to
);
6808 if (from
< GPT
&& to
>= GPT
)
6809 move_gap_both (to
, to_byte
);
6811 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
6812 to_byte
- from_byte
,
6814 !NILP (current_buffer
6815 ->enable_multibyte_characters
),
6819 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
6821 doc
: /* Detect coding system of the text in STRING.
6822 Return a list of possible coding systems ordered by priority.
6824 If only ASCII characters are found, it returns a list of single element
6825 `undecided' or its subsidiary coding system according to a detected
6828 If optional argument HIGHEST is non-nil, return the coding system of
6829 highest priority. */)
6831 Lisp_Object string
, highest
;
6833 CHECK_STRING (string
);
6835 return detect_coding_system (XSTRING (string
)->data
,
6836 STRING_BYTES (XSTRING (string
)),
6838 STRING_MULTIBYTE (string
),
6844 char_encodable_p (c
, attrs
)
6849 struct charset
*charset
;
6851 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
6852 CONSP (tail
); tail
= XCDR (tail
))
6854 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
6855 if (CHAR_CHARSET_P (c
, charset
))
6858 return (! NILP (tail
));
6862 /* Return a list of coding systems that safely encode the text between
6863 START and END. If EXCLUDE is non-nil, it is a list of coding
6864 systems not to check. The returned list doesn't contain any such
6865 coding systems. In any case, if the text contains only ASCII or is
6866 unibyte, return t. */
6868 DEFUN ("find-coding-systems-region-internal",
6869 Ffind_coding_systems_region_internal
,
6870 Sfind_coding_systems_region_internal
, 2, 3, 0,
6871 doc
: /* Internal use only. */)
6872 (start
, end
, exclude
)
6873 Lisp_Object start
, end
, exclude
;
6875 Lisp_Object coding_attrs_list
, safe_codings
;
6876 EMACS_INT start_byte
, end_byte
;
6877 const unsigned char *p
, *pbeg
, *pend
;
6879 Lisp_Object tail
, elt
;
6881 if (STRINGP (start
))
6883 if (!STRING_MULTIBYTE (start
)
6884 || XSTRING (start
)->size
== STRING_BYTES (XSTRING (start
)))
6887 end_byte
= STRING_BYTES (XSTRING (start
));
6891 CHECK_NUMBER_COERCE_MARKER (start
);
6892 CHECK_NUMBER_COERCE_MARKER (end
);
6893 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
6894 args_out_of_range (start
, end
);
6895 if (NILP (current_buffer
->enable_multibyte_characters
))
6897 start_byte
= CHAR_TO_BYTE (XINT (start
));
6898 end_byte
= CHAR_TO_BYTE (XINT (end
));
6899 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
6902 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
6904 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
6905 move_gap_both (XINT (start
), start_byte
);
6907 move_gap_both (XINT (end
), end_byte
);
6911 coding_attrs_list
= Qnil
;
6912 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
6914 || NILP (Fmemq (XCAR (tail
), exclude
)))
6918 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
6919 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
6920 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
6921 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
6924 if (STRINGP (start
))
6925 p
= pbeg
= XSTRING (start
)->data
;
6927 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
6928 pend
= p
+ (end_byte
- start_byte
);
6930 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
6931 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
6935 if (ASCII_BYTE_P (*p
))
6939 c
= STRING_CHAR_ADVANCE (p
);
6941 charset_map_loaded
= 0;
6942 for (tail
= coding_attrs_list
; CONSP (tail
);)
6947 else if (char_encodable_p (c
, elt
))
6949 else if (CONSP (XCDR (tail
)))
6951 XSETCAR (tail
, XCAR (XCDR (tail
)));
6952 XSETCDR (tail
, XCDR (XCDR (tail
)));
6956 XSETCAR (tail
, Qnil
);
6960 if (charset_map_loaded
)
6962 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
6964 if (STRINGP (start
))
6965 pbeg
= XSTRING (start
)->data
;
6967 pbeg
= BYTE_POS_ADDR (start_byte
);
6968 p
= pbeg
+ p_offset
;
6969 pend
= pbeg
+ pend_offset
;
6974 safe_codings
= Qnil
;
6975 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
6976 if (! NILP (XCAR (tail
)))
6977 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
6979 return safe_codings
;
6983 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
6984 Scheck_coding_systems_region
, 3, 3, 0,
6985 doc
: /* Check if the region is encodable by coding systems.
6987 START and END are buffer positions specifying the region.
6988 CODING-SYSTEM-LIST is a list of coding systems to check.
6990 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6991 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6992 whole region, POS0, POS1, ... are buffer positions where non-encodable
6993 characters are found.
6995 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6998 START may be a string. In that case, check if the string is
6999 encodable, and the value contains indices to the string instead of
7000 buffer positions. END is ignored. */)
7001 (start
, end
, coding_system_list
)
7002 Lisp_Object start
, end
, coding_system_list
;
7005 EMACS_INT start_byte
, end_byte
;
7007 const unsigned char *p
, *pbeg
, *pend
;
7009 Lisp_Object tail
, elt
;
7011 if (STRINGP (start
))
7013 if (!STRING_MULTIBYTE (start
)
7014 && XSTRING (start
)->size
!= STRING_BYTES (XSTRING (start
)))
7017 end_byte
= STRING_BYTES (XSTRING (start
));
7022 CHECK_NUMBER_COERCE_MARKER (start
);
7023 CHECK_NUMBER_COERCE_MARKER (end
);
7024 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7025 args_out_of_range (start
, end
);
7026 if (NILP (current_buffer
->enable_multibyte_characters
))
7028 start_byte
= CHAR_TO_BYTE (XINT (start
));
7029 end_byte
= CHAR_TO_BYTE (XINT (end
));
7030 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7033 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7035 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7036 move_gap_both (XINT (start
), start_byte
);
7038 move_gap_both (XINT (end
), end_byte
);
7044 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7047 list
= Fcons (Fcons (elt
, Fcons (AREF (CODING_SYSTEM_SPEC (elt
), 0),
7052 if (STRINGP (start
))
7053 p
= pbeg
= XSTRING (start
)->data
;
7055 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7056 pend
= p
+ (end_byte
- start_byte
);
7058 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
7059 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7063 if (ASCII_BYTE_P (*p
))
7067 c
= STRING_CHAR_ADVANCE (p
);
7069 charset_map_loaded
= 0;
7070 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
7072 elt
= XCDR (XCAR (tail
));
7073 if (! char_encodable_p (c
, XCAR (elt
)))
7074 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
7076 if (charset_map_loaded
)
7078 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7080 if (STRINGP (start
))
7081 pbeg
= XSTRING (start
)->data
;
7083 pbeg
= BYTE_POS_ADDR (start_byte
);
7084 p
= pbeg
+ p_offset
;
7085 pend
= pbeg
+ pend_offset
;
7093 for (; CONSP (tail
); tail
= XCDR (tail
))
7096 if (CONSP (XCDR (XCDR (elt
))))
7097 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
7107 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
7108 Lisp_Object start
, end
, coding_system
, dst_object
;
7109 int encodep
, norecord
;
7111 struct coding_system coding
;
7112 EMACS_INT from
, from_byte
, to
, to_byte
;
7113 Lisp_Object src_object
;
7115 CHECK_NUMBER_COERCE_MARKER (start
);
7116 CHECK_NUMBER_COERCE_MARKER (end
);
7117 if (NILP (coding_system
))
7118 coding_system
= Qno_conversion
;
7120 CHECK_CODING_SYSTEM (coding_system
);
7121 src_object
= Fcurrent_buffer ();
7122 if (NILP (dst_object
))
7123 dst_object
= src_object
;
7124 else if (! EQ (dst_object
, Qt
))
7125 CHECK_BUFFER (dst_object
);
7127 validate_region (&start
, &end
);
7128 from
= XFASTINT (start
);
7129 from_byte
= CHAR_TO_BYTE (from
);
7130 to
= XFASTINT (end
);
7131 to_byte
= CHAR_TO_BYTE (to
);
7133 setup_coding_system (coding_system
, &coding
);
7134 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7137 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7140 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7143 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7145 if (coding
.result
!= CODING_RESULT_SUCCESS
)
7146 error ("Code conversion error: %d", coding
.result
);
7148 return (BUFFERP (dst_object
)
7149 ? make_number (coding
.produced_char
)
7150 : coding
.dst_object
);
7154 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
7155 3, 4, "r\nzCoding system: ",
7156 doc
: /* Decode the current region from the specified coding system.
7157 When called from a program, takes four arguments:
7158 START, END, CODING-SYSTEM, and DESTINATION.
7159 START and END are buffer positions.
7161 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7162 If nil, the region between START and END is replace by the decoded text.
7163 If buffer, the decoded text is inserted in the buffer.
7164 If t, the decoded text is returned.
7166 This function sets `last-coding-system-used' to the precise coding system
7167 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7168 not fully specified.)
7169 It returns the length of the decoded text. */)
7170 (start
, end
, coding_system
, destination
)
7171 Lisp_Object start
, end
, coding_system
, destination
;
7173 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
7176 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
7177 3, 4, "r\nzCoding system: ",
7178 doc
: /* Encode the current region by specified coding system.
7179 When called from a program, takes three arguments:
7180 START, END, and CODING-SYSTEM. START and END are buffer positions.
7182 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7183 If nil, the region between START and END is replace by the encoded text.
7184 If buffer, the encoded text is inserted in the buffer.
7185 If t, the encoded text is returned.
7187 This function sets `last-coding-system-used' to the precise coding system
7188 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7189 not fully specified.)
7190 It returns the length of the encoded text. */)
7191 (start
, end
, coding_system
, destination
)
7192 Lisp_Object start
, end
, coding_system
, destination
;
7194 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
7198 code_convert_string (string
, coding_system
, dst_object
,
7199 encodep
, nocopy
, norecord
)
7200 Lisp_Object string
, coding_system
, dst_object
;
7201 int encodep
, nocopy
, norecord
;
7203 struct coding_system coding
;
7204 EMACS_INT chars
, bytes
;
7206 CHECK_STRING (string
);
7207 if (NILP (coding_system
))
7210 Vlast_coding_system_used
= Qno_conversion
;
7211 if (NILP (dst_object
))
7212 return (nocopy
? Fcopy_sequence (string
) : string
);
7215 if (NILP (coding_system
))
7216 coding_system
= Qno_conversion
;
7218 CHECK_CODING_SYSTEM (coding_system
);
7219 if (NILP (dst_object
))
7221 else if (! EQ (dst_object
, Qt
))
7222 CHECK_BUFFER (dst_object
);
7224 setup_coding_system (coding_system
, &coding
);
7225 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7226 chars
= XSTRING (string
)->size
;
7227 bytes
= STRING_BYTES (XSTRING (string
));
7229 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7231 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7233 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7235 if (coding
.result
!= CODING_RESULT_SUCCESS
)
7236 error ("Code conversion error: %d", coding
.result
);
7238 return (BUFFERP (dst_object
)
7239 ? make_number (coding
.produced_char
)
7240 : coding
.dst_object
);
7244 /* Encode or decode STRING according to CODING_SYSTEM.
7245 Do not set Vlast_coding_system_used.
7247 This function is called only from macros DECODE_FILE and
7248 ENCODE_FILE, thus we ignore character composition. */
7251 code_convert_string_norecord (string
, coding_system
, encodep
)
7252 Lisp_Object string
, coding_system
;
7255 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
7259 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
7261 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7263 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7264 if the decoding operation is trivial.
7266 Optional fourth arg BUFFER non-nil meant that the decoded text is
7267 inserted in BUFFER instead of returned as a string. In this case,
7268 the return value is BUFFER.
7270 This function sets `last-coding-system-used' to the precise coding system
7271 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7272 not fully specified. */)
7273 (string
, coding_system
, nocopy
, buffer
)
7274 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7276 return code_convert_string (string
, coding_system
, buffer
,
7277 0, ! NILP (nocopy
), 0);
7280 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
7282 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
7284 Optional third arg NOCOPY non-nil means it is OK to return STRING
7285 itself if the encoding operation is trivial.
7287 Optional fourth arg BUFFER non-nil meant that the encoded text is
7288 inserted in BUFFER instead of returned as a string. In this case,
7289 the return value is BUFFER.
7291 This function sets `last-coding-system-used' to the precise coding system
7292 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7293 not fully specified.) */)
7294 (string
, coding_system
, nocopy
, buffer
)
7295 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7297 return code_convert_string (string
, coding_system
, buffer
,
7298 1, ! NILP (nocopy
), 1);
7302 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
7303 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
7304 Return the corresponding character. */)
7308 Lisp_Object spec
, attrs
, val
;
7309 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
7312 CHECK_NATNUM (code
);
7313 c
= XFASTINT (code
);
7314 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7315 attrs
= AREF (spec
, 0);
7317 if (ASCII_BYTE_P (c
)
7318 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7321 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7322 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7323 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7324 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7327 charset
= charset_roman
;
7328 else if (c
>= 0xA0 && c
< 0xDF)
7330 charset
= charset_kana
;
7335 int s1
= c
>> 8, s2
= c
& 0xFF;
7337 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
7338 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
7339 error ("Invalid code: %d", code
);
7341 charset
= charset_kanji
;
7343 c
= DECODE_CHAR (charset
, c
);
7345 error ("Invalid code: %d", code
);
7346 return make_number (c
);
7350 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
7351 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
7352 Return the corresponding code in SJIS. */)
7356 Lisp_Object spec
, attrs
, charset_list
;
7358 struct charset
*charset
;
7361 CHECK_CHARACTER (ch
);
7363 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7364 attrs
= AREF (spec
, 0);
7366 if (ASCII_CHAR_P (c
)
7367 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7370 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7371 charset
= char_charset (c
, charset_list
, &code
);
7372 if (code
== CHARSET_INVALID_CODE (charset
))
7373 error ("Can't encode by shift_jis encoding: %d", c
);
7376 return make_number (code
);
7379 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
7380 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
7381 Return the corresponding character. */)
7385 Lisp_Object spec
, attrs
, val
;
7386 struct charset
*charset_roman
, *charset_big5
, *charset
;
7389 CHECK_NATNUM (code
);
7390 c
= XFASTINT (code
);
7391 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7392 attrs
= AREF (spec
, 0);
7394 if (ASCII_BYTE_P (c
)
7395 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7398 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7399 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7400 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7403 charset
= charset_roman
;
7406 int b1
= c
>> 8, b2
= c
& 0x7F;
7407 if (b1
< 0xA1 || b1
> 0xFE
7408 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
7409 error ("Invalid code: %d", code
);
7410 charset
= charset_big5
;
7412 c
= DECODE_CHAR (charset
, (unsigned )c
);
7414 error ("Invalid code: %d", code
);
7415 return make_number (c
);
7418 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
7419 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
7420 Return the corresponding character code in Big5. */)
7424 Lisp_Object spec
, attrs
, charset_list
;
7425 struct charset
*charset
;
7429 CHECK_CHARACTER (ch
);
7431 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7432 attrs
= AREF (spec
, 0);
7433 if (ASCII_CHAR_P (c
)
7434 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7437 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7438 charset
= char_charset (c
, charset_list
, &code
);
7439 if (code
== CHARSET_INVALID_CODE (charset
))
7440 error ("Can't encode by Big5 encoding: %d", c
);
7442 return make_number (code
);
7446 DEFUN ("set-terminal-coding-system-internal",
7447 Fset_terminal_coding_system_internal
,
7448 Sset_terminal_coding_system_internal
, 1, 1, 0,
7449 doc
: /* Internal use only. */)
7451 Lisp_Object coding_system
;
7453 CHECK_SYMBOL (coding_system
);
7454 setup_coding_system (Fcheck_coding_system (coding_system
),
7457 /* We had better not send unsafe characters to terminal. */
7458 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
7459 /* Characer composition should be disabled. */
7460 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7461 terminal_coding
.src_multibyte
= 1;
7462 terminal_coding
.dst_multibyte
= 0;
7466 DEFUN ("set-safe-terminal-coding-system-internal",
7467 Fset_safe_terminal_coding_system_internal
,
7468 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
7469 doc
: /* Internal use only. */)
7471 Lisp_Object coding_system
;
7473 CHECK_SYMBOL (coding_system
);
7474 setup_coding_system (Fcheck_coding_system (coding_system
),
7475 &safe_terminal_coding
);
7476 /* Characer composition should be disabled. */
7477 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7478 safe_terminal_coding
.src_multibyte
= 1;
7479 safe_terminal_coding
.dst_multibyte
= 0;
7483 DEFUN ("terminal-coding-system",
7484 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
7485 doc
: /* Return coding system specified for terminal output. */)
7488 return CODING_ID_NAME (terminal_coding
.id
);
7491 DEFUN ("set-keyboard-coding-system-internal",
7492 Fset_keyboard_coding_system_internal
,
7493 Sset_keyboard_coding_system_internal
, 1, 1, 0,
7494 doc
: /* Internal use only. */)
7496 Lisp_Object coding_system
;
7498 CHECK_SYMBOL (coding_system
);
7499 setup_coding_system (Fcheck_coding_system (coding_system
),
7501 /* Characer composition should be disabled. */
7502 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7506 DEFUN ("keyboard-coding-system",
7507 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
7508 doc
: /* Return coding system specified for decoding keyboard input. */)
7511 return CODING_ID_NAME (keyboard_coding
.id
);
7515 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
7516 Sfind_operation_coding_system
, 1, MANY
, 0,
7517 doc
: /* Choose a coding system for an operation based on the target name.
7518 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7519 DECODING-SYSTEM is the coding system to use for decoding
7520 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7521 for encoding (in case OPERATION does encoding).
7523 The first argument OPERATION specifies an I/O primitive:
7524 For file I/O, `insert-file-contents' or `write-region'.
7525 For process I/O, `call-process', `call-process-region', or `start-process'.
7526 For network I/O, `open-network-stream'.
7528 The remaining arguments should be the same arguments that were passed
7529 to the primitive. Depending on which primitive, one of those arguments
7530 is selected as the TARGET. For example, if OPERATION does file I/O,
7531 whichever argument specifies the file name is TARGET.
7533 TARGET has a meaning which depends on OPERATION:
7534 For file I/O, TARGET is a file name.
7535 For process I/O, TARGET is a process name.
7536 For network I/O, TARGET is a service name or a port number
7538 This function looks up what specified for TARGET in,
7539 `file-coding-system-alist', `process-coding-system-alist',
7540 or `network-coding-system-alist' depending on OPERATION.
7541 They may specify a coding system, a cons of coding systems,
7542 or a function symbol to call.
7543 In the last case, we call the function with one argument,
7544 which is a list of all the arguments given to this function.
7546 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7551 Lisp_Object operation
, target_idx
, target
, val
;
7552 register Lisp_Object chain
;
7555 error ("Too few arguments");
7556 operation
= args
[0];
7557 if (!SYMBOLP (operation
)
7558 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
7559 error ("Invalid first arguement");
7560 if (nargs
< 1 + XINT (target_idx
))
7561 error ("Too few arguments for operation: %s",
7562 XSYMBOL (operation
)->name
->data
);
7563 target
= args
[XINT (target_idx
) + 1];
7564 if (!(STRINGP (target
)
7565 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
7566 error ("Invalid %dth argument", XINT (target_idx
) + 1);
7568 chain
= ((EQ (operation
, Qinsert_file_contents
)
7569 || EQ (operation
, Qwrite_region
))
7570 ? Vfile_coding_system_alist
7571 : (EQ (operation
, Qopen_network_stream
)
7572 ? Vnetwork_coding_system_alist
7573 : Vprocess_coding_system_alist
));
7577 for (; CONSP (chain
); chain
= XCDR (chain
))
7583 && ((STRINGP (target
)
7584 && STRINGP (XCAR (elt
))
7585 && fast_string_match (XCAR (elt
), target
) >= 0)
7586 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
7589 /* Here, if VAL is both a valid coding system and a valid
7590 function symbol, we return VAL as a coding system. */
7593 if (! SYMBOLP (val
))
7595 if (! NILP (Fcoding_system_p (val
)))
7596 return Fcons (val
, val
);
7597 if (! NILP (Ffboundp (val
)))
7599 val
= call1 (val
, Flist (nargs
, args
));
7602 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
7603 return Fcons (val
, val
);
7611 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
7612 Sset_coding_system_priority
, 0, MANY
, 0,
7613 doc
: /* Assign higher priority to the coding systems given as arguments.
7614 usage: (set-coding-system-priority CODING-SYSTEM ...) */)
7620 int changed
[coding_category_max
];
7621 enum coding_category priorities
[coding_category_max
];
7623 bzero (changed
, sizeof changed
);
7625 for (i
= j
= 0; i
< nargs
; i
++)
7627 enum coding_category category
;
7628 Lisp_Object spec
, attrs
;
7630 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
7631 attrs
= AREF (spec
, 0);
7632 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7633 if (changed
[category
])
7634 /* Ignore this coding system because a coding system of the
7635 same category already had a higher priority. */
7637 changed
[category
] = 1;
7638 priorities
[j
++] = category
;
7639 if (coding_categories
[category
].id
>= 0
7640 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
7641 setup_coding_system (args
[i
], &coding_categories
[category
]);
7644 /* Now we have decided top J priorities. Reflect the order of the
7645 original priorities to the remaining priorities. */
7647 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
7649 while (j
< coding_category_max
7650 && changed
[coding_priorities
[j
]])
7652 if (j
== coding_category_max
)
7654 priorities
[i
] = coding_priorities
[j
];
7657 bcopy (priorities
, coding_priorities
, sizeof priorities
);
7661 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
7662 Scoding_system_priority_list
, 0, 1, 0,
7663 doc
: /* Return a list of coding systems ordered by their priorities.
7664 HIGHESTP non-nil means just return the highest priority one. */)
7666 Lisp_Object highestp
;
7671 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
7673 enum coding_category category
= coding_priorities
[i
];
7674 int id
= coding_categories
[category
].id
;
7679 attrs
= CODING_ID_ATTRS (id
);
7680 if (! NILP (highestp
))
7681 return CODING_ATTR_BASE_NAME (attrs
);
7682 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
7684 return Fnreverse (val
);
7687 static char *suffixes
[] = { "-unix", "-dos", "-mac" };
7690 make_subsidiaries (base
)
7693 Lisp_Object subsidiaries
;
7694 int base_name_len
= STRING_BYTES (XSYMBOL (base
)->name
);
7695 char *buf
= (char *) alloca (base_name_len
+ 6);
7698 bcopy (XSYMBOL (base
)->name
->data
, buf
, base_name_len
);
7699 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
7700 for (i
= 0; i
< 3; i
++)
7702 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
7703 ASET (subsidiaries
, i
, intern (buf
));
7705 return subsidiaries
;
7709 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
7710 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
7711 doc
: /* For internal use only.
7712 usage: (define-coding-system-internal ...) */)
7718 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
7719 Lisp_Object attrs
; /* Vector of attributes. */
7720 Lisp_Object eol_type
;
7721 Lisp_Object aliases
;
7722 Lisp_Object coding_type
, charset_list
, safe_charsets
;
7723 enum coding_category category
;
7724 Lisp_Object tail
, val
;
7725 int max_charset_id
= 0;
7728 if (nargs
< coding_arg_max
)
7731 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
7733 name
= args
[coding_arg_name
];
7734 CHECK_SYMBOL (name
);
7735 CODING_ATTR_BASE_NAME (attrs
) = name
;
7737 val
= args
[coding_arg_mnemonic
];
7738 if (! STRINGP (val
))
7739 CHECK_CHARACTER (val
);
7740 CODING_ATTR_MNEMONIC (attrs
) = val
;
7742 coding_type
= args
[coding_arg_coding_type
];
7743 CHECK_SYMBOL (coding_type
);
7744 CODING_ATTR_TYPE (attrs
) = coding_type
;
7746 charset_list
= args
[coding_arg_charset_list
];
7747 if (SYMBOLP (charset_list
))
7749 if (EQ (charset_list
, Qiso_2022
))
7751 if (! EQ (coding_type
, Qiso_2022
))
7752 error ("Invalid charset-list");
7753 charset_list
= Viso_2022_charset_list
;
7755 else if (EQ (charset_list
, Qemacs_mule
))
7757 if (! EQ (coding_type
, Qemacs_mule
))
7758 error ("Invalid charset-list");
7759 charset_list
= Vemacs_mule_charset_list
;
7761 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7762 if (max_charset_id
< XFASTINT (XCAR (tail
)))
7763 max_charset_id
= XFASTINT (XCAR (tail
));
7767 charset_list
= Fcopy_sequence (charset_list
);
7768 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
7770 struct charset
*charset
;
7773 CHECK_CHARSET_GET_CHARSET (val
, charset
);
7774 if (EQ (coding_type
, Qiso_2022
)
7775 ? CHARSET_ISO_FINAL (charset
) < 0
7776 : EQ (coding_type
, Qemacs_mule
)
7777 ? CHARSET_EMACS_MULE_ID (charset
) < 0
7779 error ("Can't handle charset `%s'",
7780 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7782 XCAR (tail
) = make_number (charset
->id
);
7783 if (max_charset_id
< charset
->id
)
7784 max_charset_id
= charset
->id
;
7787 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
7789 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
7791 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7792 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
7793 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
7795 CODING_ATTR_ASCII_COMPAT (attrs
) = args
[coding_arg_ascii_compatible_p
];
7797 val
= args
[coding_arg_decode_translation_table
];
7799 CHECK_CHAR_TABLE (val
);
7800 CODING_ATTR_DECODE_TBL (attrs
) = val
;
7802 val
= args
[coding_arg_encode_translation_table
];
7804 CHECK_CHAR_TABLE (val
);
7805 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
7807 val
= args
[coding_arg_post_read_conversion
];
7809 CODING_ATTR_POST_READ (attrs
) = val
;
7811 val
= args
[coding_arg_pre_write_conversion
];
7813 CODING_ATTR_PRE_WRITE (attrs
) = val
;
7815 val
= args
[coding_arg_default_char
];
7817 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
7820 CHECK_CHARACTER (val
);
7821 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
7824 val
= args
[coding_arg_plist
];
7826 CODING_ATTR_PLIST (attrs
) = val
;
7828 if (EQ (coding_type
, Qcharset
))
7830 /* Generate a lisp vector of 256 elements. Each element is nil,
7831 integer, or a list of charset IDs.
7833 If Nth element is nil, the byte code N is invalid in this
7836 If Nth element is a number NUM, N is the first byte of a
7837 charset whose ID is NUM.
7839 If Nth element is a list of charset IDs, N is the first byte
7840 of one of them. The list is sorted by dimensions of the
7841 charsets. A charset of smaller dimension comes firtst.
7843 val
= Fmake_vector (make_number (256), Qnil
);
7845 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7847 struct charset
*charset
= CHARSET_FROM_ID (XFASTINT (XCAR (tail
)));
7848 int dim
= CHARSET_DIMENSION (charset
);
7849 int idx
= (dim
- 1) * 4;
7851 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
7852 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
7854 for (i
= charset
->code_space
[idx
];
7855 i
<= charset
->code_space
[idx
+ 1]; i
++)
7857 Lisp_Object tmp
, tmp2
;
7860 tmp
= AREF (val
, i
);
7863 else if (NUMBERP (tmp
))
7865 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp
)));
7867 tmp
= Fcons (XCAR (tail
), Fcons (tmp
, Qnil
));
7869 tmp
= Fcons (tmp
, Fcons (XCAR (tail
), Qnil
));
7873 for (tmp2
= tmp
; CONSP (tmp2
); tmp2
= XCDR (tmp2
))
7875 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2
))));
7880 tmp
= nconc2 (tmp
, Fcons (XCAR (tail
), Qnil
));
7883 XSETCDR (tmp2
, Fcons (XCAR (tmp2
), XCDR (tmp2
)));
7884 XSETCAR (tmp2
, XCAR (tail
));
7890 ASET (attrs
, coding_attr_charset_valids
, val
);
7891 category
= coding_category_charset
;
7893 else if (EQ (coding_type
, Qccl
))
7897 if (nargs
< coding_arg_ccl_max
)
7900 val
= args
[coding_arg_ccl_decoder
];
7901 CHECK_CCL_PROGRAM (val
);
7903 val
= Fcopy_sequence (val
);
7904 ASET (attrs
, coding_attr_ccl_decoder
, val
);
7906 val
= args
[coding_arg_ccl_encoder
];
7907 CHECK_CCL_PROGRAM (val
);
7909 val
= Fcopy_sequence (val
);
7910 ASET (attrs
, coding_attr_ccl_encoder
, val
);
7912 val
= args
[coding_arg_ccl_valids
];
7913 valids
= Fmake_string (make_number (256), make_number (0));
7914 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
7921 from
= to
= XINT (val
);
7922 if (from
< 0 || from
> 255)
7923 args_out_of_range_3 (val
, make_number (0), make_number (255));
7928 CHECK_NUMBER (XCAR (val
));
7929 CHECK_NUMBER (XCDR (val
));
7930 from
= XINT (XCAR (val
));
7931 if (from
< 0 || from
> 255)
7932 args_out_of_range_3 (XCAR (val
),
7933 make_number (0), make_number (255));
7934 to
= XINT (XCDR (val
));
7935 if (to
< from
|| to
> 255)
7936 args_out_of_range_3 (XCDR (val
),
7937 XCAR (val
), make_number (255));
7939 for (i
= from
; i
<= to
; i
++)
7940 XSTRING (valids
)->data
[i
] = 1;
7942 ASET (attrs
, coding_attr_ccl_valids
, valids
);
7944 category
= coding_category_ccl
;
7946 else if (EQ (coding_type
, Qutf_16
))
7948 Lisp_Object bom
, endian
;
7950 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
7952 if (nargs
< coding_arg_utf16_max
)
7955 bom
= args
[coding_arg_utf16_bom
];
7956 if (! NILP (bom
) && ! EQ (bom
, Qt
))
7959 CHECK_CODING_SYSTEM (XCAR (bom
));
7960 CHECK_CODING_SYSTEM (XCDR (bom
));
7962 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
7964 endian
= args
[coding_arg_utf16_endian
];
7965 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
7967 category
= (CONSP (bom
)
7968 ? coding_category_utf_16_auto
7971 ? coding_category_utf_16_be_nosig
7972 : coding_category_utf_16_le_nosig
)
7974 ? coding_category_utf_16_be
7975 : coding_category_utf_16_le
));
7977 else if (EQ (coding_type
, Qiso_2022
))
7979 Lisp_Object initial
, reg_usage
, request
, flags
;
7982 if (nargs
< coding_arg_iso2022_max
)
7985 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
7986 CHECK_VECTOR (initial
);
7987 for (i
= 0; i
< 4; i
++)
7989 val
= Faref (initial
, make_number (i
));
7992 struct charset
*charset
;
7994 CHECK_CHARSET_GET_CHARSET (val
, charset
);
7995 ASET (initial
, i
, make_number (CHARSET_ID (charset
)));
7996 if (i
== 0 && CHARSET_ASCII_COMPATIBLE_P (charset
))
7997 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8000 ASET (initial
, i
, make_number (-1));
8003 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
8004 CHECK_CONS (reg_usage
);
8005 CHECK_NATNUM (XCAR (reg_usage
));
8006 CHECK_NATNUM (XCDR (reg_usage
));
8008 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
8009 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
8015 CHECK_CHARSET_GET_ID (XCAR (val
), id
);
8016 CHECK_NATNUM (XCDR (val
));
8017 if (XINT (XCDR (val
)) >= 4)
8018 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
8019 XCAR (val
) = make_number (id
);
8022 flags
= args
[coding_arg_iso2022_flags
];
8023 CHECK_NATNUM (flags
);
8025 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
8026 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
8028 ASET (attrs
, coding_attr_iso_initial
, initial
);
8029 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
8030 ASET (attrs
, coding_attr_iso_request
, request
);
8031 ASET (attrs
, coding_attr_iso_flags
, flags
);
8032 setup_iso_safe_charsets (attrs
);
8034 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
8035 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
8036 | CODING_ISO_FLAG_SINGLE_SHIFT
))
8037 ? coding_category_iso_7_else
8038 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8039 ? coding_category_iso_7
8040 : coding_category_iso_7_tight
);
8043 int id
= XINT (AREF (initial
, 1));
8045 category
= (((i
& CODING_ISO_FLAG_LOCKING_SHIFT
)
8046 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8048 ? coding_category_iso_8_else
8049 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
8050 ? coding_category_iso_8_1
8051 : coding_category_iso_8_2
);
8053 if (category
!= coding_category_iso_8_1
8054 && category
!= coding_category_iso_8_2
)
8055 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8057 else if (EQ (coding_type
, Qemacs_mule
))
8059 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
8060 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
8061 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8062 category
= coding_category_emacs_mule
;
8064 else if (EQ (coding_type
, Qshift_jis
))
8067 struct charset
*charset
;
8069 if (XINT (Flength (charset_list
)) != 3)
8070 error ("There should be just three charsets");
8072 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8073 if (CHARSET_DIMENSION (charset
) != 1)
8074 error ("Dimension of charset %s is not one",
8075 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8076 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8077 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8079 charset_list
= XCDR (charset_list
);
8080 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8081 if (CHARSET_DIMENSION (charset
) != 1)
8082 error ("Dimension of charset %s is not one",
8083 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8085 charset_list
= XCDR (charset_list
);
8086 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8087 if (CHARSET_DIMENSION (charset
) != 2)
8088 error ("Dimension of charset %s is not two",
8089 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8091 category
= coding_category_sjis
;
8092 Vsjis_coding_system
= name
;
8094 else if (EQ (coding_type
, Qbig5
))
8096 struct charset
*charset
;
8098 if (XINT (Flength (charset_list
)) != 2)
8099 error ("There should be just two charsets");
8101 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8102 if (CHARSET_DIMENSION (charset
) != 1)
8103 error ("Dimension of charset %s is not one",
8104 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8105 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8106 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8108 charset_list
= XCDR (charset_list
);
8109 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8110 if (CHARSET_DIMENSION (charset
) != 2)
8111 error ("Dimension of charset %s is not two",
8112 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8114 category
= coding_category_big5
;
8115 Vbig5_coding_system
= name
;
8117 else if (EQ (coding_type
, Qraw_text
))
8119 category
= coding_category_raw_text
;
8120 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8122 else if (EQ (coding_type
, Qutf_8
))
8124 category
= coding_category_utf_8
;
8125 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8127 else if (EQ (coding_type
, Qundecided
))
8128 category
= coding_category_undecided
;
8130 error ("Invalid coding system type: %s",
8131 XSYMBOL (coding_type
)->name
->data
);
8133 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
8135 eol_type
= args
[coding_arg_eol_type
];
8136 if (! NILP (eol_type
)
8137 && ! EQ (eol_type
, Qunix
)
8138 && ! EQ (eol_type
, Qdos
)
8139 && ! EQ (eol_type
, Qmac
))
8140 error ("Invalid eol-type");
8142 aliases
= Fcons (name
, Qnil
);
8144 if (NILP (eol_type
))
8146 eol_type
= make_subsidiaries (name
);
8147 for (i
= 0; i
< 3; i
++)
8149 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
8151 this_name
= AREF (eol_type
, i
);
8152 this_aliases
= Fcons (this_name
, Qnil
);
8153 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
8154 this_spec
= Fmake_vector (make_number (3), attrs
);
8155 ASET (this_spec
, 1, this_aliases
);
8156 ASET (this_spec
, 2, this_eol_type
);
8157 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
8158 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
8159 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
8160 Vcoding_system_alist
);
8164 spec_vec
= Fmake_vector (make_number (3), attrs
);
8165 ASET (spec_vec
, 1, aliases
);
8166 ASET (spec_vec
, 2, eol_type
);
8168 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
8169 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
8170 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
8171 Vcoding_system_alist
);
8174 int id
= coding_categories
[category
].id
;
8176 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
8177 setup_coding_system (name
, &coding_categories
[category
]);
8183 return Fsignal (Qwrong_number_of_arguments
,
8184 Fcons (intern ("define-coding-system-internal"),
8185 make_number (nargs
)));
8188 /* Fixme: should this record the alias relationships for
8189 diagnostics? Should it update coding-system-list? */
8190 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
8191 Sdefine_coding_system_alias
, 2, 2, 0,
8192 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8193 (alias
, coding_system
)
8194 Lisp_Object alias
, coding_system
;
8196 Lisp_Object spec
, aliases
, eol_type
;
8198 CHECK_SYMBOL (alias
);
8199 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8200 aliases
= AREF (spec
, 1);
8201 while (!NILP (XCDR (aliases
)))
8202 aliases
= XCDR (aliases
);
8203 XCDR (aliases
) = Fcons (alias
, Qnil
);
8205 eol_type
= AREF (spec
, 2);
8206 if (VECTORP (eol_type
))
8208 Lisp_Object subsidiaries
;
8211 subsidiaries
= make_subsidiaries (alias
);
8212 for (i
= 0; i
< 3; i
++)
8213 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
8214 AREF (eol_type
, i
));
8216 ASET (spec
, 2, subsidiaries
);
8219 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
8220 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (alias
), Qnil
),
8221 Vcoding_system_alist
);
8226 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
8228 doc
: /* Return the base of CODING-SYSTEM.
8229 Any alias or subsidiary coding system is not a base coding system. */)
8231 Lisp_Object coding_system
;
8233 Lisp_Object spec
, attrs
;
8235 if (NILP (coding_system
))
8236 return (Qno_conversion
);
8237 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8238 attrs
= AREF (spec
, 0);
8239 return CODING_ATTR_BASE_NAME (attrs
);
8242 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
8244 doc
: "Return the property list of CODING-SYSTEM.")
8246 Lisp_Object coding_system
;
8248 Lisp_Object spec
, attrs
;
8250 if (NILP (coding_system
))
8251 coding_system
= Qno_conversion
;
8252 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8253 attrs
= AREF (spec
, 0);
8254 return CODING_ATTR_PLIST (attrs
);
8258 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
8260 doc
: /* Return the list of aliases of CODING-SYSTEM. */)
8262 Lisp_Object coding_system
;
8266 if (NILP (coding_system
))
8267 coding_system
= Qno_conversion
;
8268 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8269 return AREF (spec
, 1);
8272 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
8273 Scoding_system_eol_type
, 1, 1, 0,
8274 doc
: /* Return eol-type of CODING-SYSTEM.
8275 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8277 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8278 and CR respectively.
8280 A vector value indicates that a format of end-of-line should be
8281 detected automatically. Nth element of the vector is the subsidiary
8282 coding system whose eol-type is N. */)
8284 Lisp_Object coding_system
;
8286 Lisp_Object spec
, eol_type
;
8289 if (NILP (coding_system
))
8290 coding_system
= Qno_conversion
;
8291 if (! CODING_SYSTEM_P (coding_system
))
8293 spec
= CODING_SYSTEM_SPEC (coding_system
);
8294 eol_type
= AREF (spec
, 2);
8295 if (VECTORP (eol_type
))
8296 return Fcopy_sequence (eol_type
);
8297 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
8298 return make_number (n
);
8304 /*** 9. Post-amble ***/
8311 for (i
= 0; i
< coding_category_max
; i
++)
8313 coding_categories
[i
].id
= -1;
8314 coding_priorities
[i
] = i
;
8317 /* ISO2022 specific initialize routine. */
8318 for (i
= 0; i
< 0x20; i
++)
8319 iso_code_class
[i
] = ISO_control_0
;
8320 for (i
= 0x21; i
< 0x7F; i
++)
8321 iso_code_class
[i
] = ISO_graphic_plane_0
;
8322 for (i
= 0x80; i
< 0xA0; i
++)
8323 iso_code_class
[i
] = ISO_control_1
;
8324 for (i
= 0xA1; i
< 0xFF; i
++)
8325 iso_code_class
[i
] = ISO_graphic_plane_1
;
8326 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
8327 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
8328 iso_code_class
[ISO_CODE_CR
] = ISO_carriage_return
;
8329 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
8330 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
8331 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
8332 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
8333 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
8334 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
8335 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
8337 inhibit_pre_post_conversion
= 0;
8339 for (i
= 0; i
< 256; i
++)
8341 emacs_mule_bytes
[i
] = 1;
8343 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_11
] = 3;
8344 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_12
] = 3;
8345 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_21
] = 4;
8346 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_22
] = 4;
8354 staticpro (&Vcoding_system_hash_table
);
8355 Vcoding_system_hash_table
= Fmakehash (Qeq
);
8357 staticpro (&Vsjis_coding_system
);
8358 Vsjis_coding_system
= Qnil
;
8360 staticpro (&Vbig5_coding_system
);
8361 Vbig5_coding_system
= Qnil
;
8363 staticpro (&Vcode_conversion_work_buf_list
);
8364 Vcode_conversion_work_buf_list
= Qnil
;
8366 staticpro (&Vcode_conversion_reused_work_buf
);
8367 Vcode_conversion_reused_work_buf
= Qnil
;
8369 DEFSYM (Qcharset
, "charset");
8370 DEFSYM (Qtarget_idx
, "target-idx");
8371 DEFSYM (Qcoding_system_history
, "coding-system-history");
8372 Fset (Qcoding_system_history
, Qnil
);
8374 /* Target FILENAME is the first argument. */
8375 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
8376 /* Target FILENAME is the third argument. */
8377 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
8379 DEFSYM (Qcall_process
, "call-process");
8380 /* Target PROGRAM is the first argument. */
8381 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
8383 DEFSYM (Qcall_process_region
, "call-process-region");
8384 /* Target PROGRAM is the third argument. */
8385 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
8387 DEFSYM (Qstart_process
, "start-process");
8388 /* Target PROGRAM is the third argument. */
8389 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
8391 DEFSYM (Qopen_network_stream
, "open-network-stream");
8392 /* Target SERVICE is the fourth argument. */
8393 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
8395 DEFSYM (Qcoding_system
, "coding-system");
8396 DEFSYM (Qcoding_aliases
, "coding-aliases");
8398 DEFSYM (Qeol_type
, "eol-type");
8399 DEFSYM (Qunix
, "unix");
8400 DEFSYM (Qdos
, "dos");
8402 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
8403 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
8404 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
8405 DEFSYM (Qdefault_char
, "default-char");
8406 DEFSYM (Qundecided
, "undecided");
8407 DEFSYM (Qno_conversion
, "no-conversion");
8408 DEFSYM (Qraw_text
, "raw-text");
8410 DEFSYM (Qiso_2022
, "iso-2022");
8412 DEFSYM (Qutf_8
, "utf-8");
8414 DEFSYM (Qutf_16
, "utf-16");
8415 DEFSYM (Qsignature
, "signature");
8416 DEFSYM (Qendian
, "endian");
8417 DEFSYM (Qbig
, "big");
8418 DEFSYM (Qlittle
, "little");
8420 DEFSYM (Qshift_jis
, "shift-jis");
8421 DEFSYM (Qbig5
, "big5");
8423 DEFSYM (Qcoding_system_p
, "coding-system-p");
8425 DEFSYM (Qcoding_system_error
, "coding-system-error");
8426 Fput (Qcoding_system_error
, Qerror_conditions
,
8427 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
8428 Fput (Qcoding_system_error
, Qerror_message
,
8429 build_string ("Invalid coding system"));
8431 /* Intern this now in case it isn't already done.
8432 Setting this variable twice is harmless.
8433 But don't staticpro it here--that is done in alloc.c. */
8434 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
8436 DEFSYM (Qtranslation_table
, "translation-table");
8437 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
8438 DEFSYM (Qtranslation_table_id
, "translation-table-id");
8439 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
8440 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
8442 DEFSYM (Qvalid_codes
, "valid-codes");
8444 DEFSYM (Qemacs_mule
, "emacs-mule");
8446 Vcoding_category_table
8447 = Fmake_vector (make_number (coding_category_max
), Qnil
);
8448 staticpro (&Vcoding_category_table
);
8449 /* Followings are target of code detection. */
8450 ASET (Vcoding_category_table
, coding_category_iso_7
,
8451 intern ("coding-category-iso-7"));
8452 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
8453 intern ("coding-category-iso-7-tight"));
8454 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
8455 intern ("coding-category-iso-8-1"));
8456 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
8457 intern ("coding-category-iso-8-2"));
8458 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
8459 intern ("coding-category-iso-7-else"));
8460 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
8461 intern ("coding-category-iso-8-else"));
8462 ASET (Vcoding_category_table
, coding_category_utf_8
,
8463 intern ("coding-category-utf-8"));
8464 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
8465 intern ("coding-category-utf-16-be"));
8466 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
8467 intern ("coding-category-utf-16-le"));
8468 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
8469 intern ("coding-category-utf-16-be-nosig"));
8470 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
8471 intern ("coding-category-utf-16-le-nosig"));
8472 ASET (Vcoding_category_table
, coding_category_charset
,
8473 intern ("coding-category-charset"));
8474 ASET (Vcoding_category_table
, coding_category_sjis
,
8475 intern ("coding-category-sjis"));
8476 ASET (Vcoding_category_table
, coding_category_big5
,
8477 intern ("coding-category-big5"));
8478 ASET (Vcoding_category_table
, coding_category_ccl
,
8479 intern ("coding-category-ccl"));
8480 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
8481 intern ("coding-category-emacs-mule"));
8482 /* Followings are NOT target of code detection. */
8483 ASET (Vcoding_category_table
, coding_category_raw_text
,
8484 intern ("coding-category-raw-text"));
8485 ASET (Vcoding_category_table
, coding_category_undecided
,
8486 intern ("coding-category-undecided"));
8488 defsubr (&Scoding_system_p
);
8489 defsubr (&Sread_coding_system
);
8490 defsubr (&Sread_non_nil_coding_system
);
8491 defsubr (&Scheck_coding_system
);
8492 defsubr (&Sdetect_coding_region
);
8493 defsubr (&Sdetect_coding_string
);
8494 defsubr (&Sfind_coding_systems_region_internal
);
8495 defsubr (&Scheck_coding_systems_region
);
8496 defsubr (&Sdecode_coding_region
);
8497 defsubr (&Sencode_coding_region
);
8498 defsubr (&Sdecode_coding_string
);
8499 defsubr (&Sencode_coding_string
);
8500 defsubr (&Sdecode_sjis_char
);
8501 defsubr (&Sencode_sjis_char
);
8502 defsubr (&Sdecode_big5_char
);
8503 defsubr (&Sencode_big5_char
);
8504 defsubr (&Sset_terminal_coding_system_internal
);
8505 defsubr (&Sset_safe_terminal_coding_system_internal
);
8506 defsubr (&Sterminal_coding_system
);
8507 defsubr (&Sset_keyboard_coding_system_internal
);
8508 defsubr (&Skeyboard_coding_system
);
8509 defsubr (&Sfind_operation_coding_system
);
8510 defsubr (&Sset_coding_system_priority
);
8511 defsubr (&Sdefine_coding_system_internal
);
8512 defsubr (&Sdefine_coding_system_alias
);
8513 defsubr (&Scoding_system_base
);
8514 defsubr (&Scoding_system_plist
);
8515 defsubr (&Scoding_system_aliases
);
8516 defsubr (&Scoding_system_eol_type
);
8517 defsubr (&Scoding_system_priority_list
);
8519 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
8520 doc
: /* List of coding systems.
8522 Do not alter the value of this variable manually. This variable should be
8523 updated by the functions `define-coding-system' and
8524 `define-coding-system-alias'. */);
8525 Vcoding_system_list
= Qnil
;
8527 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
8528 doc
: /* Alist of coding system names.
8529 Each element is one element list of coding system name.
8530 This variable is given to `completing-read' as TABLE argument.
8532 Do not alter the value of this variable manually. This variable should be
8533 updated by the functions `make-coding-system' and
8534 `define-coding-system-alias'. */);
8535 Vcoding_system_alist
= Qnil
;
8537 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
8538 doc
: /* List of coding-categories (symbols) ordered by priority.
8540 On detecting a coding system, Emacs tries code detection algorithms
8541 associated with each coding-category one by one in this order. When
8542 one algorithm agrees with a byte sequence of source text, the coding
8543 system bound to the corresponding coding-category is selected. */);
8547 Vcoding_category_list
= Qnil
;
8548 for (i
= coding_category_max
- 1; i
>= 0; i
--)
8549 Vcoding_category_list
8550 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
8551 Vcoding_category_list
);
8554 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
8555 doc
: /* Specify the coding system for read operations.
8556 It is useful to bind this variable with `let', but do not set it globally.
8557 If the value is a coding system, it is used for decoding on read operation.
8558 If not, an appropriate element is used from one of the coding system alists:
8559 There are three such tables, `file-coding-system-alist',
8560 `process-coding-system-alist', and `network-coding-system-alist'. */);
8561 Vcoding_system_for_read
= Qnil
;
8563 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
8564 doc
: /* Specify the coding system for write operations.
8565 Programs bind this variable with `let', but you should not set it globally.
8566 If the value is a coding system, it is used for encoding of output,
8567 when writing it to a file and when sending it to a file or subprocess.
8569 If this does not specify a coding system, an appropriate element
8570 is used from one of the coding system alists:
8571 There are three such tables, `file-coding-system-alist',
8572 `process-coding-system-alist', and `network-coding-system-alist'.
8573 For output to files, if the above procedure does not specify a coding system,
8574 the value of `buffer-file-coding-system' is used. */);
8575 Vcoding_system_for_write
= Qnil
;
8577 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
8579 Coding system used in the latest file or process I/O. */);
8580 Vlast_coding_system_used
= Qnil
;
8582 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
8584 *Non-nil means always inhibit code conversion of end-of-line format.
8585 See info node `Coding Systems' and info node `Text and Binary' concerning
8586 such conversion. */);
8587 inhibit_eol_conversion
= 0;
8589 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
8591 Non-nil means process buffer inherits coding system of process output.
8592 Bind it to t if the process output is to be treated as if it were a file
8593 read from some filesystem. */);
8594 inherit_process_coding_system
= 0;
8596 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
8598 Alist to decide a coding system to use for a file I/O operation.
8599 The format is ((PATTERN . VAL) ...),
8600 where PATTERN is a regular expression matching a file name,
8601 VAL is a coding system, a cons of coding systems, or a function symbol.
8602 If VAL is a coding system, it is used for both decoding and encoding
8604 If VAL is a cons of coding systems, the car part is used for decoding,
8605 and the cdr part is used for encoding.
8606 If VAL is a function symbol, the function must return a coding system
8607 or a cons of coding systems which are used as above. The function gets
8608 the arguments with which `find-operation-coding-systems' was called.
8610 See also the function `find-operation-coding-system'
8611 and the variable `auto-coding-alist'. */);
8612 Vfile_coding_system_alist
= Qnil
;
8614 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
8616 Alist to decide a coding system to use for a process I/O operation.
8617 The format is ((PATTERN . VAL) ...),
8618 where PATTERN is a regular expression matching a program name,
8619 VAL is a coding system, a cons of coding systems, or a function symbol.
8620 If VAL is a coding system, it is used for both decoding what received
8621 from the program and encoding what sent to the program.
8622 If VAL is a cons of coding systems, the car part is used for decoding,
8623 and the cdr part is used for encoding.
8624 If VAL is a function symbol, the function must return a coding system
8625 or a cons of coding systems which are used as above.
8627 See also the function `find-operation-coding-system'. */);
8628 Vprocess_coding_system_alist
= Qnil
;
8630 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
8632 Alist to decide a coding system to use for a network I/O operation.
8633 The format is ((PATTERN . VAL) ...),
8634 where PATTERN is a regular expression matching a network service name
8635 or is a port number to connect to,
8636 VAL is a coding system, a cons of coding systems, or a function symbol.
8637 If VAL is a coding system, it is used for both decoding what received
8638 from the network stream and encoding what sent to the network stream.
8639 If VAL is a cons of coding systems, the car part is used for decoding,
8640 and the cdr part is used for encoding.
8641 If VAL is a function symbol, the function must return a coding system
8642 or a cons of coding systems which are used as above.
8644 See also the function `find-operation-coding-system'. */);
8645 Vnetwork_coding_system_alist
= Qnil
;
8647 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
8648 doc
: /* Coding system to use with system messages.
8649 Also used for decoding keyboard input on X Window system. */);
8650 Vlocale_coding_system
= Qnil
;
8652 /* The eol mnemonics are reset in startup.el system-dependently. */
8653 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
8655 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8656 eol_mnemonic_unix
= build_string (":");
8658 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
8660 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8661 eol_mnemonic_dos
= build_string ("\\");
8663 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
8665 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
8666 eol_mnemonic_mac
= build_string ("/");
8668 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
8670 *String displayed in mode line when end-of-line format is not yet determined. */);
8671 eol_mnemonic_undecided
= build_string (":");
8673 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
8675 *Non-nil enables character translation while encoding and decoding. */);
8676 Venable_character_translation
= Qt
;
8678 DEFVAR_LISP ("standard-translation-table-for-decode",
8679 &Vstandard_translation_table_for_decode
,
8680 doc
: /* Table for translating characters while decoding. */);
8681 Vstandard_translation_table_for_decode
= Qnil
;
8683 DEFVAR_LISP ("standard-translation-table-for-encode",
8684 &Vstandard_translation_table_for_encode
,
8685 doc
: /* Table for translating characters while encoding. */);
8686 Vstandard_translation_table_for_encode
= Qnil
;
8688 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
8689 doc
: /* Alist of charsets vs revision numbers.
8690 While encoding, if a charset (car part of an element) is found,
8691 designate it with the escape sequence identifying revision (cdr part
8692 of the element). */);
8693 Vcharset_revision_table
= Qnil
;
8695 DEFVAR_LISP ("default-process-coding-system",
8696 &Vdefault_process_coding_system
,
8697 doc
: /* Cons of coding systems used for process I/O by default.
8698 The car part is used for decoding a process output,
8699 the cdr part is used for encoding a text to be sent to a process. */);
8700 Vdefault_process_coding_system
= Qnil
;
8702 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
8704 Table of extra Latin codes in the range 128..159 (inclusive).
8705 This is a vector of length 256.
8706 If Nth element is non-nil, the existence of code N in a file
8707 \(or output of subprocess) doesn't prevent it to be detected as
8708 a coding system of ISO 2022 variant which has a flag
8709 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8710 or reading output of a subprocess.
8711 Only 128th through 159th elements has a meaning. */);
8712 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
8714 DEFVAR_LISP ("select-safe-coding-system-function",
8715 &Vselect_safe_coding_system_function
,
8717 Function to call to select safe coding system for encoding a text.
8719 If set, this function is called to force a user to select a proper
8720 coding system which can encode the text in the case that a default
8721 coding system used in each operation can't encode the text.
8723 The default value is `select-safe-coding-system' (which see). */);
8724 Vselect_safe_coding_system_function
= Qnil
;
8726 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8727 &inhibit_iso_escape_detection
,
8729 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8731 By default, on reading a file, Emacs tries to detect how the text is
8732 encoded. This code detection is sensitive to escape sequences. If
8733 the sequence is valid as ISO2022, the code is determined as one of
8734 the ISO2022 encodings, and the file is decoded by the corresponding
8735 coding system (e.g. `iso-2022-7bit').
8737 However, there may be a case that you want to read escape sequences in
8738 a file as is. In such a case, you can set this variable to non-nil.
8739 Then, as the code detection ignores any escape sequences, no file is
8740 detected as encoded in some ISO2022 encoding. The result is that all
8741 escape sequences become visible in a buffer.
8743 The default value is nil, and it is strongly recommended not to change
8744 it. That is because many Emacs Lisp source files that contain
8745 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8746 in Emacs's distribution, and they won't be decoded correctly on
8747 reading if you suppress escape sequence detection.
8749 The other way to read escape sequences in a file without decoding is
8750 to explicitly specify some coding system that doesn't use ISO2022's
8751 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8752 inhibit_iso_escape_detection
= 0;
8755 Lisp_Object args
[coding_arg_max
];
8756 Lisp_Object plist
[14];
8759 for (i
= 0; i
< coding_arg_max
; i
++)
8762 plist
[0] = intern (":name");
8763 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
8764 plist
[2] = intern (":mnemonic");
8765 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
8766 plist
[4] = intern (":coding-type");
8767 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
8768 plist
[6] = intern (":ascii-compatible-p");
8769 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
8770 plist
[8] = intern (":default-char");
8771 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
8772 plist
[10] = intern (":docstring");
8773 plist
[11] = build_string ("Do no conversion.\n\
8775 When you visit a file with this coding, the file is read into a\n\
8776 unibyte buffer as is, thus each byte of a file is treated as a\n\
8778 plist
[12] = intern (":eol-type");
8779 plist
[13] = args
[coding_arg_eol_type
] = Qunix
;
8780 args
[coding_arg_plist
] = Flist (14, plist
);
8781 Fdefine_coding_system_internal (coding_arg_max
, args
);
8784 setup_coding_system (Qno_conversion
, &keyboard_coding
);
8785 setup_coding_system (Qno_conversion
, &terminal_coding
);
8786 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
8790 emacs_strerror (error_number
)
8795 synchronize_system_messages_locale ();
8796 str
= strerror (error_number
);
8798 if (! NILP (Vlocale_coding_system
))
8800 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
8801 Vlocale_coding_system
,
8803 str
= (char *) XSTRING (dec
)->data
;