1 /* Coding system handler (conversion, detection, etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
77 o Old Emacs internal format (emacs-mule)
79 The coding system adopted by old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX, and update the members of
150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
152 Below is the template of these functions. */
156 detect_coding_XXX (coding
, detect_info
)
157 struct coding_system
*coding
;
158 struct coding_detection_info
*detect_info
;
160 unsigned char *src
= coding
->source
;
161 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
162 int multibytep
= coding
->src_multibyte
;
163 int consumed_chars
= 0;
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
173 if (! __C_conforms_to_XXX___ (c
))
175 if (! __C_strongly_suggests_XXX__ (c
))
176 found
= CATEGORY_MASK_XXX
;
178 /* The byte sequence is invalid for XXX. */
179 detect_info
->rejected
|= CATEGORY_MASK_XXX
;
183 /* The source exausted successfully. */
184 detect_info
->found
|= found
;
189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
201 Below is the template of these functions. */
205 decode_coding_XXXX (coding
)
206 struct coding_system
*coding
;
208 unsigned char *src
= coding
->source
+ coding
->consumed
;
209 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base
;
214 /* A buffer to produce decoded characters. */
215 int *charbuf
= coding
->charbuf
;
216 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
217 int multibytep
= coding
->src_multibyte
;
222 if (charbuf
< charbuf_end
)
223 /* No more room to produce a decoded character. */
230 if (src_base
< src_end
231 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base
< src_end
&& charbuf
< charbuf_end
)
235 *charbuf
++ = *src_base
++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
239 /* Remember how many characters we produced. */
240 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
260 Below is a template of these functions. */
263 encode_coding_XXX (coding
)
264 struct coding_system
*coding
;
266 int multibytep
= coding
->dst_multibyte
;
267 int *charbuf
= coding
->charbuf
;
268 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
269 unsigned char *dst
= coding
->destination
+ coding
->produced
;
270 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
271 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
272 int produced_chars
= 0;
274 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
277 /* Encode C into DST, and increment DST. */
279 label_no_more_destination
:
280 /* How many chars and bytes we produced. */
281 coding
->produced_char
+= produced_chars
;
282 coding
->produced
= dst
- coding
->destination
;
287 /*** 1. Preamble ***/
294 #include "character.h"
297 #include "composite.h"
301 Lisp_Object Vcoding_system_hash_table
;
303 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
304 Lisp_Object Qunix
, Qdos
;
305 extern Lisp_Object Qmac
; /* frame.c */
306 Lisp_Object Qbuffer_file_coding_system
;
307 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
308 Lisp_Object Qdefault_char
;
309 Lisp_Object Qno_conversion
, Qundecided
;
310 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
311 Lisp_Object Qsignature
, Qendian
, Qbig
, Qlittle
;
312 Lisp_Object Qcoding_system_history
;
313 Lisp_Object Qvalid_codes
;
315 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
316 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
317 Lisp_Object Qstart_process
, Qopen_network_stream
;
318 Lisp_Object Qtarget_idx
;
320 Lisp_Object Vselect_safe_coding_system_function
;
322 /* Mnemonic string for each format of end-of-line. */
323 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
324 /* Mnemonic string to indicate format of end-of-line is not yet
326 Lisp_Object eol_mnemonic_undecided
;
330 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
332 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
334 /* Coding system emacs-mule and raw-text are for converting only
335 end-of-line format. */
336 Lisp_Object Qemacs_mule
, Qraw_text
;
338 /* Coding-systems are handed between Emacs Lisp programs and C internal
339 routines by the following three variables. */
340 /* Coding-system for reading files and receiving data from process. */
341 Lisp_Object Vcoding_system_for_read
;
342 /* Coding-system for writing files and sending data to process. */
343 Lisp_Object Vcoding_system_for_write
;
344 /* Coding-system actually used in the latest I/O. */
345 Lisp_Object Vlast_coding_system_used
;
347 /* A vector of length 256 which contains information about special
348 Latin codes (especially for dealing with Microsoft codes). */
349 Lisp_Object Vlatin_extra_code_table
;
351 /* Flag to inhibit code conversion of end-of-line format. */
352 int inhibit_eol_conversion
;
354 /* Flag to inhibit ISO2022 escape sequence detection. */
355 int inhibit_iso_escape_detection
;
357 /* Flag to make buffer-file-coding-system inherit from process-coding. */
358 int inherit_process_coding_system
;
360 /* Coding system to be used to encode text for terminal display. */
361 struct coding_system terminal_coding
;
363 /* Coding system to be used to encode text for terminal display when
364 terminal coding system is nil. */
365 struct coding_system safe_terminal_coding
;
367 /* Coding system of what is sent from terminal keyboard. */
368 struct coding_system keyboard_coding
;
370 Lisp_Object Vfile_coding_system_alist
;
371 Lisp_Object Vprocess_coding_system_alist
;
372 Lisp_Object Vnetwork_coding_system_alist
;
374 Lisp_Object Vlocale_coding_system
;
378 /* Flag to tell if we look up translation table on character code
380 Lisp_Object Venable_character_translation
;
381 /* Standard translation table to look up on decoding (reading). */
382 Lisp_Object Vstandard_translation_table_for_decode
;
383 /* Standard translation table to look up on encoding (writing). */
384 Lisp_Object Vstandard_translation_table_for_encode
;
386 Lisp_Object Qtranslation_table
;
387 Lisp_Object Qtranslation_table_id
;
388 Lisp_Object Qtranslation_table_for_decode
;
389 Lisp_Object Qtranslation_table_for_encode
;
391 /* Alist of charsets vs revision number. */
392 static Lisp_Object Vcharset_revision_table
;
394 /* Default coding systems used for process I/O. */
395 Lisp_Object Vdefault_process_coding_system
;
397 /* Global flag to tell that we can't call post-read-conversion and
398 pre-write-conversion functions. Usually the value is zero, but it
399 is set to 1 temporarily while such functions are running. This is
400 to avoid infinite recursive call. */
401 static int inhibit_pre_post_conversion
;
403 /* Two special coding systems. */
404 Lisp_Object Vsjis_coding_system
;
405 Lisp_Object Vbig5_coding_system
;
408 static int detect_coding_utf_8
P_ ((struct coding_system
*,
409 struct coding_detection_info
*info
));
410 static void decode_coding_utf_8
P_ ((struct coding_system
*));
411 static int encode_coding_utf_8
P_ ((struct coding_system
*));
413 static int detect_coding_utf_16
P_ ((struct coding_system
*,
414 struct coding_detection_info
*info
));
415 static void decode_coding_utf_16
P_ ((struct coding_system
*));
416 static int encode_coding_utf_16
P_ ((struct coding_system
*));
418 static int detect_coding_iso_2022
P_ ((struct coding_system
*,
419 struct coding_detection_info
*info
));
420 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
421 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
423 static int detect_coding_emacs_mule
P_ ((struct coding_system
*,
424 struct coding_detection_info
*info
));
425 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
426 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
428 static int detect_coding_sjis
P_ ((struct coding_system
*,
429 struct coding_detection_info
*info
));
430 static void decode_coding_sjis
P_ ((struct coding_system
*));
431 static int encode_coding_sjis
P_ ((struct coding_system
*));
433 static int detect_coding_big5
P_ ((struct coding_system
*,
434 struct coding_detection_info
*info
));
435 static void decode_coding_big5
P_ ((struct coding_system
*));
436 static int encode_coding_big5
P_ ((struct coding_system
*));
438 static int detect_coding_ccl
P_ ((struct coding_system
*,
439 struct coding_detection_info
*info
));
440 static void decode_coding_ccl
P_ ((struct coding_system
*));
441 static int encode_coding_ccl
P_ ((struct coding_system
*));
443 static void decode_coding_raw_text
P_ ((struct coding_system
*));
444 static int encode_coding_raw_text
P_ ((struct coding_system
*));
447 /* ISO2022 section */
449 #define CODING_ISO_INITIAL(coding, reg) \
450 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
451 coding_attr_iso_initial), \
455 #define CODING_ISO_REQUEST(coding, charset_id) \
456 ((charset_id <= (coding)->max_charset_id \
457 ? (coding)->safe_charsets[charset_id] \
461 #define CODING_ISO_FLAGS(coding) \
462 ((coding)->spec.iso_2022.flags)
463 #define CODING_ISO_DESIGNATION(coding, reg) \
464 ((coding)->spec.iso_2022.current_designation[reg])
465 #define CODING_ISO_INVOCATION(coding, plane) \
466 ((coding)->spec.iso_2022.current_invocation[plane])
467 #define CODING_ISO_SINGLE_SHIFTING(coding) \
468 ((coding)->spec.iso_2022.single_shifting)
469 #define CODING_ISO_BOL(coding) \
470 ((coding)->spec.iso_2022.bol)
471 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
472 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
474 /* Control characters of ISO2022. */
475 /* code */ /* function */
476 #define ISO_CODE_LF 0x0A /* line-feed */
477 #define ISO_CODE_CR 0x0D /* carriage-return */
478 #define ISO_CODE_SO 0x0E /* shift-out */
479 #define ISO_CODE_SI 0x0F /* shift-in */
480 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
481 #define ISO_CODE_ESC 0x1B /* escape */
482 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
483 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
484 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
486 /* All code (1-byte) of ISO2022 is classified into one of the
488 enum iso_code_class_type
490 ISO_control_0
, /* Control codes in the range
491 0x00..0x1F and 0x7F, except for the
492 following 5 codes. */
493 ISO_carriage_return
, /* ISO_CODE_CR (0x0D) */
494 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
495 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
496 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
497 ISO_escape
, /* ISO_CODE_SO (0x1B) */
498 ISO_control_1
, /* Control codes in the range
499 0x80..0x9F, except for the
500 following 3 codes. */
501 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
502 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
503 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
504 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
505 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
506 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
507 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
510 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
511 `iso-flags' attribute of an iso2022 coding system. */
513 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
514 instead of the correct short-form sequence (e.g. ESC $ A). */
515 #define CODING_ISO_FLAG_LONG_FORM 0x0001
517 /* If set, reset graphic planes and registers at end-of-line to the
519 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
521 /* If set, reset graphic planes and registers before any control
522 characters to the initial state. */
523 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
525 /* If set, encode by 7-bit environment. */
526 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
528 /* If set, use locking-shift function. */
529 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
531 /* If set, use single-shift function. Overwrite
532 CODING_ISO_FLAG_LOCKING_SHIFT. */
533 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
535 /* If set, use designation escape sequence. */
536 #define CODING_ISO_FLAG_DESIGNATION 0x0040
538 /* If set, produce revision number sequence. */
539 #define CODING_ISO_FLAG_REVISION 0x0080
541 /* If set, produce ISO6429's direction specifying sequence. */
542 #define CODING_ISO_FLAG_DIRECTION 0x0100
544 /* If set, assume designation states are reset at beginning of line on
546 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
548 /* If set, designation sequence should be placed at beginning of line
550 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
552 /* If set, do not encode unsafe charactes on output. */
553 #define CODING_ISO_FLAG_SAFE 0x0800
555 /* If set, extra latin codes (128..159) are accepted as a valid code
557 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
559 #define CODING_ISO_FLAG_COMPOSITION 0x2000
561 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
563 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
565 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
567 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
569 /* A character to be produced on output if encoding of the original
570 character is prohibited by CODING_ISO_FLAG_SAFE. */
571 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
575 #define CODING_UTF_16_BOM(coding) \
576 ((coding)->spec.utf_16.bom)
578 #define CODING_UTF_16_ENDIAN(coding) \
579 ((coding)->spec.utf_16.endian)
581 #define CODING_UTF_16_SURROGATE(coding) \
582 ((coding)->spec.utf_16.surrogate)
586 #define CODING_CCL_DECODER(coding) \
587 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
588 #define CODING_CCL_ENCODER(coding) \
589 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
590 #define CODING_CCL_VALIDS(coding) \
591 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
594 /* Index for each coding category in `coding_categories' */
598 coding_category_iso_7
,
599 coding_category_iso_7_tight
,
600 coding_category_iso_8_1
,
601 coding_category_iso_8_2
,
602 coding_category_iso_7_else
,
603 coding_category_iso_8_else
,
604 coding_category_utf_8
,
605 coding_category_utf_16_auto
,
606 coding_category_utf_16_be
,
607 coding_category_utf_16_le
,
608 coding_category_utf_16_be_nosig
,
609 coding_category_utf_16_le_nosig
,
610 coding_category_charset
,
611 coding_category_sjis
,
612 coding_category_big5
,
614 coding_category_emacs_mule
,
615 /* All above are targets of code detection. */
616 coding_category_raw_text
,
617 coding_category_undecided
,
621 /* Definitions of flag bits used in detect_coding_XXXX. */
622 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
623 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
624 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
625 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
626 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
627 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
628 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
629 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
630 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
631 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
632 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
633 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
634 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
635 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
636 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
637 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
638 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
640 /* This value is returned if detect_coding_mask () find nothing other
641 than ASCII characters. */
642 #define CATEGORY_MASK_ANY \
643 (CATEGORY_MASK_ISO_7 \
644 | CATEGORY_MASK_ISO_7_TIGHT \
645 | CATEGORY_MASK_ISO_8_1 \
646 | CATEGORY_MASK_ISO_8_2 \
647 | CATEGORY_MASK_ISO_7_ELSE \
648 | CATEGORY_MASK_ISO_8_ELSE \
649 | CATEGORY_MASK_UTF_8 \
650 | CATEGORY_MASK_UTF_16_BE \
651 | CATEGORY_MASK_UTF_16_LE \
652 | CATEGORY_MASK_UTF_16_BE_NOSIG \
653 | CATEGORY_MASK_UTF_16_LE_NOSIG \
654 | CATEGORY_MASK_CHARSET \
655 | CATEGORY_MASK_SJIS \
656 | CATEGORY_MASK_BIG5 \
657 | CATEGORY_MASK_CCL \
658 | CATEGORY_MASK_EMACS_MULE)
661 #define CATEGORY_MASK_ISO_7BIT \
662 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
664 #define CATEGORY_MASK_ISO_8BIT \
665 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
667 #define CATEGORY_MASK_ISO_ELSE \
668 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
670 #define CATEGORY_MASK_ISO_ESCAPE \
671 (CATEGORY_MASK_ISO_7 \
672 | CATEGORY_MASK_ISO_7_TIGHT \
673 | CATEGORY_MASK_ISO_7_ELSE \
674 | CATEGORY_MASK_ISO_8_ELSE)
676 #define CATEGORY_MASK_ISO \
677 ( CATEGORY_MASK_ISO_7BIT \
678 | CATEGORY_MASK_ISO_8BIT \
679 | CATEGORY_MASK_ISO_ELSE)
681 #define CATEGORY_MASK_UTF_16 \
682 (CATEGORY_MASK_UTF_16_BE \
683 | CATEGORY_MASK_UTF_16_LE \
684 | CATEGORY_MASK_UTF_16_BE_NOSIG \
685 | CATEGORY_MASK_UTF_16_LE_NOSIG)
688 /* List of symbols `coding-category-xxx' ordered by priority. This
689 variable is exposed to Emacs Lisp. */
690 static Lisp_Object Vcoding_category_list
;
692 /* Table of coding categories (Lisp symbols). This variable is for
694 static Lisp_Object Vcoding_category_table
;
696 /* Table of coding-categories ordered by priority. */
697 static enum coding_category coding_priorities
[coding_category_max
];
699 /* Nth element is a coding context for the coding system bound to the
700 Nth coding category. */
701 static struct coding_system coding_categories
[coding_category_max
];
703 static int detected_mask
[coding_category_raw_text
] =
711 CATEGORY_MASK_UTF_16
,
712 CATEGORY_MASK_UTF_16
,
713 CATEGORY_MASK_UTF_16
,
714 CATEGORY_MASK_UTF_16
,
715 CATEGORY_MASK_UTF_16
,
716 CATEGORY_MASK_CHARSET
,
720 CATEGORY_MASK_EMACS_MULE
723 /*** Commonly used macros and functions ***/
726 #define min(a, b) ((a) < (b) ? (a) : (b))
729 #define max(a, b) ((a) > (b) ? (a) : (b))
732 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
734 attrs = CODING_ID_ATTRS (coding->id); \
735 eol_type = CODING_ID_EOL_TYPE (coding->id); \
736 if (VECTORP (eol_type)) \
738 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
742 /* Safely get one byte from the source text pointed by SRC which ends
743 at SRC_END, and set C to that byte. If there are not enough bytes
744 in the source, it jumps to `no_more_source'. The caller
745 should declare and set these variables appropriately in advance:
746 src, src_end, multibytep
749 #define ONE_MORE_BYTE(c) \
751 if (src == src_end) \
753 if (src_base < src) \
754 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
755 goto no_more_source; \
758 if (multibytep && (c & 0x80)) \
760 if ((c & 0xFE) != 0xC0) \
761 error ("Undecodable char found"); \
762 c = ((c & 1) << 6) | *src++; \
768 #define ONE_MORE_BYTE_NO_CHECK(c) \
771 if (multibytep && (c & 0x80)) \
773 if ((c & 0xFE) != 0xC0) \
774 error ("Undecodable char found"); \
775 c = ((c & 1) << 6) | *src++; \
781 /* Store a byte C in the place pointed by DST and increment DST to the
782 next free point, and increment PRODUCED_CHARS. The caller should
783 assure that C is 0..127, and declare and set the variable `dst'
784 appropriately in advance.
788 #define EMIT_ONE_ASCII_BYTE(c) \
795 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
797 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
799 produced_chars += 2; \
800 *dst++ = (c1), *dst++ = (c2); \
804 /* Store a byte C in the place pointed by DST and increment DST to the
805 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
806 nonzero, store in an appropriate multibyte from. The caller should
807 declare and set the variables `dst' and `multibytep' appropriately
810 #define EMIT_ONE_BYTE(c) \
817 ch = BYTE8_TO_CHAR (ch); \
818 CHAR_STRING_ADVANCE (ch, dst); \
825 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
827 #define EMIT_TWO_BYTES(c1, c2) \
829 produced_chars += 2; \
836 ch = BYTE8_TO_CHAR (ch); \
837 CHAR_STRING_ADVANCE (ch, dst); \
840 ch = BYTE8_TO_CHAR (ch); \
841 CHAR_STRING_ADVANCE (ch, dst); \
851 #define EMIT_THREE_BYTES(c1, c2, c3) \
853 EMIT_ONE_BYTE (c1); \
854 EMIT_TWO_BYTES (c2, c3); \
858 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
860 EMIT_TWO_BYTES (c1, c2); \
861 EMIT_TWO_BYTES (c3, c4); \
865 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
867 charset_map_loaded = 0; \
868 c = DECODE_CHAR (charset, code); \
869 if (charset_map_loaded) \
871 unsigned char *orig = coding->source; \
874 coding_set_source (coding); \
875 offset = coding->source - orig; \
877 src_base += offset; \
883 #define ASSURE_DESTINATION(bytes) \
885 if (dst + (bytes) >= dst_end) \
887 int more_bytes = charbuf_end - charbuf + (bytes); \
889 dst = alloc_destination (coding, more_bytes, dst); \
890 dst_end = coding->destination + coding->dst_bytes; \
897 coding_set_source (coding
)
898 struct coding_system
*coding
;
900 if (BUFFERP (coding
->src_object
))
902 struct buffer
*buf
= XBUFFER (coding
->src_object
);
904 if (coding
->src_pos
< 0)
905 coding
->source
= BUF_GAP_END_ADDR (buf
) + coding
->src_pos_byte
;
907 coding
->source
= BUF_BYTE_ADDRESS (buf
, coding
->src_pos_byte
);
909 else if (STRINGP (coding
->src_object
))
911 coding
->source
= (XSTRING (coding
->src_object
)->data
912 + coding
->src_pos_byte
);
915 /* Otherwise, the source is C string and is never relocated
916 automatically. Thus we don't have to update anything. */
921 coding_set_destination (coding
)
922 struct coding_system
*coding
;
924 if (BUFFERP (coding
->dst_object
))
926 if (coding
->src_pos
< 0)
928 coding
->destination
= BEG_ADDR
+ coding
->dst_pos_byte
- 1;
929 coding
->dst_bytes
= (GAP_END_ADDR
930 - (coding
->src_bytes
- coding
->consumed
)
931 - coding
->destination
);
935 /* We are sure that coding->dst_pos_byte is before the gap
937 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
938 + coding
->dst_pos_byte
- 1);
939 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
940 - coding
->destination
);
944 /* Otherwise, the destination is C string and is never relocated
945 automatically. Thus we don't have to update anything. */
951 coding_alloc_by_realloc (coding
, bytes
)
952 struct coding_system
*coding
;
955 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
956 coding
->dst_bytes
+ bytes
);
957 coding
->dst_bytes
+= bytes
;
961 coding_alloc_by_making_gap (coding
, bytes
)
962 struct coding_system
*coding
;
965 if (BUFFERP (coding
->dst_object
)
966 && EQ (coding
->src_object
, coding
->dst_object
))
968 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
970 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
972 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
976 Lisp_Object this_buffer
;
978 this_buffer
= Fcurrent_buffer ();
979 set_buffer_internal (XBUFFER (coding
->dst_object
));
981 set_buffer_internal (XBUFFER (this_buffer
));
986 static unsigned char *
987 alloc_destination (coding
, nbytes
, dst
)
988 struct coding_system
*coding
;
992 EMACS_INT offset
= dst
- coding
->destination
;
994 if (BUFFERP (coding
->dst_object
))
995 coding_alloc_by_making_gap (coding
, nbytes
);
997 coding_alloc_by_realloc (coding
, nbytes
);
998 coding
->result
= CODING_RESULT_SUCCESS
;
999 coding_set_destination (coding
);
1000 dst
= coding
->destination
+ offset
;
1004 /** Macros for annotations. */
1006 /* Maximum length of annotation data (sum of annotations for
1007 composition and charset). */
1008 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1010 /* An annotation data is stored in the array coding->charbuf in this
1012 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1013 LENGTH is the number of elements in the annotation.
1014 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1015 FROM and TO specify the range of text annotated. They are relative
1016 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1018 The format of the following elements depend on ANNOTATION_MASK.
1020 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1022 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1023 METHOD is one of enum composition_method.
1024 Optionnal COMPOSITION-COMPONENTS are characters and composition
1027 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1030 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1032 *(buf)++ = -(len); \
1033 *(buf)++ = (mask); \
1034 *(buf)++ = (from); \
1036 coding->annotated = 1; \
1039 #define ADD_COMPOSITION_DATA(buf, from, to, method) \
1041 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1046 #define ADD_CHARSET_DATA(buf, from, to, id) \
1048 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1053 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1060 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1061 Check if a text is encoded in UTF-8. If it is, return 1, else
1064 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1065 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1066 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1067 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1068 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1069 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1072 detect_coding_utf_8 (coding
, detect_info
)
1073 struct coding_system
*coding
;
1074 struct coding_detection_info
*detect_info
;
1076 unsigned char *src
= coding
->source
, *src_base
= src
;
1077 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1078 int multibytep
= coding
->src_multibyte
;
1079 int consumed_chars
= 0;
1083 detect_info
->checked
|= CATEGORY_MASK_UTF_8
;
1084 /* A coding system of this category is always ASCII compatible. */
1085 src
+= coding
->head_ascii
;
1089 int c
, c1
, c2
, c3
, c4
;
1093 if (UTF_8_1_OCTET_P (c
))
1097 if (! UTF_8_EXTRA_OCTET_P (c1
))
1099 if (UTF_8_2_OCTET_LEADING_P (c
))
1101 found
= CATEGORY_MASK_UTF_8
;
1105 if (! UTF_8_EXTRA_OCTET_P (c2
))
1107 if (UTF_8_3_OCTET_LEADING_P (c
))
1109 found
= CATEGORY_MASK_UTF_8
;
1113 if (! UTF_8_EXTRA_OCTET_P (c3
))
1115 if (UTF_8_4_OCTET_LEADING_P (c
))
1117 found
= CATEGORY_MASK_UTF_8
;
1121 if (! UTF_8_EXTRA_OCTET_P (c4
))
1123 if (UTF_8_5_OCTET_LEADING_P (c
))
1125 found
= CATEGORY_MASK_UTF_8
;
1130 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1134 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1136 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1139 detect_info
->found
|= found
;
1145 decode_coding_utf_8 (coding
)
1146 struct coding_system
*coding
;
1148 unsigned char *src
= coding
->source
+ coding
->consumed
;
1149 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1150 unsigned char *src_base
;
1151 int *charbuf
= coding
->charbuf
;
1152 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1153 int consumed_chars
= 0, consumed_chars_base
;
1154 int multibytep
= coding
->src_multibyte
;
1155 Lisp_Object attr
, eol_type
, charset_list
;
1157 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1161 int c
, c1
, c2
, c3
, c4
, c5
;
1164 consumed_chars_base
= consumed_chars
;
1166 if (charbuf
>= charbuf_end
)
1170 if (UTF_8_1_OCTET_P(c1
))
1175 if (EQ (eol_type
, Qdos
))
1179 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
1180 goto no_more_source
;
1185 else if (EQ (eol_type
, Qmac
))
1192 if (! UTF_8_EXTRA_OCTET_P (c2
))
1194 if (UTF_8_2_OCTET_LEADING_P (c1
))
1196 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1197 /* Reject overlong sequences here and below. Encoders
1198 producing them are incorrect, they can be misleading,
1199 and they mess up read/write invariance. */
1206 if (! UTF_8_EXTRA_OCTET_P (c3
))
1208 if (UTF_8_3_OCTET_LEADING_P (c1
))
1210 c
= (((c1
& 0xF) << 12)
1211 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1213 || (c
>= 0xd800 && c
< 0xe000)) /* surrogates (invalid) */
1219 if (! UTF_8_EXTRA_OCTET_P (c4
))
1221 if (UTF_8_4_OCTET_LEADING_P (c1
))
1223 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1224 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1231 if (! UTF_8_EXTRA_OCTET_P (c5
))
1233 if (UTF_8_5_OCTET_LEADING_P (c1
))
1235 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1236 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1238 if ((c
> MAX_CHAR
) || (c
< 0x200000))
1253 consumed_chars
= consumed_chars_base
;
1255 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1260 coding
->consumed_char
+= consumed_chars_base
;
1261 coding
->consumed
= src_base
- coding
->source
;
1262 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1267 encode_coding_utf_8 (coding
)
1268 struct coding_system
*coding
;
1270 int multibytep
= coding
->dst_multibyte
;
1271 int *charbuf
= coding
->charbuf
;
1272 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1273 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1274 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1275 int produced_chars
= 0;
1280 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1282 while (charbuf
< charbuf_end
)
1284 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1286 ASSURE_DESTINATION (safe_room
);
1288 if (CHAR_BYTE8_P (c
))
1290 c
= CHAR_TO_BYTE8 (c
);
1295 CHAR_STRING_ADVANCE (c
, pend
);
1296 for (p
= str
; p
< pend
; p
++)
1303 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1305 while (charbuf
< charbuf_end
)
1307 ASSURE_DESTINATION (safe_room
);
1309 dst
+= CHAR_STRING (c
, dst
);
1313 coding
->result
= CODING_RESULT_SUCCESS
;
1314 coding
->produced_char
+= produced_chars
;
1315 coding
->produced
= dst
- coding
->destination
;
1320 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1321 Check if a text is encoded in one of UTF-16 based coding systems.
1322 If it is, return 1, else return 0. */
1324 #define UTF_16_HIGH_SURROGATE_P(val) \
1325 (((val) & 0xFC00) == 0xD800)
1327 #define UTF_16_LOW_SURROGATE_P(val) \
1328 (((val) & 0xFC00) == 0xDC00)
1330 #define UTF_16_INVALID_P(val) \
1331 (((val) == 0xFFFE) \
1332 || ((val) == 0xFFFF) \
1333 || UTF_16_LOW_SURROGATE_P (val))
1337 detect_coding_utf_16 (coding
, detect_info
)
1338 struct coding_system
*coding
;
1339 struct coding_detection_info
*detect_info
;
1341 unsigned char *src
= coding
->source
, *src_base
= src
;
1342 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1343 int multibytep
= coding
->src_multibyte
;
1344 int consumed_chars
= 0;
1347 detect_info
->checked
|= CATEGORY_MASK_UTF_16
;
1349 if (coding
->mode
& CODING_MODE_LAST_BLOCK
1350 && (coding
->src_bytes
& 1))
1352 detect_info
->rejected
|= CATEGORY_MASK_UTF_16
;
1358 if ((c1
== 0xFF) && (c2
== 0xFE))
1360 detect_info
->found
|= CATEGORY_MASK_UTF_16_LE
;
1361 detect_info
->rejected
|= CATEGORY_MASK_UTF_16_BE
;
1363 else if ((c1
== 0xFE) && (c2
== 0xFF))
1365 detect_info
->found
|= CATEGORY_MASK_UTF_16_BE
;
1366 detect_info
->rejected
|= CATEGORY_MASK_UTF_16_LE
;
1373 decode_coding_utf_16 (coding
)
1374 struct coding_system
*coding
;
1376 unsigned char *src
= coding
->source
+ coding
->consumed
;
1377 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1378 unsigned char *src_base
;
1379 int *charbuf
= coding
->charbuf
;
1380 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1381 int consumed_chars
= 0, consumed_chars_base
;
1382 int multibytep
= coding
->src_multibyte
;
1383 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1384 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1385 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1386 Lisp_Object attr
, eol_type
, charset_list
;
1388 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1390 if (bom
!= utf_16_without_bom
)
1398 if (bom
== utf_16_with_bom
)
1400 if (endian
== utf_16_big_endian
1401 ? c
!= 0xFEFF : c
!= 0xFFFE)
1403 /* We are sure that there's enouph room at CHARBUF. */
1412 CODING_UTF_16_ENDIAN (coding
)
1413 = endian
= utf_16_big_endian
;
1414 else if (c
== 0xFFFE)
1415 CODING_UTF_16_ENDIAN (coding
)
1416 = endian
= utf_16_little_endian
;
1419 CODING_UTF_16_ENDIAN (coding
)
1420 = endian
= utf_16_big_endian
;
1424 CODING_UTF_16_BOM (coding
) = utf_16_with_bom
;
1432 consumed_chars_base
= consumed_chars
;
1434 if (charbuf
+ 2 >= charbuf_end
)
1439 c
= (endian
== utf_16_big_endian
1440 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1443 if (! UTF_16_LOW_SURROGATE_P (c
))
1445 if (endian
== utf_16_big_endian
)
1446 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1448 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1452 if (UTF_16_HIGH_SURROGATE_P (c
))
1453 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1459 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1460 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1466 if (UTF_16_HIGH_SURROGATE_P (c
))
1467 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1474 coding
->consumed_char
+= consumed_chars_base
;
1475 coding
->consumed
= src_base
- coding
->source
;
1476 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1480 encode_coding_utf_16 (coding
)
1481 struct coding_system
*coding
;
1483 int multibytep
= coding
->dst_multibyte
;
1484 int *charbuf
= coding
->charbuf
;
1485 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1486 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1487 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1489 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1490 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1491 int produced_chars
= 0;
1492 Lisp_Object attrs
, eol_type
, charset_list
;
1495 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1497 if (bom
== utf_16_with_bom
)
1499 ASSURE_DESTINATION (safe_room
);
1501 EMIT_TWO_BYTES (0xFE, 0xFF);
1503 EMIT_TWO_BYTES (0xFF, 0xFE);
1504 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1507 while (charbuf
< charbuf_end
)
1509 ASSURE_DESTINATION (safe_room
);
1511 if (c
>= MAX_UNICODE_CHAR
)
1512 c
= coding
->default_char
;
1517 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1519 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1526 c1
= (c
>> 10) + 0xD800;
1527 c2
= (c
& 0x3FF) + 0xDC00;
1529 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1531 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1534 coding
->result
= CODING_RESULT_SUCCESS
;
1535 coding
->produced
= dst
- coding
->destination
;
1536 coding
->produced_char
+= produced_chars
;
1541 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1543 /* Emacs' internal format for representation of multiple character
1544 sets is a kind of multi-byte encoding, i.e. characters are
1545 represented by variable-length sequences of one-byte codes.
1547 ASCII characters and control characters (e.g. `tab', `newline') are
1548 represented by one-byte sequences which are their ASCII codes, in
1549 the range 0x00 through 0x7F.
1551 8-bit characters of the range 0x80..0x9F are represented by
1552 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1555 8-bit characters of the range 0xA0..0xFF are represented by
1556 one-byte sequences which are their 8-bit code.
1558 The other characters are represented by a sequence of `base
1559 leading-code', optional `extended leading-code', and one or two
1560 `position-code's. The length of the sequence is determined by the
1561 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1562 whereas extended leading-code and position-code take the range 0xA0
1563 through 0xFF. See `charset.h' for more details about leading-code
1566 --- CODE RANGE of Emacs' internal format ---
1570 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1571 eight-bit-graphic 0xA0..0xBF
1572 ELSE 0x81..0x9D + [0xA0..0xFF]+
1573 ---------------------------------------------
1575 As this is the internal character representation, the format is
1576 usually not used externally (i.e. in a file or in a data sent to a
1577 process). But, it is possible to have a text externally in this
1578 format (i.e. by encoding by the coding system `emacs-mule').
1580 In that case, a sequence of one-byte codes has a slightly different
1583 At first, all characters in eight-bit-control are represented by
1584 one-byte sequences which are their 8-bit code.
1586 Next, character composition data are represented by the byte
1587 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1589 METHOD is 0xF0 plus one of composition method (enum
1590 composition_method),
1592 BYTES is 0xA0 plus a byte length of this composition data,
1594 CHARS is 0x20 plus a number of characters composed by this
1597 COMPONENTs are characters of multibye form or composition
1598 rules encoded by two-byte of ASCII codes.
1600 In addition, for backward compatibility, the following formats are
1601 also recognized as composition data on decoding.
1604 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1607 MSEQ is a multibyte form but in these special format:
1608 ASCII: 0xA0 ASCII_CODE+0x80,
1609 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1610 RULE is a one byte code of the range 0xA0..0xF0 that
1611 represents a composition rule.
1614 char emacs_mule_bytes
[256];
1617 emacs_mule_char (coding
, src
, nbytes
, nchars
, id
)
1618 struct coding_system
*coding
;
1620 int *nbytes
, *nchars
, *id
;
1622 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1623 int multibytep
= coding
->src_multibyte
;
1624 unsigned char *src_base
= src
;
1625 struct charset
*charset
;
1628 int consumed_chars
= 0;
1631 switch (emacs_mule_bytes
[c
])
1634 if (! (charset
= emacs_mule_charset
[c
]))
1641 if (c
== EMACS_MULE_LEADING_CODE_PRIVATE_11
1642 || c
== EMACS_MULE_LEADING_CODE_PRIVATE_12
)
1645 if (! (charset
= emacs_mule_charset
[c
]))
1652 if (! (charset
= emacs_mule_charset
[c
]))
1655 code
= (c
& 0x7F) << 8;
1663 if (! (charset
= emacs_mule_charset
[c
]))
1666 code
= (c
& 0x7F) << 8;
1673 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
)
1674 ? charset_ascii
: charset_eight_bit
);
1680 c
= DECODE_CHAR (charset
, code
);
1683 *nbytes
= src
- src_base
;
1684 *nchars
= consumed_chars
;
1697 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1698 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1702 detect_coding_emacs_mule (coding
, detect_info
)
1703 struct coding_system
*coding
;
1704 struct coding_detection_info
*detect_info
;
1706 unsigned char *src
= coding
->source
, *src_base
= src
;
1707 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1708 int multibytep
= coding
->src_multibyte
;
1709 int consumed_chars
= 0;
1714 detect_info
->checked
|= CATEGORY_MASK_EMACS_MULE
;
1715 /* A coding system of this category is always ASCII compatible. */
1716 src
+= coding
->head_ascii
;
1726 /* Perhaps the start of composite character. We simple skip
1727 it because analyzing it is too heavy for detecting. But,
1728 at least, we check that the composite character
1729 constitues of more than 4 bytes. */
1730 unsigned char *src_base
;
1740 if (src
- src_base
<= 4)
1742 found
= CATEGORY_MASK_EMACS_MULE
;
1750 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1755 unsigned char *src_base
= src
- 1;
1762 if (src
- src_base
!= emacs_mule_bytes
[*src_base
])
1764 found
= CATEGORY_MASK_EMACS_MULE
;
1767 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1771 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1773 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1776 detect_info
->found
|= found
;
1781 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1783 /* Decode a character represented as a component of composition
1784 sequence of Emacs 20/21 style at SRC. Set C to that character and
1785 update SRC to the head of next character (or an encoded composition
1786 rule). If SRC doesn't points a composition component, set C to -1.
1787 If SRC points an invalid byte sequence, global exit by a return
1790 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1794 int nbytes, nchars; \
1796 if (src == src_end) \
1798 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1803 goto invalid_code; \
1807 consumed_chars += nchars; \
1812 /* Decode a composition rule represented as a component of composition
1813 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1814 and increment BUF. If SRC points an invalid byte sequence, set C
1817 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1819 int c, gref, nref; \
1821 if (src >= src_end) \
1822 goto invalid_code; \
1823 ONE_MORE_BYTE_NO_CHECK (c); \
1825 if (c < 0 || c >= 81) \
1826 goto invalid_code; \
1828 gref = c / 9, nref = c % 9; \
1829 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1833 /* Decode a composition rule represented as a component of composition
1834 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1835 and increment BUF. If SRC points an invalid byte sequence, set C
1838 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1842 if (src + 1>= src_end) \
1843 goto invalid_code; \
1844 ONE_MORE_BYTE_NO_CHECK (gref); \
1846 ONE_MORE_BYTE_NO_CHECK (nref); \
1848 if (gref < 0 || gref >= 81 \
1849 || nref < 0 || nref >= 81) \
1850 goto invalid_code; \
1851 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1855 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1857 /* Emacs 21 style format. The first three bytes at SRC are \
1858 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1859 the byte length of this composition information, CHARS is the \
1860 number of characters composed by this composition. */ \
1861 enum composition_method method = c - 0xF2; \
1862 int *charbuf_base = charbuf; \
1864 int consumed_chars_limit; \
1865 int nbytes, nchars; \
1867 ONE_MORE_BYTE (c); \
1868 nbytes = c - 0xA0; \
1870 goto invalid_code; \
1871 ONE_MORE_BYTE (c); \
1872 nchars = c - 0xA0; \
1873 from = coding->produced + char_offset; \
1874 to = from + nchars; \
1875 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1876 consumed_chars_limit = consumed_chars_base + nbytes; \
1877 if (method != COMPOSITION_RELATIVE) \
1880 while (consumed_chars < consumed_chars_limit) \
1882 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1883 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1885 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1888 if (consumed_chars < consumed_chars_limit) \
1889 goto invalid_code; \
1890 charbuf_base[0] -= i; \
1895 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1897 /* Emacs 20 style format for relative composition. */ \
1898 /* Store multibyte form of characters to be composed. */ \
1899 enum composition_method method = COMPOSITION_RELATIVE; \
1900 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1901 int *buf = components; \
1906 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1907 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1908 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1910 goto invalid_code; \
1911 from = coding->produced_char + char_offset; \
1913 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1914 for (j = 0; j < i; j++) \
1915 *charbuf++ = components[j]; \
1919 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1921 /* Emacs 20 style format for rule-base composition. */ \
1922 /* Store multibyte form of characters to be composed. */ \
1923 enum composition_method method = COMPOSITION_WITH_RULE; \
1924 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1925 int *buf = components; \
1929 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1930 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1932 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1933 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1935 if (i < 1 || (buf - components) % 2 == 0) \
1936 goto invalid_code; \
1937 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1938 goto no_more_source; \
1939 from = coding->produced_char + char_offset; \
1941 ADD_COMPOSITION_DATA (buf, from, to, method); \
1942 for (j = 0; j < i; j++) \
1943 *charbuf++ = components[j]; \
1944 for (j = 0; j < i; j += 2) \
1945 *charbuf++ = components[j]; \
1950 decode_coding_emacs_mule (coding
)
1951 struct coding_system
*coding
;
1953 unsigned char *src
= coding
->source
+ coding
->consumed
;
1954 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1955 unsigned char *src_base
;
1956 int *charbuf
= coding
->charbuf
;
1957 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
1958 int consumed_chars
= 0, consumed_chars_base
;
1959 int multibytep
= coding
->src_multibyte
;
1960 Lisp_Object attrs
, eol_type
, charset_list
;
1961 int char_offset
= coding
->produced_char
;
1962 int last_offset
= char_offset
;
1963 int last_id
= charset_ascii
;
1965 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1972 consumed_chars_base
= consumed_chars
;
1974 if (charbuf
>= charbuf_end
)
1983 if (EQ (eol_type
, Qdos
))
1987 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
1988 goto no_more_source
;
1993 else if (EQ (eol_type
, Qmac
))
2002 if (c
- 0xF2 >= COMPOSITION_RELATIVE
2003 && c
- 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS
)
2004 DECODE_EMACS_MULE_21_COMPOSITION (c
);
2006 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
2008 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
2012 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
2018 consumed_chars
= consumed_chars_base
;
2019 c
= emacs_mule_char (coding
, src
, &nbytes
, &nchars
, &id
);
2028 if (last_id
!= charset_ascii
)
2029 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2031 last_offset
= char_offset
;
2035 consumed_chars
+= nchars
;
2042 consumed_chars
= consumed_chars_base
;
2044 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
2050 if (last_id
!= charset_ascii
)
2051 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2052 coding
->consumed_char
+= consumed_chars_base
;
2053 coding
->consumed
= src_base
- coding
->source
;
2054 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
2058 #define EMACS_MULE_LEADING_CODES(id, codes) \
2061 codes[0] = id, codes[1] = 0; \
2062 else if (id < 0xE0) \
2063 codes[0] = 0x9A, codes[1] = id; \
2064 else if (id < 0xF0) \
2065 codes[0] = 0x9B, codes[1] = id; \
2066 else if (id < 0xF5) \
2067 codes[0] = 0x9C, codes[1] = id; \
2069 codes[0] = 0x9D, codes[1] = id; \
2074 encode_coding_emacs_mule (coding
)
2075 struct coding_system
*coding
;
2077 int multibytep
= coding
->dst_multibyte
;
2078 int *charbuf
= coding
->charbuf
;
2079 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
2080 unsigned char *dst
= coding
->destination
+ coding
->produced
;
2081 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
2083 int produced_chars
= 0;
2084 Lisp_Object attrs
, eol_type
, charset_list
;
2086 int preferred_charset_id
= -1;
2088 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
2090 while (charbuf
< charbuf_end
)
2092 ASSURE_DESTINATION (safe_room
);
2097 /* Handle an annotation. */
2100 case CODING_ANNOTATE_COMPOSITION_MASK
:
2101 /* Not yet implemented. */
2103 case CODING_ANNOTATE_CHARSET_MASK
:
2104 preferred_charset_id
= charbuf
[3];
2105 if (preferred_charset_id
>= 0
2106 && NILP (Fmemq (make_number (preferred_charset_id
),
2108 preferred_charset_id
= -1;
2117 if (ASCII_CHAR_P (c
))
2118 EMIT_ONE_ASCII_BYTE (c
);
2119 else if (CHAR_BYTE8_P (c
))
2121 c
= CHAR_TO_BYTE8 (c
);
2126 struct charset
*charset
;
2130 unsigned char leading_codes
[2];
2132 if (preferred_charset_id
>= 0)
2134 charset
= CHARSET_FROM_ID (preferred_charset_id
);
2135 if (! CHAR_CHARSET_P (c
, charset
))
2136 charset
= char_charset (c
, charset_list
, NULL
);
2139 charset
= char_charset (c
, charset_list
, &code
);
2142 c
= coding
->default_char
;
2143 if (ASCII_CHAR_P (c
))
2145 EMIT_ONE_ASCII_BYTE (c
);
2148 charset
= char_charset (c
, charset_list
, &code
);
2150 dimension
= CHARSET_DIMENSION (charset
);
2151 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
2152 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
2153 EMIT_ONE_BYTE (leading_codes
[0]);
2154 if (leading_codes
[1])
2155 EMIT_ONE_BYTE (leading_codes
[1]);
2157 EMIT_ONE_BYTE (code
);
2160 EMIT_ONE_BYTE (code
>> 8);
2161 EMIT_ONE_BYTE (code
& 0xFF);
2165 coding
->result
= CODING_RESULT_SUCCESS
;
2166 coding
->produced_char
+= produced_chars
;
2167 coding
->produced
= dst
- coding
->destination
;
2172 /*** 7. ISO2022 handlers ***/
2174 /* The following note describes the coding system ISO2022 briefly.
2175 Since the intention of this note is to help understand the
2176 functions in this file, some parts are NOT ACCURATE or are OVERLY
2177 SIMPLIFIED. For thorough understanding, please refer to the
2178 original document of ISO2022. This is equivalent to the standard
2179 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2181 ISO2022 provides many mechanisms to encode several character sets
2182 in 7-bit and 8-bit environments. For 7-bit environments, all text
2183 is encoded using bytes less than 128. This may make the encoded
2184 text a little bit longer, but the text passes more easily through
2185 several types of gateway, some of which strip off the MSB (Most
2188 There are two kinds of character sets: control character sets and
2189 graphic character sets. The former contain control characters such
2190 as `newline' and `escape' to provide control functions (control
2191 functions are also provided by escape sequences). The latter
2192 contain graphic characters such as 'A' and '-'. Emacs recognizes
2193 two control character sets and many graphic character sets.
2195 Graphic character sets are classified into one of the following
2196 four classes, according to the number of bytes (DIMENSION) and
2197 number of characters in one dimension (CHARS) of the set:
2198 - DIMENSION1_CHARS94
2199 - DIMENSION1_CHARS96
2200 - DIMENSION2_CHARS94
2201 - DIMENSION2_CHARS96
2203 In addition, each character set is assigned an identification tag,
2204 unique for each set, called the "final character" (denoted as <F>
2205 hereafter). The <F> of each character set is decided by ECMA(*)
2206 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2207 (0x30..0x3F are for private use only).
2209 Note (*): ECMA = European Computer Manufacturers Association
2211 Here are examples of graphic character sets [NAME(<F>)]:
2212 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2213 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2214 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2215 o DIMENSION2_CHARS96 -- none for the moment
2217 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2218 C0 [0x00..0x1F] -- control character plane 0
2219 GL [0x20..0x7F] -- graphic character plane 0
2220 C1 [0x80..0x9F] -- control character plane 1
2221 GR [0xA0..0xFF] -- graphic character plane 1
2223 A control character set is directly designated and invoked to C0 or
2224 C1 by an escape sequence. The most common case is that:
2225 - ISO646's control character set is designated/invoked to C0, and
2226 - ISO6429's control character set is designated/invoked to C1,
2227 and usually these designations/invocations are omitted in encoded
2228 text. In a 7-bit environment, only C0 can be used, and a control
2229 character for C1 is encoded by an appropriate escape sequence to
2230 fit into the environment. All control characters for C1 are
2231 defined to have corresponding escape sequences.
2233 A graphic character set is at first designated to one of four
2234 graphic registers (G0 through G3), then these graphic registers are
2235 invoked to GL or GR. These designations and invocations can be
2236 done independently. The most common case is that G0 is invoked to
2237 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2238 these invocations and designations are omitted in encoded text.
2239 In a 7-bit environment, only GL can be used.
2241 When a graphic character set of CHARS94 is invoked to GL, codes
2242 0x20 and 0x7F of the GL area work as control characters SPACE and
2243 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2246 There are two ways of invocation: locking-shift and single-shift.
2247 With locking-shift, the invocation lasts until the next different
2248 invocation, whereas with single-shift, the invocation affects the
2249 following character only and doesn't affect the locking-shift
2250 state. Invocations are done by the following control characters or
2253 ----------------------------------------------------------------------
2254 abbrev function cntrl escape seq description
2255 ----------------------------------------------------------------------
2256 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2257 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2258 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2259 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2260 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2261 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2262 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2263 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2264 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2265 ----------------------------------------------------------------------
2266 (*) These are not used by any known coding system.
2268 Control characters for these functions are defined by macros
2269 ISO_CODE_XXX in `coding.h'.
2271 Designations are done by the following escape sequences:
2272 ----------------------------------------------------------------------
2273 escape sequence description
2274 ----------------------------------------------------------------------
2275 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2276 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2277 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2278 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2279 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2280 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2281 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2282 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2283 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2284 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2285 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2286 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2287 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2288 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2289 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2290 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2291 ----------------------------------------------------------------------
2293 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2294 of dimension 1, chars 94, and final character <F>, etc...
2296 Note (*): Although these designations are not allowed in ISO2022,
2297 Emacs accepts them on decoding, and produces them on encoding
2298 CHARS96 character sets in a coding system which is characterized as
2299 7-bit environment, non-locking-shift, and non-single-shift.
2301 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2302 '(' must be omitted. We refer to this as "short-form" hereafter.
2304 Now you may notice that there are a lot of ways of encoding the
2305 same multilingual text in ISO2022. Actually, there exist many
2306 coding systems such as Compound Text (used in X11's inter client
2307 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2308 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2309 localized platforms), and all of these are variants of ISO2022.
2311 In addition to the above, Emacs handles two more kinds of escape
2312 sequences: ISO6429's direction specification and Emacs' private
2313 sequence for specifying character composition.
2315 ISO6429's direction specification takes the following form:
2316 o CSI ']' -- end of the current direction
2317 o CSI '0' ']' -- end of the current direction
2318 o CSI '1' ']' -- start of left-to-right text
2319 o CSI '2' ']' -- start of right-to-left text
2320 The control character CSI (0x9B: control sequence introducer) is
2321 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2323 Character composition specification takes the following form:
2324 o ESC '0' -- start relative composition
2325 o ESC '1' -- end composition
2326 o ESC '2' -- start rule-base composition (*)
2327 o ESC '3' -- start relative composition with alternate chars (**)
2328 o ESC '4' -- start rule-base composition with alternate chars (**)
2329 Since these are not standard escape sequences of any ISO standard,
2330 the use of them with these meanings is restricted to Emacs only.
2332 (*) This form is used only in Emacs 20.7 and older versions,
2333 but newer versions can safely decode it.
2334 (**) This form is used only in Emacs 21.1 and newer versions,
2335 and older versions can't decode it.
2337 Here's a list of example usages of these composition escape
2338 sequences (categorized by `enum composition_method').
2340 COMPOSITION_RELATIVE:
2341 ESC 0 CHAR [ CHAR ] ESC 1
2342 COMPOSITION_WITH_RULE:
2343 ESC 2 CHAR [ RULE CHAR ] ESC 1
2344 COMPOSITION_WITH_ALTCHARS:
2345 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2346 COMPOSITION_WITH_RULE_ALTCHARS:
2347 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2349 enum iso_code_class_type iso_code_class
[256];
2351 #define SAFE_CHARSET_P(coding, id) \
2352 ((id) <= (coding)->max_charset_id \
2353 && (coding)->safe_charsets[id] >= 0)
2356 #define SHIFT_OUT_OK(category) \
2357 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2360 setup_iso_safe_charsets (attrs
)
2363 Lisp_Object charset_list
, safe_charsets
;
2364 Lisp_Object request
;
2365 Lisp_Object reg_usage
;
2368 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2371 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2372 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2373 && ! EQ (charset_list
, Viso_2022_charset_list
))
2375 CODING_ATTR_CHARSET_LIST (attrs
)
2376 = charset_list
= Viso_2022_charset_list
;
2377 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2380 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2384 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2386 int id
= XINT (XCAR (tail
));
2387 if (max_charset_id
< id
)
2388 max_charset_id
= id
;
2391 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2393 request
= AREF (attrs
, coding_attr_iso_request
);
2394 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2395 reg94
= XINT (XCAR (reg_usage
));
2396 reg96
= XINT (XCDR (reg_usage
));
2398 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2402 struct charset
*charset
;
2405 charset
= CHARSET_FROM_ID (XINT (id
));
2406 reg
= Fcdr (Fassq (id
, request
));
2408 XSTRING (safe_charsets
)->data
[XINT (id
)] = XINT (reg
);
2409 else if (charset
->iso_chars_96
)
2412 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg96
;
2417 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg94
;
2420 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2424 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2425 Check if a text is encoded in one of ISO-2022 based codig systems.
2426 If it is, return 1, else return 0. */
2429 detect_coding_iso_2022 (coding
, detect_info
)
2430 struct coding_system
*coding
;
2431 struct coding_detection_info
*detect_info
;
2433 unsigned char *src
= coding
->source
, *src_base
= src
;
2434 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2435 int multibytep
= coding
->src_multibyte
;
2436 int single_shifting
= 0;
2439 int consumed_chars
= 0;
2444 detect_info
->checked
|= CATEGORY_MASK_ISO
;
2446 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2448 struct coding_system
*this = &(coding_categories
[i
]);
2449 Lisp_Object attrs
, val
;
2451 attrs
= CODING_ID_ATTRS (this->id
);
2452 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2453 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2454 setup_iso_safe_charsets (attrs
);
2455 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2456 this->max_charset_id
= XSTRING (val
)->size
- 1;
2457 this->safe_charsets
= (char *) XSTRING (val
)->data
;
2460 /* A coding system of this category is always ASCII compatible. */
2461 src
+= coding
->head_ascii
;
2463 while (rejected
!= CATEGORY_MASK_ISO
)
2469 if (inhibit_iso_escape_detection
)
2471 single_shifting
= 0;
2473 if (c
>= '(' && c
<= '/')
2475 /* Designation sequence for a charset of dimension 1. */
2477 if (c1
< ' ' || c1
>= 0x80
2478 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2479 /* Invalid designation sequence. Just ignore. */
2484 /* Designation sequence for a charset of dimension 2. */
2486 if (c
>= '@' && c
<= 'B')
2487 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2488 id
= iso_charset_table
[1][0][c
];
2489 else if (c
>= '(' && c
<= '/')
2492 if (c1
< ' ' || c1
>= 0x80
2493 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2494 /* Invalid designation sequence. Just ignore. */
2498 /* Invalid designation sequence. Just ignore it. */
2501 else if (c
== 'N' || c
== 'O')
2503 /* ESC <Fe> for SS2 or SS3. */
2504 single_shifting
= 1;
2505 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2508 else if (c
>= '0' && c
<= '4')
2510 /* ESC <Fp> for start/end composition. */
2511 found
|= CATEGORY_MASK_ISO
;
2516 /* Invalid escape sequence. Just ignore it. */
2520 /* We found a valid designation sequence for CHARSET. */
2521 rejected
|= CATEGORY_MASK_ISO_8BIT
;
2522 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2524 found
|= CATEGORY_MASK_ISO_7
;
2526 rejected
|= CATEGORY_MASK_ISO_7
;
2527 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2529 found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2531 rejected
|= CATEGORY_MASK_ISO_7_TIGHT
;
2532 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2534 found
|= CATEGORY_MASK_ISO_7_ELSE
;
2536 rejected
|= CATEGORY_MASK_ISO_7_ELSE
;
2537 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2539 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2541 rejected
|= CATEGORY_MASK_ISO_8_ELSE
;
2546 /* Locking shift out/in. */
2547 if (inhibit_iso_escape_detection
)
2549 single_shifting
= 0;
2550 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2551 found
|= CATEGORY_MASK_ISO_ELSE
;
2555 /* Control sequence introducer. */
2556 single_shifting
= 0;
2557 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2558 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2559 goto check_extra_latin
;
2565 if (inhibit_iso_escape_detection
)
2567 single_shifting
= 1;
2568 rejected
|= CATEGORY_MASK_ISO_7BIT
;
2569 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2570 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2571 found
|= CATEGORY_MASK_ISO_8_1
;
2572 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2573 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2574 found
|= CATEGORY_MASK_ISO_8_2
;
2575 goto check_extra_latin
;
2580 single_shifting
= 0;
2585 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2586 found
|= CATEGORY_MASK_ISO_8_1
;
2587 /* Check the length of succeeding codes of the range
2588 0xA0..0FF. If the byte length is even, we include
2589 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2590 only when we are not single shifting. */
2591 if (! single_shifting
2592 && ! (rejected
& CATEGORY_MASK_ISO_8_2
))
2595 while (src
< src_end
)
2603 if (i
& 1 && src
< src_end
)
2604 rejected
|= CATEGORY_MASK_ISO_8_2
;
2606 found
|= CATEGORY_MASK_ISO_8_2
;
2611 single_shifting
= 0;
2612 if (! VECTORP (Vlatin_extra_code_table
)
2613 || NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2615 rejected
= CATEGORY_MASK_ISO
;
2618 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2619 & CODING_ISO_FLAG_LATIN_EXTRA
)
2620 found
|= CATEGORY_MASK_ISO_8_1
;
2622 rejected
|= CATEGORY_MASK_ISO_8_1
;
2623 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2624 & CODING_ISO_FLAG_LATIN_EXTRA
)
2625 found
|= CATEGORY_MASK_ISO_8_2
;
2627 rejected
|= CATEGORY_MASK_ISO_8_2
;
2630 detect_info
->rejected
|= CATEGORY_MASK_ISO
;
2634 detect_info
->rejected
|= rejected
;
2635 detect_info
->found
|= (found
& ~rejected
);
2640 /* Set designation state into CODING. */
2641 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2645 if (final < '0' || final >= 128 \
2646 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2647 || !SAFE_CHARSET_P (coding, id)) \
2649 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2650 goto invalid_code; \
2652 prev = CODING_ISO_DESIGNATION (coding, reg); \
2653 if (id == charset_jisx0201_roman) \
2655 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2656 id = charset_ascii; \
2658 else if (id == charset_jisx0208_1978) \
2660 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2661 id = charset_jisx0208; \
2663 CODING_ISO_DESIGNATION (coding, reg) = id; \
2664 /* If there was an invalid designation to REG previously, and this \
2665 designation is ASCII to REG, we should keep this designation \
2667 if (prev == -2 && id == charset_ascii) \
2668 goto invalid_code; \
2672 #define MAYBE_FINISH_COMPOSITION() \
2675 if (composition_state == COMPOSING_NO) \
2677 /* It is assured that we have enough room for producing \
2678 characters stored in the table `components'. */ \
2679 if (charbuf + component_idx > charbuf_end) \
2680 goto no_more_source; \
2681 composition_state = COMPOSING_NO; \
2682 if (method == COMPOSITION_RELATIVE \
2683 || method == COMPOSITION_WITH_ALTCHARS) \
2685 for (i = 0; i < component_idx; i++) \
2686 *charbuf++ = components[i]; \
2687 char_offset += component_idx; \
2691 for (i = 0; i < component_idx; i += 2) \
2692 *charbuf++ = components[i]; \
2693 char_offset += (component_idx / 2) + 1; \
2698 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2699 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2700 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2701 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2702 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2705 #define DECODE_COMPOSITION_START(c1) \
2708 && composition_state == COMPOSING_COMPONENT_RULE) \
2710 component_len = component_idx; \
2711 composition_state = COMPOSING_CHAR; \
2717 MAYBE_FINISH_COMPOSITION (); \
2718 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2719 goto no_more_source; \
2720 for (p = src; p < src_end - 1; p++) \
2721 if (*p == ISO_CODE_ESC && p[1] == '1') \
2723 if (p == src_end - 1) \
2725 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2726 goto invalid_code; \
2727 goto no_more_source; \
2730 /* This is surely the start of a composition. */ \
2731 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2732 : c1 == '2' ? COMPOSITION_WITH_RULE \
2733 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2734 : COMPOSITION_WITH_RULE_ALTCHARS); \
2735 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2736 : COMPOSING_COMPONENT_CHAR); \
2737 component_idx = component_len = 0; \
2742 /* Handle compositoin end sequence ESC 1. */
2744 #define DECODE_COMPOSITION_END() \
2746 int nchars = (component_len > 0 ? component_idx - component_len \
2747 : method == COMPOSITION_RELATIVE ? component_idx \
2748 : (component_idx + 1) / 2); \
2750 int *saved_charbuf = charbuf; \
2751 int from = coding->produced_char + char_offset; \
2752 int to = from + nchars; \
2754 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
2755 if (method != COMPOSITION_RELATIVE) \
2757 if (component_len == 0) \
2758 for (i = 0; i < component_idx; i++) \
2759 *charbuf++ = components[i]; \
2761 for (i = 0; i < component_len; i++) \
2762 *charbuf++ = components[i]; \
2763 *saved_charbuf = saved_charbuf - charbuf; \
2765 if (method == COMPOSITION_WITH_RULE) \
2766 for (i = 0; i < component_idx; i += 2, char_offset++) \
2767 *charbuf++ = components[i]; \
2769 for (i = component_len; i < component_idx; i++, char_offset++) \
2770 *charbuf++ = components[i]; \
2771 coding->annotated = 1; \
2772 composition_state = COMPOSING_NO; \
2776 /* Decode a composition rule from the byte C1 (and maybe one more byte
2777 from SRC) and store one encoded composition rule in
2778 coding->cmp_data. */
2780 #define DECODE_COMPOSITION_RULE(c1) \
2783 if (c1 < 81) /* old format (before ver.21) */ \
2785 int gref = (c1) / 9; \
2786 int nref = (c1) % 9; \
2787 if (gref == 4) gref = 10; \
2788 if (nref == 4) nref = 10; \
2789 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2791 else if (c1 < 93) /* new format (after ver.21) */ \
2793 ONE_MORE_BYTE (c2); \
2794 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2801 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2804 decode_coding_iso_2022 (coding
)
2805 struct coding_system
*coding
;
2807 unsigned char *src
= coding
->source
+ coding
->consumed
;
2808 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2809 unsigned char *src_base
;
2810 int *charbuf
= coding
->charbuf
;
2812 = charbuf
+ coding
->charbuf_size
- 4 - MAX_ANNOTATION_LENGTH
;
2813 int consumed_chars
= 0, consumed_chars_base
;
2814 int multibytep
= coding
->src_multibyte
;
2815 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2816 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2817 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2818 struct charset
*charset
;
2820 /* For handling composition sequence. */
2821 #define COMPOSING_NO 0
2822 #define COMPOSING_CHAR 1
2823 #define COMPOSING_RULE 2
2824 #define COMPOSING_COMPONENT_CHAR 3
2825 #define COMPOSING_COMPONENT_RULE 4
2827 int composition_state
= COMPOSING_NO
;
2828 enum composition_method method
;
2829 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2832 Lisp_Object attrs
, eol_type
, charset_list
;
2833 int char_offset
= coding
->produced_char
;
2834 int last_offset
= char_offset
;
2835 int last_id
= charset_ascii
;
2837 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
2838 setup_iso_safe_charsets (attrs
);
2845 consumed_chars_base
= consumed_chars
;
2847 if (charbuf
>= charbuf_end
)
2852 /* We produce at most one character. */
2853 switch (iso_code_class
[c1
])
2855 case ISO_0x20_or_0x7F
:
2856 if (composition_state
!= COMPOSING_NO
)
2858 if (composition_state
== COMPOSING_RULE
2859 || composition_state
== COMPOSING_COMPONENT_RULE
)
2861 DECODE_COMPOSITION_RULE (c1
);
2862 components
[component_idx
++] = c1
;
2863 composition_state
--;
2867 if (charset_id_0
< 0
2868 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2869 /* This is SPACE or DEL. */
2870 charset
= CHARSET_FROM_ID (charset_ascii
);
2872 charset
= CHARSET_FROM_ID (charset_id_0
);
2875 case ISO_graphic_plane_0
:
2876 if (composition_state
!= COMPOSING_NO
)
2878 if (composition_state
== COMPOSING_RULE
2879 || composition_state
== COMPOSING_COMPONENT_RULE
)
2881 DECODE_COMPOSITION_RULE (c1
);
2882 components
[component_idx
++] = c1
;
2883 composition_state
--;
2887 charset
= CHARSET_FROM_ID (charset_id_0
);
2890 case ISO_0xA0_or_0xFF
:
2891 if (charset_id_1
< 0
2892 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2893 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2895 /* This is a graphic character, we fall down ... */
2897 case ISO_graphic_plane_1
:
2898 if (charset_id_1
< 0)
2900 charset
= CHARSET_FROM_ID (charset_id_1
);
2903 case ISO_carriage_return
:
2906 if (EQ (eol_type
, Qdos
))
2910 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
2911 goto no_more_source
;
2916 else if (EQ (eol_type
, Qmac
))
2922 MAYBE_FINISH_COMPOSITION ();
2923 charset
= CHARSET_FROM_ID (charset_ascii
);
2927 MAYBE_FINISH_COMPOSITION ();
2931 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2932 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
2934 CODING_ISO_INVOCATION (coding
, 0) = 1;
2935 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2939 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
2941 CODING_ISO_INVOCATION (coding
, 0) = 0;
2942 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2945 case ISO_single_shift_2_7
:
2946 case ISO_single_shift_2
:
2947 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2949 /* SS2 is handled as an escape sequence of ESC 'N' */
2951 goto label_escape_sequence
;
2953 case ISO_single_shift_3
:
2954 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2956 /* SS2 is handled as an escape sequence of ESC 'O' */
2958 goto label_escape_sequence
;
2960 case ISO_control_sequence_introducer
:
2961 /* CSI is handled as an escape sequence of ESC '[' ... */
2963 goto label_escape_sequence
;
2967 label_escape_sequence
:
2968 /* Escape sequences handled here are invocation,
2969 designation, direction specification, and character
2970 composition specification. */
2973 case '&': /* revision of following character set */
2975 if (!(c1
>= '@' && c1
<= '~'))
2978 if (c1
!= ISO_CODE_ESC
)
2981 goto label_escape_sequence
;
2983 case '$': /* designation of 2-byte character set */
2984 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
2987 if (c1
>= '@' && c1
<= 'B')
2988 { /* designation of JISX0208.1978, GB2312.1980,
2990 DECODE_DESIGNATION (0, 2, 0, c1
);
2992 else if (c1
>= 0x28 && c1
<= 0x2B)
2993 { /* designation of DIMENSION2_CHARS94 character set */
2995 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
2997 else if (c1
>= 0x2C && c1
<= 0x2F)
2998 { /* designation of DIMENSION2_CHARS96 character set */
3000 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
3004 /* We must update these variables now. */
3005 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3006 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3009 case 'n': /* invocation of locking-shift-2 */
3010 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3011 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3013 CODING_ISO_INVOCATION (coding
, 0) = 2;
3014 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3017 case 'o': /* invocation of locking-shift-3 */
3018 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3019 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3021 CODING_ISO_INVOCATION (coding
, 0) = 3;
3022 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3025 case 'N': /* invocation of single-shift-2 */
3026 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3027 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3029 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
3031 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3035 case 'O': /* invocation of single-shift-3 */
3036 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3037 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3039 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
3041 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3045 case '0': case '2': case '3': case '4': /* start composition */
3046 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
3048 DECODE_COMPOSITION_START (c1
);
3051 case '1': /* end composition */
3052 if (composition_state
== COMPOSING_NO
)
3054 DECODE_COMPOSITION_END ();
3057 case '[': /* specification of direction */
3058 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
3060 /* For the moment, nested direction is not supported.
3061 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3062 left-to-right, and nozero means right-to-left. */
3066 case ']': /* end of the current direction */
3067 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3069 case '0': /* end of the current direction */
3070 case '1': /* start of left-to-right direction */
3073 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3078 case '2': /* start of right-to-left direction */
3081 coding
->mode
|= CODING_MODE_DIRECTION
;
3092 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3094 if (c1
>= 0x28 && c1
<= 0x2B)
3095 { /* designation of DIMENSION1_CHARS94 character set */
3097 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
3099 else if (c1
>= 0x2C && c1
<= 0x2F)
3100 { /* designation of DIMENSION1_CHARS96 character set */
3102 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
3106 /* We must update these variables now. */
3107 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3108 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3113 if (charset
->id
!= charset_ascii
3114 && last_id
!= charset
->id
)
3116 if (last_id
!= charset_ascii
)
3117 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3118 last_id
= charset
->id
;
3119 last_offset
= char_offset
;
3122 /* Now we know CHARSET and 1st position code C1 of a character.
3123 Produce a decoded character while getting 2nd position code
3126 if (CHARSET_DIMENSION (charset
) > 1)
3129 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3130 /* C2 is not in a valid range. */
3132 c1
= (c1
<< 8) | (c2
& 0x7F);
3133 if (CHARSET_DIMENSION (charset
) > 2)
3136 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3137 /* C2 is not in a valid range. */
3139 c1
= (c1
<< 8) | (c2
& 0x7F);
3143 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3146 MAYBE_FINISH_COMPOSITION ();
3147 for (; src_base
< src
; src_base
++, char_offset
++)
3149 if (ASCII_BYTE_P (*src_base
))
3150 *charbuf
++ = *src_base
;
3152 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3156 else if (composition_state
== COMPOSING_NO
)
3163 components
[component_idx
++] = c
;
3164 if (method
== COMPOSITION_WITH_RULE
3165 || (method
== COMPOSITION_WITH_RULE_ALTCHARS
3166 && composition_state
== COMPOSING_COMPONENT_CHAR
))
3167 composition_state
++;
3172 MAYBE_FINISH_COMPOSITION ();
3174 consumed_chars
= consumed_chars_base
;
3176 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3182 if (last_id
!= charset_ascii
)
3183 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3184 coding
->consumed_char
+= consumed_chars_base
;
3185 coding
->consumed
= src_base
- coding
->source
;
3186 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3190 /* ISO2022 encoding stuff. */
3193 It is not enough to say just "ISO2022" on encoding, we have to
3194 specify more details. In Emacs, each coding system of ISO2022
3195 variant has the following specifications:
3196 1. Initial designation to G0 thru G3.
3197 2. Allows short-form designation?
3198 3. ASCII should be designated to G0 before control characters?
3199 4. ASCII should be designated to G0 at end of line?
3200 5. 7-bit environment or 8-bit environment?
3201 6. Use locking-shift?
3202 7. Use Single-shift?
3203 And the following two are only for Japanese:
3204 8. Use ASCII in place of JIS0201-1976-Roman?
3205 9. Use JISX0208-1983 in place of JISX0208-1978?
3206 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3207 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3211 /* Produce codes (escape sequence) for designating CHARSET to graphic
3212 register REG at DST, and increment DST. If <final-char> of CHARSET is
3213 '@', 'A', or 'B' and the coding system CODING allows, produce
3214 designation sequence of short-form. */
3216 #define ENCODE_DESIGNATION(charset, reg, coding) \
3218 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3219 char *intermediate_char_94 = "()*+"; \
3220 char *intermediate_char_96 = ",-./"; \
3221 int revision = -1; \
3224 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3225 revision = CHARSET_ISO_REVISION (charset); \
3227 if (revision >= 0) \
3229 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3230 EMIT_ONE_BYTE ('@' + revision); \
3232 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3233 if (CHARSET_DIMENSION (charset) == 1) \
3235 if (! CHARSET_ISO_CHARS_96 (charset)) \
3236 c = intermediate_char_94[reg]; \
3238 c = intermediate_char_96[reg]; \
3239 EMIT_ONE_ASCII_BYTE (c); \
3243 EMIT_ONE_ASCII_BYTE ('$'); \
3244 if (! CHARSET_ISO_CHARS_96 (charset)) \
3246 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3248 || final_char < '@' || final_char > 'B') \
3249 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3252 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3254 EMIT_ONE_ASCII_BYTE (final_char); \
3256 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3260 /* The following two macros produce codes (control character or escape
3261 sequence) for ISO2022 single-shift functions (single-shift-2 and
3264 #define ENCODE_SINGLE_SHIFT_2 \
3266 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3267 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3269 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3270 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3274 #define ENCODE_SINGLE_SHIFT_3 \
3276 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3277 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3279 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3280 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3284 /* The following four macros produce codes (control character or
3285 escape sequence) for ISO2022 locking-shift functions (shift-in,
3286 shift-out, locking-shift-2, and locking-shift-3). */
3288 #define ENCODE_SHIFT_IN \
3290 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3291 CODING_ISO_INVOCATION (coding, 0) = 0; \
3295 #define ENCODE_SHIFT_OUT \
3297 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3298 CODING_ISO_INVOCATION (coding, 0) = 1; \
3302 #define ENCODE_LOCKING_SHIFT_2 \
3304 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3305 CODING_ISO_INVOCATION (coding, 0) = 2; \
3309 #define ENCODE_LOCKING_SHIFT_3 \
3311 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3312 CODING_ISO_INVOCATION (coding, 0) = 3; \
3316 /* Produce codes for a DIMENSION1 character whose character set is
3317 CHARSET and whose position-code is C1. Designation and invocation
3318 sequences are also produced in advance if necessary. */
3320 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3322 int id = CHARSET_ID (charset); \
3324 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3325 && id == charset_ascii) \
3327 id = charset_jisx0201_roman; \
3328 charset = CHARSET_FROM_ID (id); \
3331 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3333 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3334 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3336 EMIT_ONE_BYTE (c1 | 0x80); \
3337 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3340 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3342 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3345 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3347 EMIT_ONE_BYTE (c1 | 0x80); \
3351 /* Since CHARSET is not yet invoked to any graphic planes, we \
3352 must invoke it, or, at first, designate it to some graphic \
3353 register. Then repeat the loop to actually produce the \
3355 dst = encode_invocation_designation (charset, coding, dst, \
3360 /* Produce codes for a DIMENSION2 character whose character set is
3361 CHARSET and whose position-codes are C1 and C2. Designation and
3362 invocation codes are also produced in advance if necessary. */
3364 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3366 int id = CHARSET_ID (charset); \
3368 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3369 && id == charset_jisx0208) \
3371 id = charset_jisx0208_1978; \
3372 charset = CHARSET_FROM_ID (id); \
3375 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3377 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3378 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3380 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3381 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3384 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3386 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3389 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3391 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3395 /* Since CHARSET is not yet invoked to any graphic planes, we \
3396 must invoke it, or, at first, designate it to some graphic \
3397 register. Then repeat the loop to actually produce the \
3399 dst = encode_invocation_designation (charset, coding, dst, \
3404 #define ENCODE_ISO_CHARACTER(charset, c) \
3406 int code = ENCODE_CHAR ((charset),(c)); \
3408 if (CHARSET_DIMENSION (charset) == 1) \
3409 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3411 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3415 /* Produce designation and invocation codes at a place pointed by DST
3416 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3420 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3421 struct charset
*charset
;
3422 struct coding_system
*coding
;
3426 int multibytep
= coding
->dst_multibyte
;
3427 int produced_chars
= *p_nchars
;
3428 int reg
; /* graphic register number */
3429 int id
= CHARSET_ID (charset
);
3431 /* At first, check designations. */
3432 for (reg
= 0; reg
< 4; reg
++)
3433 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3438 /* CHARSET is not yet designated to any graphic registers. */
3439 /* At first check the requested designation. */
3440 reg
= CODING_ISO_REQUEST (coding
, id
);
3442 /* Since CHARSET requests no special designation, designate it
3443 to graphic register 0. */
3446 ENCODE_DESIGNATION (charset
, reg
, coding
);
3449 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3450 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3452 /* Since the graphic register REG is not invoked to any graphic
3453 planes, invoke it to graphic plane 0. */
3456 case 0: /* graphic register 0 */
3460 case 1: /* graphic register 1 */
3464 case 2: /* graphic register 2 */
3465 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3466 ENCODE_SINGLE_SHIFT_2
;
3468 ENCODE_LOCKING_SHIFT_2
;
3471 case 3: /* graphic register 3 */
3472 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3473 ENCODE_SINGLE_SHIFT_3
;
3475 ENCODE_LOCKING_SHIFT_3
;
3480 *p_nchars
= produced_chars
;
3484 /* The following three macros produce codes for indicating direction
3486 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3488 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3489 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3491 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3495 #define ENCODE_DIRECTION_R2L() \
3497 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3498 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3502 #define ENCODE_DIRECTION_L2R() \
3504 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3505 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3509 /* Produce codes for designation and invocation to reset the graphic
3510 planes and registers to initial state. */
3511 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3514 struct charset *charset; \
3516 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3518 for (reg = 0; reg < 4; reg++) \
3519 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3520 && (CODING_ISO_DESIGNATION (coding, reg) \
3521 != CODING_ISO_INITIAL (coding, reg))) \
3523 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3524 ENCODE_DESIGNATION (charset, reg, coding); \
3529 /* Produce designation sequences of charsets in the line started from
3530 SRC to a place pointed by DST, and return updated DST.
3532 If the current block ends before any end-of-line, we may fail to
3533 find all the necessary designations. */
3535 static unsigned char *
3536 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3537 struct coding_system
*coding
;
3538 int *charbuf
, *charbuf_end
;
3541 struct charset
*charset
;
3542 /* Table of charsets to be designated to each graphic register. */
3544 int c
, found
= 0, reg
;
3545 int produced_chars
= 0;
3546 int multibytep
= coding
->dst_multibyte
;
3548 Lisp_Object charset_list
;
3550 attrs
= CODING_ID_ATTRS (coding
->id
);
3551 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3552 if (EQ (charset_list
, Qiso_2022
))
3553 charset_list
= Viso_2022_charset_list
;
3555 for (reg
= 0; reg
< 4; reg
++)
3565 charset
= char_charset (c
, charset_list
, NULL
);
3566 id
= CHARSET_ID (charset
);
3567 reg
= CODING_ISO_REQUEST (coding
, id
);
3568 if (reg
>= 0 && r
[reg
] < 0)
3577 for (reg
= 0; reg
< 4; reg
++)
3579 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3580 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3586 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3589 encode_coding_iso_2022 (coding
)
3590 struct coding_system
*coding
;
3592 int multibytep
= coding
->dst_multibyte
;
3593 int *charbuf
= coding
->charbuf
;
3594 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3595 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3596 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3599 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3600 && CODING_ISO_BOL (coding
));
3601 int produced_chars
= 0;
3602 Lisp_Object attrs
, eol_type
, charset_list
;
3603 int ascii_compatible
;
3605 int preferred_charset_id
= -1;
3607 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3608 setup_iso_safe_charsets (attrs
);
3609 /* Charset list may have been changed. */
3610 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
); \
3611 coding
->safe_charsets
3612 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs
))->data
;
3614 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3616 while (charbuf
< charbuf_end
)
3618 ASSURE_DESTINATION (safe_room
);
3620 if (bol_designation
)
3622 unsigned char *dst_prev
= dst
;
3624 /* We have to produce designation sequences if any now. */
3625 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3626 bol_designation
= 0;
3627 /* We are sure that designation sequences are all ASCII bytes. */
3628 produced_chars
+= dst
- dst_prev
;
3635 /* Handle an annotation. */
3638 case CODING_ANNOTATE_COMPOSITION_MASK
:
3639 /* Not yet implemented. */
3641 case CODING_ANNOTATE_CHARSET_MASK
:
3642 preferred_charset_id
= charbuf
[3];
3643 if (preferred_charset_id
>= 0
3644 && NILP (Fmemq (make_number (preferred_charset_id
),
3646 preferred_charset_id
= -1;
3655 /* Now encode the character C. */
3656 if (c
< 0x20 || c
== 0x7F)
3659 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3661 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3662 ENCODE_RESET_PLANE_AND_REGISTER ();
3663 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3667 for (i
= 0; i
< 4; i
++)
3668 CODING_ISO_DESIGNATION (coding
, i
)
3669 = CODING_ISO_INITIAL (coding
, i
);
3672 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3674 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3675 ENCODE_RESET_PLANE_AND_REGISTER ();
3676 EMIT_ONE_ASCII_BYTE (c
);
3678 else if (ASCII_CHAR_P (c
))
3680 if (ascii_compatible
)
3681 EMIT_ONE_ASCII_BYTE (c
);
3684 struct charset
*charset
= CHARSET_FROM_ID (charset_ascii
);
3685 ENCODE_ISO_CHARACTER (charset
, c
);
3688 else if (CHAR_BYTE8_P (c
))
3690 c
= CHAR_TO_BYTE8 (c
);
3695 struct charset
*charset
;
3697 if (preferred_charset_id
>= 0)
3699 charset
= CHARSET_FROM_ID (preferred_charset_id
);
3700 if (! CHAR_CHARSET_P (c
, charset
))
3701 charset
= char_charset (c
, charset_list
, NULL
);
3704 charset
= char_charset (c
, charset_list
, NULL
);
3707 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3709 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3710 charset
= CHARSET_FROM_ID (charset_ascii
);
3714 c
= coding
->default_char
;
3715 charset
= char_charset (c
, charset_list
, NULL
);
3718 ENCODE_ISO_CHARACTER (charset
, c
);
3722 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3723 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3725 ASSURE_DESTINATION (safe_room
);
3726 ENCODE_RESET_PLANE_AND_REGISTER ();
3728 coding
->result
= CODING_RESULT_SUCCESS
;
3729 CODING_ISO_BOL (coding
) = bol_designation
;
3730 coding
->produced_char
+= produced_chars
;
3731 coding
->produced
= dst
- coding
->destination
;
3736 /*** 8,9. SJIS and BIG5 handlers ***/
3738 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3739 quite widely. So, for the moment, Emacs supports them in the bare
3740 C code. But, in the future, they may be supported only by CCL. */
3742 /* SJIS is a coding system encoding three character sets: ASCII, right
3743 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3744 as is. A character of charset katakana-jisx0201 is encoded by
3745 "position-code + 0x80". A character of charset japanese-jisx0208
3746 is encoded in 2-byte but two position-codes are divided and shifted
3747 so that it fit in the range below.
3749 --- CODE RANGE of SJIS ---
3750 (character set) (range)
3752 KATAKANA-JISX0201 0xA0 .. 0xDF
3753 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3754 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3755 -------------------------------
3759 /* BIG5 is a coding system encoding two character sets: ASCII and
3760 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3761 character set and is encoded in two-byte.
3763 --- CODE RANGE of BIG5 ---
3764 (character set) (range)
3766 Big5 (1st byte) 0xA1 .. 0xFE
3767 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3768 --------------------------
3772 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3773 Check if a text is encoded in SJIS. If it is, return
3774 CATEGORY_MASK_SJIS, else return 0. */
3777 detect_coding_sjis (coding
, detect_info
)
3778 struct coding_system
*coding
;
3779 struct coding_detection_info
*detect_info
;
3781 unsigned char *src
= coding
->source
, *src_base
= src
;
3782 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3783 int multibytep
= coding
->src_multibyte
;
3784 int consumed_chars
= 0;
3789 detect_info
->checked
|= CATEGORY_MASK_SJIS
;
3790 /* A coding system of this category is always ASCII compatible. */
3791 src
+= coding
->head_ascii
;
3800 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3803 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3805 found
= CATEGORY_MASK_SJIS
;
3807 else if (c
>= 0xA0 && c
< 0xE0)
3808 found
= CATEGORY_MASK_SJIS
;
3812 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3816 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3818 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3821 detect_info
->found
|= found
;
3825 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3826 Check if a text is encoded in BIG5. If it is, return
3827 CATEGORY_MASK_BIG5, else return 0. */
3830 detect_coding_big5 (coding
, detect_info
)
3831 struct coding_system
*coding
;
3832 struct coding_detection_info
*detect_info
;
3834 unsigned char *src
= coding
->source
, *src_base
= src
;
3835 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3836 int multibytep
= coding
->src_multibyte
;
3837 int consumed_chars
= 0;
3842 detect_info
->checked
|= CATEGORY_MASK_BIG5
;
3843 /* A coding system of this category is always ASCII compatible. */
3844 src
+= coding
->head_ascii
;
3856 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3858 found
= CATEGORY_MASK_BIG5
;
3863 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3867 if (incomplete
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3869 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3872 detect_info
->found
|= found
;
3876 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3877 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3880 decode_coding_sjis (coding
)
3881 struct coding_system
*coding
;
3883 unsigned char *src
= coding
->source
+ coding
->consumed
;
3884 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3885 unsigned char *src_base
;
3886 int *charbuf
= coding
->charbuf
;
3887 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
3888 int consumed_chars
= 0, consumed_chars_base
;
3889 int multibytep
= coding
->src_multibyte
;
3890 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3891 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3892 int char_offset
= coding
->produced_char
;
3893 int last_offset
= char_offset
;
3894 int last_id
= charset_ascii
;
3896 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3899 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3900 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3901 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3908 consumed_chars_base
= consumed_chars
;
3910 if (charbuf
>= charbuf_end
)
3917 if (EQ (eol_type
, Qdos
))
3921 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
3922 goto no_more_source
;
3927 else if (EQ (eol_type
, Qmac
))
3932 struct charset
*charset
;
3935 charset
= charset_roman
;
3940 if (c
< 0xA0 || c
>= 0xE0)
3942 /* SJIS -> JISX0208 */
3944 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
3948 charset
= charset_kanji
;
3952 /* SJIS -> JISX0201-Kana */
3954 charset
= charset_kana
;
3957 if (charset
->id
!= charset_ascii
3958 && last_id
!= charset
->id
)
3960 if (last_id
!= charset_ascii
)
3961 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3962 last_id
= charset
->id
;
3963 last_offset
= char_offset
;
3965 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
3973 consumed_chars
= consumed_chars_base
;
3975 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3981 if (last_id
!= charset_ascii
)
3982 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3983 coding
->consumed_char
+= consumed_chars_base
;
3984 coding
->consumed
= src_base
- coding
->source
;
3985 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3989 decode_coding_big5 (coding
)
3990 struct coding_system
*coding
;
3992 unsigned char *src
= coding
->source
+ coding
->consumed
;
3993 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3994 unsigned char *src_base
;
3995 int *charbuf
= coding
->charbuf
;
3996 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
3997 int consumed_chars
= 0, consumed_chars_base
;
3998 int multibytep
= coding
->src_multibyte
;
3999 struct charset
*charset_roman
, *charset_big5
;
4000 Lisp_Object attrs
, eol_type
, charset_list
, val
;
4001 int char_offset
= coding
->produced_char
;
4002 int last_offset
= char_offset
;
4003 int last_id
= charset_ascii
;
4005 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4007 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4008 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4015 consumed_chars_base
= consumed_chars
;
4017 if (charbuf
>= charbuf_end
)
4024 if (EQ (eol_type
, Qdos
))
4028 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4029 goto no_more_source
;
4034 else if (EQ (eol_type
, Qmac
))
4039 struct charset
*charset
;
4041 charset
= charset_roman
;
4045 if (c
< 0xA1 || c
> 0xFE)
4048 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
4051 charset
= charset_big5
;
4053 if (charset
->id
!= charset_ascii
4054 && last_id
!= charset
->id
)
4056 if (last_id
!= charset_ascii
)
4057 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4058 last_id
= charset
->id
;
4059 last_offset
= char_offset
;
4061 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4070 consumed_chars
= consumed_chars_base
;
4072 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4078 if (last_id
!= charset_ascii
)
4079 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4080 coding
->consumed_char
+= consumed_chars_base
;
4081 coding
->consumed
= src_base
- coding
->source
;
4082 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4085 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4086 This function can encode charsets `ascii', `katakana-jisx0201',
4087 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4088 are sure that all these charsets are registered as official charset
4089 (i.e. do not have extended leading-codes). Characters of other
4090 charsets are produced without any encoding. If SJIS_P is 1, encode
4091 SJIS text, else encode BIG5 text. */
4094 encode_coding_sjis (coding
)
4095 struct coding_system
*coding
;
4097 int multibytep
= coding
->dst_multibyte
;
4098 int *charbuf
= coding
->charbuf
;
4099 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4100 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4101 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4103 int produced_chars
= 0;
4104 Lisp_Object attrs
, eol_type
, charset_list
, val
;
4105 int ascii_compatible
;
4106 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4109 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4111 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4112 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4113 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4115 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4117 while (charbuf
< charbuf_end
)
4119 ASSURE_DESTINATION (safe_room
);
4121 /* Now encode the character C. */
4122 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4123 EMIT_ONE_ASCII_BYTE (c
);
4124 else if (CHAR_BYTE8_P (c
))
4126 c
= CHAR_TO_BYTE8 (c
);
4132 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4136 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4138 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4139 charset
= CHARSET_FROM_ID (charset_ascii
);
4143 c
= coding
->default_char
;
4144 charset
= char_charset (c
, charset_list
, &code
);
4147 if (code
== CHARSET_INVALID_CODE (charset
))
4149 if (charset
== charset_kanji
)
4153 c1
= code
>> 8, c2
= code
& 0xFF;
4154 EMIT_TWO_BYTES (c1
, c2
);
4156 else if (charset
== charset_kana
)
4157 EMIT_ONE_BYTE (code
| 0x80);
4159 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4162 coding
->result
= CODING_RESULT_SUCCESS
;
4163 coding
->produced_char
+= produced_chars
;
4164 coding
->produced
= dst
- coding
->destination
;
4169 encode_coding_big5 (coding
)
4170 struct coding_system
*coding
;
4172 int multibytep
= coding
->dst_multibyte
;
4173 int *charbuf
= coding
->charbuf
;
4174 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4175 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4176 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4178 int produced_chars
= 0;
4179 Lisp_Object attrs
, eol_type
, charset_list
, val
;
4180 int ascii_compatible
;
4181 struct charset
*charset_roman
, *charset_big5
;
4184 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4186 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4187 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4188 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4190 while (charbuf
< charbuf_end
)
4192 ASSURE_DESTINATION (safe_room
);
4194 /* Now encode the character C. */
4195 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4196 EMIT_ONE_ASCII_BYTE (c
);
4197 else if (CHAR_BYTE8_P (c
))
4199 c
= CHAR_TO_BYTE8 (c
);
4205 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4209 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4211 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4212 charset
= CHARSET_FROM_ID (charset_ascii
);
4216 c
= coding
->default_char
;
4217 charset
= char_charset (c
, charset_list
, &code
);
4220 if (code
== CHARSET_INVALID_CODE (charset
))
4222 if (charset
== charset_big5
)
4226 c1
= code
>> 8, c2
= code
& 0xFF;
4227 EMIT_TWO_BYTES (c1
, c2
);
4230 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4233 coding
->result
= CODING_RESULT_SUCCESS
;
4234 coding
->produced_char
+= produced_chars
;
4235 coding
->produced
= dst
- coding
->destination
;
4240 /*** 10. CCL handlers ***/
4242 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4243 Check if a text is encoded in a coding system of which
4244 encoder/decoder are written in CCL program. If it is, return
4245 CATEGORY_MASK_CCL, else return 0. */
4248 detect_coding_ccl (coding
, detect_info
)
4249 struct coding_system
*coding
;
4250 struct coding_detection_info
*detect_info
;
4252 unsigned char *src
= coding
->source
, *src_base
= src
;
4253 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4254 int multibytep
= coding
->src_multibyte
;
4255 int consumed_chars
= 0;
4257 unsigned char *valids
= CODING_CCL_VALIDS (coding
);
4258 int head_ascii
= coding
->head_ascii
;
4261 detect_info
->checked
|= CATEGORY_MASK_CCL
;
4263 coding
= &coding_categories
[coding_category_ccl
];
4264 attrs
= CODING_ID_ATTRS (coding
->id
);
4265 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4274 if ((valids
[c
] > 1))
4275 found
= CATEGORY_MASK_CCL
;
4277 detect_info
->rejected
|= CATEGORY_MASK_CCL
;
4281 detect_info
->found
|= found
;
4286 decode_coding_ccl (coding
)
4287 struct coding_system
*coding
;
4289 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4290 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4291 int *charbuf
= coding
->charbuf
;
4292 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
4293 int consumed_chars
= 0;
4294 int multibytep
= coding
->src_multibyte
;
4295 struct ccl_program ccl
;
4296 int source_charbuf
[1024];
4297 int source_byteidx
[1024];
4298 Lisp_Object attrs
, eol_type
, charset_list
, valids
;
4300 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4301 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4303 while (src
< src_end
)
4305 const unsigned char *p
= src
;
4306 int *source
, *source_end
;
4310 while (i
< 1024 && p
< src_end
)
4312 source_byteidx
[i
] = p
- src
;
4313 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4316 while (i
< 1024 && p
< src_end
)
4317 source_charbuf
[i
++] = *p
++;
4319 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4322 source
= source_charbuf
;
4323 source_end
= source
+ i
;
4324 while (source
< source_end
)
4326 ccl_driver (&ccl
, source
, charbuf
,
4327 source_end
- source
, charbuf_end
- charbuf
,
4329 source
+= ccl
.consumed
;
4330 charbuf
+= ccl
.produced
;
4331 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4334 if (source
< source_end
)
4335 src
+= source_byteidx
[source
- source_charbuf
];
4338 consumed_chars
+= source
- source_charbuf
;
4340 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4341 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4347 case CCL_STAT_SUSPEND_BY_SRC
:
4348 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4350 case CCL_STAT_SUSPEND_BY_DST
:
4353 case CCL_STAT_INVALID_CMD
:
4354 coding
->result
= CODING_RESULT_INTERRUPT
;
4357 coding
->result
= CODING_RESULT_SUCCESS
;
4360 coding
->consumed_char
+= consumed_chars
;
4361 coding
->consumed
= src
- coding
->source
;
4362 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4366 encode_coding_ccl (coding
)
4367 struct coding_system
*coding
;
4369 struct ccl_program ccl
;
4370 int multibytep
= coding
->dst_multibyte
;
4371 int *charbuf
= coding
->charbuf
;
4372 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4373 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4374 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4375 unsigned char *adjusted_dst_end
= dst_end
- 1;
4376 int destination_charbuf
[1024];
4377 int i
, produced_chars
= 0;
4378 Lisp_Object attrs
, eol_type
, charset_list
;
4380 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4381 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4383 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4384 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4386 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4388 int dst_bytes
= dst_end
- dst
;
4389 if (dst_bytes
> 1024)
4392 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4393 charbuf_end
- charbuf
, dst_bytes
, charset_list
);
4394 charbuf
+= ccl
.consumed
;
4396 for (i
= 0; i
< ccl
.produced
; i
++)
4397 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4400 for (i
= 0; i
< ccl
.produced
; i
++)
4401 *dst
++ = destination_charbuf
[i
] & 0xFF;
4402 produced_chars
+= ccl
.produced
;
4408 case CCL_STAT_SUSPEND_BY_SRC
:
4409 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4411 case CCL_STAT_SUSPEND_BY_DST
:
4412 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
4415 case CCL_STAT_INVALID_CMD
:
4416 coding
->result
= CODING_RESULT_INTERRUPT
;
4419 coding
->result
= CODING_RESULT_SUCCESS
;
4423 coding
->produced_char
+= produced_chars
;
4424 coding
->produced
= dst
- coding
->destination
;
4430 /*** 10, 11. no-conversion handlers ***/
4432 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4435 decode_coding_raw_text (coding
)
4436 struct coding_system
*coding
;
4438 coding
->chars_at_source
= 1;
4439 coding
->consumed_char
= 0;
4440 coding
->consumed
= 0;
4441 coding
->result
= CODING_RESULT_SUCCESS
;
4445 encode_coding_raw_text (coding
)
4446 struct coding_system
*coding
;
4448 int multibytep
= coding
->dst_multibyte
;
4449 int *charbuf
= coding
->charbuf
;
4450 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4451 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4452 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4453 int produced_chars
= 0;
4458 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4460 if (coding
->src_multibyte
)
4461 while (charbuf
< charbuf_end
)
4463 ASSURE_DESTINATION (safe_room
);
4465 if (ASCII_CHAR_P (c
))
4466 EMIT_ONE_ASCII_BYTE (c
);
4467 else if (CHAR_BYTE8_P (c
))
4469 c
= CHAR_TO_BYTE8 (c
);
4474 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4476 CHAR_STRING_ADVANCE (c
, p1
);
4479 EMIT_ONE_BYTE (*p0
);
4485 while (charbuf
< charbuf_end
)
4487 ASSURE_DESTINATION (safe_room
);
4494 if (coding
->src_multibyte
)
4496 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4498 while (charbuf
< charbuf_end
)
4500 ASSURE_DESTINATION (safe_room
);
4502 if (ASCII_CHAR_P (c
))
4504 else if (CHAR_BYTE8_P (c
))
4505 *dst
++ = CHAR_TO_BYTE8 (c
);
4507 CHAR_STRING_ADVANCE (c
, dst
);
4513 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4514 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4515 *dst
++ = *charbuf
++;
4516 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4519 coding
->result
= CODING_RESULT_SUCCESS
;
4520 coding
->produced_char
+= produced_chars
;
4521 coding
->produced
= dst
- coding
->destination
;
4525 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4526 Check if a text is encoded in a charset-based coding system. If it
4527 is, return 1, else return 0. */
4530 detect_coding_charset (coding
, detect_info
)
4531 struct coding_system
*coding
;
4532 struct coding_detection_info
*detect_info
;
4534 unsigned char *src
= coding
->source
, *src_base
= src
;
4535 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4536 int multibytep
= coding
->src_multibyte
;
4537 int consumed_chars
= 0;
4538 Lisp_Object attrs
, valids
;
4541 detect_info
->checked
|= CATEGORY_MASK_CHARSET
;
4543 coding
= &coding_categories
[coding_category_charset
];
4544 attrs
= CODING_ID_ATTRS (coding
->id
);
4545 valids
= AREF (attrs
, coding_attr_charset_valids
);
4547 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4548 src
+= coding
->head_ascii
;
4555 if (NILP (AREF (valids
, c
)))
4558 found
= CATEGORY_MASK_CHARSET
;
4560 detect_info
->rejected
|= CATEGORY_MASK_CHARSET
;
4564 detect_info
->found
|= found
;
4569 decode_coding_charset (coding
)
4570 struct coding_system
*coding
;
4572 unsigned char *src
= coding
->source
+ coding
->consumed
;
4573 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4574 unsigned char *src_base
;
4575 int *charbuf
= coding
->charbuf
;
4576 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4577 int consumed_chars
= 0, consumed_chars_base
;
4578 int multibytep
= coding
->src_multibyte
;
4579 Lisp_Object attrs
, eol_type
, charset_list
, valids
;
4580 int char_offset
= coding
->produced_char
;
4581 int last_offset
= char_offset
;
4582 int last_id
= charset_ascii
;
4584 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4585 valids
= AREF (attrs
, coding_attr_charset_valids
);
4592 consumed_chars_base
= consumed_chars
;
4594 if (charbuf
>= charbuf_end
)
4600 /* Here we assume that no charset maps '\r' to something
4602 if (EQ (eol_type
, Qdos
))
4606 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4607 goto no_more_source
;
4612 else if (EQ (eol_type
, Qmac
))
4618 struct charset
*charset
;
4623 val
= AREF (valids
, c
);
4628 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4629 dim
= CHARSET_DIMENSION (charset
);
4633 code
= (code
<< 8) | c
;
4636 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
,
4641 /* VAL is a list of charset IDs. It is assured that the
4642 list is sorted by charset dimensions (smaller one
4646 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (val
)));
4647 dim
= CHARSET_DIMENSION (charset
);
4651 code
= (code
<< 8) | c
;
4654 CODING_DECODE_CHAR (coding
, src
, src_base
,
4655 src_end
, charset
, code
, c
);
4663 if (charset
->id
!= charset_ascii
4664 && last_id
!= charset
->id
)
4666 if (last_id
!= charset_ascii
)
4667 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4668 last_id
= charset
->id
;
4669 last_offset
= char_offset
;
4678 consumed_chars
= consumed_chars_base
;
4680 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4686 if (last_id
!= charset_ascii
)
4687 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4688 coding
->consumed_char
+= consumed_chars_base
;
4689 coding
->consumed
= src_base
- coding
->source
;
4690 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4694 encode_coding_charset (coding
)
4695 struct coding_system
*coding
;
4697 int multibytep
= coding
->dst_multibyte
;
4698 int *charbuf
= coding
->charbuf
;
4699 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4700 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4701 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4702 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4703 int produced_chars
= 0;
4704 Lisp_Object attrs
, eol_type
, charset_list
;
4705 int ascii_compatible
;
4708 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4709 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4711 while (charbuf
< charbuf_end
)
4713 struct charset
*charset
;
4716 ASSURE_DESTINATION (safe_room
);
4718 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4719 EMIT_ONE_ASCII_BYTE (c
);
4720 else if (CHAR_BYTE8_P (c
))
4722 c
= CHAR_TO_BYTE8 (c
);
4727 charset
= char_charset (c
, charset_list
, &code
);
4730 if (CHARSET_DIMENSION (charset
) == 1)
4731 EMIT_ONE_BYTE (code
);
4732 else if (CHARSET_DIMENSION (charset
) == 2)
4733 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4734 else if (CHARSET_DIMENSION (charset
) == 3)
4735 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4737 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4738 (code
>> 8) & 0xFF, code
& 0xFF);
4742 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4743 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4745 c
= coding
->default_char
;
4751 coding
->result
= CODING_RESULT_SUCCESS
;
4752 coding
->produced_char
+= produced_chars
;
4753 coding
->produced
= dst
- coding
->destination
;
4758 /*** 7. C library functions ***/
4760 /* Setup coding context CODING from information about CODING_SYSTEM.
4761 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4762 CODING_SYSTEM is invalid, signal an error. */
4765 setup_coding_system (coding_system
, coding
)
4766 Lisp_Object coding_system
;
4767 struct coding_system
*coding
;
4770 Lisp_Object eol_type
;
4771 Lisp_Object coding_type
;
4774 if (NILP (coding_system
))
4775 coding_system
= Qno_conversion
;
4777 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4779 attrs
= CODING_ID_ATTRS (coding
->id
);
4780 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4783 coding
->head_ascii
= -1;
4784 coding
->common_flags
4785 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4787 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4788 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4789 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4790 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4792 coding_type
= CODING_ATTR_TYPE (attrs
);
4793 if (EQ (coding_type
, Qundecided
))
4795 coding
->detector
= NULL
;
4796 coding
->decoder
= decode_coding_raw_text
;
4797 coding
->encoder
= encode_coding_raw_text
;
4798 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4800 else if (EQ (coding_type
, Qiso_2022
))
4803 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4804 enum coding_category category
= XINT (CODING_ATTR_CATEGORY (attrs
));
4806 /* Invoke graphic register 0 to plane 0. */
4807 CODING_ISO_INVOCATION (coding
, 0) = 0;
4808 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4809 CODING_ISO_INVOCATION (coding
, 1)
4810 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4811 /* Setup the initial status of designation. */
4812 for (i
= 0; i
< 4; i
++)
4813 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4814 /* Not single shifting initially. */
4815 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4816 /* Beginning of buffer should also be regarded as bol. */
4817 CODING_ISO_BOL (coding
) = 1;
4818 coding
->detector
= detect_coding_iso_2022
;
4819 coding
->decoder
= decode_coding_iso_2022
;
4820 coding
->encoder
= encode_coding_iso_2022
;
4821 if (flags
& CODING_ISO_FLAG_SAFE
)
4822 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4823 coding
->common_flags
4824 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4825 | CODING_REQUIRE_FLUSHING_MASK
);
4826 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4827 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4828 if (flags
& CODING_ISO_FLAG_DESIGNATION
)
4829 coding
->common_flags
|= CODING_ANNOTATE_CHARSET_MASK
;
4830 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4832 setup_iso_safe_charsets (attrs
);
4833 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4834 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4835 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4837 CODING_ISO_FLAGS (coding
) = flags
;
4839 else if (EQ (coding_type
, Qcharset
))
4841 coding
->detector
= detect_coding_charset
;
4842 coding
->decoder
= decode_coding_charset
;
4843 coding
->encoder
= encode_coding_charset
;
4844 coding
->common_flags
4845 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4847 else if (EQ (coding_type
, Qutf_8
))
4849 coding
->detector
= detect_coding_utf_8
;
4850 coding
->decoder
= decode_coding_utf_8
;
4851 coding
->encoder
= encode_coding_utf_8
;
4852 coding
->common_flags
4853 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4855 else if (EQ (coding_type
, Qutf_16
))
4857 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4858 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4859 : EQ (val
, Qt
) ? utf_16_with_bom
4860 : utf_16_without_bom
);
4861 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4862 CODING_UTF_16_ENDIAN (coding
) = (NILP (val
) ? utf_16_big_endian
4863 : utf_16_little_endian
);
4864 CODING_UTF_16_SURROGATE (coding
) = 0;
4865 coding
->detector
= detect_coding_utf_16
;
4866 coding
->decoder
= decode_coding_utf_16
;
4867 coding
->encoder
= encode_coding_utf_16
;
4868 coding
->common_flags
4869 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4871 else if (EQ (coding_type
, Qccl
))
4873 coding
->detector
= detect_coding_ccl
;
4874 coding
->decoder
= decode_coding_ccl
;
4875 coding
->encoder
= encode_coding_ccl
;
4876 coding
->common_flags
4877 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4878 | CODING_REQUIRE_FLUSHING_MASK
);
4880 else if (EQ (coding_type
, Qemacs_mule
))
4882 coding
->detector
= detect_coding_emacs_mule
;
4883 coding
->decoder
= decode_coding_emacs_mule
;
4884 coding
->encoder
= encode_coding_emacs_mule
;
4885 coding
->common_flags
4886 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4887 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
4888 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
4890 Lisp_Object tail
, safe_charsets
;
4891 int max_charset_id
= 0;
4893 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4895 if (max_charset_id
< XFASTINT (XCAR (tail
)))
4896 max_charset_id
= XFASTINT (XCAR (tail
));
4897 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
4899 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4901 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
4902 coding
->max_charset_id
= max_charset_id
;
4903 coding
->safe_charsets
= (char *) XSTRING (safe_charsets
)->data
;
4906 else if (EQ (coding_type
, Qshift_jis
))
4908 coding
->detector
= detect_coding_sjis
;
4909 coding
->decoder
= decode_coding_sjis
;
4910 coding
->encoder
= encode_coding_sjis
;
4911 coding
->common_flags
4912 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4914 else if (EQ (coding_type
, Qbig5
))
4916 coding
->detector
= detect_coding_big5
;
4917 coding
->decoder
= decode_coding_big5
;
4918 coding
->encoder
= encode_coding_big5
;
4919 coding
->common_flags
4920 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4922 else /* EQ (coding_type, Qraw_text) */
4924 coding
->detector
= NULL
;
4925 coding
->decoder
= decode_coding_raw_text
;
4926 coding
->encoder
= encode_coding_raw_text
;
4927 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4933 /* Return raw-text or one of its subsidiaries that has the same
4934 eol_type as CODING-SYSTEM. */
4937 raw_text_coding_system (coding_system
)
4938 Lisp_Object coding_system
;
4940 Lisp_Object spec
, attrs
;
4941 Lisp_Object eol_type
, raw_text_eol_type
;
4943 spec
= CODING_SYSTEM_SPEC (coding_system
);
4944 attrs
= AREF (spec
, 0);
4946 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
4947 return coding_system
;
4949 eol_type
= AREF (spec
, 2);
4950 if (VECTORP (eol_type
))
4952 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
4953 raw_text_eol_type
= AREF (spec
, 2);
4954 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
4955 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
4956 : AREF (raw_text_eol_type
, 2));
4960 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4961 does, return one of the subsidiary that has the same eol-spec as
4962 PARENT. Otherwise, return CODING_SYSTEM. */
4965 coding_inherit_eol_type (coding_system
, parent
)
4966 Lisp_Object coding_system
, parent
;
4968 Lisp_Object spec
, attrs
, eol_type
;
4970 spec
= CODING_SYSTEM_SPEC (coding_system
);
4971 attrs
= AREF (spec
, 0);
4972 eol_type
= AREF (spec
, 2);
4973 if (VECTORP (eol_type
))
4975 Lisp_Object parent_spec
;
4976 Lisp_Object parent_eol_type
;
4979 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
4980 parent_eol_type
= AREF (parent_spec
, 2);
4981 if (EQ (parent_eol_type
, Qunix
))
4982 coding_system
= AREF (eol_type
, 0);
4983 else if (EQ (parent_eol_type
, Qdos
))
4984 coding_system
= AREF (eol_type
, 1);
4985 else if (EQ (parent_eol_type
, Qmac
))
4986 coding_system
= AREF (eol_type
, 2);
4988 return coding_system
;
4991 /* Emacs has a mechanism to automatically detect a coding system if it
4992 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4993 it's impossible to distinguish some coding systems accurately
4994 because they use the same range of codes. So, at first, coding
4995 systems are categorized into 7, those are:
4997 o coding-category-emacs-mule
4999 The category for a coding system which has the same code range
5000 as Emacs' internal format. Assigned the coding-system (Lisp
5001 symbol) `emacs-mule' by default.
5003 o coding-category-sjis
5005 The category for a coding system which has the same code range
5006 as SJIS. Assigned the coding-system (Lisp
5007 symbol) `japanese-shift-jis' by default.
5009 o coding-category-iso-7
5011 The category for a coding system which has the same code range
5012 as ISO2022 of 7-bit environment. This doesn't use any locking
5013 shift and single shift functions. This can encode/decode all
5014 charsets. Assigned the coding-system (Lisp symbol)
5015 `iso-2022-7bit' by default.
5017 o coding-category-iso-7-tight
5019 Same as coding-category-iso-7 except that this can
5020 encode/decode only the specified charsets.
5022 o coding-category-iso-8-1
5024 The category for a coding system which has the same code range
5025 as ISO2022 of 8-bit environment and graphic plane 1 used only
5026 for DIMENSION1 charset. This doesn't use any locking shift
5027 and single shift functions. Assigned the coding-system (Lisp
5028 symbol) `iso-latin-1' by default.
5030 o coding-category-iso-8-2
5032 The category for a coding system which has the same code range
5033 as ISO2022 of 8-bit environment and graphic plane 1 used only
5034 for DIMENSION2 charset. This doesn't use any locking shift
5035 and single shift functions. Assigned the coding-system (Lisp
5036 symbol) `japanese-iso-8bit' by default.
5038 o coding-category-iso-7-else
5040 The category for a coding system which has the same code range
5041 as ISO2022 of 7-bit environemnt but uses locking shift or
5042 single shift functions. Assigned the coding-system (Lisp
5043 symbol) `iso-2022-7bit-lock' by default.
5045 o coding-category-iso-8-else
5047 The category for a coding system which has the same code range
5048 as ISO2022 of 8-bit environemnt but uses locking shift or
5049 single shift functions. Assigned the coding-system (Lisp
5050 symbol) `iso-2022-8bit-ss2' by default.
5052 o coding-category-big5
5054 The category for a coding system which has the same code range
5055 as BIG5. Assigned the coding-system (Lisp symbol)
5056 `cn-big5' by default.
5058 o coding-category-utf-8
5060 The category for a coding system which has the same code range
5061 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5062 symbol) `utf-8' by default.
5064 o coding-category-utf-16-be
5066 The category for a coding system in which a text has an
5067 Unicode signature (cf. Unicode Standard) in the order of BIG
5068 endian at the head. Assigned the coding-system (Lisp symbol)
5069 `utf-16-be' by default.
5071 o coding-category-utf-16-le
5073 The category for a coding system in which a text has an
5074 Unicode signature (cf. Unicode Standard) in the order of
5075 LITTLE endian at the head. Assigned the coding-system (Lisp
5076 symbol) `utf-16-le' by default.
5078 o coding-category-ccl
5080 The category for a coding system of which encoder/decoder is
5081 written in CCL programs. The default value is nil, i.e., no
5082 coding system is assigned.
5084 o coding-category-binary
5086 The category for a coding system not categorized in any of the
5087 above. Assigned the coding-system (Lisp symbol)
5088 `no-conversion' by default.
5090 Each of them is a Lisp symbol and the value is an actual
5091 `coding-system's (this is also a Lisp symbol) assigned by a user.
5092 What Emacs does actually is to detect a category of coding system.
5093 Then, it uses a `coding-system' assigned to it. If Emacs can't
5094 decide only one possible category, it selects a category of the
5095 highest priority. Priorities of categories are also specified by a
5096 user in a Lisp variable `coding-category-list'.
5100 #define EOL_SEEN_NONE 0
5101 #define EOL_SEEN_LF 1
5102 #define EOL_SEEN_CR 2
5103 #define EOL_SEEN_CRLF 4
5105 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5106 SOURCE is encoded. If CATEGORY is one of
5107 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5108 two-byte, else they are encoded by one-byte.
5110 Return one of EOL_SEEN_XXX. */
5112 #define MAX_EOL_CHECK_COUNT 3
5115 detect_eol (source
, src_bytes
, category
)
5116 unsigned char *source
;
5117 EMACS_INT src_bytes
;
5118 enum coding_category category
;
5120 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
5123 int eol_seen
= EOL_SEEN_NONE
;
5125 if ((1 << category
) & CATEGORY_MASK_UTF_16
)
5129 msb
= category
== (coding_category_utf_16_le
5130 | coding_category_utf_16_le_nosig
);
5133 while (src
+ 1 < src_end
)
5136 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
5141 this_eol
= EOL_SEEN_LF
;
5142 else if (src
+ 3 >= src_end
5143 || src
[msb
+ 2] != 0
5144 || src
[lsb
+ 2] != '\n')
5145 this_eol
= EOL_SEEN_CR
;
5147 this_eol
= EOL_SEEN_CRLF
;
5149 if (eol_seen
== EOL_SEEN_NONE
)
5150 /* This is the first end-of-line. */
5151 eol_seen
= this_eol
;
5152 else if (eol_seen
!= this_eol
)
5154 /* The found type is different from what found before. */
5155 eol_seen
= EOL_SEEN_LF
;
5158 if (++total
== MAX_EOL_CHECK_COUNT
)
5166 while (src
< src_end
)
5169 if (c
== '\n' || c
== '\r')
5174 this_eol
= EOL_SEEN_LF
;
5175 else if (src
>= src_end
|| *src
!= '\n')
5176 this_eol
= EOL_SEEN_CR
;
5178 this_eol
= EOL_SEEN_CRLF
, src
++;
5180 if (eol_seen
== EOL_SEEN_NONE
)
5181 /* This is the first end-of-line. */
5182 eol_seen
= this_eol
;
5183 else if (eol_seen
!= this_eol
)
5185 /* The found type is different from what found before. */
5186 eol_seen
= EOL_SEEN_LF
;
5189 if (++total
== MAX_EOL_CHECK_COUNT
)
5199 adjust_coding_eol_type (coding
, eol_seen
)
5200 struct coding_system
*coding
;
5203 Lisp_Object eol_type
;
5205 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5206 if (eol_seen
& EOL_SEEN_LF
)
5207 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
5208 else if (eol_seen
& EOL_SEEN_CRLF
)
5209 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
5210 else if (eol_seen
& EOL_SEEN_CR
)
5211 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
5214 /* Detect how a text specified in CODING is encoded. If a coding
5215 system is detected, update fields of CODING by the detected coding
5219 detect_coding (coding
)
5220 struct coding_system
*coding
;
5222 unsigned char *src
, *src_end
;
5223 Lisp_Object attrs
, coding_type
;
5225 coding
->consumed
= coding
->consumed_char
= 0;
5226 coding
->produced
= coding
->produced_char
= 0;
5227 coding_set_source (coding
);
5229 src_end
= coding
->source
+ coding
->src_bytes
;
5231 /* If we have not yet decided the text encoding type, detect it
5233 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
5237 for (src
= coding
->source
; src
< src_end
; src
++)
5240 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
5242 || c
== ISO_CODE_SO
)))
5245 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
5247 if (coding
->head_ascii
< coding
->src_bytes
)
5249 struct coding_detection_info detect_info
;
5250 enum coding_category category
;
5251 struct coding_system
*this;
5253 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
5254 for (i
= 0; i
< coding_category_raw_text
; i
++)
5256 category
= coding_priorities
[i
];
5257 this = coding_categories
+ category
;
5260 /* No coding system of this category is defined. */
5261 detect_info
.rejected
|= (1 << category
);
5263 else if (category
>= coding_category_raw_text
)
5265 else if (detect_info
.checked
& (1 << category
))
5267 if (detect_info
.found
& (1 << category
))
5270 else if ((*(this->detector
)) (coding
, &detect_info
)
5271 && detect_info
.found
& (1 << category
))
5274 if (i
< coding_category_raw_text
)
5275 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5276 else if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
5277 setup_coding_system (Qraw_text
, coding
);
5278 else if (detect_info
.rejected
)
5279 for (i
= 0; i
< coding_category_raw_text
; i
++)
5280 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
5282 this = coding_categories
+ coding_priorities
[i
];
5283 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5289 attrs
= CODING_ID_ATTRS (coding
->id
);
5290 coding_type
= CODING_ATTR_TYPE (attrs
);
5292 /* If we have not yet decided the EOL type, detect it now. But, the
5293 detection is impossible for a CCL based coding system, in which
5294 case, we detct the EOL type after decoding. */
5295 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
))
5296 && ! EQ (coding_type
, Qccl
))
5298 int eol_seen
= detect_eol (coding
->source
, coding
->src_bytes
,
5299 XINT (CODING_ATTR_CATEGORY (attrs
)));
5301 if (eol_seen
!= EOL_SEEN_NONE
)
5302 adjust_coding_eol_type (coding
, eol_seen
);
5309 struct coding_system
*coding
;
5311 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
)))
5313 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5314 unsigned char *pend
= p
+ coding
->produced
;
5315 int eol_seen
= EOL_SEEN_NONE
;
5317 for (; p
< pend
; p
++)
5320 eol_seen
|= EOL_SEEN_LF
;
5321 else if (*p
== '\r')
5323 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5325 eol_seen
|= EOL_SEEN_CRLF
;
5329 eol_seen
|= EOL_SEEN_CR
;
5332 if (eol_seen
!= EOL_SEEN_NONE
)
5333 adjust_coding_eol_type (coding
, eol_seen
);
5336 if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qmac
))
5338 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5339 unsigned char *pend
= p
+ coding
->produced
;
5341 for (; p
< pend
; p
++)
5345 else if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qdos
))
5347 unsigned char *p
, *pbeg
, *pend
;
5348 Lisp_Object undo_list
;
5350 move_gap_both (coding
->dst_pos
+ coding
->produced_char
,
5351 coding
->dst_pos_byte
+ coding
->produced
);
5352 undo_list
= current_buffer
->undo_list
;
5353 current_buffer
->undo_list
= Qt
;
5354 del_range_2 (coding
->dst_pos
, coding
->dst_pos_byte
, GPT
, GPT_BYTE
, 0);
5355 current_buffer
->undo_list
= undo_list
;
5357 pend
= pbeg
+ coding
->produced
;
5359 for (p
= pend
- 1; p
>= pbeg
; p
--)
5362 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
- p
- 1);
5365 coding
->produced_char
-= coding
->produced
- (pend
- pbeg
);
5366 coding
->produced
= pend
- pbeg
;
5367 insert_from_gap (coding
->produced_char
, coding
->produced
);
5372 translate_chars (coding
, table
)
5373 struct coding_system
*coding
;
5376 int *charbuf
= coding
->charbuf
;
5377 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5380 if (coding
->chars_at_source
)
5383 while (charbuf
< charbuf_end
)
5389 *charbuf
++ = translate_char (table
, c
);
5394 produce_chars (coding
)
5395 struct coding_system
*coding
;
5397 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5398 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5400 int produced_chars
= 0;
5402 if (! coding
->chars_at_source
)
5404 /* Characters are in coding->charbuf. */
5405 int *buf
= coding
->charbuf
;
5406 int *buf_end
= buf
+ coding
->charbuf_used
;
5407 unsigned char *adjusted_dst_end
;
5409 if (BUFFERP (coding
->src_object
)
5410 && EQ (coding
->src_object
, coding
->dst_object
))
5411 dst_end
= coding
->source
+ coding
->consumed
;
5412 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5414 while (buf
< buf_end
)
5418 if (dst
>= adjusted_dst_end
)
5420 dst
= alloc_destination (coding
,
5421 buf_end
- buf
+ MAX_MULTIBYTE_LENGTH
,
5423 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5424 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5428 if (coding
->dst_multibyte
5429 || ! CHAR_BYTE8_P (c
))
5430 CHAR_STRING_ADVANCE (c
, dst
);
5432 *dst
++ = CHAR_TO_BYTE8 (c
);
5436 /* This is an annotation datum. */
5442 unsigned char *src
= coding
->source
;
5443 unsigned char *src_end
= src
+ coding
->src_bytes
;
5444 Lisp_Object eol_type
;
5446 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5448 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5450 if (coding
->src_multibyte
)
5457 unsigned char *src_base
= src
;
5463 if (EQ (eol_type
, Qdos
))
5467 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
5468 goto no_more_source
;
5473 else if (EQ (eol_type
, Qmac
))
5478 coding
->consumed
= src
- coding
->source
;
5480 if (EQ (coding
->src_object
, coding
->dst_object
))
5484 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5486 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5487 coding_set_source (coding
);
5488 src
= coding
->source
+ coding
->consumed
;
5489 src_end
= coding
->source
+ coding
->src_bytes
;
5499 while (src
< src_end
)
5506 if (EQ (eol_type
, Qdos
))
5512 else if (EQ (eol_type
, Qmac
))
5515 if (dst
>= dst_end
- 1)
5517 coding
->consumed
= src
- coding
->source
;
5519 if (EQ (coding
->src_object
, coding
->dst_object
))
5521 if (dst
>= dst_end
- 1)
5523 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5525 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5526 coding_set_source (coding
);
5527 src
= coding
->source
+ coding
->consumed
;
5528 src_end
= coding
->source
+ coding
->src_bytes
;
5536 if (!EQ (coding
->src_object
, coding
->dst_object
))
5538 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5542 EMACS_INT offset
= src
- coding
->source
;
5544 dst
= alloc_destination (coding
, require
, dst
);
5545 coding_set_source (coding
);
5546 src
= coding
->source
+ offset
;
5547 src_end
= coding
->source
+ coding
->src_bytes
;
5550 produced_chars
= coding
->src_chars
;
5551 while (src
< src_end
)
5557 if (EQ (eol_type
, Qdos
))
5564 else if (EQ (eol_type
, Qmac
))
5570 coding
->consumed
= coding
->src_bytes
;
5571 coding
->consumed_char
= coding
->src_chars
;
5574 produced
= dst
- (coding
->destination
+ coding
->produced
);
5575 if (BUFFERP (coding
->dst_object
))
5576 insert_from_gap (produced_chars
, produced
);
5577 coding
->produced
+= produced
;
5578 coding
->produced_char
+= produced_chars
;
5579 return produced_chars
;
5582 /* Compose text in CODING->object according to the annotation data at
5583 CHARBUF. CHARBUF is an array:
5584 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5588 produce_composition (coding
, charbuf
)
5589 struct coding_system
*coding
;
5594 enum composition_method method
;
5595 Lisp_Object components
;
5598 from
= coding
->dst_pos
+ charbuf
[2];
5599 to
= coding
->dst_pos
+ charbuf
[3];
5600 method
= (enum composition_method
) (charbuf
[4]);
5602 if (method
== COMPOSITION_RELATIVE
)
5606 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5611 for (i
= 0; i
< len
; i
++)
5612 args
[i
] = make_number (charbuf
[i
]);
5613 components
= (method
== COMPOSITION_WITH_ALTCHARS
5614 ? Fstring (len
, args
) : Fvector (len
, args
));
5616 compose_text (from
, to
, components
, Qnil
, coding
->dst_object
);
5620 /* Put `charset' property on text in CODING->object according to
5621 the annotation data at CHARBUF. CHARBUF is an array:
5622 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5626 produce_charset (coding
, charbuf
)
5627 struct coding_system
*coding
;
5630 EMACS_INT from
= coding
->dst_pos
+ charbuf
[2];
5631 EMACS_INT to
= coding
->dst_pos
+ charbuf
[3];
5632 struct charset
*charset
= CHARSET_FROM_ID (charbuf
[4]);
5634 Fput_text_property (make_number (from
), make_number (to
),
5635 Qcharset
, CHARSET_NAME (charset
),
5636 coding
->dst_object
);
5640 #define CHARBUF_SIZE 0x4000
5642 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5644 int size = CHARBUF_SIZE;; \
5646 coding->charbuf = NULL; \
5647 while (size > 1024) \
5649 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5650 if (coding->charbuf) \
5654 if (! coding->charbuf) \
5656 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5657 return coding->result; \
5659 coding->charbuf_size = size; \
5664 produce_annotation (coding
)
5665 struct coding_system
*coding
;
5667 int *charbuf
= coding
->charbuf
;
5668 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5670 if (NILP (coding
->dst_object
))
5673 while (charbuf
< charbuf_end
)
5679 int len
= -*charbuf
;
5682 case CODING_ANNOTATE_COMPOSITION_MASK
:
5683 produce_composition (coding
, charbuf
);
5685 case CODING_ANNOTATE_CHARSET_MASK
:
5686 produce_charset (coding
, charbuf
);
5696 /* Decode the data at CODING->src_object into CODING->dst_object.
5697 CODING->src_object is a buffer, a string, or nil.
5698 CODING->dst_object is a buffer.
5700 If CODING->src_object is a buffer, it must be the current buffer.
5701 In this case, if CODING->src_pos is positive, it is a position of
5702 the source text in the buffer, otherwise, the source text is in the
5703 gap area of the buffer, and CODING->src_pos specifies the offset of
5704 the text from GPT (which must be the same as PT). If this is the
5705 same buffer as CODING->dst_object, CODING->src_pos must be
5708 If CODING->src_object is a string, CODING->src_pos in an index to
5711 If CODING->src_object is nil, CODING->source must already point to
5712 the non-relocatable memory area. In this case, CODING->src_pos is
5713 an offset from CODING->source.
5715 The decoded data is inserted at the current point of the buffer
5720 decode_coding (coding
)
5721 struct coding_system
*coding
;
5725 if (BUFFERP (coding
->src_object
)
5726 && coding
->src_pos
> 0
5727 && coding
->src_pos
< GPT
5728 && coding
->src_pos
+ coding
->src_chars
> GPT
)
5729 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
5731 if (BUFFERP (coding
->dst_object
))
5733 if (current_buffer
!= XBUFFER (coding
->dst_object
))
5734 set_buffer_internal (XBUFFER (coding
->dst_object
));
5736 move_gap_both (PT
, PT_BYTE
);
5739 coding
->consumed
= coding
->consumed_char
= 0;
5740 coding
->produced
= coding
->produced_char
= 0;
5741 coding
->chars_at_source
= 0;
5742 coding
->result
= CODING_RESULT_SUCCESS
;
5745 ALLOC_CONVERSION_WORK_AREA (coding
);
5747 attrs
= CODING_ID_ATTRS (coding
->id
);
5751 coding_set_source (coding
);
5752 coding
->annotated
= 0;
5753 (*(coding
->decoder
)) (coding
);
5754 if (!NILP (CODING_ATTR_DECODE_TBL (attrs
)))
5755 translate_chars (coding
, CODING_ATTR_DECODE_TBL (attrs
));
5756 else if (!NILP (Vstandard_translation_table_for_decode
))
5757 translate_chars (coding
, Vstandard_translation_table_for_decode
);
5758 coding_set_destination (coding
);
5759 produce_chars (coding
);
5760 if (coding
->annotated
)
5761 produce_annotation (coding
);
5763 while (coding
->consumed
< coding
->src_bytes
5764 && ! coding
->result
);
5766 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qccl
)
5767 && SYMBOLP (CODING_ID_EOL_TYPE (coding
->id
))
5768 && ! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
5769 decode_eol (coding
);
5771 coding
->carryover_bytes
= 0;
5772 if (coding
->consumed
< coding
->src_bytes
)
5774 int nbytes
= coding
->src_bytes
- coding
->consumed
;
5777 coding_set_source (coding
);
5778 coding_set_destination (coding
);
5779 src
= coding
->source
+ coding
->consumed
;
5781 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
5783 /* Flush out unprocessed data as binary chars. We are sure
5784 that the number of data is less than the size of
5786 while (nbytes
-- > 0)
5790 coding
->charbuf
[coding
->charbuf_used
++] = (c
& 0x80 ? - c
: c
);
5792 produce_chars (coding
);
5796 /* Record unprocessed bytes in coding->carryover. We are
5797 sure that the number of data is less than the size of
5798 coding->carryover. */
5799 unsigned char *p
= coding
->carryover
;
5801 coding
->carryover_bytes
= nbytes
;
5802 while (nbytes
-- > 0)
5805 coding
->consumed
= coding
->src_bytes
;
5808 return coding
->result
;
5812 /* Extract an annotation datum from a composition starting at POS and
5813 ending before LIMIT of CODING->src_object (buffer or string), store
5814 the data in BUF, set *STOP to a starting position of the next
5815 composition (if any) or to LIMIT, and return the address of the
5816 next element of BUF.
5818 If such an annotation is not found, set *STOP to a starting
5819 position of a composition after POS (if any) or to LIMIT, and
5823 handle_composition_annotation (pos
, limit
, coding
, buf
, stop
)
5824 EMACS_INT pos
, limit
;
5825 struct coding_system
*coding
;
5829 EMACS_INT start
, end
;
5832 if (! find_composition (pos
, limit
, &start
, &end
, &prop
, coding
->src_object
)
5835 else if (start
> pos
)
5841 /* We found a composition. Store the corresponding
5842 annotation data in BUF. */
5844 enum composition_method method
= COMPOSITION_METHOD (prop
);
5845 int nchars
= COMPOSITION_LENGTH (prop
);
5847 ADD_COMPOSITION_DATA (buf
, 0, nchars
, method
);
5848 if (method
!= COMPOSITION_RELATIVE
)
5850 Lisp_Object components
;
5853 components
= COMPOSITION_COMPONENTS (prop
);
5854 if (VECTORP (components
))
5856 len
= XVECTOR (components
)->size
;
5857 for (i
= 0; i
< len
; i
++)
5858 *buf
++ = XINT (AREF (components
, i
));
5860 else if (STRINGP (components
))
5862 len
= XSTRING (components
)->size
;
5866 FETCH_STRING_CHAR_ADVANCE (*buf
, components
, i
, i_byte
);
5870 else if (INTEGERP (components
))
5873 *buf
++ = XINT (components
);
5875 else if (CONSP (components
))
5877 for (len
= 0; CONSP (components
);
5878 len
++, components
= XCDR (components
))
5879 *buf
++ = XINT (XCAR (components
));
5887 if (find_composition (end
, limit
, &start
, &end
, &prop
,
5898 /* Extract an annotation datum from a text property `charset' at POS of
5899 CODING->src_object (buffer of string), store the data in BUF, set
5900 *STOP to the position where the value of `charset' property changes
5901 (limiting by LIMIT), and return the address of the next element of
5904 If the property value is nil, set *STOP to the position where the
5905 property value is non-nil (limiting by LIMIT), and return BUF. */
5908 handle_charset_annotation (pos
, limit
, coding
, buf
, stop
)
5909 EMACS_INT pos
, limit
;
5910 struct coding_system
*coding
;
5914 Lisp_Object val
, next
;
5917 val
= Fget_text_property (make_number (pos
), Qcharset
, coding
->src_object
);
5918 if (! NILP (val
) && CHARSETP (val
))
5919 id
= XINT (CHARSET_SYMBOL_ID (val
));
5922 ADD_CHARSET_DATA (buf
, 0, 0, id
);
5923 next
= Fnext_single_property_change (make_number (pos
), Qcharset
,
5925 make_number (limit
));
5926 *stop
= XINT (next
);
5932 consume_chars (coding
)
5933 struct coding_system
*coding
;
5935 int *buf
= coding
->charbuf
;
5936 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
;
5937 const unsigned char *src
= coding
->source
+ coding
->consumed
;
5938 EMACS_INT pos
= coding
->src_pos
+ coding
->consumed_char
;
5939 EMACS_INT end_pos
= coding
->src_pos
+ coding
->src_chars
;
5940 int multibytep
= coding
->src_multibyte
;
5941 Lisp_Object eol_type
;
5943 EMACS_INT stop
, stop_composition
, stop_charset
;
5946 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5947 if (VECTORP (eol_type
))
5950 /* Note: composition handling is not yet implemented. */
5951 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
5953 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
)
5954 stop
= stop_composition
= pos
;
5956 stop
= stop_composition
= end_pos
;
5957 if (coding
->common_flags
& CODING_ANNOTATE_CHARSET_MASK
)
5958 stop
= stop_charset
= pos
;
5960 stop_charset
= end_pos
;
5962 /* Compensate for CRLF and annotation. */
5963 buf_end
-= 1 + MAX_ANNOTATION_LENGTH
;
5964 while (buf
< buf_end
)
5972 if (pos
== stop_composition
)
5973 buf
= handle_composition_annotation (pos
, end_pos
, coding
,
5974 buf
, &stop_composition
);
5975 if (pos
== stop_charset
)
5976 buf
= handle_charset_annotation (pos
, end_pos
, coding
,
5977 buf
, &stop_charset
);
5978 stop
= (stop_composition
< stop_charset
5979 ? stop_composition
: stop_charset
);
5985 c
= STRING_CHAR_ADVANCE (src
);
5986 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
5988 if (! EQ (eol_type
, Qunix
))
5992 if (EQ (eol_type
, Qdos
))
6002 coding
->consumed
= src
- coding
->source
;
6003 coding
->consumed_char
= pos
- coding
->src_pos
;
6004 coding
->charbuf_used
= buf
- coding
->charbuf
;
6005 coding
->chars_at_source
= 0;
6009 /* Encode the text at CODING->src_object into CODING->dst_object.
6010 CODING->src_object is a buffer or a string.
6011 CODING->dst_object is a buffer or nil.
6013 If CODING->src_object is a buffer, it must be the current buffer.
6014 In this case, if CODING->src_pos is positive, it is a position of
6015 the source text in the buffer, otherwise. the source text is in the
6016 gap area of the buffer, and coding->src_pos specifies the offset of
6017 the text from GPT (which must be the same as PT). If this is the
6018 same buffer as CODING->dst_object, CODING->src_pos must be
6019 negative and CODING should not have `pre-write-conversion'.
6021 If CODING->src_object is a string, CODING should not have
6022 `pre-write-conversion'.
6024 If CODING->dst_object is a buffer, the encoded data is inserted at
6025 the current point of that buffer.
6027 If CODING->dst_object is nil, the encoded data is placed at the
6028 memory area specified by CODING->destination. */
6031 encode_coding (coding
)
6032 struct coding_system
*coding
;
6036 attrs
= CODING_ID_ATTRS (coding
->id
);
6038 if (BUFFERP (coding
->dst_object
))
6040 set_buffer_internal (XBUFFER (coding
->dst_object
));
6041 coding
->dst_multibyte
6042 = ! NILP (current_buffer
->enable_multibyte_characters
);
6045 coding
->consumed
= coding
->consumed_char
= 0;
6046 coding
->produced
= coding
->produced_char
= 0;
6047 coding
->result
= CODING_RESULT_SUCCESS
;
6050 ALLOC_CONVERSION_WORK_AREA (coding
);
6053 coding_set_source (coding
);
6054 consume_chars (coding
);
6056 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs
)))
6057 translate_chars (coding
, CODING_ATTR_ENCODE_TBL (attrs
));
6058 else if (!NILP (Vstandard_translation_table_for_encode
))
6059 translate_chars (coding
, Vstandard_translation_table_for_encode
);
6061 coding_set_destination (coding
);
6062 (*(coding
->encoder
)) (coding
);
6063 } while (coding
->consumed_char
< coding
->src_chars
);
6065 if (BUFFERP (coding
->dst_object
))
6066 insert_from_gap (coding
->produced_char
, coding
->produced
);
6068 return (coding
->result
);
6073 /* List of currently used working buffer. */
6074 Lisp_Object Vcode_conversion_work_buf_list
;
6076 /* A working buffer used by the top level conversion. */
6077 Lisp_Object Vcode_conversion_reused_work_buf
;
6080 /* Return a working buffer that can be freely used by the following
6081 code conversion. MULTIBYTEP specifies the multibyteness of the
6085 make_conversion_work_buffer (multibytep
)
6088 struct buffer
*current
= current_buffer
;
6091 if (NILP (Vcode_conversion_work_buf_list
))
6093 if (NILP (Vcode_conversion_reused_work_buf
))
6094 Vcode_conversion_reused_work_buf
6095 = Fget_buffer_create (build_string (" *code-conversion-work*"));
6096 Vcode_conversion_work_buf_list
6097 = Fcons (Vcode_conversion_reused_work_buf
, Qnil
);
6101 int depth
= XINT (Flength (Vcode_conversion_work_buf_list
));
6104 sprintf (str
, " *code-conversion-work*<%d>", depth
);
6105 Vcode_conversion_work_buf_list
6106 = Fcons (Fget_buffer_create (build_string (str
)),
6107 Vcode_conversion_work_buf_list
);
6110 buf
= XCAR (Vcode_conversion_work_buf_list
);
6111 set_buffer_internal (XBUFFER (buf
));
6112 current_buffer
->undo_list
= Qt
;
6114 Fset_buffer_multibyte (multibytep
? Qt
: Qnil
, Qnil
);
6115 set_buffer_internal (current
);
6119 static struct coding_system
*saved_coding
;
6122 code_conversion_restore (info
)
6125 int depth
= XINT (Flength (Vcode_conversion_work_buf_list
));
6130 buf
= XCAR (Vcode_conversion_work_buf_list
);
6131 Vcode_conversion_work_buf_list
= XCDR (Vcode_conversion_work_buf_list
);
6132 if (depth
> 1 && !NILP (Fbuffer_live_p (buf
)))
6136 if (EQ (saved_coding
->dst_object
, Qt
)
6137 && saved_coding
->destination
)
6138 xfree (saved_coding
->destination
);
6140 return save_excursion_restore (info
);
6145 decode_coding_gap (coding
, chars
, bytes
)
6146 struct coding_system
*coding
;
6147 EMACS_INT chars
, bytes
;
6149 int count
= specpdl_ptr
- specpdl
;
6151 saved_coding
= coding
;
6152 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6154 coding
->src_object
= Fcurrent_buffer ();
6155 coding
->src_chars
= chars
;
6156 coding
->src_bytes
= bytes
;
6157 coding
->src_pos
= -chars
;
6158 coding
->src_pos_byte
= -bytes
;
6159 coding
->src_multibyte
= chars
< bytes
;
6160 coding
->dst_object
= coding
->src_object
;
6161 coding
->dst_pos
= PT
;
6162 coding
->dst_pos_byte
= PT_BYTE
;
6163 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
6164 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
6166 if (CODING_REQUIRE_DETECTION (coding
))
6167 detect_coding (coding
);
6169 decode_coding (coding
);
6171 unbind_to (count
, Qnil
);
6172 return coding
->result
;
6176 encode_coding_gap (coding
, chars
, bytes
)
6177 struct coding_system
*coding
;
6178 EMACS_INT chars
, bytes
;
6180 int count
= specpdl_ptr
- specpdl
;
6183 saved_coding
= coding
;
6184 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6186 buffer
= Fcurrent_buffer ();
6187 coding
->src_object
= buffer
;
6188 coding
->src_chars
= chars
;
6189 coding
->src_bytes
= bytes
;
6190 coding
->src_pos
= -chars
;
6191 coding
->src_pos_byte
= -bytes
;
6192 coding
->src_multibyte
= chars
< bytes
;
6193 coding
->dst_object
= coding
->src_object
;
6194 coding
->dst_pos
= PT
;
6195 coding
->dst_pos_byte
= PT_BYTE
;
6197 encode_coding (coding
);
6199 unbind_to (count
, Qnil
);
6200 return coding
->result
;
6204 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6205 SRC_OBJECT into DST_OBJECT by coding context CODING.
6207 SRC_OBJECT is a buffer, a string, or Qnil.
6209 If it is a buffer, the text is at point of the buffer. FROM and TO
6210 are positions in the buffer.
6212 If it is a string, the text is at the beginning of the string.
6213 FROM and TO are indices to the string.
6215 If it is nil, the text is at coding->source. FROM and TO are
6216 indices to coding->source.
6218 DST_OBJECT is a buffer, Qt, or Qnil.
6220 If it is a buffer, the decoded text is inserted at point of the
6221 buffer. If the buffer is the same as SRC_OBJECT, the source text
6224 If it is Qt, a string is made from the decoded text, and
6225 set in CODING->dst_object.
6227 If it is Qnil, the decoded text is stored at CODING->destination.
6228 The caller must allocate CODING->dst_bytes bytes at
6229 CODING->destination by xmalloc. If the decoded text is longer than
6230 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6234 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6236 struct coding_system
*coding
;
6237 Lisp_Object src_object
;
6238 EMACS_INT from
, from_byte
, to
, to_byte
;
6239 Lisp_Object dst_object
;
6241 int count
= specpdl_ptr
- specpdl
;
6242 unsigned char *destination
;
6243 EMACS_INT dst_bytes
;
6244 EMACS_INT chars
= to
- from
;
6245 EMACS_INT bytes
= to_byte
- from_byte
;
6248 saved_coding
= coding
;
6249 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6251 if (NILP (dst_object
))
6253 destination
= coding
->destination
;
6254 dst_bytes
= coding
->dst_bytes
;
6257 coding
->src_object
= src_object
;
6258 coding
->src_chars
= chars
;
6259 coding
->src_bytes
= bytes
;
6260 coding
->src_multibyte
= chars
< bytes
;
6262 if (STRINGP (src_object
))
6264 coding
->src_pos
= from
;
6265 coding
->src_pos_byte
= from_byte
;
6267 else if (BUFFERP (src_object
))
6269 set_buffer_internal (XBUFFER (src_object
));
6271 move_gap_both (from
, from_byte
);
6272 if (EQ (src_object
, dst_object
))
6274 TEMP_SET_PT_BOTH (from
, from_byte
);
6275 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6276 coding
->src_pos
= -chars
;
6277 coding
->src_pos_byte
= -bytes
;
6281 coding
->src_pos
= from
;
6282 coding
->src_pos_byte
= from_byte
;
6286 if (CODING_REQUIRE_DETECTION (coding
))
6287 detect_coding (coding
);
6288 attrs
= CODING_ID_ATTRS (coding
->id
);
6290 if (EQ (dst_object
, Qt
)
6291 || (! NILP (CODING_ATTR_POST_READ (attrs
))
6292 && NILP (dst_object
)))
6294 coding
->dst_object
= make_conversion_work_buffer (1);
6295 coding
->dst_pos
= BEG
;
6296 coding
->dst_pos_byte
= BEG_BYTE
;
6297 coding
->dst_multibyte
= 1;
6299 else if (BUFFERP (dst_object
))
6301 coding
->dst_object
= dst_object
;
6302 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6303 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6304 coding
->dst_multibyte
6305 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6309 coding
->dst_object
= Qnil
;
6310 coding
->dst_multibyte
= 1;
6313 decode_coding (coding
);
6315 if (BUFFERP (coding
->dst_object
))
6316 set_buffer_internal (XBUFFER (coding
->dst_object
));
6318 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6320 struct gcpro gcpro1
, gcpro2
;
6321 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6324 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6325 GCPRO2 (coding
->src_object
, coding
->dst_object
);
6326 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6327 make_number (coding
->produced_char
));
6330 coding
->produced_char
+= Z
- prev_Z
;
6331 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6334 if (EQ (dst_object
, Qt
))
6336 coding
->dst_object
= Fbuffer_string ();
6338 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
6340 set_buffer_internal (XBUFFER (coding
->dst_object
));
6341 if (dst_bytes
< coding
->produced
)
6344 = (unsigned char *) xrealloc (destination
, coding
->produced
);
6347 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
6348 unbind_to (count
, Qnil
);
6351 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
6352 move_gap_both (BEGV
, BEGV_BYTE
);
6353 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
6354 coding
->destination
= destination
;
6358 unbind_to (count
, Qnil
);
6363 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6365 struct coding_system
*coding
;
6366 Lisp_Object src_object
;
6367 EMACS_INT from
, from_byte
, to
, to_byte
;
6368 Lisp_Object dst_object
;
6370 int count
= specpdl_ptr
- specpdl
;
6371 EMACS_INT chars
= to
- from
;
6372 EMACS_INT bytes
= to_byte
- from_byte
;
6375 saved_coding
= coding
;
6376 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
6378 coding
->src_object
= src_object
;
6379 coding
->src_chars
= chars
;
6380 coding
->src_bytes
= bytes
;
6381 coding
->src_multibyte
= chars
< bytes
;
6383 attrs
= CODING_ID_ATTRS (coding
->id
);
6385 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
6387 coding
->src_object
= make_conversion_work_buffer (coding
->src_multibyte
);
6388 set_buffer_internal (XBUFFER (coding
->src_object
));
6389 if (STRINGP (src_object
))
6390 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
6391 else if (BUFFERP (src_object
))
6392 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
6394 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
6396 if (EQ (src_object
, dst_object
))
6398 set_buffer_internal (XBUFFER (src_object
));
6399 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6400 set_buffer_internal (XBUFFER (coding
->src_object
));
6403 call2 (CODING_ATTR_PRE_WRITE (attrs
),
6404 make_number (BEG
), make_number (Z
));
6405 coding
->src_object
= Fcurrent_buffer ();
6407 move_gap_both (BEG
, BEG_BYTE
);
6408 coding
->src_chars
= Z
- BEG
;
6409 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6410 coding
->src_pos
= BEG
;
6411 coding
->src_pos_byte
= BEG_BYTE
;
6412 coding
->src_multibyte
= Z
< Z_BYTE
;
6414 else if (STRINGP (src_object
))
6416 coding
->src_pos
= from
;
6417 coding
->src_pos_byte
= from_byte
;
6419 else if (BUFFERP (src_object
))
6421 set_buffer_internal (XBUFFER (src_object
));
6422 if (EQ (src_object
, dst_object
))
6424 coding
->src_object
= del_range_1 (from
, to
, 1, 1);
6425 coding
->src_pos
= 0;
6426 coding
->src_pos_byte
= 0;
6430 if (from
< GPT
&& to
>= GPT
)
6431 move_gap_both (from
, from_byte
);
6432 coding
->src_pos
= from
;
6433 coding
->src_pos_byte
= from_byte
;
6437 if (BUFFERP (dst_object
))
6439 coding
->dst_object
= dst_object
;
6440 if (EQ (src_object
, dst_object
))
6442 coding
->dst_pos
= from
;
6443 coding
->dst_pos_byte
= from_byte
;
6447 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6448 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6450 coding
->dst_multibyte
6451 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6453 else if (EQ (dst_object
, Qt
))
6455 coding
->dst_object
= Qnil
;
6456 coding
->dst_bytes
= coding
->src_chars
;
6457 if (coding
->dst_bytes
== 0)
6458 coding
->dst_bytes
= 1;
6459 coding
->destination
= (unsigned char *) xmalloc (coding
->dst_bytes
);
6460 coding
->dst_multibyte
= 0;
6464 coding
->dst_object
= Qnil
;
6465 coding
->dst_multibyte
= 0;
6468 encode_coding (coding
);
6470 if (EQ (dst_object
, Qt
))
6472 if (BUFFERP (coding
->dst_object
))
6473 coding
->dst_object
= Fbuffer_string ();
6477 = make_unibyte_string ((char *) coding
->destination
,
6479 xfree (coding
->destination
);
6483 unbind_to (count
, Qnil
);
6488 preferred_coding_system ()
6490 int id
= coding_categories
[coding_priorities
[0]].id
;
6492 return CODING_ID_NAME (id
);
6497 /*** 8. Emacs Lisp library functions ***/
6499 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6500 doc
: /* Return t if OBJECT is nil or a coding-system.
6501 See the documentation of `define-coding-system' for information
6502 about coding-system objects. */)
6506 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6509 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6510 Sread_non_nil_coding_system
, 1, 1, 0,
6511 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6518 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6519 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6521 while (XSTRING (val
)->size
== 0);
6522 return (Fintern (val
, Qnil
));
6525 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6526 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6527 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6528 (prompt
, default_coding_system
)
6529 Lisp_Object prompt
, default_coding_system
;
6532 if (SYMBOLP (default_coding_system
))
6533 XSETSTRING (default_coding_system
, XSYMBOL (default_coding_system
)->name
);
6534 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6535 Qt
, Qnil
, Qcoding_system_history
,
6536 default_coding_system
, Qnil
);
6537 return (XSTRING (val
)->size
== 0 ? Qnil
: Fintern (val
, Qnil
));
6540 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6542 doc
: /* Check validity of CODING-SYSTEM.
6543 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
6545 Lisp_Object coding_system
;
6547 CHECK_SYMBOL (coding_system
);
6548 if (!NILP (Fcoding_system_p (coding_system
)))
6549 return coding_system
;
6551 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
6555 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6556 HIGHEST is nonzero, return the coding system of the highest
6557 priority among the detected coding systems. Otherwize return a
6558 list of detected coding systems sorted by their priorities. If
6559 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6560 multibyte form but contains only ASCII and eight-bit chars.
6561 Otherwise, the bytes are raw bytes.
6563 CODING-SYSTEM controls the detection as below:
6565 If it is nil, detect both text-format and eol-format. If the
6566 text-format part of CODING-SYSTEM is already specified
6567 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6568 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6569 detect only text-format. */
6572 detect_coding_system (src
, src_bytes
, highest
, multibytep
, coding_system
)
6574 int src_bytes
, highest
;
6576 Lisp_Object coding_system
;
6578 unsigned char *src_end
= src
+ src_bytes
;
6579 int mask
= CATEGORY_MASK_ANY
;
6580 Lisp_Object attrs
, eol_type
;
6582 struct coding_system coding
;
6584 struct coding_detection_info detect_info
;
6586 if (NILP (coding_system
))
6587 coding_system
= Qundecided
;
6588 setup_coding_system (coding_system
, &coding
);
6589 attrs
= CODING_ID_ATTRS (coding
.id
);
6590 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
6591 coding_system
= CODING_ATTR_BASE_NAME (attrs
);
6593 coding
.source
= src
;
6594 coding
.src_bytes
= src_bytes
;
6595 coding
.src_multibyte
= multibytep
;
6596 coding
.consumed
= 0;
6597 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6599 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
6601 /* At first, detect text-format if necessary. */
6602 if (XINT (CODING_ATTR_CATEGORY (attrs
)) == coding_category_undecided
)
6604 enum coding_category category
;
6605 struct coding_system
*this;
6608 for (; src
< src_end
; src
++)
6612 || (c
< 0x20 && (c
== ISO_CODE_ESC
6614 || c
== ISO_CODE_SO
)))
6617 coding
.head_ascii
= src
- coding
.source
;
6620 for (i
= 0; i
< coding_category_raw_text
; i
++)
6622 category
= coding_priorities
[i
];
6623 this = coding_categories
+ category
;
6627 /* No coding system of this category is defined. */
6628 detect_info
.rejected
|= (1 << category
);
6630 else if (category
>= coding_category_raw_text
)
6632 else if (detect_info
.checked
& (1 << category
))
6635 && (detect_info
.found
& (1 << category
)))
6640 if ((*(this->detector
)) (&coding
, &detect_info
)
6642 && (detect_info
.found
& (1 << category
)))
6648 if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
6650 detect_info
.found
= CATEGORY_MASK_RAW_TEXT
;
6651 id
= coding_categories
[coding_category_raw_text
].id
;
6652 val
= Fcons (make_number (id
), Qnil
);
6654 else if (! detect_info
.rejected
&& ! detect_info
.found
)
6656 detect_info
.found
= CATEGORY_MASK_ANY
;
6657 id
= coding_categories
[coding_category_undecided
].id
;
6658 val
= Fcons (make_number (id
), Qnil
);
6662 if (detect_info
.found
)
6664 detect_info
.found
= 1 << category
;
6665 val
= Fcons (make_number (this->id
), Qnil
);
6668 for (i
= 0; i
< coding_category_raw_text
; i
++)
6669 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
6671 detect_info
.found
= 1 << coding_priorities
[i
];
6672 id
= coding_categories
[coding_priorities
[i
]].id
;
6673 val
= Fcons (make_number (id
), Qnil
);
6679 int mask
= detect_info
.rejected
| detect_info
.found
;
6683 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6685 category
= coding_priorities
[i
];
6686 if (! (mask
& (1 << category
)))
6688 found
|= 1 << category
;
6689 id
= coding_categories
[category
].id
;
6690 val
= Fcons (make_number (id
), val
);
6693 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6695 category
= coding_priorities
[i
];
6696 if (detect_info
.found
& (1 << category
))
6698 id
= coding_categories
[category
].id
;
6699 val
= Fcons (make_number (id
), val
);
6702 detect_info
.found
|= found
;
6707 detect_info
.found
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
6708 val
= Fcons (make_number (coding
.id
), Qnil
);
6711 /* Then, detect eol-format if necessary. */
6713 int normal_eol
= -1, utf_16_be_eol
= -1, utf_16_le_eol
;
6716 if (VECTORP (eol_type
))
6718 if (detect_info
.found
& ~CATEGORY_MASK_UTF_16
)
6719 normal_eol
= detect_eol (coding
.source
, src_bytes
,
6720 coding_category_raw_text
);
6721 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_BE
6722 | CATEGORY_MASK_UTF_16_BE_NOSIG
))
6723 utf_16_be_eol
= detect_eol (coding
.source
, src_bytes
,
6724 coding_category_utf_16_be
);
6725 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_LE
6726 | CATEGORY_MASK_UTF_16_LE_NOSIG
))
6727 utf_16_le_eol
= detect_eol (coding
.source
, src_bytes
,
6728 coding_category_utf_16_le
);
6732 if (EQ (eol_type
, Qunix
))
6733 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_LF
;
6734 else if (EQ (eol_type
, Qdos
))
6735 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CRLF
;
6737 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CR
;
6740 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
6742 enum coding_category category
;
6745 id
= XINT (XCAR (tail
));
6746 attrs
= CODING_ID_ATTRS (id
);
6747 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
6748 eol_type
= CODING_ID_EOL_TYPE (id
);
6749 if (VECTORP (eol_type
))
6751 if (category
== coding_category_utf_16_be
6752 || category
== coding_category_utf_16_be_nosig
)
6753 this_eol
= utf_16_be_eol
;
6754 else if (category
== coding_category_utf_16_le
6755 || category
== coding_category_utf_16_le_nosig
)
6756 this_eol
= utf_16_le_eol
;
6758 this_eol
= normal_eol
;
6760 if (this_eol
== EOL_SEEN_LF
)
6761 XSETCAR (tail
, AREF (eol_type
, 0));
6762 else if (this_eol
== EOL_SEEN_CRLF
)
6763 XSETCAR (tail
, AREF (eol_type
, 1));
6764 else if (this_eol
== EOL_SEEN_CR
)
6765 XSETCAR (tail
, AREF (eol_type
, 2));
6767 XSETCAR (tail
, CODING_ID_NAME (id
));
6770 XSETCAR (tail
, CODING_ID_NAME (id
));
6774 return (highest
? XCAR (val
) : val
);
6778 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
6780 doc
: /* Detect coding system of the text in the region between START and END.
6781 Return a list of possible coding systems ordered by priority.
6783 If only ASCII characters are found, it returns a list of single element
6784 `undecided' or its subsidiary coding system according to a detected
6787 If optional argument HIGHEST is non-nil, return the coding system of
6788 highest priority. */)
6789 (start
, end
, highest
)
6790 Lisp_Object start
, end
, highest
;
6793 int from_byte
, to_byte
;
6795 CHECK_NUMBER_COERCE_MARKER (start
);
6796 CHECK_NUMBER_COERCE_MARKER (end
);
6798 validate_region (&start
, &end
);
6799 from
= XINT (start
), to
= XINT (end
);
6800 from_byte
= CHAR_TO_BYTE (from
);
6801 to_byte
= CHAR_TO_BYTE (to
);
6803 if (from
< GPT
&& to
>= GPT
)
6804 move_gap_both (to
, to_byte
);
6806 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
6807 to_byte
- from_byte
,
6809 !NILP (current_buffer
6810 ->enable_multibyte_characters
),
6814 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
6816 doc
: /* Detect coding system of the text in STRING.
6817 Return a list of possible coding systems ordered by priority.
6819 If only ASCII characters are found, it returns a list of single element
6820 `undecided' or its subsidiary coding system according to a detected
6823 If optional argument HIGHEST is non-nil, return the coding system of
6824 highest priority. */)
6826 Lisp_Object string
, highest
;
6828 CHECK_STRING (string
);
6830 return detect_coding_system (XSTRING (string
)->data
,
6831 STRING_BYTES (XSTRING (string
)),
6833 STRING_MULTIBYTE (string
),
6839 char_encodable_p (c
, attrs
)
6844 struct charset
*charset
;
6846 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
6847 CONSP (tail
); tail
= XCDR (tail
))
6849 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
6850 if (CHAR_CHARSET_P (c
, charset
))
6853 return (! NILP (tail
));
6857 /* Return a list of coding systems that safely encode the text between
6858 START and END. If EXCLUDE is non-nil, it is a list of coding
6859 systems not to check. The returned list doesn't contain any such
6860 coding systems. In any case, if the text contains only ASCII or is
6861 unibyte, return t. */
6863 DEFUN ("find-coding-systems-region-internal",
6864 Ffind_coding_systems_region_internal
,
6865 Sfind_coding_systems_region_internal
, 2, 3, 0,
6866 doc
: /* Internal use only. */)
6867 (start
, end
, exclude
)
6868 Lisp_Object start
, end
, exclude
;
6870 Lisp_Object coding_attrs_list
, safe_codings
;
6871 EMACS_INT start_byte
, end_byte
;
6872 const unsigned char *p
, *pbeg
, *pend
;
6874 Lisp_Object tail
, elt
;
6876 if (STRINGP (start
))
6878 if (!STRING_MULTIBYTE (start
)
6879 || XSTRING (start
)->size
== STRING_BYTES (XSTRING (start
)))
6882 end_byte
= STRING_BYTES (XSTRING (start
));
6886 CHECK_NUMBER_COERCE_MARKER (start
);
6887 CHECK_NUMBER_COERCE_MARKER (end
);
6888 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
6889 args_out_of_range (start
, end
);
6890 if (NILP (current_buffer
->enable_multibyte_characters
))
6892 start_byte
= CHAR_TO_BYTE (XINT (start
));
6893 end_byte
= CHAR_TO_BYTE (XINT (end
));
6894 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
6897 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
6899 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
6900 move_gap_both (XINT (start
), start_byte
);
6902 move_gap_both (XINT (end
), end_byte
);
6906 coding_attrs_list
= Qnil
;
6907 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
6909 || NILP (Fmemq (XCAR (tail
), exclude
)))
6913 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
6914 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
6915 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
6916 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
6919 if (STRINGP (start
))
6920 p
= pbeg
= XSTRING (start
)->data
;
6922 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
6923 pend
= p
+ (end_byte
- start_byte
);
6925 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
6926 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
6930 if (ASCII_BYTE_P (*p
))
6934 c
= STRING_CHAR_ADVANCE (p
);
6936 charset_map_loaded
= 0;
6937 for (tail
= coding_attrs_list
; CONSP (tail
);)
6942 else if (char_encodable_p (c
, elt
))
6944 else if (CONSP (XCDR (tail
)))
6946 XSETCAR (tail
, XCAR (XCDR (tail
)));
6947 XSETCDR (tail
, XCDR (XCDR (tail
)));
6951 XSETCAR (tail
, Qnil
);
6955 if (charset_map_loaded
)
6957 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
6959 if (STRINGP (start
))
6960 pbeg
= XSTRING (start
)->data
;
6962 pbeg
= BYTE_POS_ADDR (start_byte
);
6963 p
= pbeg
+ p_offset
;
6964 pend
= pbeg
+ pend_offset
;
6969 safe_codings
= Qnil
;
6970 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
6971 if (! NILP (XCAR (tail
)))
6972 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
6974 return safe_codings
;
6978 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
6979 Scheck_coding_systems_region
, 3, 3, 0,
6980 doc
: /* Check if the region is encodable by coding systems.
6982 START and END are buffer positions specifying the region.
6983 CODING-SYSTEM-LIST is a list of coding systems to check.
6985 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6986 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6987 whole region, POS0, POS1, ... are buffer positions where non-encodable
6988 characters are found.
6990 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6993 START may be a string. In that case, check if the string is
6994 encodable, and the value contains indices to the string instead of
6995 buffer positions. END is ignored. */)
6996 (start
, end
, coding_system_list
)
6997 Lisp_Object start
, end
, coding_system_list
;
7000 EMACS_INT start_byte
, end_byte
;
7002 const unsigned char *p
, *pbeg
, *pend
;
7004 Lisp_Object tail
, elt
;
7006 if (STRINGP (start
))
7008 if (!STRING_MULTIBYTE (start
)
7009 && XSTRING (start
)->size
!= STRING_BYTES (XSTRING (start
)))
7012 end_byte
= STRING_BYTES (XSTRING (start
));
7017 CHECK_NUMBER_COERCE_MARKER (start
);
7018 CHECK_NUMBER_COERCE_MARKER (end
);
7019 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7020 args_out_of_range (start
, end
);
7021 if (NILP (current_buffer
->enable_multibyte_characters
))
7023 start_byte
= CHAR_TO_BYTE (XINT (start
));
7024 end_byte
= CHAR_TO_BYTE (XINT (end
));
7025 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7028 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7030 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7031 move_gap_both (XINT (start
), start_byte
);
7033 move_gap_both (XINT (end
), end_byte
);
7039 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7042 list
= Fcons (Fcons (elt
, Fcons (AREF (CODING_SYSTEM_SPEC (elt
), 0),
7047 if (STRINGP (start
))
7048 p
= pbeg
= XSTRING (start
)->data
;
7050 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7051 pend
= p
+ (end_byte
- start_byte
);
7053 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
7054 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7058 if (ASCII_BYTE_P (*p
))
7062 c
= STRING_CHAR_ADVANCE (p
);
7064 charset_map_loaded
= 0;
7065 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
7067 elt
= XCDR (XCAR (tail
));
7068 if (! char_encodable_p (c
, XCAR (elt
)))
7069 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
7071 if (charset_map_loaded
)
7073 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7075 if (STRINGP (start
))
7076 pbeg
= XSTRING (start
)->data
;
7078 pbeg
= BYTE_POS_ADDR (start_byte
);
7079 p
= pbeg
+ p_offset
;
7080 pend
= pbeg
+ pend_offset
;
7088 for (; CONSP (tail
); tail
= XCDR (tail
))
7091 if (CONSP (XCDR (XCDR (elt
))))
7092 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
7102 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
7103 Lisp_Object start
, end
, coding_system
, dst_object
;
7104 int encodep
, norecord
;
7106 struct coding_system coding
;
7107 EMACS_INT from
, from_byte
, to
, to_byte
;
7108 Lisp_Object src_object
;
7110 CHECK_NUMBER_COERCE_MARKER (start
);
7111 CHECK_NUMBER_COERCE_MARKER (end
);
7112 if (NILP (coding_system
))
7113 coding_system
= Qno_conversion
;
7115 CHECK_CODING_SYSTEM (coding_system
);
7116 src_object
= Fcurrent_buffer ();
7117 if (NILP (dst_object
))
7118 dst_object
= src_object
;
7119 else if (! EQ (dst_object
, Qt
))
7120 CHECK_BUFFER (dst_object
);
7122 validate_region (&start
, &end
);
7123 from
= XFASTINT (start
);
7124 from_byte
= CHAR_TO_BYTE (from
);
7125 to
= XFASTINT (end
);
7126 to_byte
= CHAR_TO_BYTE (to
);
7128 setup_coding_system (coding_system
, &coding
);
7129 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7132 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7135 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7138 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7140 if (coding
.result
!= CODING_RESULT_SUCCESS
)
7141 error ("Code conversion error: %d", coding
.result
);
7143 return (BUFFERP (dst_object
)
7144 ? make_number (coding
.produced_char
)
7145 : coding
.dst_object
);
7149 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
7150 3, 4, "r\nzCoding system: ",
7151 doc
: /* Decode the current region from the specified coding system.
7152 When called from a program, takes four arguments:
7153 START, END, CODING-SYSTEM, and DESTINATION.
7154 START and END are buffer positions.
7156 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7157 If nil, the region between START and END is replace by the decoded text.
7158 If buffer, the decoded text is inserted in the buffer.
7159 If t, the decoded text is returned.
7161 This function sets `last-coding-system-used' to the precise coding system
7162 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7163 not fully specified.)
7164 It returns the length of the decoded text. */)
7165 (start
, end
, coding_system
, destination
)
7166 Lisp_Object start
, end
, coding_system
, destination
;
7168 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
7171 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
7172 3, 4, "r\nzCoding system: ",
7173 doc
: /* Encode the current region by specified coding system.
7174 When called from a program, takes three arguments:
7175 START, END, and CODING-SYSTEM. START and END are buffer positions.
7177 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7178 If nil, the region between START and END is replace by the encoded text.
7179 If buffer, the encoded text is inserted in the buffer.
7180 If t, the encoded text is returned.
7182 This function sets `last-coding-system-used' to the precise coding system
7183 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7184 not fully specified.)
7185 It returns the length of the encoded text. */)
7186 (start
, end
, coding_system
, destination
)
7187 Lisp_Object start
, end
, coding_system
, destination
;
7189 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
7193 code_convert_string (string
, coding_system
, dst_object
,
7194 encodep
, nocopy
, norecord
)
7195 Lisp_Object string
, coding_system
, dst_object
;
7196 int encodep
, nocopy
, norecord
;
7198 struct coding_system coding
;
7199 EMACS_INT chars
, bytes
;
7201 CHECK_STRING (string
);
7202 if (NILP (coding_system
))
7205 Vlast_coding_system_used
= Qno_conversion
;
7206 if (NILP (dst_object
))
7207 return (nocopy
? Fcopy_sequence (string
) : string
);
7210 if (NILP (coding_system
))
7211 coding_system
= Qno_conversion
;
7213 CHECK_CODING_SYSTEM (coding_system
);
7214 if (NILP (dst_object
))
7216 else if (! EQ (dst_object
, Qt
))
7217 CHECK_BUFFER (dst_object
);
7219 setup_coding_system (coding_system
, &coding
);
7220 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7221 chars
= XSTRING (string
)->size
;
7222 bytes
= STRING_BYTES (XSTRING (string
));
7224 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7226 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7228 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7230 if (coding
.result
!= CODING_RESULT_SUCCESS
)
7231 error ("Code conversion error: %d", coding
.result
);
7233 return (BUFFERP (dst_object
)
7234 ? make_number (coding
.produced_char
)
7235 : coding
.dst_object
);
7239 /* Encode or decode STRING according to CODING_SYSTEM.
7240 Do not set Vlast_coding_system_used.
7242 This function is called only from macros DECODE_FILE and
7243 ENCODE_FILE, thus we ignore character composition. */
7246 code_convert_string_norecord (string
, coding_system
, encodep
)
7247 Lisp_Object string
, coding_system
;
7250 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
7254 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
7256 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7258 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7259 if the decoding operation is trivial.
7261 Optional fourth arg BUFFER non-nil meant that the decoded text is
7262 inserted in BUFFER instead of returned as a string. In this case,
7263 the return value is BUFFER.
7265 This function sets `last-coding-system-used' to the precise coding system
7266 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7267 not fully specified. */)
7268 (string
, coding_system
, nocopy
, buffer
)
7269 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7271 return code_convert_string (string
, coding_system
, buffer
,
7272 0, ! NILP (nocopy
), 0);
7275 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
7277 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
7279 Optional third arg NOCOPY non-nil means it is OK to return STRING
7280 itself if the encoding operation is trivial.
7282 Optional fourth arg BUFFER non-nil meant that the encoded text is
7283 inserted in BUFFER instead of returned as a string. In this case,
7284 the return value is BUFFER.
7286 This function sets `last-coding-system-used' to the precise coding system
7287 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7288 not fully specified.) */)
7289 (string
, coding_system
, nocopy
, buffer
)
7290 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7292 return code_convert_string (string
, coding_system
, buffer
,
7293 1, ! NILP (nocopy
), 1);
7297 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
7298 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
7299 Return the corresponding character. */)
7303 Lisp_Object spec
, attrs
, val
;
7304 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
7307 CHECK_NATNUM (code
);
7308 c
= XFASTINT (code
);
7309 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7310 attrs
= AREF (spec
, 0);
7312 if (ASCII_BYTE_P (c
)
7313 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7316 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7317 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7318 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7319 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7322 charset
= charset_roman
;
7323 else if (c
>= 0xA0 && c
< 0xDF)
7325 charset
= charset_kana
;
7330 int s1
= c
>> 8, s2
= c
& 0xFF;
7332 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
7333 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
7334 error ("Invalid code: %d", code
);
7336 charset
= charset_kanji
;
7338 c
= DECODE_CHAR (charset
, c
);
7340 error ("Invalid code: %d", code
);
7341 return make_number (c
);
7345 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
7346 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
7347 Return the corresponding code in SJIS. */)
7351 Lisp_Object spec
, attrs
, charset_list
;
7353 struct charset
*charset
;
7356 CHECK_CHARACTER (ch
);
7358 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7359 attrs
= AREF (spec
, 0);
7361 if (ASCII_CHAR_P (c
)
7362 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7365 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7366 charset
= char_charset (c
, charset_list
, &code
);
7367 if (code
== CHARSET_INVALID_CODE (charset
))
7368 error ("Can't encode by shift_jis encoding: %d", c
);
7371 return make_number (code
);
7374 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
7375 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
7376 Return the corresponding character. */)
7380 Lisp_Object spec
, attrs
, val
;
7381 struct charset
*charset_roman
, *charset_big5
, *charset
;
7384 CHECK_NATNUM (code
);
7385 c
= XFASTINT (code
);
7386 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7387 attrs
= AREF (spec
, 0);
7389 if (ASCII_BYTE_P (c
)
7390 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7393 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7394 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7395 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7398 charset
= charset_roman
;
7401 int b1
= c
>> 8, b2
= c
& 0x7F;
7402 if (b1
< 0xA1 || b1
> 0xFE
7403 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
7404 error ("Invalid code: %d", code
);
7405 charset
= charset_big5
;
7407 c
= DECODE_CHAR (charset
, (unsigned )c
);
7409 error ("Invalid code: %d", code
);
7410 return make_number (c
);
7413 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
7414 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
7415 Return the corresponding character code in Big5. */)
7419 Lisp_Object spec
, attrs
, charset_list
;
7420 struct charset
*charset
;
7424 CHECK_CHARACTER (ch
);
7426 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7427 attrs
= AREF (spec
, 0);
7428 if (ASCII_CHAR_P (c
)
7429 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7432 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7433 charset
= char_charset (c
, charset_list
, &code
);
7434 if (code
== CHARSET_INVALID_CODE (charset
))
7435 error ("Can't encode by Big5 encoding: %d", c
);
7437 return make_number (code
);
7441 DEFUN ("set-terminal-coding-system-internal",
7442 Fset_terminal_coding_system_internal
,
7443 Sset_terminal_coding_system_internal
, 1, 1, 0,
7444 doc
: /* Internal use only. */)
7446 Lisp_Object coding_system
;
7448 CHECK_SYMBOL (coding_system
);
7449 setup_coding_system (Fcheck_coding_system (coding_system
),
7452 /* We had better not send unsafe characters to terminal. */
7453 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
7454 /* Characer composition should be disabled. */
7455 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7456 terminal_coding
.src_multibyte
= 1;
7457 terminal_coding
.dst_multibyte
= 0;
7461 DEFUN ("set-safe-terminal-coding-system-internal",
7462 Fset_safe_terminal_coding_system_internal
,
7463 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
7464 doc
: /* Internal use only. */)
7466 Lisp_Object coding_system
;
7468 CHECK_SYMBOL (coding_system
);
7469 setup_coding_system (Fcheck_coding_system (coding_system
),
7470 &safe_terminal_coding
);
7471 /* Characer composition should be disabled. */
7472 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7473 safe_terminal_coding
.src_multibyte
= 1;
7474 safe_terminal_coding
.dst_multibyte
= 0;
7478 DEFUN ("terminal-coding-system",
7479 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
7480 doc
: /* Return coding system specified for terminal output. */)
7483 return CODING_ID_NAME (terminal_coding
.id
);
7486 DEFUN ("set-keyboard-coding-system-internal",
7487 Fset_keyboard_coding_system_internal
,
7488 Sset_keyboard_coding_system_internal
, 1, 1, 0,
7489 doc
: /* Internal use only. */)
7491 Lisp_Object coding_system
;
7493 CHECK_SYMBOL (coding_system
);
7494 setup_coding_system (Fcheck_coding_system (coding_system
),
7496 /* Characer composition should be disabled. */
7497 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7501 DEFUN ("keyboard-coding-system",
7502 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
7503 doc
: /* Return coding system specified for decoding keyboard input. */)
7506 return CODING_ID_NAME (keyboard_coding
.id
);
7510 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
7511 Sfind_operation_coding_system
, 1, MANY
, 0,
7512 doc
: /* Choose a coding system for an operation based on the target name.
7513 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7514 DECODING-SYSTEM is the coding system to use for decoding
7515 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7516 for encoding (in case OPERATION does encoding).
7518 The first argument OPERATION specifies an I/O primitive:
7519 For file I/O, `insert-file-contents' or `write-region'.
7520 For process I/O, `call-process', `call-process-region', or `start-process'.
7521 For network I/O, `open-network-stream'.
7523 The remaining arguments should be the same arguments that were passed
7524 to the primitive. Depending on which primitive, one of those arguments
7525 is selected as the TARGET. For example, if OPERATION does file I/O,
7526 whichever argument specifies the file name is TARGET.
7528 TARGET has a meaning which depends on OPERATION:
7529 For file I/O, TARGET is a file name.
7530 For process I/O, TARGET is a process name.
7531 For network I/O, TARGET is a service name or a port number
7533 This function looks up what specified for TARGET in,
7534 `file-coding-system-alist', `process-coding-system-alist',
7535 or `network-coding-system-alist' depending on OPERATION.
7536 They may specify a coding system, a cons of coding systems,
7537 or a function symbol to call.
7538 In the last case, we call the function with one argument,
7539 which is a list of all the arguments given to this function.
7541 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7546 Lisp_Object operation
, target_idx
, target
, val
;
7547 register Lisp_Object chain
;
7550 error ("Too few arguments");
7551 operation
= args
[0];
7552 if (!SYMBOLP (operation
)
7553 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
7554 error ("Invalid first arguement");
7555 if (nargs
< 1 + XINT (target_idx
))
7556 error ("Too few arguments for operation: %s",
7557 XSYMBOL (operation
)->name
->data
);
7558 target
= args
[XINT (target_idx
) + 1];
7559 if (!(STRINGP (target
)
7560 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
7561 error ("Invalid %dth argument", XINT (target_idx
) + 1);
7563 chain
= ((EQ (operation
, Qinsert_file_contents
)
7564 || EQ (operation
, Qwrite_region
))
7565 ? Vfile_coding_system_alist
7566 : (EQ (operation
, Qopen_network_stream
)
7567 ? Vnetwork_coding_system_alist
7568 : Vprocess_coding_system_alist
));
7572 for (; CONSP (chain
); chain
= XCDR (chain
))
7578 && ((STRINGP (target
)
7579 && STRINGP (XCAR (elt
))
7580 && fast_string_match (XCAR (elt
), target
) >= 0)
7581 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
7584 /* Here, if VAL is both a valid coding system and a valid
7585 function symbol, we return VAL as a coding system. */
7588 if (! SYMBOLP (val
))
7590 if (! NILP (Fcoding_system_p (val
)))
7591 return Fcons (val
, val
);
7592 if (! NILP (Ffboundp (val
)))
7594 val
= call1 (val
, Flist (nargs
, args
));
7597 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
7598 return Fcons (val
, val
);
7606 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
7607 Sset_coding_system_priority
, 0, MANY
, 0,
7608 doc
: /* Assign higher priority to the coding systems given as arguments.
7609 usage: (set-coding-system-priority CODING-SYSTEM ...) */)
7615 int changed
[coding_category_max
];
7616 enum coding_category priorities
[coding_category_max
];
7618 bzero (changed
, sizeof changed
);
7620 for (i
= j
= 0; i
< nargs
; i
++)
7622 enum coding_category category
;
7623 Lisp_Object spec
, attrs
;
7625 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
7626 attrs
= AREF (spec
, 0);
7627 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7628 if (changed
[category
])
7629 /* Ignore this coding system because a coding system of the
7630 same category already had a higher priority. */
7632 changed
[category
] = 1;
7633 priorities
[j
++] = category
;
7634 if (coding_categories
[category
].id
>= 0
7635 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
7636 setup_coding_system (args
[i
], &coding_categories
[category
]);
7639 /* Now we have decided top J priorities. Reflect the order of the
7640 original priorities to the remaining priorities. */
7642 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
7644 while (j
< coding_category_max
7645 && changed
[coding_priorities
[j
]])
7647 if (j
== coding_category_max
)
7649 priorities
[i
] = coding_priorities
[j
];
7652 bcopy (priorities
, coding_priorities
, sizeof priorities
);
7656 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
7657 Scoding_system_priority_list
, 0, 1, 0,
7658 doc
: /* Return a list of coding systems ordered by their priorities.
7659 HIGHESTP non-nil means just return the highest priority one. */)
7661 Lisp_Object highestp
;
7666 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
7668 enum coding_category category
= coding_priorities
[i
];
7669 int id
= coding_categories
[category
].id
;
7674 attrs
= CODING_ID_ATTRS (id
);
7675 if (! NILP (highestp
))
7676 return CODING_ATTR_BASE_NAME (attrs
);
7677 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
7679 return Fnreverse (val
);
7682 static char *suffixes
[] = { "-unix", "-dos", "-mac" };
7685 make_subsidiaries (base
)
7688 Lisp_Object subsidiaries
;
7689 int base_name_len
= STRING_BYTES (XSYMBOL (base
)->name
);
7690 char *buf
= (char *) alloca (base_name_len
+ 6);
7693 bcopy (XSYMBOL (base
)->name
->data
, buf
, base_name_len
);
7694 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
7695 for (i
= 0; i
< 3; i
++)
7697 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
7698 ASET (subsidiaries
, i
, intern (buf
));
7700 return subsidiaries
;
7704 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
7705 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
7706 doc
: /* For internal use only.
7707 usage: (define-coding-system-internal ...) */)
7713 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
7714 Lisp_Object attrs
; /* Vector of attributes. */
7715 Lisp_Object eol_type
;
7716 Lisp_Object aliases
;
7717 Lisp_Object coding_type
, charset_list
, safe_charsets
;
7718 enum coding_category category
;
7719 Lisp_Object tail
, val
;
7720 int max_charset_id
= 0;
7723 if (nargs
< coding_arg_max
)
7726 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
7728 name
= args
[coding_arg_name
];
7729 CHECK_SYMBOL (name
);
7730 CODING_ATTR_BASE_NAME (attrs
) = name
;
7732 val
= args
[coding_arg_mnemonic
];
7733 if (! STRINGP (val
))
7734 CHECK_CHARACTER (val
);
7735 CODING_ATTR_MNEMONIC (attrs
) = val
;
7737 coding_type
= args
[coding_arg_coding_type
];
7738 CHECK_SYMBOL (coding_type
);
7739 CODING_ATTR_TYPE (attrs
) = coding_type
;
7741 charset_list
= args
[coding_arg_charset_list
];
7742 if (SYMBOLP (charset_list
))
7744 if (EQ (charset_list
, Qiso_2022
))
7746 if (! EQ (coding_type
, Qiso_2022
))
7747 error ("Invalid charset-list");
7748 charset_list
= Viso_2022_charset_list
;
7750 else if (EQ (charset_list
, Qemacs_mule
))
7752 if (! EQ (coding_type
, Qemacs_mule
))
7753 error ("Invalid charset-list");
7754 charset_list
= Vemacs_mule_charset_list
;
7756 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7757 if (max_charset_id
< XFASTINT (XCAR (tail
)))
7758 max_charset_id
= XFASTINT (XCAR (tail
));
7762 charset_list
= Fcopy_sequence (charset_list
);
7763 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
7765 struct charset
*charset
;
7768 CHECK_CHARSET_GET_CHARSET (val
, charset
);
7769 if (EQ (coding_type
, Qiso_2022
)
7770 ? CHARSET_ISO_FINAL (charset
) < 0
7771 : EQ (coding_type
, Qemacs_mule
)
7772 ? CHARSET_EMACS_MULE_ID (charset
) < 0
7774 error ("Can't handle charset `%s'",
7775 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7777 XCAR (tail
) = make_number (charset
->id
);
7778 if (max_charset_id
< charset
->id
)
7779 max_charset_id
= charset
->id
;
7782 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
7784 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
7786 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7787 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
7788 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
7790 CODING_ATTR_ASCII_COMPAT (attrs
) = args
[coding_arg_ascii_compatible_p
];
7792 val
= args
[coding_arg_decode_translation_table
];
7794 CHECK_CHAR_TABLE (val
);
7795 CODING_ATTR_DECODE_TBL (attrs
) = val
;
7797 val
= args
[coding_arg_encode_translation_table
];
7799 CHECK_CHAR_TABLE (val
);
7800 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
7802 val
= args
[coding_arg_post_read_conversion
];
7804 CODING_ATTR_POST_READ (attrs
) = val
;
7806 val
= args
[coding_arg_pre_write_conversion
];
7808 CODING_ATTR_PRE_WRITE (attrs
) = val
;
7810 val
= args
[coding_arg_default_char
];
7812 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
7815 CHECK_CHARACTER (val
);
7816 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
7819 val
= args
[coding_arg_plist
];
7821 CODING_ATTR_PLIST (attrs
) = val
;
7823 if (EQ (coding_type
, Qcharset
))
7825 /* Generate a lisp vector of 256 elements. Each element is nil,
7826 integer, or a list of charset IDs.
7828 If Nth element is nil, the byte code N is invalid in this
7831 If Nth element is a number NUM, N is the first byte of a
7832 charset whose ID is NUM.
7834 If Nth element is a list of charset IDs, N is the first byte
7835 of one of them. The list is sorted by dimensions of the
7836 charsets. A charset of smaller dimension comes firtst.
7838 val
= Fmake_vector (make_number (256), Qnil
);
7840 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7842 struct charset
*charset
= CHARSET_FROM_ID (XFASTINT (XCAR (tail
)));
7843 int dim
= CHARSET_DIMENSION (charset
);
7844 int idx
= (dim
- 1) * 4;
7846 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
7847 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
7849 for (i
= charset
->code_space
[idx
];
7850 i
<= charset
->code_space
[idx
+ 1]; i
++)
7852 Lisp_Object tmp
, tmp2
;
7855 tmp
= AREF (val
, i
);
7858 else if (NUMBERP (tmp
))
7860 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp
)));
7862 tmp
= Fcons (XCAR (tail
), Fcons (tmp
, Qnil
));
7864 tmp
= Fcons (tmp
, Fcons (XCAR (tail
), Qnil
));
7868 for (tmp2
= tmp
; CONSP (tmp2
); tmp2
= XCDR (tmp2
))
7870 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2
))));
7875 tmp
= nconc2 (tmp
, Fcons (XCAR (tail
), Qnil
));
7878 XSETCDR (tmp2
, Fcons (XCAR (tmp2
), XCDR (tmp2
)));
7879 XSETCAR (tmp2
, XCAR (tail
));
7885 ASET (attrs
, coding_attr_charset_valids
, val
);
7886 category
= coding_category_charset
;
7888 else if (EQ (coding_type
, Qccl
))
7892 if (nargs
< coding_arg_ccl_max
)
7895 val
= args
[coding_arg_ccl_decoder
];
7896 CHECK_CCL_PROGRAM (val
);
7898 val
= Fcopy_sequence (val
);
7899 ASET (attrs
, coding_attr_ccl_decoder
, val
);
7901 val
= args
[coding_arg_ccl_encoder
];
7902 CHECK_CCL_PROGRAM (val
);
7904 val
= Fcopy_sequence (val
);
7905 ASET (attrs
, coding_attr_ccl_encoder
, val
);
7907 val
= args
[coding_arg_ccl_valids
];
7908 valids
= Fmake_string (make_number (256), make_number (0));
7909 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
7916 from
= to
= XINT (val
);
7917 if (from
< 0 || from
> 255)
7918 args_out_of_range_3 (val
, make_number (0), make_number (255));
7923 CHECK_NUMBER (XCAR (val
));
7924 CHECK_NUMBER (XCDR (val
));
7925 from
= XINT (XCAR (val
));
7926 if (from
< 0 || from
> 255)
7927 args_out_of_range_3 (XCAR (val
),
7928 make_number (0), make_number (255));
7929 to
= XINT (XCDR (val
));
7930 if (to
< from
|| to
> 255)
7931 args_out_of_range_3 (XCDR (val
),
7932 XCAR (val
), make_number (255));
7934 for (i
= from
; i
<= to
; i
++)
7935 XSTRING (valids
)->data
[i
] = 1;
7937 ASET (attrs
, coding_attr_ccl_valids
, valids
);
7939 category
= coding_category_ccl
;
7941 else if (EQ (coding_type
, Qutf_16
))
7943 Lisp_Object bom
, endian
;
7945 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
7947 if (nargs
< coding_arg_utf16_max
)
7950 bom
= args
[coding_arg_utf16_bom
];
7951 if (! NILP (bom
) && ! EQ (bom
, Qt
))
7954 CHECK_CODING_SYSTEM (XCAR (bom
));
7955 CHECK_CODING_SYSTEM (XCDR (bom
));
7957 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
7959 endian
= args
[coding_arg_utf16_endian
];
7960 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
7962 category
= (CONSP (bom
)
7963 ? coding_category_utf_16_auto
7966 ? coding_category_utf_16_be_nosig
7967 : coding_category_utf_16_le_nosig
)
7969 ? coding_category_utf_16_be
7970 : coding_category_utf_16_le
));
7972 else if (EQ (coding_type
, Qiso_2022
))
7974 Lisp_Object initial
, reg_usage
, request
, flags
;
7977 if (nargs
< coding_arg_iso2022_max
)
7980 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
7981 CHECK_VECTOR (initial
);
7982 for (i
= 0; i
< 4; i
++)
7984 val
= Faref (initial
, make_number (i
));
7987 struct charset
*charset
;
7989 CHECK_CHARSET_GET_CHARSET (val
, charset
);
7990 ASET (initial
, i
, make_number (CHARSET_ID (charset
)));
7991 if (i
== 0 && CHARSET_ASCII_COMPATIBLE_P (charset
))
7992 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
7995 ASET (initial
, i
, make_number (-1));
7998 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
7999 CHECK_CONS (reg_usage
);
8000 CHECK_NATNUM (XCAR (reg_usage
));
8001 CHECK_NATNUM (XCDR (reg_usage
));
8003 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
8004 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
8010 CHECK_CHARSET_GET_ID (XCAR (val
), id
);
8011 CHECK_NATNUM (XCDR (val
));
8012 if (XINT (XCDR (val
)) >= 4)
8013 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
8014 XCAR (val
) = make_number (id
);
8017 flags
= args
[coding_arg_iso2022_flags
];
8018 CHECK_NATNUM (flags
);
8020 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
8021 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
8023 ASET (attrs
, coding_attr_iso_initial
, initial
);
8024 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
8025 ASET (attrs
, coding_attr_iso_request
, request
);
8026 ASET (attrs
, coding_attr_iso_flags
, flags
);
8027 setup_iso_safe_charsets (attrs
);
8029 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
8030 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
8031 | CODING_ISO_FLAG_SINGLE_SHIFT
))
8032 ? coding_category_iso_7_else
8033 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8034 ? coding_category_iso_7
8035 : coding_category_iso_7_tight
);
8038 int id
= XINT (AREF (initial
, 1));
8040 category
= (((i
& CODING_ISO_FLAG_LOCKING_SHIFT
)
8041 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8043 ? coding_category_iso_8_else
8044 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
8045 ? coding_category_iso_8_1
8046 : coding_category_iso_8_2
);
8048 if (category
!= coding_category_iso_8_1
8049 && category
!= coding_category_iso_8_2
)
8050 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8052 else if (EQ (coding_type
, Qemacs_mule
))
8054 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
8055 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
8056 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8057 category
= coding_category_emacs_mule
;
8059 else if (EQ (coding_type
, Qshift_jis
))
8062 struct charset
*charset
;
8064 if (XINT (Flength (charset_list
)) != 3)
8065 error ("There should be just three charsets");
8067 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8068 if (CHARSET_DIMENSION (charset
) != 1)
8069 error ("Dimension of charset %s is not one",
8070 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8071 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8072 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8074 charset_list
= XCDR (charset_list
);
8075 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8076 if (CHARSET_DIMENSION (charset
) != 1)
8077 error ("Dimension of charset %s is not one",
8078 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8080 charset_list
= XCDR (charset_list
);
8081 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8082 if (CHARSET_DIMENSION (charset
) != 2)
8083 error ("Dimension of charset %s is not two",
8084 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8086 category
= coding_category_sjis
;
8087 Vsjis_coding_system
= name
;
8089 else if (EQ (coding_type
, Qbig5
))
8091 struct charset
*charset
;
8093 if (XINT (Flength (charset_list
)) != 2)
8094 error ("There should be just two charsets");
8096 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8097 if (CHARSET_DIMENSION (charset
) != 1)
8098 error ("Dimension of charset %s is not one",
8099 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8100 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8101 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8103 charset_list
= XCDR (charset_list
);
8104 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8105 if (CHARSET_DIMENSION (charset
) != 2)
8106 error ("Dimension of charset %s is not two",
8107 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
8109 category
= coding_category_big5
;
8110 Vbig5_coding_system
= name
;
8112 else if (EQ (coding_type
, Qraw_text
))
8114 category
= coding_category_raw_text
;
8115 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8117 else if (EQ (coding_type
, Qutf_8
))
8119 category
= coding_category_utf_8
;
8120 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8122 else if (EQ (coding_type
, Qundecided
))
8123 category
= coding_category_undecided
;
8125 error ("Invalid coding system type: %s",
8126 XSYMBOL (coding_type
)->name
->data
);
8128 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
8130 eol_type
= args
[coding_arg_eol_type
];
8131 if (! NILP (eol_type
)
8132 && ! EQ (eol_type
, Qunix
)
8133 && ! EQ (eol_type
, Qdos
)
8134 && ! EQ (eol_type
, Qmac
))
8135 error ("Invalid eol-type");
8137 aliases
= Fcons (name
, Qnil
);
8139 if (NILP (eol_type
))
8141 eol_type
= make_subsidiaries (name
);
8142 for (i
= 0; i
< 3; i
++)
8144 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
8146 this_name
= AREF (eol_type
, i
);
8147 this_aliases
= Fcons (this_name
, Qnil
);
8148 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
8149 this_spec
= Fmake_vector (make_number (3), attrs
);
8150 ASET (this_spec
, 1, this_aliases
);
8151 ASET (this_spec
, 2, this_eol_type
);
8152 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
8153 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
8154 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
8155 Vcoding_system_alist
);
8159 spec_vec
= Fmake_vector (make_number (3), attrs
);
8160 ASET (spec_vec
, 1, aliases
);
8161 ASET (spec_vec
, 2, eol_type
);
8163 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
8164 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
8165 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
8166 Vcoding_system_alist
);
8169 int id
= coding_categories
[category
].id
;
8171 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
8172 setup_coding_system (name
, &coding_categories
[category
]);
8178 return Fsignal (Qwrong_number_of_arguments
,
8179 Fcons (intern ("define-coding-system-internal"),
8180 make_number (nargs
)));
8183 /* Fixme: should this record the alias relationships for
8184 diagnostics? Should it update coding-system-list? */
8185 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
8186 Sdefine_coding_system_alias
, 2, 2, 0,
8187 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8188 (alias
, coding_system
)
8189 Lisp_Object alias
, coding_system
;
8191 Lisp_Object spec
, aliases
, eol_type
;
8193 CHECK_SYMBOL (alias
);
8194 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8195 aliases
= AREF (spec
, 1);
8196 while (!NILP (XCDR (aliases
)))
8197 aliases
= XCDR (aliases
);
8198 XCDR (aliases
) = Fcons (alias
, Qnil
);
8200 eol_type
= AREF (spec
, 2);
8201 if (VECTORP (eol_type
))
8203 Lisp_Object subsidiaries
;
8206 subsidiaries
= make_subsidiaries (alias
);
8207 for (i
= 0; i
< 3; i
++)
8208 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
8209 AREF (eol_type
, i
));
8211 ASET (spec
, 2, subsidiaries
);
8214 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
8215 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (alias
), Qnil
),
8216 Vcoding_system_alist
);
8221 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
8223 doc
: /* Return the base of CODING-SYSTEM.
8224 Any alias or subsidiary coding system is not a base coding system. */)
8226 Lisp_Object coding_system
;
8228 Lisp_Object spec
, attrs
;
8230 if (NILP (coding_system
))
8231 return (Qno_conversion
);
8232 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8233 attrs
= AREF (spec
, 0);
8234 return CODING_ATTR_BASE_NAME (attrs
);
8237 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
8239 doc
: "Return the property list of CODING-SYSTEM.")
8241 Lisp_Object coding_system
;
8243 Lisp_Object spec
, attrs
;
8245 if (NILP (coding_system
))
8246 coding_system
= Qno_conversion
;
8247 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8248 attrs
= AREF (spec
, 0);
8249 return CODING_ATTR_PLIST (attrs
);
8253 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
8255 doc
: /* Return the list of aliases of CODING-SYSTEM. */)
8257 Lisp_Object coding_system
;
8261 if (NILP (coding_system
))
8262 coding_system
= Qno_conversion
;
8263 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8264 return AREF (spec
, 1);
8267 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
8268 Scoding_system_eol_type
, 1, 1, 0,
8269 doc
: /* Return eol-type of CODING-SYSTEM.
8270 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8272 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8273 and CR respectively.
8275 A vector value indicates that a format of end-of-line should be
8276 detected automatically. Nth element of the vector is the subsidiary
8277 coding system whose eol-type is N. */)
8279 Lisp_Object coding_system
;
8281 Lisp_Object spec
, eol_type
;
8284 if (NILP (coding_system
))
8285 coding_system
= Qno_conversion
;
8286 if (! CODING_SYSTEM_P (coding_system
))
8288 spec
= CODING_SYSTEM_SPEC (coding_system
);
8289 eol_type
= AREF (spec
, 2);
8290 if (VECTORP (eol_type
))
8291 return Fcopy_sequence (eol_type
);
8292 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
8293 return make_number (n
);
8299 /*** 9. Post-amble ***/
8306 for (i
= 0; i
< coding_category_max
; i
++)
8308 coding_categories
[i
].id
= -1;
8309 coding_priorities
[i
] = i
;
8312 /* ISO2022 specific initialize routine. */
8313 for (i
= 0; i
< 0x20; i
++)
8314 iso_code_class
[i
] = ISO_control_0
;
8315 for (i
= 0x21; i
< 0x7F; i
++)
8316 iso_code_class
[i
] = ISO_graphic_plane_0
;
8317 for (i
= 0x80; i
< 0xA0; i
++)
8318 iso_code_class
[i
] = ISO_control_1
;
8319 for (i
= 0xA1; i
< 0xFF; i
++)
8320 iso_code_class
[i
] = ISO_graphic_plane_1
;
8321 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
8322 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
8323 iso_code_class
[ISO_CODE_CR
] = ISO_carriage_return
;
8324 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
8325 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
8326 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
8327 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
8328 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
8329 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
8330 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
8332 inhibit_pre_post_conversion
= 0;
8334 for (i
= 0; i
< 256; i
++)
8336 emacs_mule_bytes
[i
] = 1;
8338 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_11
] = 3;
8339 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_12
] = 3;
8340 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_21
] = 4;
8341 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_22
] = 4;
8349 staticpro (&Vcoding_system_hash_table
);
8350 Vcoding_system_hash_table
= Fmakehash (Qeq
);
8352 staticpro (&Vsjis_coding_system
);
8353 Vsjis_coding_system
= Qnil
;
8355 staticpro (&Vbig5_coding_system
);
8356 Vbig5_coding_system
= Qnil
;
8358 staticpro (&Vcode_conversion_work_buf_list
);
8359 Vcode_conversion_work_buf_list
= Qnil
;
8361 staticpro (&Vcode_conversion_reused_work_buf
);
8362 Vcode_conversion_reused_work_buf
= Qnil
;
8364 DEFSYM (Qcharset
, "charset");
8365 DEFSYM (Qtarget_idx
, "target-idx");
8366 DEFSYM (Qcoding_system_history
, "coding-system-history");
8367 Fset (Qcoding_system_history
, Qnil
);
8369 /* Target FILENAME is the first argument. */
8370 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
8371 /* Target FILENAME is the third argument. */
8372 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
8374 DEFSYM (Qcall_process
, "call-process");
8375 /* Target PROGRAM is the first argument. */
8376 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
8378 DEFSYM (Qcall_process_region
, "call-process-region");
8379 /* Target PROGRAM is the third argument. */
8380 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
8382 DEFSYM (Qstart_process
, "start-process");
8383 /* Target PROGRAM is the third argument. */
8384 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
8386 DEFSYM (Qopen_network_stream
, "open-network-stream");
8387 /* Target SERVICE is the fourth argument. */
8388 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
8390 DEFSYM (Qcoding_system
, "coding-system");
8391 DEFSYM (Qcoding_aliases
, "coding-aliases");
8393 DEFSYM (Qeol_type
, "eol-type");
8394 DEFSYM (Qunix
, "unix");
8395 DEFSYM (Qdos
, "dos");
8397 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
8398 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
8399 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
8400 DEFSYM (Qdefault_char
, "default-char");
8401 DEFSYM (Qundecided
, "undecided");
8402 DEFSYM (Qno_conversion
, "no-conversion");
8403 DEFSYM (Qraw_text
, "raw-text");
8405 DEFSYM (Qiso_2022
, "iso-2022");
8407 DEFSYM (Qutf_8
, "utf-8");
8409 DEFSYM (Qutf_16
, "utf-16");
8410 DEFSYM (Qsignature
, "signature");
8411 DEFSYM (Qendian
, "endian");
8412 DEFSYM (Qbig
, "big");
8413 DEFSYM (Qlittle
, "little");
8415 DEFSYM (Qshift_jis
, "shift-jis");
8416 DEFSYM (Qbig5
, "big5");
8418 DEFSYM (Qcoding_system_p
, "coding-system-p");
8420 DEFSYM (Qcoding_system_error
, "coding-system-error");
8421 Fput (Qcoding_system_error
, Qerror_conditions
,
8422 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
8423 Fput (Qcoding_system_error
, Qerror_message
,
8424 build_string ("Invalid coding system"));
8426 /* Intern this now in case it isn't already done.
8427 Setting this variable twice is harmless.
8428 But don't staticpro it here--that is done in alloc.c. */
8429 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
8431 DEFSYM (Qtranslation_table
, "translation-table");
8432 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
8433 DEFSYM (Qtranslation_table_id
, "translation-table-id");
8434 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
8435 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
8437 DEFSYM (Qvalid_codes
, "valid-codes");
8439 DEFSYM (Qemacs_mule
, "emacs-mule");
8441 Vcoding_category_table
8442 = Fmake_vector (make_number (coding_category_max
), Qnil
);
8443 staticpro (&Vcoding_category_table
);
8444 /* Followings are target of code detection. */
8445 ASET (Vcoding_category_table
, coding_category_iso_7
,
8446 intern ("coding-category-iso-7"));
8447 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
8448 intern ("coding-category-iso-7-tight"));
8449 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
8450 intern ("coding-category-iso-8-1"));
8451 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
8452 intern ("coding-category-iso-8-2"));
8453 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
8454 intern ("coding-category-iso-7-else"));
8455 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
8456 intern ("coding-category-iso-8-else"));
8457 ASET (Vcoding_category_table
, coding_category_utf_8
,
8458 intern ("coding-category-utf-8"));
8459 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
8460 intern ("coding-category-utf-16-be"));
8461 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
8462 intern ("coding-category-utf-16-le"));
8463 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
8464 intern ("coding-category-utf-16-be-nosig"));
8465 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
8466 intern ("coding-category-utf-16-le-nosig"));
8467 ASET (Vcoding_category_table
, coding_category_charset
,
8468 intern ("coding-category-charset"));
8469 ASET (Vcoding_category_table
, coding_category_sjis
,
8470 intern ("coding-category-sjis"));
8471 ASET (Vcoding_category_table
, coding_category_big5
,
8472 intern ("coding-category-big5"));
8473 ASET (Vcoding_category_table
, coding_category_ccl
,
8474 intern ("coding-category-ccl"));
8475 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
8476 intern ("coding-category-emacs-mule"));
8477 /* Followings are NOT target of code detection. */
8478 ASET (Vcoding_category_table
, coding_category_raw_text
,
8479 intern ("coding-category-raw-text"));
8480 ASET (Vcoding_category_table
, coding_category_undecided
,
8481 intern ("coding-category-undecided"));
8483 defsubr (&Scoding_system_p
);
8484 defsubr (&Sread_coding_system
);
8485 defsubr (&Sread_non_nil_coding_system
);
8486 defsubr (&Scheck_coding_system
);
8487 defsubr (&Sdetect_coding_region
);
8488 defsubr (&Sdetect_coding_string
);
8489 defsubr (&Sfind_coding_systems_region_internal
);
8490 defsubr (&Scheck_coding_systems_region
);
8491 defsubr (&Sdecode_coding_region
);
8492 defsubr (&Sencode_coding_region
);
8493 defsubr (&Sdecode_coding_string
);
8494 defsubr (&Sencode_coding_string
);
8495 defsubr (&Sdecode_sjis_char
);
8496 defsubr (&Sencode_sjis_char
);
8497 defsubr (&Sdecode_big5_char
);
8498 defsubr (&Sencode_big5_char
);
8499 defsubr (&Sset_terminal_coding_system_internal
);
8500 defsubr (&Sset_safe_terminal_coding_system_internal
);
8501 defsubr (&Sterminal_coding_system
);
8502 defsubr (&Sset_keyboard_coding_system_internal
);
8503 defsubr (&Skeyboard_coding_system
);
8504 defsubr (&Sfind_operation_coding_system
);
8505 defsubr (&Sset_coding_system_priority
);
8506 defsubr (&Sdefine_coding_system_internal
);
8507 defsubr (&Sdefine_coding_system_alias
);
8508 defsubr (&Scoding_system_base
);
8509 defsubr (&Scoding_system_plist
);
8510 defsubr (&Scoding_system_aliases
);
8511 defsubr (&Scoding_system_eol_type
);
8512 defsubr (&Scoding_system_priority_list
);
8514 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
8515 doc
: /* List of coding systems.
8517 Do not alter the value of this variable manually. This variable should be
8518 updated by the functions `define-coding-system' and
8519 `define-coding-system-alias'. */);
8520 Vcoding_system_list
= Qnil
;
8522 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
8523 doc
: /* Alist of coding system names.
8524 Each element is one element list of coding system name.
8525 This variable is given to `completing-read' as TABLE argument.
8527 Do not alter the value of this variable manually. This variable should be
8528 updated by the functions `make-coding-system' and
8529 `define-coding-system-alias'. */);
8530 Vcoding_system_alist
= Qnil
;
8532 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
8533 doc
: /* List of coding-categories (symbols) ordered by priority.
8535 On detecting a coding system, Emacs tries code detection algorithms
8536 associated with each coding-category one by one in this order. When
8537 one algorithm agrees with a byte sequence of source text, the coding
8538 system bound to the corresponding coding-category is selected. */);
8542 Vcoding_category_list
= Qnil
;
8543 for (i
= coding_category_max
- 1; i
>= 0; i
--)
8544 Vcoding_category_list
8545 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
8546 Vcoding_category_list
);
8549 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
8550 doc
: /* Specify the coding system for read operations.
8551 It is useful to bind this variable with `let', but do not set it globally.
8552 If the value is a coding system, it is used for decoding on read operation.
8553 If not, an appropriate element is used from one of the coding system alists:
8554 There are three such tables, `file-coding-system-alist',
8555 `process-coding-system-alist', and `network-coding-system-alist'. */);
8556 Vcoding_system_for_read
= Qnil
;
8558 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
8559 doc
: /* Specify the coding system for write operations.
8560 Programs bind this variable with `let', but you should not set it globally.
8561 If the value is a coding system, it is used for encoding of output,
8562 when writing it to a file and when sending it to a file or subprocess.
8564 If this does not specify a coding system, an appropriate element
8565 is used from one of the coding system alists:
8566 There are three such tables, `file-coding-system-alist',
8567 `process-coding-system-alist', and `network-coding-system-alist'.
8568 For output to files, if the above procedure does not specify a coding system,
8569 the value of `buffer-file-coding-system' is used. */);
8570 Vcoding_system_for_write
= Qnil
;
8572 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
8574 Coding system used in the latest file or process I/O. */);
8575 Vlast_coding_system_used
= Qnil
;
8577 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
8579 *Non-nil means always inhibit code conversion of end-of-line format.
8580 See info node `Coding Systems' and info node `Text and Binary' concerning
8581 such conversion. */);
8582 inhibit_eol_conversion
= 0;
8584 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
8586 Non-nil means process buffer inherits coding system of process output.
8587 Bind it to t if the process output is to be treated as if it were a file
8588 read from some filesystem. */);
8589 inherit_process_coding_system
= 0;
8591 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
8593 Alist to decide a coding system to use for a file I/O operation.
8594 The format is ((PATTERN . VAL) ...),
8595 where PATTERN is a regular expression matching a file name,
8596 VAL is a coding system, a cons of coding systems, or a function symbol.
8597 If VAL is a coding system, it is used for both decoding and encoding
8599 If VAL is a cons of coding systems, the car part is used for decoding,
8600 and the cdr part is used for encoding.
8601 If VAL is a function symbol, the function must return a coding system
8602 or a cons of coding systems which are used as above. The function gets
8603 the arguments with which `find-operation-coding-systems' was called.
8605 See also the function `find-operation-coding-system'
8606 and the variable `auto-coding-alist'. */);
8607 Vfile_coding_system_alist
= Qnil
;
8609 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
8611 Alist to decide a coding system to use for a process I/O operation.
8612 The format is ((PATTERN . VAL) ...),
8613 where PATTERN is a regular expression matching a program name,
8614 VAL is a coding system, a cons of coding systems, or a function symbol.
8615 If VAL is a coding system, it is used for both decoding what received
8616 from the program and encoding what sent to the program.
8617 If VAL is a cons of coding systems, the car part is used for decoding,
8618 and the cdr part is used for encoding.
8619 If VAL is a function symbol, the function must return a coding system
8620 or a cons of coding systems which are used as above.
8622 See also the function `find-operation-coding-system'. */);
8623 Vprocess_coding_system_alist
= Qnil
;
8625 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
8627 Alist to decide a coding system to use for a network I/O operation.
8628 The format is ((PATTERN . VAL) ...),
8629 where PATTERN is a regular expression matching a network service name
8630 or is a port number to connect to,
8631 VAL is a coding system, a cons of coding systems, or a function symbol.
8632 If VAL is a coding system, it is used for both decoding what received
8633 from the network stream and encoding what sent to the network stream.
8634 If VAL is a cons of coding systems, the car part is used for decoding,
8635 and the cdr part is used for encoding.
8636 If VAL is a function symbol, the function must return a coding system
8637 or a cons of coding systems which are used as above.
8639 See also the function `find-operation-coding-system'. */);
8640 Vnetwork_coding_system_alist
= Qnil
;
8642 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
8643 doc
: /* Coding system to use with system messages.
8644 Also used for decoding keyboard input on X Window system. */);
8645 Vlocale_coding_system
= Qnil
;
8647 /* The eol mnemonics are reset in startup.el system-dependently. */
8648 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
8650 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8651 eol_mnemonic_unix
= build_string (":");
8653 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
8655 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8656 eol_mnemonic_dos
= build_string ("\\");
8658 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
8660 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
8661 eol_mnemonic_mac
= build_string ("/");
8663 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
8665 *String displayed in mode line when end-of-line format is not yet determined. */);
8666 eol_mnemonic_undecided
= build_string (":");
8668 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
8670 *Non-nil enables character translation while encoding and decoding. */);
8671 Venable_character_translation
= Qt
;
8673 DEFVAR_LISP ("standard-translation-table-for-decode",
8674 &Vstandard_translation_table_for_decode
,
8675 doc
: /* Table for translating characters while decoding. */);
8676 Vstandard_translation_table_for_decode
= Qnil
;
8678 DEFVAR_LISP ("standard-translation-table-for-encode",
8679 &Vstandard_translation_table_for_encode
,
8680 doc
: /* Table for translating characters while encoding. */);
8681 Vstandard_translation_table_for_encode
= Qnil
;
8683 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
8684 doc
: /* Alist of charsets vs revision numbers.
8685 While encoding, if a charset (car part of an element) is found,
8686 designate it with the escape sequence identifying revision (cdr part
8687 of the element). */);
8688 Vcharset_revision_table
= Qnil
;
8690 DEFVAR_LISP ("default-process-coding-system",
8691 &Vdefault_process_coding_system
,
8692 doc
: /* Cons of coding systems used for process I/O by default.
8693 The car part is used for decoding a process output,
8694 the cdr part is used for encoding a text to be sent to a process. */);
8695 Vdefault_process_coding_system
= Qnil
;
8697 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
8699 Table of extra Latin codes in the range 128..159 (inclusive).
8700 This is a vector of length 256.
8701 If Nth element is non-nil, the existence of code N in a file
8702 \(or output of subprocess) doesn't prevent it to be detected as
8703 a coding system of ISO 2022 variant which has a flag
8704 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8705 or reading output of a subprocess.
8706 Only 128th through 159th elements has a meaning. */);
8707 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
8709 DEFVAR_LISP ("select-safe-coding-system-function",
8710 &Vselect_safe_coding_system_function
,
8712 Function to call to select safe coding system for encoding a text.
8714 If set, this function is called to force a user to select a proper
8715 coding system which can encode the text in the case that a default
8716 coding system used in each operation can't encode the text.
8718 The default value is `select-safe-coding-system' (which see). */);
8719 Vselect_safe_coding_system_function
= Qnil
;
8721 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8722 &inhibit_iso_escape_detection
,
8724 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8726 By default, on reading a file, Emacs tries to detect how the text is
8727 encoded. This code detection is sensitive to escape sequences. If
8728 the sequence is valid as ISO2022, the code is determined as one of
8729 the ISO2022 encodings, and the file is decoded by the corresponding
8730 coding system (e.g. `iso-2022-7bit').
8732 However, there may be a case that you want to read escape sequences in
8733 a file as is. In such a case, you can set this variable to non-nil.
8734 Then, as the code detection ignores any escape sequences, no file is
8735 detected as encoded in some ISO2022 encoding. The result is that all
8736 escape sequences become visible in a buffer.
8738 The default value is nil, and it is strongly recommended not to change
8739 it. That is because many Emacs Lisp source files that contain
8740 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8741 in Emacs's distribution, and they won't be decoded correctly on
8742 reading if you suppress escape sequence detection.
8744 The other way to read escape sequences in a file without decoding is
8745 to explicitly specify some coding system that doesn't use ISO2022's
8746 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8747 inhibit_iso_escape_detection
= 0;
8750 Lisp_Object args
[coding_arg_max
];
8751 Lisp_Object plist
[14];
8754 for (i
= 0; i
< coding_arg_max
; i
++)
8757 plist
[0] = intern (":name");
8758 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
8759 plist
[2] = intern (":mnemonic");
8760 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
8761 plist
[4] = intern (":coding-type");
8762 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
8763 plist
[6] = intern (":ascii-compatible-p");
8764 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
8765 plist
[8] = intern (":default-char");
8766 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
8767 plist
[10] = intern (":docstring");
8768 plist
[11] = build_string ("Do no conversion.\n\
8770 When you visit a file with this coding, the file is read into a\n\
8771 unibyte buffer as is, thus each byte of a file is treated as a\n\
8773 plist
[12] = intern (":eol-type");
8774 plist
[13] = args
[coding_arg_eol_type
] = Qunix
;
8775 args
[coding_arg_plist
] = Flist (14, plist
);
8776 Fdefine_coding_system_internal (coding_arg_max
, args
);
8779 setup_coding_system (Qno_conversion
, &keyboard_coding
);
8780 setup_coding_system (Qno_conversion
, &terminal_coding
);
8781 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
8785 emacs_strerror (error_number
)
8790 synchronize_system_messages_locale ();
8791 str
= strerror (error_number
);
8793 if (! NILP (Vlocale_coding_system
))
8795 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
8796 Vlocale_coding_system
,
8798 str
= (char *) XSTRING (dec
)->data
;