1 /* Coding system handler (conversion, detection, etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
77 o Old Emacs internal format (emacs-mule)
79 The coding system adopted by old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX, and update the members of
150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
152 Below is the template of these functions. */
156 detect_coding_XXX (coding
, detect_info
)
157 struct coding_system
*coding
;
158 struct coding_detection_info
*detect_info
;
160 unsigned char *src
= coding
->source
;
161 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
162 int multibytep
= coding
->src_multibyte
;
163 int consumed_chars
= 0;
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
173 if (! __C_conforms_to_XXX___ (c
))
175 if (! __C_strongly_suggests_XXX__ (c
))
176 found
= CATEGORY_MASK_XXX
;
178 /* The byte sequence is invalid for XXX. */
179 detect_info
->rejected
|= CATEGORY_MASK_XXX
;
183 /* The source exausted successfully. */
184 detect_info
->found
|= found
;
189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
201 Below is the template of these functions. */
205 decode_coding_XXXX (coding
)
206 struct coding_system
*coding
;
208 unsigned char *src
= coding
->source
+ coding
->consumed
;
209 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base
;
214 /* A buffer to produce decoded characters. */
215 int *charbuf
= coding
->charbuf
;
216 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
217 int multibytep
= coding
->src_multibyte
;
222 if (charbuf
< charbuf_end
)
223 /* No more room to produce a decoded character. */
230 if (src_base
< src_end
231 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base
< src_end
&& charbuf
< charbuf_end
)
235 *charbuf
++ = *src_base
++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
239 /* Remember how many characters we produced. */
240 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
260 Below is a template of these functions. */
263 encode_coding_XXX (coding
)
264 struct coding_system
*coding
;
266 int multibytep
= coding
->dst_multibyte
;
267 int *charbuf
= coding
->charbuf
;
268 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
269 unsigned char *dst
= coding
->destination
+ coding
->produced
;
270 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
271 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
272 int produced_chars
= 0;
274 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
277 /* Encode C into DST, and increment DST. */
279 label_no_more_destination
:
280 /* How many chars and bytes we produced. */
281 coding
->produced_char
+= produced_chars
;
282 coding
->produced
= dst
- coding
->destination
;
287 /*** 1. Preamble ***/
294 #include "character.h"
297 #include "composite.h"
301 Lisp_Object Vcoding_system_hash_table
;
303 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
304 Lisp_Object Qunix
, Qdos
;
305 extern Lisp_Object Qmac
; /* frame.c */
306 Lisp_Object Qbuffer_file_coding_system
;
307 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
308 Lisp_Object Qdefault_char
;
309 Lisp_Object Qno_conversion
, Qundecided
;
310 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
311 Lisp_Object Qbig
, Qlittle
;
312 Lisp_Object Qcoding_system_history
;
313 Lisp_Object Qvalid_codes
;
314 Lisp_Object QCcategory
, QCmnemonic
, QCdefalut_char
;
315 Lisp_Object QCdecode_translation_table
, QCencode_translation_table
;
316 Lisp_Object QCpost_read_conversion
, QCpre_write_conversion
;
318 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
319 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
320 Lisp_Object Qstart_process
, Qopen_network_stream
;
321 Lisp_Object Qtarget_idx
;
323 Lisp_Object Qinsufficient_source
, Qinconsistent_eol
, Qinvalid_source
;
324 Lisp_Object Qinterrupted
, Qinsufficient_memory
;
326 int coding_system_require_warning
;
328 Lisp_Object Vselect_safe_coding_system_function
;
330 /* Mnemonic string for each format of end-of-line. */
331 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
332 /* Mnemonic string to indicate format of end-of-line is not yet
334 Lisp_Object eol_mnemonic_undecided
;
338 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
340 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
342 /* Coding system emacs-mule and raw-text are for converting only
343 end-of-line format. */
344 Lisp_Object Qemacs_mule
, Qraw_text
;
345 Lisp_Object Qutf_8_emacs
;
347 /* Coding-systems are handed between Emacs Lisp programs and C internal
348 routines by the following three variables. */
349 /* Coding-system for reading files and receiving data from process. */
350 Lisp_Object Vcoding_system_for_read
;
351 /* Coding-system for writing files and sending data to process. */
352 Lisp_Object Vcoding_system_for_write
;
353 /* Coding-system actually used in the latest I/O. */
354 Lisp_Object Vlast_coding_system_used
;
355 /* Set to non-nil when an error is detected while code conversion. */
356 Lisp_Object Vlast_code_conversion_error
;
357 /* A vector of length 256 which contains information about special
358 Latin codes (especially for dealing with Microsoft codes). */
359 Lisp_Object Vlatin_extra_code_table
;
361 /* Flag to inhibit code conversion of end-of-line format. */
362 int inhibit_eol_conversion
;
364 /* Flag to inhibit ISO2022 escape sequence detection. */
365 int inhibit_iso_escape_detection
;
367 /* Flag to make buffer-file-coding-system inherit from process-coding. */
368 int inherit_process_coding_system
;
370 /* Coding system to be used to encode text for terminal display. */
371 struct coding_system terminal_coding
;
373 /* Coding system to be used to encode text for terminal display when
374 terminal coding system is nil. */
375 struct coding_system safe_terminal_coding
;
377 /* Coding system of what is sent from terminal keyboard. */
378 struct coding_system keyboard_coding
;
380 Lisp_Object Vfile_coding_system_alist
;
381 Lisp_Object Vprocess_coding_system_alist
;
382 Lisp_Object Vnetwork_coding_system_alist
;
384 Lisp_Object Vlocale_coding_system
;
388 /* Flag to tell if we look up translation table on character code
390 Lisp_Object Venable_character_translation
;
391 /* Standard translation table to look up on decoding (reading). */
392 Lisp_Object Vstandard_translation_table_for_decode
;
393 /* Standard translation table to look up on encoding (writing). */
394 Lisp_Object Vstandard_translation_table_for_encode
;
396 Lisp_Object Qtranslation_table
;
397 Lisp_Object Qtranslation_table_id
;
398 Lisp_Object Qtranslation_table_for_decode
;
399 Lisp_Object Qtranslation_table_for_encode
;
401 /* Alist of charsets vs revision number. */
402 static Lisp_Object Vcharset_revision_table
;
404 /* Default coding systems used for process I/O. */
405 Lisp_Object Vdefault_process_coding_system
;
407 /* Char table for translating Quail and self-inserting input. */
408 Lisp_Object Vtranslation_table_for_input
;
410 /* Two special coding systems. */
411 Lisp_Object Vsjis_coding_system
;
412 Lisp_Object Vbig5_coding_system
;
414 static void record_conversion_result (struct coding_system
*coding
,
415 enum coding_result_code result
);
416 static int detect_coding_utf_8
P_ ((struct coding_system
*,
417 struct coding_detection_info
*info
));
418 static void decode_coding_utf_8
P_ ((struct coding_system
*));
419 static int encode_coding_utf_8
P_ ((struct coding_system
*));
421 static int detect_coding_utf_16
P_ ((struct coding_system
*,
422 struct coding_detection_info
*info
));
423 static void decode_coding_utf_16
P_ ((struct coding_system
*));
424 static int encode_coding_utf_16
P_ ((struct coding_system
*));
426 static int detect_coding_iso_2022
P_ ((struct coding_system
*,
427 struct coding_detection_info
*info
));
428 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
429 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
431 static int detect_coding_emacs_mule
P_ ((struct coding_system
*,
432 struct coding_detection_info
*info
));
433 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
434 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
436 static int detect_coding_sjis
P_ ((struct coding_system
*,
437 struct coding_detection_info
*info
));
438 static void decode_coding_sjis
P_ ((struct coding_system
*));
439 static int encode_coding_sjis
P_ ((struct coding_system
*));
441 static int detect_coding_big5
P_ ((struct coding_system
*,
442 struct coding_detection_info
*info
));
443 static void decode_coding_big5
P_ ((struct coding_system
*));
444 static int encode_coding_big5
P_ ((struct coding_system
*));
446 static int detect_coding_ccl
P_ ((struct coding_system
*,
447 struct coding_detection_info
*info
));
448 static void decode_coding_ccl
P_ ((struct coding_system
*));
449 static int encode_coding_ccl
P_ ((struct coding_system
*));
451 static void decode_coding_raw_text
P_ ((struct coding_system
*));
452 static int encode_coding_raw_text
P_ ((struct coding_system
*));
455 /* ISO2022 section */
457 #define CODING_ISO_INITIAL(coding, reg) \
458 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
459 coding_attr_iso_initial), \
463 #define CODING_ISO_REQUEST(coding, charset_id) \
464 ((charset_id <= (coding)->max_charset_id \
465 ? (coding)->safe_charsets[charset_id] \
469 #define CODING_ISO_FLAGS(coding) \
470 ((coding)->spec.iso_2022.flags)
471 #define CODING_ISO_DESIGNATION(coding, reg) \
472 ((coding)->spec.iso_2022.current_designation[reg])
473 #define CODING_ISO_INVOCATION(coding, plane) \
474 ((coding)->spec.iso_2022.current_invocation[plane])
475 #define CODING_ISO_SINGLE_SHIFTING(coding) \
476 ((coding)->spec.iso_2022.single_shifting)
477 #define CODING_ISO_BOL(coding) \
478 ((coding)->spec.iso_2022.bol)
479 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
480 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
482 /* Control characters of ISO2022. */
483 /* code */ /* function */
484 #define ISO_CODE_LF 0x0A /* line-feed */
485 #define ISO_CODE_CR 0x0D /* carriage-return */
486 #define ISO_CODE_SO 0x0E /* shift-out */
487 #define ISO_CODE_SI 0x0F /* shift-in */
488 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
489 #define ISO_CODE_ESC 0x1B /* escape */
490 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
491 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
492 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
494 /* All code (1-byte) of ISO2022 is classified into one of the
496 enum iso_code_class_type
498 ISO_control_0
, /* Control codes in the range
499 0x00..0x1F and 0x7F, except for the
500 following 5 codes. */
501 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
502 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
503 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
504 ISO_escape
, /* ISO_CODE_SO (0x1B) */
505 ISO_control_1
, /* Control codes in the range
506 0x80..0x9F, except for the
507 following 3 codes. */
508 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
509 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
510 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
511 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
512 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
513 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
514 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
517 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
518 `iso-flags' attribute of an iso2022 coding system. */
520 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
521 instead of the correct short-form sequence (e.g. ESC $ A). */
522 #define CODING_ISO_FLAG_LONG_FORM 0x0001
524 /* If set, reset graphic planes and registers at end-of-line to the
526 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
528 /* If set, reset graphic planes and registers before any control
529 characters to the initial state. */
530 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
532 /* If set, encode by 7-bit environment. */
533 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
535 /* If set, use locking-shift function. */
536 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
538 /* If set, use single-shift function. Overwrite
539 CODING_ISO_FLAG_LOCKING_SHIFT. */
540 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
542 /* If set, use designation escape sequence. */
543 #define CODING_ISO_FLAG_DESIGNATION 0x0040
545 /* If set, produce revision number sequence. */
546 #define CODING_ISO_FLAG_REVISION 0x0080
548 /* If set, produce ISO6429's direction specifying sequence. */
549 #define CODING_ISO_FLAG_DIRECTION 0x0100
551 /* If set, assume designation states are reset at beginning of line on
553 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
555 /* If set, designation sequence should be placed at beginning of line
557 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
559 /* If set, do not encode unsafe charactes on output. */
560 #define CODING_ISO_FLAG_SAFE 0x0800
562 /* If set, extra latin codes (128..159) are accepted as a valid code
564 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
566 #define CODING_ISO_FLAG_COMPOSITION 0x2000
568 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
570 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
572 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
574 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
576 /* A character to be produced on output if encoding of the original
577 character is prohibited by CODING_ISO_FLAG_SAFE. */
578 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
582 #define CODING_UTF_16_BOM(coding) \
583 ((coding)->spec.utf_16.bom)
585 #define CODING_UTF_16_ENDIAN(coding) \
586 ((coding)->spec.utf_16.endian)
588 #define CODING_UTF_16_SURROGATE(coding) \
589 ((coding)->spec.utf_16.surrogate)
593 #define CODING_CCL_DECODER(coding) \
594 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
595 #define CODING_CCL_ENCODER(coding) \
596 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
597 #define CODING_CCL_VALIDS(coding) \
598 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
600 /* Index for each coding category in `coding_categories' */
604 coding_category_iso_7
,
605 coding_category_iso_7_tight
,
606 coding_category_iso_8_1
,
607 coding_category_iso_8_2
,
608 coding_category_iso_7_else
,
609 coding_category_iso_8_else
,
610 coding_category_utf_8
,
611 coding_category_utf_16_auto
,
612 coding_category_utf_16_be
,
613 coding_category_utf_16_le
,
614 coding_category_utf_16_be_nosig
,
615 coding_category_utf_16_le_nosig
,
616 coding_category_charset
,
617 coding_category_sjis
,
618 coding_category_big5
,
620 coding_category_emacs_mule
,
621 /* All above are targets of code detection. */
622 coding_category_raw_text
,
623 coding_category_undecided
,
627 /* Definitions of flag bits used in detect_coding_XXXX. */
628 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
629 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
630 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
631 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
632 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
633 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
634 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
635 #define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
636 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
637 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
638 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
639 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
640 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
641 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
642 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
643 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
644 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
645 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
647 /* This value is returned if detect_coding_mask () find nothing other
648 than ASCII characters. */
649 #define CATEGORY_MASK_ANY \
650 (CATEGORY_MASK_ISO_7 \
651 | CATEGORY_MASK_ISO_7_TIGHT \
652 | CATEGORY_MASK_ISO_8_1 \
653 | CATEGORY_MASK_ISO_8_2 \
654 | CATEGORY_MASK_ISO_7_ELSE \
655 | CATEGORY_MASK_ISO_8_ELSE \
656 | CATEGORY_MASK_UTF_8 \
657 | CATEGORY_MASK_UTF_16_BE \
658 | CATEGORY_MASK_UTF_16_LE \
659 | CATEGORY_MASK_UTF_16_BE_NOSIG \
660 | CATEGORY_MASK_UTF_16_LE_NOSIG \
661 | CATEGORY_MASK_CHARSET \
662 | CATEGORY_MASK_SJIS \
663 | CATEGORY_MASK_BIG5 \
664 | CATEGORY_MASK_CCL \
665 | CATEGORY_MASK_EMACS_MULE)
668 #define CATEGORY_MASK_ISO_7BIT \
669 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
671 #define CATEGORY_MASK_ISO_8BIT \
672 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
674 #define CATEGORY_MASK_ISO_ELSE \
675 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
677 #define CATEGORY_MASK_ISO_ESCAPE \
678 (CATEGORY_MASK_ISO_7 \
679 | CATEGORY_MASK_ISO_7_TIGHT \
680 | CATEGORY_MASK_ISO_7_ELSE \
681 | CATEGORY_MASK_ISO_8_ELSE)
683 #define CATEGORY_MASK_ISO \
684 ( CATEGORY_MASK_ISO_7BIT \
685 | CATEGORY_MASK_ISO_8BIT \
686 | CATEGORY_MASK_ISO_ELSE)
688 #define CATEGORY_MASK_UTF_16 \
689 (CATEGORY_MASK_UTF_16_BE \
690 | CATEGORY_MASK_UTF_16_LE \
691 | CATEGORY_MASK_UTF_16_BE_NOSIG \
692 | CATEGORY_MASK_UTF_16_LE_NOSIG)
695 /* List of symbols `coding-category-xxx' ordered by priority. This
696 variable is exposed to Emacs Lisp. */
697 static Lisp_Object Vcoding_category_list
;
699 /* Table of coding categories (Lisp symbols). This variable is for
701 static Lisp_Object Vcoding_category_table
;
703 /* Table of coding-categories ordered by priority. */
704 static enum coding_category coding_priorities
[coding_category_max
];
706 /* Nth element is a coding context for the coding system bound to the
707 Nth coding category. */
708 static struct coding_system coding_categories
[coding_category_max
];
710 /*** Commonly used macros and functions ***/
713 #define min(a, b) ((a) < (b) ? (a) : (b))
716 #define max(a, b) ((a) > (b) ? (a) : (b))
719 #define CODING_GET_INFO(coding, attrs, charset_list) \
721 (attrs) = CODING_ID_ATTRS ((coding)->id); \
722 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
726 /* Safely get one byte from the source text pointed by SRC which ends
727 at SRC_END, and set C to that byte. If there are not enough bytes
728 in the source, it jumps to `no_more_source'. If multibytep is
729 nonzero, and a multibyte character is found at SRC, set C to the
730 negative value of the character code. The caller should declare
731 and set these variables appropriately in advance:
732 src, src_end, multibytep */
734 #define ONE_MORE_BYTE(c) \
736 if (src == src_end) \
738 if (src_base < src) \
739 record_conversion_result \
740 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
741 goto no_more_source; \
744 if (multibytep && (c & 0x80)) \
746 if ((c & 0xFE) == 0xC0) \
747 c = ((c & 1) << 6) | *src++; \
750 c = - string_char (--src, &src, NULL); \
751 record_conversion_result \
752 (coding, CODING_RESULT_INVALID_SRC); \
759 #define ONE_MORE_BYTE_NO_CHECK(c) \
762 if (multibytep && (c & 0x80)) \
764 if ((c & 0xFE) == 0xC0) \
765 c = ((c & 1) << 6) | *src++; \
768 c = - string_char (--src, &src, NULL); \
769 record_conversion_result \
770 (coding, CODING_RESULT_INVALID_SRC); \
777 /* Store a byte C in the place pointed by DST and increment DST to the
778 next free point, and increment PRODUCED_CHARS. The caller should
779 assure that C is 0..127, and declare and set the variable `dst'
780 appropriately in advance.
784 #define EMIT_ONE_ASCII_BYTE(c) \
791 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
793 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
795 produced_chars += 2; \
796 *dst++ = (c1), *dst++ = (c2); \
800 /* Store a byte C in the place pointed by DST and increment DST to the
801 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
802 nonzero, store in an appropriate multibyte from. The caller should
803 declare and set the variables `dst' and `multibytep' appropriately
806 #define EMIT_ONE_BYTE(c) \
813 ch = BYTE8_TO_CHAR (ch); \
814 CHAR_STRING_ADVANCE (ch, dst); \
821 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
823 #define EMIT_TWO_BYTES(c1, c2) \
825 produced_chars += 2; \
832 ch = BYTE8_TO_CHAR (ch); \
833 CHAR_STRING_ADVANCE (ch, dst); \
836 ch = BYTE8_TO_CHAR (ch); \
837 CHAR_STRING_ADVANCE (ch, dst); \
847 #define EMIT_THREE_BYTES(c1, c2, c3) \
849 EMIT_ONE_BYTE (c1); \
850 EMIT_TWO_BYTES (c2, c3); \
854 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
856 EMIT_TWO_BYTES (c1, c2); \
857 EMIT_TWO_BYTES (c3, c4); \
862 record_conversion_result (struct coding_system
*coding
,
863 enum coding_result_code result
)
865 coding
->result
= result
;
868 case CODING_RESULT_INSUFFICIENT_SRC
:
869 Vlast_code_conversion_error
= Qinsufficient_source
;
871 case CODING_RESULT_INCONSISTENT_EOL
:
872 Vlast_code_conversion_error
= Qinconsistent_eol
;
874 case CODING_RESULT_INVALID_SRC
:
875 Vlast_code_conversion_error
= Qinvalid_source
;
877 case CODING_RESULT_INTERRUPT
:
878 Vlast_code_conversion_error
= Qinterrupted
;
880 case CODING_RESULT_INSUFFICIENT_MEM
:
881 Vlast_code_conversion_error
= Qinsufficient_memory
;
886 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
888 charset_map_loaded = 0; \
889 c = DECODE_CHAR (charset, code); \
890 if (charset_map_loaded) \
892 const unsigned char *orig = coding->source; \
895 coding_set_source (coding); \
896 offset = coding->source - orig; \
898 src_base += offset; \
904 #define ASSURE_DESTINATION(bytes) \
906 if (dst + (bytes) >= dst_end) \
908 int more_bytes = charbuf_end - charbuf + (bytes); \
910 dst = alloc_destination (coding, more_bytes, dst); \
911 dst_end = coding->destination + coding->dst_bytes; \
918 coding_set_source (coding
)
919 struct coding_system
*coding
;
921 if (BUFFERP (coding
->src_object
))
923 struct buffer
*buf
= XBUFFER (coding
->src_object
);
925 if (coding
->src_pos
< 0)
926 coding
->source
= BUF_GAP_END_ADDR (buf
) + coding
->src_pos_byte
;
928 coding
->source
= BUF_BYTE_ADDRESS (buf
, coding
->src_pos_byte
);
930 else if (STRINGP (coding
->src_object
))
932 coding
->source
= SDATA (coding
->src_object
) + coding
->src_pos_byte
;
935 /* Otherwise, the source is C string and is never relocated
936 automatically. Thus we don't have to update anything. */
941 coding_set_destination (coding
)
942 struct coding_system
*coding
;
944 if (BUFFERP (coding
->dst_object
))
946 if (coding
->src_pos
< 0)
948 coding
->destination
= BEG_ADDR
+ coding
->dst_pos_byte
- 1;
949 coding
->dst_bytes
= (GAP_END_ADDR
950 - (coding
->src_bytes
- coding
->consumed
)
951 - coding
->destination
);
955 /* We are sure that coding->dst_pos_byte is before the gap
957 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
958 + coding
->dst_pos_byte
- 1);
959 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
960 - coding
->destination
);
964 /* Otherwise, the destination is C string and is never relocated
965 automatically. Thus we don't have to update anything. */
971 coding_alloc_by_realloc (coding
, bytes
)
972 struct coding_system
*coding
;
975 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
976 coding
->dst_bytes
+ bytes
);
977 coding
->dst_bytes
+= bytes
;
981 coding_alloc_by_making_gap (coding
, bytes
)
982 struct coding_system
*coding
;
985 if (BUFFERP (coding
->dst_object
)
986 && EQ (coding
->src_object
, coding
->dst_object
))
988 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
990 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
992 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
996 Lisp_Object this_buffer
;
998 this_buffer
= Fcurrent_buffer ();
999 set_buffer_internal (XBUFFER (coding
->dst_object
));
1001 set_buffer_internal (XBUFFER (this_buffer
));
1006 static unsigned char *
1007 alloc_destination (coding
, nbytes
, dst
)
1008 struct coding_system
*coding
;
1012 EMACS_INT offset
= dst
- coding
->destination
;
1014 if (BUFFERP (coding
->dst_object
))
1015 coding_alloc_by_making_gap (coding
, nbytes
);
1017 coding_alloc_by_realloc (coding
, nbytes
);
1018 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1019 coding_set_destination (coding
);
1020 dst
= coding
->destination
+ offset
;
1024 /** Macros for annotations. */
1026 /* Maximum length of annotation data (sum of annotations for
1027 composition and charset). */
1028 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1030 /* An annotation data is stored in the array coding->charbuf in this
1032 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1033 LENGTH is the number of elements in the annotation.
1034 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1035 FROM and TO specify the range of text annotated. They are relative
1036 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1038 The format of the following elements depend on ANNOTATION_MASK.
1040 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1042 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1043 METHOD is one of enum composition_method.
1044 Optionnal COMPOSITION-COMPONENTS are characters and composition
1047 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1050 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1052 *(buf)++ = -(len); \
1053 *(buf)++ = (mask); \
1054 *(buf)++ = (from); \
1056 coding->annotated = 1; \
1059 #define ADD_COMPOSITION_DATA(buf, from, to, method) \
1061 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1066 #define ADD_CHARSET_DATA(buf, from, to, id) \
1068 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1073 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1080 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1081 Check if a text is encoded in UTF-8. If it is, return 1, else
1084 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1085 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1086 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1087 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1088 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1089 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1092 detect_coding_utf_8 (coding
, detect_info
)
1093 struct coding_system
*coding
;
1094 struct coding_detection_info
*detect_info
;
1096 const unsigned char *src
= coding
->source
, *src_base
;
1097 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1098 int multibytep
= coding
->src_multibyte
;
1099 int consumed_chars
= 0;
1102 detect_info
->checked
|= CATEGORY_MASK_UTF_8
;
1103 /* A coding system of this category is always ASCII compatible. */
1104 src
+= coding
->head_ascii
;
1108 int c
, c1
, c2
, c3
, c4
;
1112 if (c
< 0 || UTF_8_1_OCTET_P (c
))
1115 if (c1
< 0 || ! UTF_8_EXTRA_OCTET_P (c1
))
1117 if (UTF_8_2_OCTET_LEADING_P (c
))
1119 found
= CATEGORY_MASK_UTF_8
;
1123 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1125 if (UTF_8_3_OCTET_LEADING_P (c
))
1127 found
= CATEGORY_MASK_UTF_8
;
1131 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1133 if (UTF_8_4_OCTET_LEADING_P (c
))
1135 found
= CATEGORY_MASK_UTF_8
;
1139 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1141 if (UTF_8_5_OCTET_LEADING_P (c
))
1143 found
= CATEGORY_MASK_UTF_8
;
1148 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1152 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1154 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1157 detect_info
->found
|= found
;
1163 decode_coding_utf_8 (coding
)
1164 struct coding_system
*coding
;
1166 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1167 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1168 const unsigned char *src_base
;
1169 int *charbuf
= coding
->charbuf
;
1170 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1171 int consumed_chars
= 0, consumed_chars_base
;
1172 int multibytep
= coding
->src_multibyte
;
1173 Lisp_Object attr
, charset_list
;
1175 CODING_GET_INFO (coding
, attr
, charset_list
);
1179 int c
, c1
, c2
, c3
, c4
, c5
;
1182 consumed_chars_base
= consumed_chars
;
1184 if (charbuf
>= charbuf_end
)
1192 else if (UTF_8_1_OCTET_P(c1
))
1199 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1201 if (UTF_8_2_OCTET_LEADING_P (c1
))
1203 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1204 /* Reject overlong sequences here and below. Encoders
1205 producing them are incorrect, they can be misleading,
1206 and they mess up read/write invariance. */
1213 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1215 if (UTF_8_3_OCTET_LEADING_P (c1
))
1217 c
= (((c1
& 0xF) << 12)
1218 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1220 || (c
>= 0xd800 && c
< 0xe000)) /* surrogates (invalid) */
1226 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1228 if (UTF_8_4_OCTET_LEADING_P (c1
))
1230 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1231 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1238 if (c5
< 0 || ! UTF_8_EXTRA_OCTET_P (c5
))
1240 if (UTF_8_5_OCTET_LEADING_P (c1
))
1242 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1243 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1245 if ((c
> MAX_CHAR
) || (c
< 0x200000))
1260 consumed_chars
= consumed_chars_base
;
1262 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1267 coding
->consumed_char
+= consumed_chars_base
;
1268 coding
->consumed
= src_base
- coding
->source
;
1269 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1274 encode_coding_utf_8 (coding
)
1275 struct coding_system
*coding
;
1277 int multibytep
= coding
->dst_multibyte
;
1278 int *charbuf
= coding
->charbuf
;
1279 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1280 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1281 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1282 int produced_chars
= 0;
1287 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1289 while (charbuf
< charbuf_end
)
1291 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1293 ASSURE_DESTINATION (safe_room
);
1295 if (CHAR_BYTE8_P (c
))
1297 c
= CHAR_TO_BYTE8 (c
);
1302 CHAR_STRING_ADVANCE (c
, pend
);
1303 for (p
= str
; p
< pend
; p
++)
1310 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1312 while (charbuf
< charbuf_end
)
1314 ASSURE_DESTINATION (safe_room
);
1316 dst
+= CHAR_STRING (c
, dst
);
1320 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1321 coding
->produced_char
+= produced_chars
;
1322 coding
->produced
= dst
- coding
->destination
;
1327 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1328 Check if a text is encoded in one of UTF-16 based coding systems.
1329 If it is, return 1, else return 0. */
1331 #define UTF_16_HIGH_SURROGATE_P(val) \
1332 (((val) & 0xFC00) == 0xD800)
1334 #define UTF_16_LOW_SURROGATE_P(val) \
1335 (((val) & 0xFC00) == 0xDC00)
1337 #define UTF_16_INVALID_P(val) \
1338 (((val) == 0xFFFE) \
1339 || ((val) == 0xFFFF) \
1340 || UTF_16_LOW_SURROGATE_P (val))
1344 detect_coding_utf_16 (coding
, detect_info
)
1345 struct coding_system
*coding
;
1346 struct coding_detection_info
*detect_info
;
1348 const unsigned char *src
= coding
->source
, *src_base
= src
;
1349 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1350 int multibytep
= coding
->src_multibyte
;
1351 int consumed_chars
= 0;
1354 detect_info
->checked
|= CATEGORY_MASK_UTF_16
;
1355 if (coding
->mode
& CODING_MODE_LAST_BLOCK
1356 && (coding
->src_chars
& 1))
1358 detect_info
->rejected
|= CATEGORY_MASK_UTF_16
;
1364 if ((c1
== 0xFF) && (c2
== 0xFE))
1366 detect_info
->found
|= (CATEGORY_MASK_UTF_16_LE
1367 | CATEGORY_MASK_UTF_16_AUTO
);
1368 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_BE
1369 | CATEGORY_MASK_UTF_16_BE_NOSIG
1370 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1372 else if ((c1
== 0xFE) && (c2
== 0xFF))
1374 detect_info
->found
|= (CATEGORY_MASK_UTF_16_BE
1375 | CATEGORY_MASK_UTF_16_AUTO
);
1376 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_LE
1377 | CATEGORY_MASK_UTF_16_BE_NOSIG
1378 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1380 else if (c1
>= 0 && c2
>= 0)
1382 unsigned char b1
[256], b2
[256];
1383 int b1_variants
= 1, b2_variants
= 1;
1386 bzero (b1
, 256), bzero (b2
, 256);
1388 for (n
= 0; n
< 256 && src
< src_end
; n
++)
1393 if (c1
< 0 || c2
< 0)
1395 if (! b1
[c1
++]) b1_variants
++;
1396 if (! b2
[c2
++]) b2_variants
++;
1398 if (b1_variants
< b2_variants
)
1399 detect_info
->found
|= CATEGORY_MASK_UTF_16_BE_NOSIG
;
1401 detect_info
->found
|= CATEGORY_MASK_UTF_16_LE_NOSIG
;
1402 detect_info
->rejected
1403 |= (CATEGORY_MASK_UTF_16_BE
| CATEGORY_MASK_UTF_16_LE
);
1410 decode_coding_utf_16 (coding
)
1411 struct coding_system
*coding
;
1413 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1414 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1415 const unsigned char *src_base
;
1416 int *charbuf
= coding
->charbuf
;
1417 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1418 int consumed_chars
= 0, consumed_chars_base
;
1419 int multibytep
= coding
->src_multibyte
;
1420 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1421 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1422 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1423 Lisp_Object attr
, charset_list
;
1425 CODING_GET_INFO (coding
, attr
, charset_list
);
1427 if (bom
== utf_16_with_bom
)
1436 if (endian
== utf_16_big_endian
1437 ? c
!= 0xFEFF : c
!= 0xFFFE)
1439 /* The first two bytes are not BOM. Treat them as bytes
1440 for a normal character. */
1444 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1446 else if (bom
== utf_16_detect_bom
)
1448 /* We have already tried to detect BOM and failed in
1450 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1458 consumed_chars_base
= consumed_chars
;
1460 if (charbuf
+ 2 >= charbuf_end
)
1472 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
1476 c
= (endian
== utf_16_big_endian
1477 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1480 if (! UTF_16_LOW_SURROGATE_P (c
))
1482 if (endian
== utf_16_big_endian
)
1483 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1485 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1489 if (UTF_16_HIGH_SURROGATE_P (c
))
1490 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1496 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1497 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1498 *charbuf
++ = 0x10000 + c
;
1503 if (UTF_16_HIGH_SURROGATE_P (c
))
1504 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1511 coding
->consumed_char
+= consumed_chars_base
;
1512 coding
->consumed
= src_base
- coding
->source
;
1513 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1517 encode_coding_utf_16 (coding
)
1518 struct coding_system
*coding
;
1520 int multibytep
= coding
->dst_multibyte
;
1521 int *charbuf
= coding
->charbuf
;
1522 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1523 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1524 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1526 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1527 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1528 int produced_chars
= 0;
1529 Lisp_Object attrs
, charset_list
;
1532 CODING_GET_INFO (coding
, attrs
, charset_list
);
1534 if (bom
!= utf_16_without_bom
)
1536 ASSURE_DESTINATION (safe_room
);
1538 EMIT_TWO_BYTES (0xFE, 0xFF);
1540 EMIT_TWO_BYTES (0xFF, 0xFE);
1541 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1544 while (charbuf
< charbuf_end
)
1546 ASSURE_DESTINATION (safe_room
);
1548 if (c
>= MAX_UNICODE_CHAR
)
1549 c
= coding
->default_char
;
1554 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1556 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1563 c1
= (c
>> 10) + 0xD800;
1564 c2
= (c
& 0x3FF) + 0xDC00;
1566 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1568 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1571 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1572 coding
->produced
= dst
- coding
->destination
;
1573 coding
->produced_char
+= produced_chars
;
1578 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1580 /* Emacs' internal format for representation of multiple character
1581 sets is a kind of multi-byte encoding, i.e. characters are
1582 represented by variable-length sequences of one-byte codes.
1584 ASCII characters and control characters (e.g. `tab', `newline') are
1585 represented by one-byte sequences which are their ASCII codes, in
1586 the range 0x00 through 0x7F.
1588 8-bit characters of the range 0x80..0x9F are represented by
1589 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1592 8-bit characters of the range 0xA0..0xFF are represented by
1593 one-byte sequences which are their 8-bit code.
1595 The other characters are represented by a sequence of `base
1596 leading-code', optional `extended leading-code', and one or two
1597 `position-code's. The length of the sequence is determined by the
1598 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1599 whereas extended leading-code and position-code take the range 0xA0
1600 through 0xFF. See `charset.h' for more details about leading-code
1603 --- CODE RANGE of Emacs' internal format ---
1607 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1608 eight-bit-graphic 0xA0..0xBF
1609 ELSE 0x81..0x9D + [0xA0..0xFF]+
1610 ---------------------------------------------
1612 As this is the internal character representation, the format is
1613 usually not used externally (i.e. in a file or in a data sent to a
1614 process). But, it is possible to have a text externally in this
1615 format (i.e. by encoding by the coding system `emacs-mule').
1617 In that case, a sequence of one-byte codes has a slightly different
1620 At first, all characters in eight-bit-control are represented by
1621 one-byte sequences which are their 8-bit code.
1623 Next, character composition data are represented by the byte
1624 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1626 METHOD is 0xF0 plus one of composition method (enum
1627 composition_method),
1629 BYTES is 0xA0 plus a byte length of this composition data,
1631 CHARS is 0x20 plus a number of characters composed by this
1634 COMPONENTs are characters of multibye form or composition
1635 rules encoded by two-byte of ASCII codes.
1637 In addition, for backward compatibility, the following formats are
1638 also recognized as composition data on decoding.
1641 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1644 MSEQ is a multibyte form but in these special format:
1645 ASCII: 0xA0 ASCII_CODE+0x80,
1646 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1647 RULE is a one byte code of the range 0xA0..0xF0 that
1648 represents a composition rule.
1651 char emacs_mule_bytes
[256];
1654 emacs_mule_char (coding
, src
, nbytes
, nchars
, id
)
1655 struct coding_system
*coding
;
1656 const unsigned char *src
;
1657 int *nbytes
, *nchars
, *id
;
1659 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1660 const unsigned char *src_base
= src
;
1661 int multibytep
= coding
->src_multibyte
;
1662 struct charset
*charset
;
1665 int consumed_chars
= 0;
1671 charset
= emacs_mule_charset
[0];
1675 switch (emacs_mule_bytes
[c
])
1678 if (! (charset
= emacs_mule_charset
[c
]))
1687 if (c
== EMACS_MULE_LEADING_CODE_PRIVATE_11
1688 || c
== EMACS_MULE_LEADING_CODE_PRIVATE_12
)
1691 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1700 if (! (charset
= emacs_mule_charset
[c
]))
1705 code
= (c
& 0x7F) << 8;
1715 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1720 code
= (c
& 0x7F) << 8;
1729 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
)
1730 ? charset_ascii
: charset_eight_bit
);
1736 c
= DECODE_CHAR (charset
, code
);
1740 *nbytes
= src
- src_base
;
1741 *nchars
= consumed_chars
;
1754 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1755 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1759 detect_coding_emacs_mule (coding
, detect_info
)
1760 struct coding_system
*coding
;
1761 struct coding_detection_info
*detect_info
;
1763 const unsigned char *src
= coding
->source
, *src_base
;
1764 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1765 int multibytep
= coding
->src_multibyte
;
1766 int consumed_chars
= 0;
1770 detect_info
->checked
|= CATEGORY_MASK_EMACS_MULE
;
1771 /* A coding system of this category is always ASCII compatible. */
1772 src
+= coding
->head_ascii
;
1782 /* Perhaps the start of composite character. We simple skip
1783 it because analyzing it is too heavy for detecting. But,
1784 at least, we check that the composite character
1785 constitues of more than 4 bytes. */
1786 const unsigned char *src_base
;
1796 if (src
- src_base
<= 4)
1798 found
= CATEGORY_MASK_EMACS_MULE
;
1806 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1811 int more_bytes
= emacs_mule_bytes
[*src_base
] - 1;
1813 while (more_bytes
> 0)
1818 src
--; /* Unread the last byte. */
1823 if (more_bytes
!= 0)
1825 found
= CATEGORY_MASK_EMACS_MULE
;
1828 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1832 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1834 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1837 detect_info
->found
|= found
;
1842 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1844 /* Decode a character represented as a component of composition
1845 sequence of Emacs 20/21 style at SRC. Set C to that character and
1846 update SRC to the head of next character (or an encoded composition
1847 rule). If SRC doesn't points a composition component, set C to -1.
1848 If SRC points an invalid byte sequence, global exit by a return
1851 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1855 int nbytes, nchars; \
1857 if (src == src_end) \
1859 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1864 goto invalid_code; \
1868 consumed_chars += nchars; \
1873 /* Decode a composition rule represented as a component of composition
1874 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1875 and increment BUF. If SRC points an invalid byte sequence, set C
1878 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1880 int c, gref, nref; \
1882 if (src >= src_end) \
1883 goto invalid_code; \
1884 ONE_MORE_BYTE_NO_CHECK (c); \
1886 if (c < 0 || c >= 81) \
1887 goto invalid_code; \
1889 gref = c / 9, nref = c % 9; \
1890 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1894 /* Decode a composition rule represented as a component of composition
1895 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1896 and increment BUF. If SRC points an invalid byte sequence, set C
1899 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1903 if (src + 1>= src_end) \
1904 goto invalid_code; \
1905 ONE_MORE_BYTE_NO_CHECK (gref); \
1907 ONE_MORE_BYTE_NO_CHECK (nref); \
1909 if (gref < 0 || gref >= 81 \
1910 || nref < 0 || nref >= 81) \
1911 goto invalid_code; \
1912 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1916 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1918 /* Emacs 21 style format. The first three bytes at SRC are \
1919 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1920 the byte length of this composition information, CHARS is the \
1921 number of characters composed by this composition. */ \
1922 enum composition_method method = c - 0xF2; \
1923 int *charbuf_base = charbuf; \
1925 int consumed_chars_limit; \
1926 int nbytes, nchars; \
1928 ONE_MORE_BYTE (c); \
1930 goto invalid_code; \
1931 nbytes = c - 0xA0; \
1933 goto invalid_code; \
1934 ONE_MORE_BYTE (c); \
1936 goto invalid_code; \
1937 nchars = c - 0xA0; \
1938 from = coding->produced + char_offset; \
1939 to = from + nchars; \
1940 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1941 consumed_chars_limit = consumed_chars_base + nbytes; \
1942 if (method != COMPOSITION_RELATIVE) \
1945 while (consumed_chars < consumed_chars_limit) \
1947 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1948 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1950 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1953 if (consumed_chars < consumed_chars_limit) \
1954 goto invalid_code; \
1955 charbuf_base[0] -= i; \
1960 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1962 /* Emacs 20 style format for relative composition. */ \
1963 /* Store multibyte form of characters to be composed. */ \
1964 enum composition_method method = COMPOSITION_RELATIVE; \
1965 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1966 int *buf = components; \
1971 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1972 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1973 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1975 goto invalid_code; \
1976 from = coding->produced_char + char_offset; \
1978 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1979 for (j = 0; j < i; j++) \
1980 *charbuf++ = components[j]; \
1984 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1986 /* Emacs 20 style format for rule-base composition. */ \
1987 /* Store multibyte form of characters to be composed. */ \
1988 enum composition_method method = COMPOSITION_WITH_RULE; \
1989 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1990 int *buf = components; \
1994 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1995 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1997 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1998 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2000 if (i < 1 || (buf - components) % 2 == 0) \
2001 goto invalid_code; \
2002 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
2003 goto no_more_source; \
2004 from = coding->produced_char + char_offset; \
2006 ADD_COMPOSITION_DATA (buf, from, to, method); \
2007 for (j = 0; j < i; j++) \
2008 *charbuf++ = components[j]; \
2009 for (j = 0; j < i; j += 2) \
2010 *charbuf++ = components[j]; \
2015 decode_coding_emacs_mule (coding
)
2016 struct coding_system
*coding
;
2018 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2019 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2020 const unsigned char *src_base
;
2021 int *charbuf
= coding
->charbuf
;
2022 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
2023 int consumed_chars
= 0, consumed_chars_base
;
2024 int multibytep
= coding
->src_multibyte
;
2025 Lisp_Object attrs
, charset_list
;
2026 int char_offset
= coding
->produced_char
;
2027 int last_offset
= char_offset
;
2028 int last_id
= charset_ascii
;
2030 CODING_GET_INFO (coding
, attrs
, charset_list
);
2037 consumed_chars_base
= consumed_chars
;
2039 if (charbuf
>= charbuf_end
)
2058 if (c
- 0xF2 >= COMPOSITION_RELATIVE
2059 && c
- 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS
)
2060 DECODE_EMACS_MULE_21_COMPOSITION (c
);
2062 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
2064 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
2068 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
2074 consumed_chars
= consumed_chars_base
;
2075 c
= emacs_mule_char (coding
, src
, &nbytes
, &nchars
, &id
);
2084 if (last_id
!= charset_ascii
)
2085 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2087 last_offset
= char_offset
;
2091 consumed_chars
+= nchars
;
2098 consumed_chars
= consumed_chars_base
;
2100 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
2106 if (last_id
!= charset_ascii
)
2107 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2108 coding
->consumed_char
+= consumed_chars_base
;
2109 coding
->consumed
= src_base
- coding
->source
;
2110 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
2114 #define EMACS_MULE_LEADING_CODES(id, codes) \
2117 codes[0] = id, codes[1] = 0; \
2118 else if (id < 0xE0) \
2119 codes[0] = 0x9A, codes[1] = id; \
2120 else if (id < 0xF0) \
2121 codes[0] = 0x9B, codes[1] = id; \
2122 else if (id < 0xF5) \
2123 codes[0] = 0x9C, codes[1] = id; \
2125 codes[0] = 0x9D, codes[1] = id; \
2130 encode_coding_emacs_mule (coding
)
2131 struct coding_system
*coding
;
2133 int multibytep
= coding
->dst_multibyte
;
2134 int *charbuf
= coding
->charbuf
;
2135 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
2136 unsigned char *dst
= coding
->destination
+ coding
->produced
;
2137 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
2139 int produced_chars
= 0;
2140 Lisp_Object attrs
, charset_list
;
2142 int preferred_charset_id
= -1;
2144 CODING_GET_INFO (coding
, attrs
, charset_list
);
2145 if (! EQ (charset_list
, Vemacs_mule_charset_list
))
2147 CODING_ATTR_CHARSET_LIST (attrs
)
2148 = charset_list
= Vemacs_mule_charset_list
;
2151 while (charbuf
< charbuf_end
)
2153 ASSURE_DESTINATION (safe_room
);
2158 /* Handle an annotation. */
2161 case CODING_ANNOTATE_COMPOSITION_MASK
:
2162 /* Not yet implemented. */
2164 case CODING_ANNOTATE_CHARSET_MASK
:
2165 preferred_charset_id
= charbuf
[3];
2166 if (preferred_charset_id
>= 0
2167 && NILP (Fmemq (make_number (preferred_charset_id
),
2169 preferred_charset_id
= -1;
2178 if (ASCII_CHAR_P (c
))
2179 EMIT_ONE_ASCII_BYTE (c
);
2180 else if (CHAR_BYTE8_P (c
))
2182 c
= CHAR_TO_BYTE8 (c
);
2187 struct charset
*charset
;
2191 unsigned char leading_codes
[2];
2193 if (preferred_charset_id
>= 0)
2195 charset
= CHARSET_FROM_ID (preferred_charset_id
);
2196 if (! CHAR_CHARSET_P (c
, charset
))
2197 charset
= char_charset (c
, charset_list
, NULL
);
2200 charset
= char_charset (c
, charset_list
, &code
);
2203 c
= coding
->default_char
;
2204 if (ASCII_CHAR_P (c
))
2206 EMIT_ONE_ASCII_BYTE (c
);
2209 charset
= char_charset (c
, charset_list
, &code
);
2211 dimension
= CHARSET_DIMENSION (charset
);
2212 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
2213 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
2214 EMIT_ONE_BYTE (leading_codes
[0]);
2215 if (leading_codes
[1])
2216 EMIT_ONE_BYTE (leading_codes
[1]);
2218 EMIT_ONE_BYTE (code
| 0x80);
2222 EMIT_ONE_BYTE (code
>> 8);
2223 EMIT_ONE_BYTE (code
& 0xFF);
2227 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
2228 coding
->produced_char
+= produced_chars
;
2229 coding
->produced
= dst
- coding
->destination
;
2234 /*** 7. ISO2022 handlers ***/
2236 /* The following note describes the coding system ISO2022 briefly.
2237 Since the intention of this note is to help understand the
2238 functions in this file, some parts are NOT ACCURATE or are OVERLY
2239 SIMPLIFIED. For thorough understanding, please refer to the
2240 original document of ISO2022. This is equivalent to the standard
2241 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2243 ISO2022 provides many mechanisms to encode several character sets
2244 in 7-bit and 8-bit environments. For 7-bit environments, all text
2245 is encoded using bytes less than 128. This may make the encoded
2246 text a little bit longer, but the text passes more easily through
2247 several types of gateway, some of which strip off the MSB (Most
2250 There are two kinds of character sets: control character sets and
2251 graphic character sets. The former contain control characters such
2252 as `newline' and `escape' to provide control functions (control
2253 functions are also provided by escape sequences). The latter
2254 contain graphic characters such as 'A' and '-'. Emacs recognizes
2255 two control character sets and many graphic character sets.
2257 Graphic character sets are classified into one of the following
2258 four classes, according to the number of bytes (DIMENSION) and
2259 number of characters in one dimension (CHARS) of the set:
2260 - DIMENSION1_CHARS94
2261 - DIMENSION1_CHARS96
2262 - DIMENSION2_CHARS94
2263 - DIMENSION2_CHARS96
2265 In addition, each character set is assigned an identification tag,
2266 unique for each set, called the "final character" (denoted as <F>
2267 hereafter). The <F> of each character set is decided by ECMA(*)
2268 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2269 (0x30..0x3F are for private use only).
2271 Note (*): ECMA = European Computer Manufacturers Association
2273 Here are examples of graphic character sets [NAME(<F>)]:
2274 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2275 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2276 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2277 o DIMENSION2_CHARS96 -- none for the moment
2279 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2280 C0 [0x00..0x1F] -- control character plane 0
2281 GL [0x20..0x7F] -- graphic character plane 0
2282 C1 [0x80..0x9F] -- control character plane 1
2283 GR [0xA0..0xFF] -- graphic character plane 1
2285 A control character set is directly designated and invoked to C0 or
2286 C1 by an escape sequence. The most common case is that:
2287 - ISO646's control character set is designated/invoked to C0, and
2288 - ISO6429's control character set is designated/invoked to C1,
2289 and usually these designations/invocations are omitted in encoded
2290 text. In a 7-bit environment, only C0 can be used, and a control
2291 character for C1 is encoded by an appropriate escape sequence to
2292 fit into the environment. All control characters for C1 are
2293 defined to have corresponding escape sequences.
2295 A graphic character set is at first designated to one of four
2296 graphic registers (G0 through G3), then these graphic registers are
2297 invoked to GL or GR. These designations and invocations can be
2298 done independently. The most common case is that G0 is invoked to
2299 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2300 these invocations and designations are omitted in encoded text.
2301 In a 7-bit environment, only GL can be used.
2303 When a graphic character set of CHARS94 is invoked to GL, codes
2304 0x20 and 0x7F of the GL area work as control characters SPACE and
2305 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2308 There are two ways of invocation: locking-shift and single-shift.
2309 With locking-shift, the invocation lasts until the next different
2310 invocation, whereas with single-shift, the invocation affects the
2311 following character only and doesn't affect the locking-shift
2312 state. Invocations are done by the following control characters or
2315 ----------------------------------------------------------------------
2316 abbrev function cntrl escape seq description
2317 ----------------------------------------------------------------------
2318 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2319 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2320 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2321 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2322 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2323 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2324 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2325 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2326 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2327 ----------------------------------------------------------------------
2328 (*) These are not used by any known coding system.
2330 Control characters for these functions are defined by macros
2331 ISO_CODE_XXX in `coding.h'.
2333 Designations are done by the following escape sequences:
2334 ----------------------------------------------------------------------
2335 escape sequence description
2336 ----------------------------------------------------------------------
2337 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2338 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2339 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2340 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2341 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2342 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2343 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2344 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2345 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2346 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2347 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2348 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2349 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2350 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2351 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2352 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2353 ----------------------------------------------------------------------
2355 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2356 of dimension 1, chars 94, and final character <F>, etc...
2358 Note (*): Although these designations are not allowed in ISO2022,
2359 Emacs accepts them on decoding, and produces them on encoding
2360 CHARS96 character sets in a coding system which is characterized as
2361 7-bit environment, non-locking-shift, and non-single-shift.
2363 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2364 '(' must be omitted. We refer to this as "short-form" hereafter.
2366 Now you may notice that there are a lot of ways of encoding the
2367 same multilingual text in ISO2022. Actually, there exist many
2368 coding systems such as Compound Text (used in X11's inter client
2369 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2370 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2371 localized platforms), and all of these are variants of ISO2022.
2373 In addition to the above, Emacs handles two more kinds of escape
2374 sequences: ISO6429's direction specification and Emacs' private
2375 sequence for specifying character composition.
2377 ISO6429's direction specification takes the following form:
2378 o CSI ']' -- end of the current direction
2379 o CSI '0' ']' -- end of the current direction
2380 o CSI '1' ']' -- start of left-to-right text
2381 o CSI '2' ']' -- start of right-to-left text
2382 The control character CSI (0x9B: control sequence introducer) is
2383 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2385 Character composition specification takes the following form:
2386 o ESC '0' -- start relative composition
2387 o ESC '1' -- end composition
2388 o ESC '2' -- start rule-base composition (*)
2389 o ESC '3' -- start relative composition with alternate chars (**)
2390 o ESC '4' -- start rule-base composition with alternate chars (**)
2391 Since these are not standard escape sequences of any ISO standard,
2392 the use of them with these meanings is restricted to Emacs only.
2394 (*) This form is used only in Emacs 20.7 and older versions,
2395 but newer versions can safely decode it.
2396 (**) This form is used only in Emacs 21.1 and newer versions,
2397 and older versions can't decode it.
2399 Here's a list of example usages of these composition escape
2400 sequences (categorized by `enum composition_method').
2402 COMPOSITION_RELATIVE:
2403 ESC 0 CHAR [ CHAR ] ESC 1
2404 COMPOSITION_WITH_RULE:
2405 ESC 2 CHAR [ RULE CHAR ] ESC 1
2406 COMPOSITION_WITH_ALTCHARS:
2407 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2408 COMPOSITION_WITH_RULE_ALTCHARS:
2409 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2411 enum iso_code_class_type iso_code_class
[256];
2413 #define SAFE_CHARSET_P(coding, id) \
2414 ((id) <= (coding)->max_charset_id \
2415 && (coding)->safe_charsets[id] >= 0)
2418 #define SHIFT_OUT_OK(category) \
2419 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2422 setup_iso_safe_charsets (attrs
)
2425 Lisp_Object charset_list
, safe_charsets
;
2426 Lisp_Object request
;
2427 Lisp_Object reg_usage
;
2430 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2433 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2434 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2435 && ! EQ (charset_list
, Viso_2022_charset_list
))
2437 CODING_ATTR_CHARSET_LIST (attrs
)
2438 = charset_list
= Viso_2022_charset_list
;
2439 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2442 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2446 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2448 int id
= XINT (XCAR (tail
));
2449 if (max_charset_id
< id
)
2450 max_charset_id
= id
;
2453 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2455 request
= AREF (attrs
, coding_attr_iso_request
);
2456 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2457 reg94
= XINT (XCAR (reg_usage
));
2458 reg96
= XINT (XCDR (reg_usage
));
2460 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2464 struct charset
*charset
;
2467 charset
= CHARSET_FROM_ID (XINT (id
));
2468 reg
= Fcdr (Fassq (id
, request
));
2470 SSET (safe_charsets
, XINT (id
), XINT (reg
));
2471 else if (charset
->iso_chars_96
)
2474 SSET (safe_charsets
, XINT (id
), reg96
);
2479 SSET (safe_charsets
, XINT (id
), reg94
);
2482 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2486 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2487 Check if a text is encoded in one of ISO-2022 based codig systems.
2488 If it is, return 1, else return 0. */
2491 detect_coding_iso_2022 (coding
, detect_info
)
2492 struct coding_system
*coding
;
2493 struct coding_detection_info
*detect_info
;
2495 const unsigned char *src
= coding
->source
, *src_base
= src
;
2496 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2497 int multibytep
= coding
->src_multibyte
;
2498 int single_shifting
= 0;
2501 int consumed_chars
= 0;
2506 detect_info
->checked
|= CATEGORY_MASK_ISO
;
2508 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2510 struct coding_system
*this = &(coding_categories
[i
]);
2511 Lisp_Object attrs
, val
;
2513 attrs
= CODING_ID_ATTRS (this->id
);
2514 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2515 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2516 setup_iso_safe_charsets (attrs
);
2517 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2518 this->max_charset_id
= SCHARS (val
) - 1;
2519 this->safe_charsets
= (char *) SDATA (val
);
2522 /* A coding system of this category is always ASCII compatible. */
2523 src
+= coding
->head_ascii
;
2525 while (rejected
!= CATEGORY_MASK_ISO
)
2532 if (inhibit_iso_escape_detection
)
2534 single_shifting
= 0;
2536 if (c
>= '(' && c
<= '/')
2538 /* Designation sequence for a charset of dimension 1. */
2540 if (c1
< ' ' || c1
>= 0x80
2541 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2542 /* Invalid designation sequence. Just ignore. */
2547 /* Designation sequence for a charset of dimension 2. */
2549 if (c
>= '@' && c
<= 'B')
2550 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2551 id
= iso_charset_table
[1][0][c
];
2552 else if (c
>= '(' && c
<= '/')
2555 if (c1
< ' ' || c1
>= 0x80
2556 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2557 /* Invalid designation sequence. Just ignore. */
2561 /* Invalid designation sequence. Just ignore it. */
2564 else if (c
== 'N' || c
== 'O')
2566 /* ESC <Fe> for SS2 or SS3. */
2567 single_shifting
= 1;
2568 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2571 else if (c
>= '0' && c
<= '4')
2573 /* ESC <Fp> for start/end composition. */
2574 found
|= CATEGORY_MASK_ISO
;
2579 /* Invalid escape sequence. Just ignore it. */
2583 /* We found a valid designation sequence for CHARSET. */
2584 rejected
|= CATEGORY_MASK_ISO_8BIT
;
2585 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2587 found
|= CATEGORY_MASK_ISO_7
;
2589 rejected
|= CATEGORY_MASK_ISO_7
;
2590 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2592 found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2594 rejected
|= CATEGORY_MASK_ISO_7_TIGHT
;
2595 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2597 found
|= CATEGORY_MASK_ISO_7_ELSE
;
2599 rejected
|= CATEGORY_MASK_ISO_7_ELSE
;
2600 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2602 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2604 rejected
|= CATEGORY_MASK_ISO_8_ELSE
;
2609 /* Locking shift out/in. */
2610 if (inhibit_iso_escape_detection
)
2612 single_shifting
= 0;
2613 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2614 found
|= CATEGORY_MASK_ISO_ELSE
;
2618 /* Control sequence introducer. */
2619 single_shifting
= 0;
2620 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2621 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2622 goto check_extra_latin
;
2627 if (inhibit_iso_escape_detection
)
2629 single_shifting
= 0;
2630 rejected
|= CATEGORY_MASK_ISO_7BIT
;
2631 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2632 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2633 found
|= CATEGORY_MASK_ISO_8_1
, single_shifting
= 1;
2634 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2635 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2636 found
|= CATEGORY_MASK_ISO_8_2
, single_shifting
= 1;
2637 if (single_shifting
)
2639 goto check_extra_latin
;
2646 single_shifting
= 0;
2651 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2652 found
|= CATEGORY_MASK_ISO_8_1
;
2653 /* Check the length of succeeding codes of the range
2654 0xA0..0FF. If the byte length is even, we include
2655 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2656 only when we are not single shifting. */
2657 if (! single_shifting
2658 && ! (rejected
& CATEGORY_MASK_ISO_8_2
))
2661 while (src
< src_end
)
2669 if (i
& 1 && src
< src_end
)
2670 rejected
|= CATEGORY_MASK_ISO_8_2
;
2672 found
|= CATEGORY_MASK_ISO_8_2
;
2677 single_shifting
= 0;
2678 if (! VECTORP (Vlatin_extra_code_table
)
2679 || NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2681 rejected
= CATEGORY_MASK_ISO
;
2684 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2685 & CODING_ISO_FLAG_LATIN_EXTRA
)
2686 found
|= CATEGORY_MASK_ISO_8_1
;
2688 rejected
|= CATEGORY_MASK_ISO_8_1
;
2689 rejected
|= CATEGORY_MASK_ISO_8_2
;
2692 detect_info
->rejected
|= CATEGORY_MASK_ISO
;
2696 detect_info
->rejected
|= rejected
;
2697 detect_info
->found
|= (found
& ~rejected
);
2702 /* Set designation state into CODING. */
2703 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2707 if (final < '0' || final >= 128 \
2708 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2709 || !SAFE_CHARSET_P (coding, id)) \
2711 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2712 goto invalid_code; \
2714 prev = CODING_ISO_DESIGNATION (coding, reg); \
2715 if (id == charset_jisx0201_roman) \
2717 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2718 id = charset_ascii; \
2720 else if (id == charset_jisx0208_1978) \
2722 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2723 id = charset_jisx0208; \
2725 CODING_ISO_DESIGNATION (coding, reg) = id; \
2726 /* If there was an invalid designation to REG previously, and this \
2727 designation is ASCII to REG, we should keep this designation \
2729 if (prev == -2 && id == charset_ascii) \
2730 goto invalid_code; \
2734 #define MAYBE_FINISH_COMPOSITION() \
2737 if (composition_state == COMPOSING_NO) \
2739 /* It is assured that we have enough room for producing \
2740 characters stored in the table `components'. */ \
2741 if (charbuf + component_idx > charbuf_end) \
2742 goto no_more_source; \
2743 composition_state = COMPOSING_NO; \
2744 if (method == COMPOSITION_RELATIVE \
2745 || method == COMPOSITION_WITH_ALTCHARS) \
2747 for (i = 0; i < component_idx; i++) \
2748 *charbuf++ = components[i]; \
2749 char_offset += component_idx; \
2753 for (i = 0; i < component_idx; i += 2) \
2754 *charbuf++ = components[i]; \
2755 char_offset += (component_idx / 2) + 1; \
2760 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2761 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2762 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2763 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2764 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2767 #define DECODE_COMPOSITION_START(c1) \
2770 && composition_state == COMPOSING_COMPONENT_RULE) \
2772 component_len = component_idx; \
2773 composition_state = COMPOSING_CHAR; \
2777 const unsigned char *p; \
2779 MAYBE_FINISH_COMPOSITION (); \
2780 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2781 goto no_more_source; \
2782 for (p = src; p < src_end - 1; p++) \
2783 if (*p == ISO_CODE_ESC && p[1] == '1') \
2785 if (p == src_end - 1) \
2787 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2788 goto invalid_code; \
2789 goto no_more_source; \
2792 /* This is surely the start of a composition. */ \
2793 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2794 : c1 == '2' ? COMPOSITION_WITH_RULE \
2795 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2796 : COMPOSITION_WITH_RULE_ALTCHARS); \
2797 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2798 : COMPOSING_COMPONENT_CHAR); \
2799 component_idx = component_len = 0; \
2804 /* Handle compositoin end sequence ESC 1. */
2806 #define DECODE_COMPOSITION_END() \
2808 int nchars = (component_len > 0 ? component_idx - component_len \
2809 : method == COMPOSITION_RELATIVE ? component_idx \
2810 : (component_idx + 1) / 2); \
2812 int *saved_charbuf = charbuf; \
2813 int from = char_offset; \
2814 int to = from + nchars; \
2816 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
2817 if (method != COMPOSITION_RELATIVE) \
2819 if (component_len == 0) \
2820 for (i = 0; i < component_idx; i++) \
2821 *charbuf++ = components[i]; \
2823 for (i = 0; i < component_len; i++) \
2824 *charbuf++ = components[i]; \
2825 *saved_charbuf = saved_charbuf - charbuf; \
2827 if (method == COMPOSITION_WITH_RULE) \
2828 for (i = 0; i < component_idx; i += 2, char_offset++) \
2829 *charbuf++ = components[i]; \
2831 for (i = component_len; i < component_idx; i++, char_offset++) \
2832 *charbuf++ = components[i]; \
2833 coding->annotated = 1; \
2834 composition_state = COMPOSING_NO; \
2838 /* Decode a composition rule from the byte C1 (and maybe one more byte
2839 from SRC) and store one encoded composition rule in
2840 coding->cmp_data. */
2842 #define DECODE_COMPOSITION_RULE(c1) \
2845 if (c1 < 81) /* old format (before ver.21) */ \
2847 int gref = (c1) / 9; \
2848 int nref = (c1) % 9; \
2849 if (gref == 4) gref = 10; \
2850 if (nref == 4) nref = 10; \
2851 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2853 else if (c1 < 93) /* new format (after ver.21) */ \
2855 ONE_MORE_BYTE (c2); \
2856 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2863 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2866 decode_coding_iso_2022 (coding
)
2867 struct coding_system
*coding
;
2869 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2870 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2871 const unsigned char *src_base
;
2872 int *charbuf
= coding
->charbuf
;
2874 = charbuf
+ coding
->charbuf_size
- 4 - MAX_ANNOTATION_LENGTH
;
2875 int consumed_chars
= 0, consumed_chars_base
;
2876 int multibytep
= coding
->src_multibyte
;
2877 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2878 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2879 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2880 struct charset
*charset
;
2882 /* For handling composition sequence. */
2883 #define COMPOSING_NO 0
2884 #define COMPOSING_CHAR 1
2885 #define COMPOSING_RULE 2
2886 #define COMPOSING_COMPONENT_CHAR 3
2887 #define COMPOSING_COMPONENT_RULE 4
2889 int composition_state
= COMPOSING_NO
;
2890 enum composition_method method
;
2891 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2894 Lisp_Object attrs
, charset_list
;
2895 int char_offset
= coding
->produced_char
;
2896 int last_offset
= char_offset
;
2897 int last_id
= charset_ascii
;
2899 CODING_GET_INFO (coding
, attrs
, charset_list
);
2900 setup_iso_safe_charsets (attrs
);
2907 consumed_chars_base
= consumed_chars
;
2909 if (charbuf
>= charbuf_end
)
2916 /* We produce at most one character. */
2917 switch (iso_code_class
[c1
])
2919 case ISO_0x20_or_0x7F
:
2920 if (composition_state
!= COMPOSING_NO
)
2922 if (composition_state
== COMPOSING_RULE
2923 || composition_state
== COMPOSING_COMPONENT_RULE
)
2925 DECODE_COMPOSITION_RULE (c1
);
2926 components
[component_idx
++] = c1
;
2927 composition_state
--;
2931 if (charset_id_0
< 0
2932 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2933 /* This is SPACE or DEL. */
2934 charset
= CHARSET_FROM_ID (charset_ascii
);
2936 charset
= CHARSET_FROM_ID (charset_id_0
);
2939 case ISO_graphic_plane_0
:
2940 if (composition_state
!= COMPOSING_NO
)
2942 if (composition_state
== COMPOSING_RULE
2943 || composition_state
== COMPOSING_COMPONENT_RULE
)
2945 DECODE_COMPOSITION_RULE (c1
);
2946 components
[component_idx
++] = c1
;
2947 composition_state
--;
2951 charset
= CHARSET_FROM_ID (charset_id_0
);
2954 case ISO_0xA0_or_0xFF
:
2955 if (charset_id_1
< 0
2956 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2957 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2959 /* This is a graphic character, we fall down ... */
2961 case ISO_graphic_plane_1
:
2962 if (charset_id_1
< 0)
2964 charset
= CHARSET_FROM_ID (charset_id_1
);
2968 MAYBE_FINISH_COMPOSITION ();
2969 charset
= CHARSET_FROM_ID (charset_ascii
);
2973 MAYBE_FINISH_COMPOSITION ();
2977 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2978 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
2980 CODING_ISO_INVOCATION (coding
, 0) = 1;
2981 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2985 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
2987 CODING_ISO_INVOCATION (coding
, 0) = 0;
2988 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2991 case ISO_single_shift_2_7
:
2992 case ISO_single_shift_2
:
2993 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2995 /* SS2 is handled as an escape sequence of ESC 'N' */
2997 goto label_escape_sequence
;
2999 case ISO_single_shift_3
:
3000 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
3002 /* SS2 is handled as an escape sequence of ESC 'O' */
3004 goto label_escape_sequence
;
3006 case ISO_control_sequence_introducer
:
3007 /* CSI is handled as an escape sequence of ESC '[' ... */
3009 goto label_escape_sequence
;
3013 label_escape_sequence
:
3014 /* Escape sequences handled here are invocation,
3015 designation, direction specification, and character
3016 composition specification. */
3019 case '&': /* revision of following character set */
3021 if (!(c1
>= '@' && c1
<= '~'))
3024 if (c1
!= ISO_CODE_ESC
)
3027 goto label_escape_sequence
;
3029 case '$': /* designation of 2-byte character set */
3030 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3033 if (c1
>= '@' && c1
<= 'B')
3034 { /* designation of JISX0208.1978, GB2312.1980,
3036 DECODE_DESIGNATION (0, 2, 0, c1
);
3038 else if (c1
>= 0x28 && c1
<= 0x2B)
3039 { /* designation of DIMENSION2_CHARS94 character set */
3041 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
3043 else if (c1
>= 0x2C && c1
<= 0x2F)
3044 { /* designation of DIMENSION2_CHARS96 character set */
3046 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
3050 /* We must update these variables now. */
3051 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3052 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3055 case 'n': /* invocation of locking-shift-2 */
3056 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3057 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3059 CODING_ISO_INVOCATION (coding
, 0) = 2;
3060 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3063 case 'o': /* invocation of locking-shift-3 */
3064 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3065 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3067 CODING_ISO_INVOCATION (coding
, 0) = 3;
3068 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3071 case 'N': /* invocation of single-shift-2 */
3072 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3073 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3075 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
3077 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3081 case 'O': /* invocation of single-shift-3 */
3082 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3083 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3085 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
3087 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3091 case '0': case '2': case '3': case '4': /* start composition */
3092 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
3094 DECODE_COMPOSITION_START (c1
);
3097 case '1': /* end composition */
3098 if (composition_state
== COMPOSING_NO
)
3100 DECODE_COMPOSITION_END ();
3103 case '[': /* specification of direction */
3104 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
3106 /* For the moment, nested direction is not supported.
3107 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3108 left-to-right, and nozero means right-to-left. */
3112 case ']': /* end of the current direction */
3113 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3115 case '0': /* end of the current direction */
3116 case '1': /* start of left-to-right direction */
3119 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3124 case '2': /* start of right-to-left direction */
3127 coding
->mode
|= CODING_MODE_DIRECTION
;
3141 /* CTEXT extended segment:
3142 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3143 We keep these bytes as is for the moment.
3144 They may be decoded by post-read-conversion. */
3148 ONE_MORE_BYTE (dim
);
3151 size
= ((M
- 128) * 128) + (L
- 128);
3152 if (charbuf
+ 8 + size
> charbuf_end
)
3154 *charbuf
++ = ISO_CODE_ESC
;
3158 *charbuf
++ = BYTE8_TO_CHAR (M
);
3159 *charbuf
++ = BYTE8_TO_CHAR (L
);
3163 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3168 /* XFree86 extension for embedding UTF-8 in CTEXT:
3169 ESC % G --UTF-8-BYTES-- ESC % @
3170 We keep these bytes as is for the moment.
3171 They may be decoded by post-read-conversion. */
3174 if (p
+ 6 > charbuf_end
)
3176 *p
++ = ISO_CODE_ESC
;
3179 while (p
< charbuf_end
)
3182 if (c1
== ISO_CODE_ESC
3183 && src
+ 1 < src_end
3187 *p
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3189 if (p
+ 3 > charbuf_end
)
3191 *p
++ = ISO_CODE_ESC
;
3202 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3204 if (c1
>= 0x28 && c1
<= 0x2B)
3205 { /* designation of DIMENSION1_CHARS94 character set */
3207 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
3209 else if (c1
>= 0x2C && c1
<= 0x2F)
3210 { /* designation of DIMENSION1_CHARS96 character set */
3212 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
3216 /* We must update these variables now. */
3217 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3218 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3223 if (charset
->id
!= charset_ascii
3224 && last_id
!= charset
->id
)
3226 if (last_id
!= charset_ascii
)
3227 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3228 last_id
= charset
->id
;
3229 last_offset
= char_offset
;
3232 /* Now we know CHARSET and 1st position code C1 of a character.
3233 Produce a decoded character while getting 2nd position code
3236 if (CHARSET_DIMENSION (charset
) > 1)
3239 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3240 /* C2 is not in a valid range. */
3242 c1
= (c1
<< 8) | (c2
& 0x7F);
3243 if (CHARSET_DIMENSION (charset
) > 2)
3246 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3247 /* C2 is not in a valid range. */
3249 c1
= (c1
<< 8) | (c2
& 0x7F);
3253 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3256 MAYBE_FINISH_COMPOSITION ();
3257 for (; src_base
< src
; src_base
++, char_offset
++)
3259 if (ASCII_BYTE_P (*src_base
))
3260 *charbuf
++ = *src_base
;
3262 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3265 else if (composition_state
== COMPOSING_NO
)
3272 components
[component_idx
++] = c
;
3273 if (method
== COMPOSITION_WITH_RULE
3274 || (method
== COMPOSITION_WITH_RULE_ALTCHARS
3275 && composition_state
== COMPOSING_COMPONENT_CHAR
))
3276 composition_state
++;
3281 MAYBE_FINISH_COMPOSITION ();
3283 consumed_chars
= consumed_chars_base
;
3285 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3295 if (last_id
!= charset_ascii
)
3296 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3297 coding
->consumed_char
+= consumed_chars_base
;
3298 coding
->consumed
= src_base
- coding
->source
;
3299 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3303 /* ISO2022 encoding stuff. */
3306 It is not enough to say just "ISO2022" on encoding, we have to
3307 specify more details. In Emacs, each coding system of ISO2022
3308 variant has the following specifications:
3309 1. Initial designation to G0 thru G3.
3310 2. Allows short-form designation?
3311 3. ASCII should be designated to G0 before control characters?
3312 4. ASCII should be designated to G0 at end of line?
3313 5. 7-bit environment or 8-bit environment?
3314 6. Use locking-shift?
3315 7. Use Single-shift?
3316 And the following two are only for Japanese:
3317 8. Use ASCII in place of JIS0201-1976-Roman?
3318 9. Use JISX0208-1983 in place of JISX0208-1978?
3319 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3320 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3324 /* Produce codes (escape sequence) for designating CHARSET to graphic
3325 register REG at DST, and increment DST. If <final-char> of CHARSET is
3326 '@', 'A', or 'B' and the coding system CODING allows, produce
3327 designation sequence of short-form. */
3329 #define ENCODE_DESIGNATION(charset, reg, coding) \
3331 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3332 char *intermediate_char_94 = "()*+"; \
3333 char *intermediate_char_96 = ",-./"; \
3334 int revision = -1; \
3337 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3338 revision = CHARSET_ISO_REVISION (charset); \
3340 if (revision >= 0) \
3342 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3343 EMIT_ONE_BYTE ('@' + revision); \
3345 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3346 if (CHARSET_DIMENSION (charset) == 1) \
3348 if (! CHARSET_ISO_CHARS_96 (charset)) \
3349 c = intermediate_char_94[reg]; \
3351 c = intermediate_char_96[reg]; \
3352 EMIT_ONE_ASCII_BYTE (c); \
3356 EMIT_ONE_ASCII_BYTE ('$'); \
3357 if (! CHARSET_ISO_CHARS_96 (charset)) \
3359 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3361 || final_char < '@' || final_char > 'B') \
3362 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3365 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3367 EMIT_ONE_ASCII_BYTE (final_char); \
3369 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3373 /* The following two macros produce codes (control character or escape
3374 sequence) for ISO2022 single-shift functions (single-shift-2 and
3377 #define ENCODE_SINGLE_SHIFT_2 \
3379 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3380 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3382 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3383 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3387 #define ENCODE_SINGLE_SHIFT_3 \
3389 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3390 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3392 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3393 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3397 /* The following four macros produce codes (control character or
3398 escape sequence) for ISO2022 locking-shift functions (shift-in,
3399 shift-out, locking-shift-2, and locking-shift-3). */
3401 #define ENCODE_SHIFT_IN \
3403 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3404 CODING_ISO_INVOCATION (coding, 0) = 0; \
3408 #define ENCODE_SHIFT_OUT \
3410 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3411 CODING_ISO_INVOCATION (coding, 0) = 1; \
3415 #define ENCODE_LOCKING_SHIFT_2 \
3417 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3418 CODING_ISO_INVOCATION (coding, 0) = 2; \
3422 #define ENCODE_LOCKING_SHIFT_3 \
3424 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3425 CODING_ISO_INVOCATION (coding, 0) = 3; \
3429 /* Produce codes for a DIMENSION1 character whose character set is
3430 CHARSET and whose position-code is C1. Designation and invocation
3431 sequences are also produced in advance if necessary. */
3433 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3435 int id = CHARSET_ID (charset); \
3437 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3438 && id == charset_ascii) \
3440 id = charset_jisx0201_roman; \
3441 charset = CHARSET_FROM_ID (id); \
3444 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3446 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3447 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3449 EMIT_ONE_BYTE (c1 | 0x80); \
3450 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3453 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3455 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3458 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3460 EMIT_ONE_BYTE (c1 | 0x80); \
3464 /* Since CHARSET is not yet invoked to any graphic planes, we \
3465 must invoke it, or, at first, designate it to some graphic \
3466 register. Then repeat the loop to actually produce the \
3468 dst = encode_invocation_designation (charset, coding, dst, \
3473 /* Produce codes for a DIMENSION2 character whose character set is
3474 CHARSET and whose position-codes are C1 and C2. Designation and
3475 invocation codes are also produced in advance if necessary. */
3477 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3479 int id = CHARSET_ID (charset); \
3481 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3482 && id == charset_jisx0208) \
3484 id = charset_jisx0208_1978; \
3485 charset = CHARSET_FROM_ID (id); \
3488 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3490 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3491 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3493 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3494 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3497 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3499 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3502 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3504 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3508 /* Since CHARSET is not yet invoked to any graphic planes, we \
3509 must invoke it, or, at first, designate it to some graphic \
3510 register. Then repeat the loop to actually produce the \
3512 dst = encode_invocation_designation (charset, coding, dst, \
3517 #define ENCODE_ISO_CHARACTER(charset, c) \
3519 int code = ENCODE_CHAR ((charset),(c)); \
3521 if (CHARSET_DIMENSION (charset) == 1) \
3522 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3524 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3528 /* Produce designation and invocation codes at a place pointed by DST
3529 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3533 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3534 struct charset
*charset
;
3535 struct coding_system
*coding
;
3539 int multibytep
= coding
->dst_multibyte
;
3540 int produced_chars
= *p_nchars
;
3541 int reg
; /* graphic register number */
3542 int id
= CHARSET_ID (charset
);
3544 /* At first, check designations. */
3545 for (reg
= 0; reg
< 4; reg
++)
3546 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3551 /* CHARSET is not yet designated to any graphic registers. */
3552 /* At first check the requested designation. */
3553 reg
= CODING_ISO_REQUEST (coding
, id
);
3555 /* Since CHARSET requests no special designation, designate it
3556 to graphic register 0. */
3559 ENCODE_DESIGNATION (charset
, reg
, coding
);
3562 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3563 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3565 /* Since the graphic register REG is not invoked to any graphic
3566 planes, invoke it to graphic plane 0. */
3569 case 0: /* graphic register 0 */
3573 case 1: /* graphic register 1 */
3577 case 2: /* graphic register 2 */
3578 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3579 ENCODE_SINGLE_SHIFT_2
;
3581 ENCODE_LOCKING_SHIFT_2
;
3584 case 3: /* graphic register 3 */
3585 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3586 ENCODE_SINGLE_SHIFT_3
;
3588 ENCODE_LOCKING_SHIFT_3
;
3593 *p_nchars
= produced_chars
;
3597 /* The following three macros produce codes for indicating direction
3599 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3601 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3602 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3604 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3608 #define ENCODE_DIRECTION_R2L() \
3610 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3611 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3615 #define ENCODE_DIRECTION_L2R() \
3617 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3618 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3622 /* Produce codes for designation and invocation to reset the graphic
3623 planes and registers to initial state. */
3624 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3627 struct charset *charset; \
3629 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3631 for (reg = 0; reg < 4; reg++) \
3632 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3633 && (CODING_ISO_DESIGNATION (coding, reg) \
3634 != CODING_ISO_INITIAL (coding, reg))) \
3636 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3637 ENCODE_DESIGNATION (charset, reg, coding); \
3642 /* Produce designation sequences of charsets in the line started from
3643 SRC to a place pointed by DST, and return updated DST.
3645 If the current block ends before any end-of-line, we may fail to
3646 find all the necessary designations. */
3648 static unsigned char *
3649 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3650 struct coding_system
*coding
;
3651 int *charbuf
, *charbuf_end
;
3654 struct charset
*charset
;
3655 /* Table of charsets to be designated to each graphic register. */
3657 int c
, found
= 0, reg
;
3658 int produced_chars
= 0;
3659 int multibytep
= coding
->dst_multibyte
;
3661 Lisp_Object charset_list
;
3663 attrs
= CODING_ID_ATTRS (coding
->id
);
3664 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3665 if (EQ (charset_list
, Qiso_2022
))
3666 charset_list
= Viso_2022_charset_list
;
3668 for (reg
= 0; reg
< 4; reg
++)
3678 charset
= char_charset (c
, charset_list
, NULL
);
3679 id
= CHARSET_ID (charset
);
3680 reg
= CODING_ISO_REQUEST (coding
, id
);
3681 if (reg
>= 0 && r
[reg
] < 0)
3690 for (reg
= 0; reg
< 4; reg
++)
3692 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3693 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3699 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3702 encode_coding_iso_2022 (coding
)
3703 struct coding_system
*coding
;
3705 int multibytep
= coding
->dst_multibyte
;
3706 int *charbuf
= coding
->charbuf
;
3707 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3708 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3709 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3712 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3713 && CODING_ISO_BOL (coding
));
3714 int produced_chars
= 0;
3715 Lisp_Object attrs
, eol_type
, charset_list
;
3716 int ascii_compatible
;
3718 int preferred_charset_id
= -1;
3720 CODING_GET_INFO (coding
, attrs
, charset_list
);
3721 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
3722 if (VECTORP (eol_type
))
3725 setup_iso_safe_charsets (attrs
);
3726 /* Charset list may have been changed. */
3727 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
); \
3728 coding
->safe_charsets
= (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs
));
3730 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3732 while (charbuf
< charbuf_end
)
3734 ASSURE_DESTINATION (safe_room
);
3736 if (bol_designation
)
3738 unsigned char *dst_prev
= dst
;
3740 /* We have to produce designation sequences if any now. */
3741 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3742 bol_designation
= 0;
3743 /* We are sure that designation sequences are all ASCII bytes. */
3744 produced_chars
+= dst
- dst_prev
;
3751 /* Handle an annotation. */
3754 case CODING_ANNOTATE_COMPOSITION_MASK
:
3755 /* Not yet implemented. */
3757 case CODING_ANNOTATE_CHARSET_MASK
:
3758 preferred_charset_id
= charbuf
[3];
3759 if (preferred_charset_id
>= 0
3760 && NILP (Fmemq (make_number (preferred_charset_id
),
3762 preferred_charset_id
= -1;
3771 /* Now encode the character C. */
3772 if (c
< 0x20 || c
== 0x7F)
3775 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3777 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3778 ENCODE_RESET_PLANE_AND_REGISTER ();
3779 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3783 for (i
= 0; i
< 4; i
++)
3784 CODING_ISO_DESIGNATION (coding
, i
)
3785 = CODING_ISO_INITIAL (coding
, i
);
3788 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3790 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3791 ENCODE_RESET_PLANE_AND_REGISTER ();
3792 EMIT_ONE_ASCII_BYTE (c
);
3794 else if (ASCII_CHAR_P (c
))
3796 if (ascii_compatible
)
3797 EMIT_ONE_ASCII_BYTE (c
);
3800 struct charset
*charset
= CHARSET_FROM_ID (charset_ascii
);
3801 ENCODE_ISO_CHARACTER (charset
, c
);
3804 else if (CHAR_BYTE8_P (c
))
3806 c
= CHAR_TO_BYTE8 (c
);
3811 struct charset
*charset
;
3813 if (preferred_charset_id
>= 0)
3815 charset
= CHARSET_FROM_ID (preferred_charset_id
);
3816 if (! CHAR_CHARSET_P (c
, charset
))
3817 charset
= char_charset (c
, charset_list
, NULL
);
3820 charset
= char_charset (c
, charset_list
, NULL
);
3823 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3825 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3826 charset
= CHARSET_FROM_ID (charset_ascii
);
3830 c
= coding
->default_char
;
3831 charset
= char_charset (c
, charset_list
, NULL
);
3834 ENCODE_ISO_CHARACTER (charset
, c
);
3838 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3839 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3841 ASSURE_DESTINATION (safe_room
);
3842 ENCODE_RESET_PLANE_AND_REGISTER ();
3844 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
3845 CODING_ISO_BOL (coding
) = bol_designation
;
3846 coding
->produced_char
+= produced_chars
;
3847 coding
->produced
= dst
- coding
->destination
;
3852 /*** 8,9. SJIS and BIG5 handlers ***/
3854 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3855 quite widely. So, for the moment, Emacs supports them in the bare
3856 C code. But, in the future, they may be supported only by CCL. */
3858 /* SJIS is a coding system encoding three character sets: ASCII, right
3859 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3860 as is. A character of charset katakana-jisx0201 is encoded by
3861 "position-code + 0x80". A character of charset japanese-jisx0208
3862 is encoded in 2-byte but two position-codes are divided and shifted
3863 so that it fit in the range below.
3865 --- CODE RANGE of SJIS ---
3866 (character set) (range)
3868 KATAKANA-JISX0201 0xA0 .. 0xDF
3869 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3870 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3871 -------------------------------
3875 /* BIG5 is a coding system encoding two character sets: ASCII and
3876 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3877 character set and is encoded in two-byte.
3879 --- CODE RANGE of BIG5 ---
3880 (character set) (range)
3882 Big5 (1st byte) 0xA1 .. 0xFE
3883 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3884 --------------------------
3888 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3889 Check if a text is encoded in SJIS. If it is, return
3890 CATEGORY_MASK_SJIS, else return 0. */
3893 detect_coding_sjis (coding
, detect_info
)
3894 struct coding_system
*coding
;
3895 struct coding_detection_info
*detect_info
;
3897 const unsigned char *src
= coding
->source
, *src_base
;
3898 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3899 int multibytep
= coding
->src_multibyte
;
3900 int consumed_chars
= 0;
3904 detect_info
->checked
|= CATEGORY_MASK_SJIS
;
3905 /* A coding system of this category is always ASCII compatible. */
3906 src
+= coding
->head_ascii
;
3914 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3917 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3919 found
= CATEGORY_MASK_SJIS
;
3921 else if (c
>= 0xA0 && c
< 0xE0)
3922 found
= CATEGORY_MASK_SJIS
;
3926 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3930 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3932 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3935 detect_info
->found
|= found
;
3939 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3940 Check if a text is encoded in BIG5. If it is, return
3941 CATEGORY_MASK_BIG5, else return 0. */
3944 detect_coding_big5 (coding
, detect_info
)
3945 struct coding_system
*coding
;
3946 struct coding_detection_info
*detect_info
;
3948 const unsigned char *src
= coding
->source
, *src_base
;
3949 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3950 int multibytep
= coding
->src_multibyte
;
3951 int consumed_chars
= 0;
3955 detect_info
->checked
|= CATEGORY_MASK_BIG5
;
3956 /* A coding system of this category is always ASCII compatible. */
3957 src
+= coding
->head_ascii
;
3968 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3970 found
= CATEGORY_MASK_BIG5
;
3975 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3979 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3981 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3984 detect_info
->found
|= found
;
3988 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3989 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3992 decode_coding_sjis (coding
)
3993 struct coding_system
*coding
;
3995 const unsigned char *src
= coding
->source
+ coding
->consumed
;
3996 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3997 const unsigned char *src_base
;
3998 int *charbuf
= coding
->charbuf
;
3999 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4000 int consumed_chars
= 0, consumed_chars_base
;
4001 int multibytep
= coding
->src_multibyte
;
4002 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4003 struct charset
*charset_kanji2
;
4004 Lisp_Object attrs
, charset_list
, val
;
4005 int char_offset
= coding
->produced_char
;
4006 int last_offset
= char_offset
;
4007 int last_id
= charset_ascii
;
4009 CODING_GET_INFO (coding
, attrs
, charset_list
);
4012 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4013 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4014 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4015 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4020 struct charset
*charset
;
4023 consumed_chars_base
= consumed_chars
;
4025 if (charbuf
>= charbuf_end
)
4032 charset
= charset_roman
;
4033 else if (c
== 0x80 || c
== 0xA0)
4035 else if (c
>= 0xA1 && c
<= 0xDF)
4037 /* SJIS -> JISX0201-Kana */
4039 charset
= charset_kana
;
4043 /* SJIS -> JISX0208 */
4045 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4049 charset
= charset_kanji
;
4051 else if (c
<= 0xFC && charset_kanji2
)
4053 /* SJIS -> JISX0213-2 */
4055 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4059 charset
= charset_kanji2
;
4063 if (charset
->id
!= charset_ascii
4064 && last_id
!= charset
->id
)
4066 if (last_id
!= charset_ascii
)
4067 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4068 last_id
= charset
->id
;
4069 last_offset
= char_offset
;
4071 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4078 consumed_chars
= consumed_chars_base
;
4080 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4086 if (last_id
!= charset_ascii
)
4087 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4088 coding
->consumed_char
+= consumed_chars_base
;
4089 coding
->consumed
= src_base
- coding
->source
;
4090 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4094 decode_coding_big5 (coding
)
4095 struct coding_system
*coding
;
4097 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4098 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4099 const unsigned char *src_base
;
4100 int *charbuf
= coding
->charbuf
;
4101 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4102 int consumed_chars
= 0, consumed_chars_base
;
4103 int multibytep
= coding
->src_multibyte
;
4104 struct charset
*charset_roman
, *charset_big5
;
4105 Lisp_Object attrs
, charset_list
, val
;
4106 int char_offset
= coding
->produced_char
;
4107 int last_offset
= char_offset
;
4108 int last_id
= charset_ascii
;
4110 CODING_GET_INFO (coding
, attrs
, charset_list
);
4112 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4113 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4118 struct charset
*charset
;
4121 consumed_chars_base
= consumed_chars
;
4123 if (charbuf
>= charbuf_end
)
4131 charset
= charset_roman
;
4135 if (c
< 0xA1 || c
> 0xFE)
4138 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
4141 charset
= charset_big5
;
4143 if (charset
->id
!= charset_ascii
4144 && last_id
!= charset
->id
)
4146 if (last_id
!= charset_ascii
)
4147 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4148 last_id
= charset
->id
;
4149 last_offset
= char_offset
;
4151 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4158 consumed_chars
= consumed_chars_base
;
4160 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4166 if (last_id
!= charset_ascii
)
4167 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4168 coding
->consumed_char
+= consumed_chars_base
;
4169 coding
->consumed
= src_base
- coding
->source
;
4170 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4173 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4174 This function can encode charsets `ascii', `katakana-jisx0201',
4175 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4176 are sure that all these charsets are registered as official charset
4177 (i.e. do not have extended leading-codes). Characters of other
4178 charsets are produced without any encoding. If SJIS_P is 1, encode
4179 SJIS text, else encode BIG5 text. */
4182 encode_coding_sjis (coding
)
4183 struct coding_system
*coding
;
4185 int multibytep
= coding
->dst_multibyte
;
4186 int *charbuf
= coding
->charbuf
;
4187 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4188 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4189 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4191 int produced_chars
= 0;
4192 Lisp_Object attrs
, charset_list
, val
;
4193 int ascii_compatible
;
4194 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4195 struct charset
*charset_kanji2
;
4198 CODING_GET_INFO (coding
, attrs
, charset_list
);
4200 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4201 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4202 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4203 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4205 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4207 while (charbuf
< charbuf_end
)
4209 ASSURE_DESTINATION (safe_room
);
4211 /* Now encode the character C. */
4212 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4213 EMIT_ONE_ASCII_BYTE (c
);
4214 else if (CHAR_BYTE8_P (c
))
4216 c
= CHAR_TO_BYTE8 (c
);
4222 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4226 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4228 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4229 charset
= CHARSET_FROM_ID (charset_ascii
);
4233 c
= coding
->default_char
;
4234 charset
= char_charset (c
, charset_list
, &code
);
4237 if (code
== CHARSET_INVALID_CODE (charset
))
4239 if (charset
== charset_kanji
)
4243 c1
= code
>> 8, c2
= code
& 0xFF;
4244 EMIT_TWO_BYTES (c1
, c2
);
4246 else if (charset
== charset_kana
)
4247 EMIT_ONE_BYTE (code
| 0x80);
4248 else if (charset_kanji2
&& charset
== charset_kanji2
)
4253 if (c1
== 0x21 || (c1
>= 0x23 && c1
< 0x25)
4254 || (c1
>= 0x2C && c1
<= 0x2F) || c1
>= 0x6E)
4256 JIS_TO_SJIS2 (code
);
4257 c1
= code
>> 8, c2
= code
& 0xFF;
4258 EMIT_TWO_BYTES (c1
, c2
);
4261 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4264 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4267 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4268 coding
->produced_char
+= produced_chars
;
4269 coding
->produced
= dst
- coding
->destination
;
4274 encode_coding_big5 (coding
)
4275 struct coding_system
*coding
;
4277 int multibytep
= coding
->dst_multibyte
;
4278 int *charbuf
= coding
->charbuf
;
4279 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4280 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4281 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4283 int produced_chars
= 0;
4284 Lisp_Object attrs
, charset_list
, val
;
4285 int ascii_compatible
;
4286 struct charset
*charset_roman
, *charset_big5
;
4289 CODING_GET_INFO (coding
, attrs
, charset_list
);
4291 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4292 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4293 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4295 while (charbuf
< charbuf_end
)
4297 ASSURE_DESTINATION (safe_room
);
4299 /* Now encode the character C. */
4300 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4301 EMIT_ONE_ASCII_BYTE (c
);
4302 else if (CHAR_BYTE8_P (c
))
4304 c
= CHAR_TO_BYTE8 (c
);
4310 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4314 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4316 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4317 charset
= CHARSET_FROM_ID (charset_ascii
);
4321 c
= coding
->default_char
;
4322 charset
= char_charset (c
, charset_list
, &code
);
4325 if (code
== CHARSET_INVALID_CODE (charset
))
4327 if (charset
== charset_big5
)
4331 c1
= code
>> 8, c2
= code
& 0xFF;
4332 EMIT_TWO_BYTES (c1
, c2
);
4335 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4338 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4339 coding
->produced_char
+= produced_chars
;
4340 coding
->produced
= dst
- coding
->destination
;
4345 /*** 10. CCL handlers ***/
4347 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4348 Check if a text is encoded in a coding system of which
4349 encoder/decoder are written in CCL program. If it is, return
4350 CATEGORY_MASK_CCL, else return 0. */
4353 detect_coding_ccl (coding
, detect_info
)
4354 struct coding_system
*coding
;
4355 struct coding_detection_info
*detect_info
;
4357 const unsigned char *src
= coding
->source
, *src_base
;
4358 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4359 int multibytep
= coding
->src_multibyte
;
4360 int consumed_chars
= 0;
4362 unsigned char *valids
;
4363 int head_ascii
= coding
->head_ascii
;
4366 detect_info
->checked
|= CATEGORY_MASK_CCL
;
4368 coding
= &coding_categories
[coding_category_ccl
];
4369 valids
= CODING_CCL_VALIDS (coding
);
4370 attrs
= CODING_ID_ATTRS (coding
->id
);
4371 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4380 if (c
< 0 || ! valids
[c
])
4382 if ((valids
[c
] > 1))
4383 found
= CATEGORY_MASK_CCL
;
4385 detect_info
->rejected
|= CATEGORY_MASK_CCL
;
4389 detect_info
->found
|= found
;
4394 decode_coding_ccl (coding
)
4395 struct coding_system
*coding
;
4397 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4398 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4399 int *charbuf
= coding
->charbuf
;
4400 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
4401 int consumed_chars
= 0;
4402 int multibytep
= coding
->src_multibyte
;
4403 struct ccl_program ccl
;
4404 int source_charbuf
[1024];
4405 int source_byteidx
[1024];
4406 Lisp_Object attrs
, charset_list
;
4408 CODING_GET_INFO (coding
, attrs
, charset_list
);
4409 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4411 while (src
< src_end
)
4413 const unsigned char *p
= src
;
4414 int *source
, *source_end
;
4418 while (i
< 1024 && p
< src_end
)
4420 source_byteidx
[i
] = p
- src
;
4421 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4424 while (i
< 1024 && p
< src_end
)
4425 source_charbuf
[i
++] = *p
++;
4427 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4430 source
= source_charbuf
;
4431 source_end
= source
+ i
;
4432 while (source
< source_end
)
4434 ccl_driver (&ccl
, source
, charbuf
,
4435 source_end
- source
, charbuf_end
- charbuf
,
4437 source
+= ccl
.consumed
;
4438 charbuf
+= ccl
.produced
;
4439 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4442 if (source
< source_end
)
4443 src
+= source_byteidx
[source
- source_charbuf
];
4446 consumed_chars
+= source
- source_charbuf
;
4448 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4449 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4455 case CCL_STAT_SUSPEND_BY_SRC
:
4456 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4458 case CCL_STAT_SUSPEND_BY_DST
:
4461 case CCL_STAT_INVALID_CMD
:
4462 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4465 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4468 coding
->consumed_char
+= consumed_chars
;
4469 coding
->consumed
= src
- coding
->source
;
4470 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4474 encode_coding_ccl (coding
)
4475 struct coding_system
*coding
;
4477 struct ccl_program ccl
;
4478 int multibytep
= coding
->dst_multibyte
;
4479 int *charbuf
= coding
->charbuf
;
4480 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4481 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4482 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4483 unsigned char *adjusted_dst_end
= dst_end
- 1;
4484 int destination_charbuf
[1024];
4485 int i
, produced_chars
= 0;
4486 Lisp_Object attrs
, charset_list
;
4488 CODING_GET_INFO (coding
, attrs
, charset_list
);
4489 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4491 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4492 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4494 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4496 int dst_bytes
= dst_end
- dst
;
4497 if (dst_bytes
> 1024)
4500 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4501 charbuf_end
- charbuf
, dst_bytes
, charset_list
);
4502 charbuf
+= ccl
.consumed
;
4504 for (i
= 0; i
< ccl
.produced
; i
++)
4505 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4508 for (i
= 0; i
< ccl
.produced
; i
++)
4509 *dst
++ = destination_charbuf
[i
] & 0xFF;
4510 produced_chars
+= ccl
.produced
;
4516 case CCL_STAT_SUSPEND_BY_SRC
:
4517 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4519 case CCL_STAT_SUSPEND_BY_DST
:
4520 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_DST
);
4523 case CCL_STAT_INVALID_CMD
:
4524 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4527 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4531 coding
->produced_char
+= produced_chars
;
4532 coding
->produced
= dst
- coding
->destination
;
4538 /*** 10, 11. no-conversion handlers ***/
4540 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4543 decode_coding_raw_text (coding
)
4544 struct coding_system
*coding
;
4546 coding
->chars_at_source
= 1;
4547 coding
->consumed_char
= 0;
4548 coding
->consumed
= 0;
4549 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4553 encode_coding_raw_text (coding
)
4554 struct coding_system
*coding
;
4556 int multibytep
= coding
->dst_multibyte
;
4557 int *charbuf
= coding
->charbuf
;
4558 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4559 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4560 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4561 int produced_chars
= 0;
4566 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4568 if (coding
->src_multibyte
)
4569 while (charbuf
< charbuf_end
)
4571 ASSURE_DESTINATION (safe_room
);
4573 if (ASCII_CHAR_P (c
))
4574 EMIT_ONE_ASCII_BYTE (c
);
4575 else if (CHAR_BYTE8_P (c
))
4577 c
= CHAR_TO_BYTE8 (c
);
4582 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4584 CHAR_STRING_ADVANCE (c
, p1
);
4587 EMIT_ONE_BYTE (*p0
);
4593 while (charbuf
< charbuf_end
)
4595 ASSURE_DESTINATION (safe_room
);
4602 if (coding
->src_multibyte
)
4604 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4606 while (charbuf
< charbuf_end
)
4608 ASSURE_DESTINATION (safe_room
);
4610 if (ASCII_CHAR_P (c
))
4612 else if (CHAR_BYTE8_P (c
))
4613 *dst
++ = CHAR_TO_BYTE8 (c
);
4615 CHAR_STRING_ADVANCE (c
, dst
);
4621 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4622 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4623 *dst
++ = *charbuf
++;
4624 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4627 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4628 coding
->produced_char
+= produced_chars
;
4629 coding
->produced
= dst
- coding
->destination
;
4633 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4634 Check if a text is encoded in a charset-based coding system. If it
4635 is, return 1, else return 0. */
4638 detect_coding_charset (coding
, detect_info
)
4639 struct coding_system
*coding
;
4640 struct coding_detection_info
*detect_info
;
4642 const unsigned char *src
= coding
->source
, *src_base
;
4643 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4644 int multibytep
= coding
->src_multibyte
;
4645 int consumed_chars
= 0;
4646 Lisp_Object attrs
, valids
;
4649 detect_info
->checked
|= CATEGORY_MASK_CHARSET
;
4651 coding
= &coding_categories
[coding_category_charset
];
4652 attrs
= CODING_ID_ATTRS (coding
->id
);
4653 valids
= AREF (attrs
, coding_attr_charset_valids
);
4655 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4656 src
+= coding
->head_ascii
;
4666 if (NILP (AREF (valids
, c
)))
4669 found
= CATEGORY_MASK_CHARSET
;
4671 detect_info
->rejected
|= CATEGORY_MASK_CHARSET
;
4675 detect_info
->found
|= found
;
4680 decode_coding_charset (coding
)
4681 struct coding_system
*coding
;
4683 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4684 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4685 const unsigned char *src_base
;
4686 int *charbuf
= coding
->charbuf
;
4687 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4688 int consumed_chars
= 0, consumed_chars_base
;
4689 int multibytep
= coding
->src_multibyte
;
4690 Lisp_Object attrs
, charset_list
, valids
;
4691 int char_offset
= coding
->produced_char
;
4692 int last_offset
= char_offset
;
4693 int last_id
= charset_ascii
;
4695 CODING_GET_INFO (coding
, attrs
, charset_list
);
4696 valids
= AREF (attrs
, coding_attr_charset_valids
);
4702 struct charset
*charset
;
4708 consumed_chars_base
= consumed_chars
;
4710 if (charbuf
>= charbuf_end
)
4718 val
= AREF (valids
, c
);
4723 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4724 dim
= CHARSET_DIMENSION (charset
);
4728 code
= (code
<< 8) | c
;
4731 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
,
4736 /* VAL is a list of charset IDs. It is assured that the
4737 list is sorted by charset dimensions (smaller one
4741 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (val
)));
4742 dim
= CHARSET_DIMENSION (charset
);
4746 code
= (code
<< 8) | c
;
4749 CODING_DECODE_CHAR (coding
, src
, src_base
,
4750 src_end
, charset
, code
, c
);
4758 if (charset
->id
!= charset_ascii
4759 && last_id
!= charset
->id
)
4761 if (last_id
!= charset_ascii
)
4762 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4763 last_id
= charset
->id
;
4764 last_offset
= char_offset
;
4773 consumed_chars
= consumed_chars_base
;
4775 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4781 if (last_id
!= charset_ascii
)
4782 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4783 coding
->consumed_char
+= consumed_chars_base
;
4784 coding
->consumed
= src_base
- coding
->source
;
4785 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4789 encode_coding_charset (coding
)
4790 struct coding_system
*coding
;
4792 int multibytep
= coding
->dst_multibyte
;
4793 int *charbuf
= coding
->charbuf
;
4794 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4795 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4796 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4797 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4798 int produced_chars
= 0;
4799 Lisp_Object attrs
, charset_list
;
4800 int ascii_compatible
;
4803 CODING_GET_INFO (coding
, attrs
, charset_list
);
4804 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4806 while (charbuf
< charbuf_end
)
4808 struct charset
*charset
;
4811 ASSURE_DESTINATION (safe_room
);
4813 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4814 EMIT_ONE_ASCII_BYTE (c
);
4815 else if (CHAR_BYTE8_P (c
))
4817 c
= CHAR_TO_BYTE8 (c
);
4822 charset
= char_charset (c
, charset_list
, &code
);
4825 if (CHARSET_DIMENSION (charset
) == 1)
4826 EMIT_ONE_BYTE (code
);
4827 else if (CHARSET_DIMENSION (charset
) == 2)
4828 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4829 else if (CHARSET_DIMENSION (charset
) == 3)
4830 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4832 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4833 (code
>> 8) & 0xFF, code
& 0xFF);
4837 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4838 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4840 c
= coding
->default_char
;
4846 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4847 coding
->produced_char
+= produced_chars
;
4848 coding
->produced
= dst
- coding
->destination
;
4853 /*** 7. C library functions ***/
4855 /* Setup coding context CODING from information about CODING_SYSTEM.
4856 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4857 CODING_SYSTEM is invalid, signal an error. */
4860 setup_coding_system (coding_system
, coding
)
4861 Lisp_Object coding_system
;
4862 struct coding_system
*coding
;
4865 Lisp_Object eol_type
;
4866 Lisp_Object coding_type
;
4869 if (NILP (coding_system
))
4870 coding_system
= Qno_conversion
;
4872 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4874 attrs
= CODING_ID_ATTRS (coding
->id
);
4875 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4878 coding
->head_ascii
= -1;
4879 coding
->common_flags
4880 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4881 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
4882 coding
->common_flags
|= CODING_REQUIRE_DECODING_MASK
;
4883 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
4884 coding
->common_flags
|= CODING_REQUIRE_ENCODING_MASK
;
4885 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs
)))
4886 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4888 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4889 coding
->max_charset_id
= SCHARS (val
) - 1;
4890 coding
->safe_charsets
= (char *) SDATA (val
);
4891 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4893 coding_type
= CODING_ATTR_TYPE (attrs
);
4894 if (EQ (coding_type
, Qundecided
))
4896 coding
->detector
= NULL
;
4897 coding
->decoder
= decode_coding_raw_text
;
4898 coding
->encoder
= encode_coding_raw_text
;
4899 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4901 else if (EQ (coding_type
, Qiso_2022
))
4904 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4906 /* Invoke graphic register 0 to plane 0. */
4907 CODING_ISO_INVOCATION (coding
, 0) = 0;
4908 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4909 CODING_ISO_INVOCATION (coding
, 1)
4910 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4911 /* Setup the initial status of designation. */
4912 for (i
= 0; i
< 4; i
++)
4913 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4914 /* Not single shifting initially. */
4915 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4916 /* Beginning of buffer should also be regarded as bol. */
4917 CODING_ISO_BOL (coding
) = 1;
4918 coding
->detector
= detect_coding_iso_2022
;
4919 coding
->decoder
= decode_coding_iso_2022
;
4920 coding
->encoder
= encode_coding_iso_2022
;
4921 if (flags
& CODING_ISO_FLAG_SAFE
)
4922 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4923 coding
->common_flags
4924 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4925 | CODING_REQUIRE_FLUSHING_MASK
);
4926 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4927 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4928 if (flags
& CODING_ISO_FLAG_DESIGNATION
)
4929 coding
->common_flags
|= CODING_ANNOTATE_CHARSET_MASK
;
4930 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4932 setup_iso_safe_charsets (attrs
);
4933 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4934 coding
->max_charset_id
= SCHARS (val
) - 1;
4935 coding
->safe_charsets
= (char *) SDATA (val
);
4937 CODING_ISO_FLAGS (coding
) = flags
;
4939 else if (EQ (coding_type
, Qcharset
))
4941 coding
->detector
= detect_coding_charset
;
4942 coding
->decoder
= decode_coding_charset
;
4943 coding
->encoder
= encode_coding_charset
;
4944 coding
->common_flags
4945 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4947 else if (EQ (coding_type
, Qutf_8
))
4949 coding
->detector
= detect_coding_utf_8
;
4950 coding
->decoder
= decode_coding_utf_8
;
4951 coding
->encoder
= encode_coding_utf_8
;
4952 coding
->common_flags
4953 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4955 else if (EQ (coding_type
, Qutf_16
))
4957 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4958 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4959 : EQ (val
, Qt
) ? utf_16_with_bom
4960 : utf_16_without_bom
);
4961 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4962 CODING_UTF_16_ENDIAN (coding
) = (EQ (val
, Qbig
) ? utf_16_big_endian
4963 : utf_16_little_endian
);
4964 CODING_UTF_16_SURROGATE (coding
) = 0;
4965 coding
->detector
= detect_coding_utf_16
;
4966 coding
->decoder
= decode_coding_utf_16
;
4967 coding
->encoder
= encode_coding_utf_16
;
4968 coding
->common_flags
4969 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4970 if (CODING_UTF_16_BOM (coding
) == utf_16_detect_bom
)
4971 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4973 else if (EQ (coding_type
, Qccl
))
4975 coding
->detector
= detect_coding_ccl
;
4976 coding
->decoder
= decode_coding_ccl
;
4977 coding
->encoder
= encode_coding_ccl
;
4978 coding
->common_flags
4979 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4980 | CODING_REQUIRE_FLUSHING_MASK
);
4982 else if (EQ (coding_type
, Qemacs_mule
))
4984 coding
->detector
= detect_coding_emacs_mule
;
4985 coding
->decoder
= decode_coding_emacs_mule
;
4986 coding
->encoder
= encode_coding_emacs_mule
;
4987 coding
->common_flags
4988 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4989 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
4990 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
4992 Lisp_Object tail
, safe_charsets
;
4993 int max_charset_id
= 0;
4995 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4997 if (max_charset_id
< XFASTINT (XCAR (tail
)))
4998 max_charset_id
= XFASTINT (XCAR (tail
));
4999 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
5001 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
5003 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
5004 coding
->max_charset_id
= max_charset_id
;
5005 coding
->safe_charsets
= (char *) SDATA (safe_charsets
);
5008 else if (EQ (coding_type
, Qshift_jis
))
5010 coding
->detector
= detect_coding_sjis
;
5011 coding
->decoder
= decode_coding_sjis
;
5012 coding
->encoder
= encode_coding_sjis
;
5013 coding
->common_flags
5014 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5016 else if (EQ (coding_type
, Qbig5
))
5018 coding
->detector
= detect_coding_big5
;
5019 coding
->decoder
= decode_coding_big5
;
5020 coding
->encoder
= encode_coding_big5
;
5021 coding
->common_flags
5022 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5024 else /* EQ (coding_type, Qraw_text) */
5026 coding
->detector
= NULL
;
5027 coding
->decoder
= decode_coding_raw_text
;
5028 coding
->encoder
= encode_coding_raw_text
;
5034 /* Return raw-text or one of its subsidiaries that has the same
5035 eol_type as CODING-SYSTEM. */
5038 raw_text_coding_system (coding_system
)
5039 Lisp_Object coding_system
;
5041 Lisp_Object spec
, attrs
;
5042 Lisp_Object eol_type
, raw_text_eol_type
;
5044 if (NILP (coding_system
))
5046 spec
= CODING_SYSTEM_SPEC (coding_system
);
5047 attrs
= AREF (spec
, 0);
5049 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
5050 return coding_system
;
5052 eol_type
= AREF (spec
, 2);
5053 if (VECTORP (eol_type
))
5055 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
5056 raw_text_eol_type
= AREF (spec
, 2);
5057 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
5058 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
5059 : AREF (raw_text_eol_type
, 2));
5063 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5064 does, return one of the subsidiary that has the same eol-spec as
5065 PARENT. Otherwise, return CODING_SYSTEM. */
5068 coding_inherit_eol_type (coding_system
, parent
)
5069 Lisp_Object coding_system
, parent
;
5071 Lisp_Object spec
, eol_type
;
5073 if (NILP (coding_system
))
5074 coding_system
= Qraw_text
;
5075 spec
= CODING_SYSTEM_SPEC (coding_system
);
5076 eol_type
= AREF (spec
, 2);
5077 if (VECTORP (eol_type
)
5080 Lisp_Object parent_spec
;
5081 Lisp_Object parent_eol_type
;
5084 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
5085 parent_eol_type
= AREF (parent_spec
, 2);
5086 if (EQ (parent_eol_type
, Qunix
))
5087 coding_system
= AREF (eol_type
, 0);
5088 else if (EQ (parent_eol_type
, Qdos
))
5089 coding_system
= AREF (eol_type
, 1);
5090 else if (EQ (parent_eol_type
, Qmac
))
5091 coding_system
= AREF (eol_type
, 2);
5093 return coding_system
;
5096 /* Emacs has a mechanism to automatically detect a coding system if it
5097 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5098 it's impossible to distinguish some coding systems accurately
5099 because they use the same range of codes. So, at first, coding
5100 systems are categorized into 7, those are:
5102 o coding-category-emacs-mule
5104 The category for a coding system which has the same code range
5105 as Emacs' internal format. Assigned the coding-system (Lisp
5106 symbol) `emacs-mule' by default.
5108 o coding-category-sjis
5110 The category for a coding system which has the same code range
5111 as SJIS. Assigned the coding-system (Lisp
5112 symbol) `japanese-shift-jis' by default.
5114 o coding-category-iso-7
5116 The category for a coding system which has the same code range
5117 as ISO2022 of 7-bit environment. This doesn't use any locking
5118 shift and single shift functions. This can encode/decode all
5119 charsets. Assigned the coding-system (Lisp symbol)
5120 `iso-2022-7bit' by default.
5122 o coding-category-iso-7-tight
5124 Same as coding-category-iso-7 except that this can
5125 encode/decode only the specified charsets.
5127 o coding-category-iso-8-1
5129 The category for a coding system which has the same code range
5130 as ISO2022 of 8-bit environment and graphic plane 1 used only
5131 for DIMENSION1 charset. This doesn't use any locking shift
5132 and single shift functions. Assigned the coding-system (Lisp
5133 symbol) `iso-latin-1' by default.
5135 o coding-category-iso-8-2
5137 The category for a coding system which has the same code range
5138 as ISO2022 of 8-bit environment and graphic plane 1 used only
5139 for DIMENSION2 charset. This doesn't use any locking shift
5140 and single shift functions. Assigned the coding-system (Lisp
5141 symbol) `japanese-iso-8bit' by default.
5143 o coding-category-iso-7-else
5145 The category for a coding system which has the same code range
5146 as ISO2022 of 7-bit environemnt but uses locking shift or
5147 single shift functions. Assigned the coding-system (Lisp
5148 symbol) `iso-2022-7bit-lock' by default.
5150 o coding-category-iso-8-else
5152 The category for a coding system which has the same code range
5153 as ISO2022 of 8-bit environemnt but uses locking shift or
5154 single shift functions. Assigned the coding-system (Lisp
5155 symbol) `iso-2022-8bit-ss2' by default.
5157 o coding-category-big5
5159 The category for a coding system which has the same code range
5160 as BIG5. Assigned the coding-system (Lisp symbol)
5161 `cn-big5' by default.
5163 o coding-category-utf-8
5165 The category for a coding system which has the same code range
5166 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5167 symbol) `utf-8' by default.
5169 o coding-category-utf-16-be
5171 The category for a coding system in which a text has an
5172 Unicode signature (cf. Unicode Standard) in the order of BIG
5173 endian at the head. Assigned the coding-system (Lisp symbol)
5174 `utf-16-be' by default.
5176 o coding-category-utf-16-le
5178 The category for a coding system in which a text has an
5179 Unicode signature (cf. Unicode Standard) in the order of
5180 LITTLE endian at the head. Assigned the coding-system (Lisp
5181 symbol) `utf-16-le' by default.
5183 o coding-category-ccl
5185 The category for a coding system of which encoder/decoder is
5186 written in CCL programs. The default value is nil, i.e., no
5187 coding system is assigned.
5189 o coding-category-binary
5191 The category for a coding system not categorized in any of the
5192 above. Assigned the coding-system (Lisp symbol)
5193 `no-conversion' by default.
5195 Each of them is a Lisp symbol and the value is an actual
5196 `coding-system's (this is also a Lisp symbol) assigned by a user.
5197 What Emacs does actually is to detect a category of coding system.
5198 Then, it uses a `coding-system' assigned to it. If Emacs can't
5199 decide only one possible category, it selects a category of the
5200 highest priority. Priorities of categories are also specified by a
5201 user in a Lisp variable `coding-category-list'.
5205 #define EOL_SEEN_NONE 0
5206 #define EOL_SEEN_LF 1
5207 #define EOL_SEEN_CR 2
5208 #define EOL_SEEN_CRLF 4
5210 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5211 SOURCE is encoded. If CATEGORY is one of
5212 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5213 two-byte, else they are encoded by one-byte.
5215 Return one of EOL_SEEN_XXX. */
5217 #define MAX_EOL_CHECK_COUNT 3
5220 detect_eol (source
, src_bytes
, category
)
5221 unsigned char *source
;
5222 EMACS_INT src_bytes
;
5223 enum coding_category category
;
5225 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
5228 int eol_seen
= EOL_SEEN_NONE
;
5230 if ((1 << category
) & CATEGORY_MASK_UTF_16
)
5234 msb
= category
== (coding_category_utf_16_le
5235 | coding_category_utf_16_le_nosig
);
5238 while (src
+ 1 < src_end
)
5241 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
5246 this_eol
= EOL_SEEN_LF
;
5247 else if (src
+ 3 >= src_end
5248 || src
[msb
+ 2] != 0
5249 || src
[lsb
+ 2] != '\n')
5250 this_eol
= EOL_SEEN_CR
;
5252 this_eol
= EOL_SEEN_CRLF
;
5254 if (eol_seen
== EOL_SEEN_NONE
)
5255 /* This is the first end-of-line. */
5256 eol_seen
= this_eol
;
5257 else if (eol_seen
!= this_eol
)
5259 /* The found type is different from what found before. */
5260 eol_seen
= EOL_SEEN_LF
;
5263 if (++total
== MAX_EOL_CHECK_COUNT
)
5271 while (src
< src_end
)
5274 if (c
== '\n' || c
== '\r')
5279 this_eol
= EOL_SEEN_LF
;
5280 else if (src
>= src_end
|| *src
!= '\n')
5281 this_eol
= EOL_SEEN_CR
;
5283 this_eol
= EOL_SEEN_CRLF
, src
++;
5285 if (eol_seen
== EOL_SEEN_NONE
)
5286 /* This is the first end-of-line. */
5287 eol_seen
= this_eol
;
5288 else if (eol_seen
!= this_eol
)
5290 /* The found type is different from what found before. */
5291 eol_seen
= EOL_SEEN_LF
;
5294 if (++total
== MAX_EOL_CHECK_COUNT
)
5304 adjust_coding_eol_type (coding
, eol_seen
)
5305 struct coding_system
*coding
;
5308 Lisp_Object eol_type
;
5310 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5311 if (eol_seen
& EOL_SEEN_LF
)
5313 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
5316 else if (eol_seen
& EOL_SEEN_CRLF
)
5318 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
5321 else if (eol_seen
& EOL_SEEN_CR
)
5323 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
5329 /* Detect how a text specified in CODING is encoded. If a coding
5330 system is detected, update fields of CODING by the detected coding
5334 detect_coding (coding
)
5335 struct coding_system
*coding
;
5337 const unsigned char *src
, *src_end
;
5338 Lisp_Object attrs
, coding_type
;
5340 coding
->consumed
= coding
->consumed_char
= 0;
5341 coding
->produced
= coding
->produced_char
= 0;
5342 coding_set_source (coding
);
5344 src_end
= coding
->source
+ coding
->src_bytes
;
5346 /* If we have not yet decided the text encoding type, detect it
5348 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
5352 for (i
= 0, src
= coding
->source
; src
< src_end
; i
++, src
++)
5355 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
5357 || c
== ISO_CODE_SO
)))
5360 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
5362 if (coding
->head_ascii
< coding
->src_bytes
)
5364 struct coding_detection_info detect_info
;
5365 enum coding_category category
;
5366 struct coding_system
*this;
5368 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
5369 for (i
= 0; i
< coding_category_raw_text
; i
++)
5371 category
= coding_priorities
[i
];
5372 this = coding_categories
+ category
;
5375 /* No coding system of this category is defined. */
5376 detect_info
.rejected
|= (1 << category
);
5378 else if (category
>= coding_category_raw_text
)
5380 else if (detect_info
.checked
& (1 << category
))
5382 if (detect_info
.found
& (1 << category
))
5385 else if ((*(this->detector
)) (coding
, &detect_info
)
5386 && detect_info
.found
& (1 << category
))
5388 if (category
== coding_category_utf_16_auto
)
5390 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5391 category
= coding_category_utf_16_le
;
5393 category
= coding_category_utf_16_be
;
5398 if (i
< coding_category_raw_text
)
5399 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5400 else if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
5401 setup_coding_system (Qraw_text
, coding
);
5402 else if (detect_info
.rejected
)
5403 for (i
= 0; i
< coding_category_raw_text
; i
++)
5404 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
5406 this = coding_categories
+ coding_priorities
[i
];
5407 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5412 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding
->id
)))
5413 == coding_category_utf_16_auto
)
5415 Lisp_Object coding_systems
;
5416 struct coding_detection_info detect_info
;
5419 = AREF (CODING_ID_ATTRS (coding
->id
), coding_attr_utf_16_bom
);
5420 detect_info
.found
= detect_info
.rejected
= 0;
5421 if (CONSP (coding_systems
)
5422 && detect_coding_utf_16 (coding
, &detect_info
))
5424 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5425 setup_coding_system (XCAR (coding_systems
), coding
);
5426 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
5427 setup_coding_system (XCDR (coding_systems
), coding
);
5435 struct coding_system
*coding
;
5437 Lisp_Object eol_type
;
5438 unsigned char *p
, *pbeg
, *pend
;
5440 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5441 if (EQ (eol_type
, Qunix
))
5444 if (NILP (coding
->dst_object
))
5445 pbeg
= coding
->destination
;
5447 pbeg
= BYTE_POS_ADDR (coding
->dst_pos_byte
);
5448 pend
= pbeg
+ coding
->produced
;
5450 if (VECTORP (eol_type
))
5452 int eol_seen
= EOL_SEEN_NONE
;
5454 for (p
= pbeg
; p
< pend
; p
++)
5457 eol_seen
|= EOL_SEEN_LF
;
5458 else if (*p
== '\r')
5460 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5462 eol_seen
|= EOL_SEEN_CRLF
;
5466 eol_seen
|= EOL_SEEN_CR
;
5469 if (eol_seen
!= EOL_SEEN_NONE
5470 && eol_seen
!= EOL_SEEN_LF
5471 && eol_seen
!= EOL_SEEN_CRLF
5472 && eol_seen
!= EOL_SEEN_CR
)
5473 eol_seen
= EOL_SEEN_LF
;
5474 if (eol_seen
!= EOL_SEEN_NONE
)
5475 eol_type
= adjust_coding_eol_type (coding
, eol_seen
);
5478 if (EQ (eol_type
, Qmac
))
5480 for (p
= pbeg
; p
< pend
; p
++)
5484 else if (EQ (eol_type
, Qdos
))
5488 if (NILP (coding
->dst_object
))
5490 for (p
= pend
- 2; p
>= pbeg
; p
--)
5493 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
-- - p
- 1);
5499 for (p
= pend
- 2; p
>= pbeg
; p
--)
5502 int pos_byte
= coding
->dst_pos_byte
+ (p
- pbeg
);
5503 int pos
= BYTE_TO_CHAR (pos_byte
);
5505 del_range_2 (pos
, pos_byte
, pos
+ 1, pos_byte
+ 1, 0);
5509 coding
->produced
-= n
;
5510 coding
->produced_char
-= n
;
5515 /* Return a translation table (or list of them) from coding system
5516 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5517 decoding (ENCODEP is zero). */
5520 get_translation_table (attrs
, encodep
)
5522 Lisp_Object standard
, translation_table
;
5525 translation_table
= CODING_ATTR_ENCODE_TBL (attrs
),
5526 standard
= Vstandard_translation_table_for_encode
;
5528 translation_table
= CODING_ATTR_DECODE_TBL (attrs
),
5529 standard
= Vstandard_translation_table_for_decode
;
5530 if (NILP (translation_table
))
5532 if (SYMBOLP (translation_table
))
5533 translation_table
= Fget (translation_table
, Qtranslation_table
);
5534 else if (CONSP (translation_table
))
5538 translation_table
= Fcopy_sequence (translation_table
);
5539 for (val
= translation_table
; CONSP (val
); val
= XCDR (val
))
5540 if (SYMBOLP (XCAR (val
)))
5541 XSETCAR (val
, Fget (XCAR (val
), Qtranslation_table
));
5543 if (! NILP (standard
))
5545 if (CONSP (translation_table
))
5546 translation_table
= nconc2 (translation_table
, Fcons (standard
, Qnil
));
5548 translation_table
= Fcons (translation_table
, Fcons (standard
, Qnil
));
5550 return translation_table
;
5555 translate_chars (coding
, table
)
5556 struct coding_system
*coding
;
5559 int *charbuf
= coding
->charbuf
;
5560 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5563 if (coding
->chars_at_source
)
5566 while (charbuf
< charbuf_end
)
5572 *charbuf
++ = translate_char (table
, c
);
5577 produce_chars (coding
)
5578 struct coding_system
*coding
;
5580 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5581 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5583 int produced_chars
= 0;
5585 if (! coding
->chars_at_source
)
5587 /* Characters are in coding->charbuf. */
5588 int *buf
= coding
->charbuf
;
5589 int *buf_end
= buf
+ coding
->charbuf_used
;
5590 unsigned char *adjusted_dst_end
;
5592 if (BUFFERP (coding
->src_object
)
5593 && EQ (coding
->src_object
, coding
->dst_object
))
5594 dst_end
= ((unsigned char *) coding
->source
) + coding
->consumed
;
5595 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5597 while (buf
< buf_end
)
5601 if (dst
>= adjusted_dst_end
)
5603 dst
= alloc_destination (coding
,
5604 buf_end
- buf
+ MAX_MULTIBYTE_LENGTH
,
5606 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5607 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5611 if (coding
->dst_multibyte
5612 || ! CHAR_BYTE8_P (c
))
5613 CHAR_STRING_ADVANCE (c
, dst
);
5615 *dst
++ = CHAR_TO_BYTE8 (c
);
5619 /* This is an annotation datum. (-C) is the length of
5626 const unsigned char *src
= coding
->source
;
5627 const unsigned char *src_end
= src
+ coding
->src_bytes
;
5628 Lisp_Object eol_type
;
5630 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5632 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5634 if (coding
->src_multibyte
)
5641 const unsigned char *src_base
= src
;
5647 if (EQ (eol_type
, Qdos
))
5651 record_conversion_result
5652 (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
5653 goto no_more_source
;
5658 else if (EQ (eol_type
, Qmac
))
5663 coding
->consumed
= src
- coding
->source
;
5665 if (EQ (coding
->src_object
, coding
->dst_object
))
5666 dst_end
= (unsigned char *) src
;
5669 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5671 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5672 coding_set_source (coding
);
5673 src
= coding
->source
+ coding
->consumed
;
5674 src_end
= coding
->source
+ coding
->src_bytes
;
5684 while (src
< src_end
)
5691 if (EQ (eol_type
, Qdos
))
5697 else if (EQ (eol_type
, Qmac
))
5700 if (dst
>= dst_end
- 1)
5702 coding
->consumed
= src
- coding
->source
;
5704 if (EQ (coding
->src_object
, coding
->dst_object
))
5705 dst_end
= (unsigned char *) src
;
5706 if (dst
>= dst_end
- 1)
5708 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5710 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5711 coding_set_source (coding
);
5712 src
= coding
->source
+ coding
->consumed
;
5713 src_end
= coding
->source
+ coding
->src_bytes
;
5721 if (!EQ (coding
->src_object
, coding
->dst_object
))
5723 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5727 EMACS_INT offset
= src
- coding
->source
;
5729 dst
= alloc_destination (coding
, require
, dst
);
5730 coding_set_source (coding
);
5731 src
= coding
->source
+ offset
;
5732 src_end
= coding
->source
+ coding
->src_bytes
;
5735 produced_chars
= coding
->src_chars
;
5736 while (src
< src_end
)
5742 if (EQ (eol_type
, Qdos
))
5749 else if (EQ (eol_type
, Qmac
))
5755 coding
->consumed
= coding
->src_bytes
;
5756 coding
->consumed_char
= coding
->src_chars
;
5759 produced
= dst
- (coding
->destination
+ coding
->produced
);
5760 if (BUFFERP (coding
->dst_object
))
5761 insert_from_gap (produced_chars
, produced
);
5762 coding
->produced
+= produced
;
5763 coding
->produced_char
+= produced_chars
;
5764 return produced_chars
;
5767 /* Compose text in CODING->object according to the annotation data at
5768 CHARBUF. CHARBUF is an array:
5769 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5773 produce_composition (coding
, charbuf
)
5774 struct coding_system
*coding
;
5779 enum composition_method method
;
5780 Lisp_Object components
;
5783 from
= coding
->dst_pos
+ charbuf
[2];
5784 to
= coding
->dst_pos
+ charbuf
[3];
5785 method
= (enum composition_method
) (charbuf
[4]);
5787 if (method
== COMPOSITION_RELATIVE
)
5791 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5796 for (i
= 0; i
< len
; i
++)
5797 args
[i
] = make_number (charbuf
[i
]);
5798 components
= (method
== COMPOSITION_WITH_ALTCHARS
5799 ? Fstring (len
, args
) : Fvector (len
, args
));
5801 compose_text (from
, to
, components
, Qnil
, coding
->dst_object
);
5805 /* Put `charset' property on text in CODING->object according to
5806 the annotation data at CHARBUF. CHARBUF is an array:
5807 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5811 produce_charset (coding
, charbuf
)
5812 struct coding_system
*coding
;
5815 EMACS_INT from
= coding
->dst_pos
+ charbuf
[2];
5816 EMACS_INT to
= coding
->dst_pos
+ charbuf
[3];
5817 struct charset
*charset
= CHARSET_FROM_ID (charbuf
[4]);
5819 Fput_text_property (make_number (from
), make_number (to
),
5820 Qcharset
, CHARSET_NAME (charset
),
5821 coding
->dst_object
);
5825 #define CHARBUF_SIZE 0x4000
5827 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5829 int size = CHARBUF_SIZE;; \
5831 coding->charbuf = NULL; \
5832 while (size > 1024) \
5834 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5835 if (coding->charbuf) \
5839 if (! coding->charbuf) \
5841 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5842 return coding->result; \
5844 coding->charbuf_size = size; \
5849 produce_annotation (coding
)
5850 struct coding_system
*coding
;
5852 int *charbuf
= coding
->charbuf
;
5853 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5855 if (NILP (coding
->dst_object
))
5858 while (charbuf
< charbuf_end
)
5864 int len
= -*charbuf
;
5867 case CODING_ANNOTATE_COMPOSITION_MASK
:
5868 produce_composition (coding
, charbuf
);
5870 case CODING_ANNOTATE_CHARSET_MASK
:
5871 produce_charset (coding
, charbuf
);
5881 /* Decode the data at CODING->src_object into CODING->dst_object.
5882 CODING->src_object is a buffer, a string, or nil.
5883 CODING->dst_object is a buffer.
5885 If CODING->src_object is a buffer, it must be the current buffer.
5886 In this case, if CODING->src_pos is positive, it is a position of
5887 the source text in the buffer, otherwise, the source text is in the
5888 gap area of the buffer, and CODING->src_pos specifies the offset of
5889 the text from GPT (which must be the same as PT). If this is the
5890 same buffer as CODING->dst_object, CODING->src_pos must be
5893 If CODING->src_object is a string, CODING->src_pos in an index to
5896 If CODING->src_object is nil, CODING->source must already point to
5897 the non-relocatable memory area. In this case, CODING->src_pos is
5898 an offset from CODING->source.
5900 The decoded data is inserted at the current point of the buffer
5905 decode_coding (coding
)
5906 struct coding_system
*coding
;
5909 Lisp_Object undo_list
;
5910 Lisp_Object translation_table
;
5912 if (BUFFERP (coding
->src_object
)
5913 && coding
->src_pos
> 0
5914 && coding
->src_pos
< GPT
5915 && coding
->src_pos
+ coding
->src_chars
> GPT
)
5916 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
5919 if (BUFFERP (coding
->dst_object
))
5921 if (current_buffer
!= XBUFFER (coding
->dst_object
))
5922 set_buffer_internal (XBUFFER (coding
->dst_object
));
5924 move_gap_both (PT
, PT_BYTE
);
5925 undo_list
= current_buffer
->undo_list
;
5926 current_buffer
->undo_list
= Qt
;
5929 coding
->consumed
= coding
->consumed_char
= 0;
5930 coding
->produced
= coding
->produced_char
= 0;
5931 coding
->chars_at_source
= 0;
5932 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
5935 ALLOC_CONVERSION_WORK_AREA (coding
);
5937 attrs
= CODING_ID_ATTRS (coding
->id
);
5938 translation_table
= get_translation_table (attrs
, 0);
5942 coding_set_source (coding
);
5943 coding
->annotated
= 0;
5944 (*(coding
->decoder
)) (coding
);
5945 if (!NILP (translation_table
))
5946 translate_chars (coding
, translation_table
);
5947 coding_set_destination (coding
);
5948 produce_chars (coding
);
5949 if (coding
->annotated
)
5950 produce_annotation (coding
);
5952 while (coding
->consumed
< coding
->src_bytes
5953 && ! coding
->result
);
5955 coding
->carryover_bytes
= 0;
5956 if (coding
->consumed
< coding
->src_bytes
)
5958 int nbytes
= coding
->src_bytes
- coding
->consumed
;
5959 const unsigned char *src
;
5961 coding_set_source (coding
);
5962 coding_set_destination (coding
);
5963 src
= coding
->source
+ coding
->consumed
;
5965 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
5967 /* Flush out unprocessed data as binary chars. We are sure
5968 that the number of data is less than the size of
5970 coding
->charbuf_used
= 0;
5971 while (nbytes
-- > 0)
5975 coding
->charbuf
[coding
->charbuf_used
++] = (c
& 0x80 ? - c
: c
);
5977 produce_chars (coding
);
5981 /* Record unprocessed bytes in coding->carryover. We are
5982 sure that the number of data is less than the size of
5983 coding->carryover. */
5984 unsigned char *p
= coding
->carryover
;
5986 coding
->carryover_bytes
= nbytes
;
5987 while (nbytes
-- > 0)
5990 coding
->consumed
= coding
->src_bytes
;
5993 if (BUFFERP (coding
->dst_object
))
5995 current_buffer
->undo_list
= undo_list
;
5996 record_insert (coding
->dst_pos
, coding
->produced_char
);
5998 if (! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
5999 decode_eol (coding
);
6000 return coding
->result
;
6004 /* Extract an annotation datum from a composition starting at POS and
6005 ending before LIMIT of CODING->src_object (buffer or string), store
6006 the data in BUF, set *STOP to a starting position of the next
6007 composition (if any) or to LIMIT, and return the address of the
6008 next element of BUF.
6010 If such an annotation is not found, set *STOP to a starting
6011 position of a composition after POS (if any) or to LIMIT, and
6015 handle_composition_annotation (pos
, limit
, coding
, buf
, stop
)
6016 EMACS_INT pos
, limit
;
6017 struct coding_system
*coding
;
6021 EMACS_INT start
, end
;
6024 if (! find_composition (pos
, limit
, &start
, &end
, &prop
, coding
->src_object
)
6027 else if (start
> pos
)
6033 /* We found a composition. Store the corresponding
6034 annotation data in BUF. */
6036 enum composition_method method
= COMPOSITION_METHOD (prop
);
6037 int nchars
= COMPOSITION_LENGTH (prop
);
6039 ADD_COMPOSITION_DATA (buf
, 0, nchars
, method
);
6040 if (method
!= COMPOSITION_RELATIVE
)
6042 Lisp_Object components
;
6045 components
= COMPOSITION_COMPONENTS (prop
);
6046 if (VECTORP (components
))
6048 len
= XVECTOR (components
)->size
;
6049 for (i
= 0; i
< len
; i
++)
6050 *buf
++ = XINT (AREF (components
, i
));
6052 else if (STRINGP (components
))
6054 len
= SCHARS (components
);
6058 FETCH_STRING_CHAR_ADVANCE (*buf
, components
, i
, i_byte
);
6062 else if (INTEGERP (components
))
6065 *buf
++ = XINT (components
);
6067 else if (CONSP (components
))
6069 for (len
= 0; CONSP (components
);
6070 len
++, components
= XCDR (components
))
6071 *buf
++ = XINT (XCAR (components
));
6079 if (find_composition (end
, limit
, &start
, &end
, &prop
,
6090 /* Extract an annotation datum from a text property `charset' at POS of
6091 CODING->src_object (buffer of string), store the data in BUF, set
6092 *STOP to the position where the value of `charset' property changes
6093 (limiting by LIMIT), and return the address of the next element of
6096 If the property value is nil, set *STOP to the position where the
6097 property value is non-nil (limiting by LIMIT), and return BUF. */
6100 handle_charset_annotation (pos
, limit
, coding
, buf
, stop
)
6101 EMACS_INT pos
, limit
;
6102 struct coding_system
*coding
;
6106 Lisp_Object val
, next
;
6109 val
= Fget_text_property (make_number (pos
), Qcharset
, coding
->src_object
);
6110 if (! NILP (val
) && CHARSETP (val
))
6111 id
= XINT (CHARSET_SYMBOL_ID (val
));
6114 ADD_CHARSET_DATA (buf
, 0, 0, id
);
6115 next
= Fnext_single_property_change (make_number (pos
), Qcharset
,
6117 make_number (limit
));
6118 *stop
= XINT (next
);
6124 consume_chars (coding
)
6125 struct coding_system
*coding
;
6127 int *buf
= coding
->charbuf
;
6128 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
;
6129 const unsigned char *src
= coding
->source
+ coding
->consumed
;
6130 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
6131 EMACS_INT pos
= coding
->src_pos
+ coding
->consumed_char
;
6132 EMACS_INT end_pos
= coding
->src_pos
+ coding
->src_chars
;
6133 int multibytep
= coding
->src_multibyte
;
6134 Lisp_Object eol_type
;
6136 EMACS_INT stop
, stop_composition
, stop_charset
;
6138 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
6139 if (VECTORP (eol_type
))
6142 /* Note: composition handling is not yet implemented. */
6143 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6145 if (NILP (coding
->src_object
))
6146 stop
= stop_composition
= stop_charset
= end_pos
;
6149 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
)
6150 stop
= stop_composition
= pos
;
6152 stop
= stop_composition
= end_pos
;
6153 if (coding
->common_flags
& CODING_ANNOTATE_CHARSET_MASK
)
6154 stop
= stop_charset
= pos
;
6156 stop_charset
= end_pos
;
6159 /* Compensate for CRLF and conversion. */
6160 buf_end
-= 1 + MAX_ANNOTATION_LENGTH
;
6161 while (buf
< buf_end
)
6167 if (pos
== stop_composition
)
6168 buf
= handle_composition_annotation (pos
, end_pos
, coding
,
6169 buf
, &stop_composition
);
6170 if (pos
== stop_charset
)
6171 buf
= handle_charset_annotation (pos
, end_pos
, coding
,
6172 buf
, &stop_charset
);
6173 stop
= (stop_composition
< stop_charset
6174 ? stop_composition
: stop_charset
);
6181 if (! CODING_FOR_UNIBYTE (coding
)
6182 && (bytes
= MULTIBYTE_LENGTH (src
, src_end
)) > 0)
6183 c
= STRING_CHAR_ADVANCE (src
), pos
+= bytes
;
6188 c
= STRING_CHAR_ADVANCE (src
), pos
++;
6189 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
6191 if (! EQ (eol_type
, Qunix
))
6195 if (EQ (eol_type
, Qdos
))
6204 coding
->consumed
= src
- coding
->source
;
6205 coding
->consumed_char
= pos
- coding
->src_pos
;
6206 coding
->charbuf_used
= buf
- coding
->charbuf
;
6207 coding
->chars_at_source
= 0;
6211 /* Encode the text at CODING->src_object into CODING->dst_object.
6212 CODING->src_object is a buffer or a string.
6213 CODING->dst_object is a buffer or nil.
6215 If CODING->src_object is a buffer, it must be the current buffer.
6216 In this case, if CODING->src_pos is positive, it is a position of
6217 the source text in the buffer, otherwise. the source text is in the
6218 gap area of the buffer, and coding->src_pos specifies the offset of
6219 the text from GPT (which must be the same as PT). If this is the
6220 same buffer as CODING->dst_object, CODING->src_pos must be
6221 negative and CODING should not have `pre-write-conversion'.
6223 If CODING->src_object is a string, CODING should not have
6224 `pre-write-conversion'.
6226 If CODING->dst_object is a buffer, the encoded data is inserted at
6227 the current point of that buffer.
6229 If CODING->dst_object is nil, the encoded data is placed at the
6230 memory area specified by CODING->destination. */
6233 encode_coding (coding
)
6234 struct coding_system
*coding
;
6237 Lisp_Object translation_table
;
6239 attrs
= CODING_ID_ATTRS (coding
->id
);
6240 translation_table
= get_translation_table (attrs
, 1);
6242 if (BUFFERP (coding
->dst_object
))
6244 set_buffer_internal (XBUFFER (coding
->dst_object
));
6245 coding
->dst_multibyte
6246 = ! NILP (current_buffer
->enable_multibyte_characters
);
6249 coding
->consumed
= coding
->consumed_char
= 0;
6250 coding
->produced
= coding
->produced_char
= 0;
6251 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
6254 ALLOC_CONVERSION_WORK_AREA (coding
);
6257 coding_set_source (coding
);
6258 consume_chars (coding
);
6260 if (!NILP (translation_table
))
6261 translate_chars (coding
, translation_table
);
6263 coding_set_destination (coding
);
6264 (*(coding
->encoder
)) (coding
);
6265 } while (coding
->consumed_char
< coding
->src_chars
);
6267 if (BUFFERP (coding
->dst_object
))
6268 insert_from_gap (coding
->produced_char
, coding
->produced
);
6270 return (coding
->result
);
6274 /* Name (or base name) of work buffer for code conversion. */
6275 static Lisp_Object Vcode_conversion_workbuf_name
;
6277 /* A working buffer used by the top level conversion. Once it is
6278 created, it is never destroyed. It has the name
6279 Vcode_conversion_workbuf_name. The other working buffers are
6280 destroyed after the use is finished, and their names are modified
6281 versions of Vcode_conversion_workbuf_name. */
6282 static Lisp_Object Vcode_conversion_reused_workbuf
;
6284 /* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6285 static int reused_workbuf_in_use
;
6288 /* Return a working buffer of code convesion. MULTIBYTE specifies the
6289 multibyteness of returning buffer. */
6292 make_conversion_work_buffer (multibyte
)
6294 Lisp_Object name
, workbuf
;
6295 struct buffer
*current
;
6297 if (reused_workbuf_in_use
++)
6299 name
= Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name
, Qnil
);
6300 workbuf
= Fget_buffer_create (name
);
6304 name
= Vcode_conversion_workbuf_name
;
6305 workbuf
= Fget_buffer_create (name
);
6306 if (NILP (Vcode_conversion_reused_workbuf
))
6307 Vcode_conversion_reused_workbuf
= workbuf
;
6309 current
= current_buffer
;
6310 set_buffer_internal (XBUFFER (workbuf
));
6312 current_buffer
->undo_list
= Qt
;
6313 current_buffer
->enable_multibyte_characters
= multibyte
? Qt
: Qnil
;
6314 set_buffer_internal (current
);
6320 code_conversion_restore (arg
)
6323 Lisp_Object current
, workbuf
;
6325 current
= XCAR (arg
);
6326 workbuf
= XCDR (arg
);
6327 if (! NILP (workbuf
))
6329 if (EQ (workbuf
, Vcode_conversion_reused_workbuf
))
6330 reused_workbuf_in_use
= 0;
6331 else if (! NILP (Fbuffer_live_p (workbuf
)))
6332 Fkill_buffer (workbuf
);
6334 set_buffer_internal (XBUFFER (current
));
6339 code_conversion_save (with_work_buf
, multibyte
)
6340 int with_work_buf
, multibyte
;
6342 Lisp_Object workbuf
= Qnil
;
6345 workbuf
= make_conversion_work_buffer (multibyte
);
6346 record_unwind_protect (code_conversion_restore
,
6347 Fcons (Fcurrent_buffer (), workbuf
));
6352 decode_coding_gap (coding
, chars
, bytes
)
6353 struct coding_system
*coding
;
6354 EMACS_INT chars
, bytes
;
6356 int count
= specpdl_ptr
- specpdl
;
6359 code_conversion_save (0, 0);
6361 coding
->src_object
= Fcurrent_buffer ();
6362 coding
->src_chars
= chars
;
6363 coding
->src_bytes
= bytes
;
6364 coding
->src_pos
= -chars
;
6365 coding
->src_pos_byte
= -bytes
;
6366 coding
->src_multibyte
= chars
< bytes
;
6367 coding
->dst_object
= coding
->src_object
;
6368 coding
->dst_pos
= PT
;
6369 coding
->dst_pos_byte
= PT_BYTE
;
6370 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
6371 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
6373 if (CODING_REQUIRE_DETECTION (coding
))
6374 detect_coding (coding
);
6376 decode_coding (coding
);
6378 attrs
= CODING_ID_ATTRS (coding
->id
);
6379 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6381 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6384 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6385 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6386 make_number (coding
->produced_char
));
6388 coding
->produced_char
+= Z
- prev_Z
;
6389 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6392 unbind_to (count
, Qnil
);
6393 return coding
->result
;
6397 encode_coding_gap (coding
, chars
, bytes
)
6398 struct coding_system
*coding
;
6399 EMACS_INT chars
, bytes
;
6401 int count
= specpdl_ptr
- specpdl
;
6403 code_conversion_save (0, 0);
6405 coding
->src_object
= Fcurrent_buffer ();
6406 coding
->src_chars
= chars
;
6407 coding
->src_bytes
= bytes
;
6408 coding
->src_pos
= -chars
;
6409 coding
->src_pos_byte
= -bytes
;
6410 coding
->src_multibyte
= chars
< bytes
;
6411 coding
->dst_object
= coding
->src_object
;
6412 coding
->dst_pos
= PT
;
6413 coding
->dst_pos_byte
= PT_BYTE
;
6415 encode_coding (coding
);
6417 unbind_to (count
, Qnil
);
6418 return coding
->result
;
6422 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6423 SRC_OBJECT into DST_OBJECT by coding context CODING.
6425 SRC_OBJECT is a buffer, a string, or Qnil.
6427 If it is a buffer, the text is at point of the buffer. FROM and TO
6428 are positions in the buffer.
6430 If it is a string, the text is at the beginning of the string.
6431 FROM and TO are indices to the string.
6433 If it is nil, the text is at coding->source. FROM and TO are
6434 indices to coding->source.
6436 DST_OBJECT is a buffer, Qt, or Qnil.
6438 If it is a buffer, the decoded text is inserted at point of the
6439 buffer. If the buffer is the same as SRC_OBJECT, the source text
6442 If it is Qt, a string is made from the decoded text, and
6443 set in CODING->dst_object.
6445 If it is Qnil, the decoded text is stored at CODING->destination.
6446 The caller must allocate CODING->dst_bytes bytes at
6447 CODING->destination by xmalloc. If the decoded text is longer than
6448 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6452 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6454 struct coding_system
*coding
;
6455 Lisp_Object src_object
;
6456 EMACS_INT from
, from_byte
, to
, to_byte
;
6457 Lisp_Object dst_object
;
6459 int count
= specpdl_ptr
- specpdl
;
6460 unsigned char *destination
;
6461 EMACS_INT dst_bytes
;
6462 EMACS_INT chars
= to
- from
;
6463 EMACS_INT bytes
= to_byte
- from_byte
;
6466 int saved_pt
= -1, saved_pt_byte
;
6468 buffer
= Fcurrent_buffer ();
6470 if (NILP (dst_object
))
6472 destination
= coding
->destination
;
6473 dst_bytes
= coding
->dst_bytes
;
6476 coding
->src_object
= src_object
;
6477 coding
->src_chars
= chars
;
6478 coding
->src_bytes
= bytes
;
6479 coding
->src_multibyte
= chars
< bytes
;
6481 if (STRINGP (src_object
))
6483 coding
->src_pos
= from
;
6484 coding
->src_pos_byte
= from_byte
;
6486 else if (BUFFERP (src_object
))
6488 set_buffer_internal (XBUFFER (src_object
));
6490 move_gap_both (from
, from_byte
);
6491 if (EQ (src_object
, dst_object
))
6493 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6494 TEMP_SET_PT_BOTH (from
, from_byte
);
6495 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6496 coding
->src_pos
= -chars
;
6497 coding
->src_pos_byte
= -bytes
;
6501 coding
->src_pos
= from
;
6502 coding
->src_pos_byte
= from_byte
;
6506 if (CODING_REQUIRE_DETECTION (coding
))
6507 detect_coding (coding
);
6508 attrs
= CODING_ID_ATTRS (coding
->id
);
6510 if (EQ (dst_object
, Qt
)
6511 || (! NILP (CODING_ATTR_POST_READ (attrs
))
6512 && NILP (dst_object
)))
6514 coding
->dst_object
= code_conversion_save (1, 1);
6515 coding
->dst_pos
= BEG
;
6516 coding
->dst_pos_byte
= BEG_BYTE
;
6517 coding
->dst_multibyte
= 1;
6519 else if (BUFFERP (dst_object
))
6521 code_conversion_save (0, 0);
6522 coding
->dst_object
= dst_object
;
6523 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6524 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6525 coding
->dst_multibyte
6526 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6530 code_conversion_save (0, 0);
6531 coding
->dst_object
= Qnil
;
6532 coding
->dst_multibyte
= 1;
6535 decode_coding (coding
);
6537 if (BUFFERP (coding
->dst_object
))
6538 set_buffer_internal (XBUFFER (coding
->dst_object
));
6540 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6542 struct gcpro gcpro1
, gcpro2
;
6543 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6546 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6547 GCPRO2 (coding
->src_object
, coding
->dst_object
);
6548 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6549 make_number (coding
->produced_char
));
6552 coding
->produced_char
+= Z
- prev_Z
;
6553 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6556 if (EQ (dst_object
, Qt
))
6558 coding
->dst_object
= Fbuffer_string ();
6560 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
6562 set_buffer_internal (XBUFFER (coding
->dst_object
));
6563 if (dst_bytes
< coding
->produced
)
6566 = (unsigned char *) xrealloc (destination
, coding
->produced
);
6569 record_conversion_result (coding
,
6570 CODING_RESULT_INSUFFICIENT_DST
);
6571 unbind_to (count
, Qnil
);
6574 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
6575 move_gap_both (BEGV
, BEGV_BYTE
);
6576 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
6577 coding
->destination
= destination
;
6583 /* This is the case of:
6584 (BUFFERP (src_object) && EQ (src_object, dst_object))
6585 As we have moved PT while replacing the original buffer
6586 contents, we must recover it now. */
6587 set_buffer_internal (XBUFFER (src_object
));
6588 if (saved_pt
< from
)
6589 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6590 else if (saved_pt
< from
+ chars
)
6591 TEMP_SET_PT_BOTH (from
, from_byte
);
6592 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6593 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6594 saved_pt_byte
+ (coding
->produced
- bytes
));
6596 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6597 saved_pt_byte
+ (coding
->produced
- bytes
));
6600 unbind_to (count
, coding
->dst_object
);
6605 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6607 struct coding_system
*coding
;
6608 Lisp_Object src_object
;
6609 EMACS_INT from
, from_byte
, to
, to_byte
;
6610 Lisp_Object dst_object
;
6612 int count
= specpdl_ptr
- specpdl
;
6613 EMACS_INT chars
= to
- from
;
6614 EMACS_INT bytes
= to_byte
- from_byte
;
6617 int saved_pt
= -1, saved_pt_byte
;
6619 buffer
= Fcurrent_buffer ();
6621 coding
->src_object
= src_object
;
6622 coding
->src_chars
= chars
;
6623 coding
->src_bytes
= bytes
;
6624 coding
->src_multibyte
= chars
< bytes
;
6626 attrs
= CODING_ID_ATTRS (coding
->id
);
6628 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
6630 coding
->src_object
= code_conversion_save (1, coding
->src_multibyte
);
6631 set_buffer_internal (XBUFFER (coding
->src_object
));
6632 if (STRINGP (src_object
))
6633 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
6634 else if (BUFFERP (src_object
))
6635 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
6637 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
6639 if (EQ (src_object
, dst_object
))
6641 set_buffer_internal (XBUFFER (src_object
));
6642 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6643 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6644 set_buffer_internal (XBUFFER (coding
->src_object
));
6647 call2 (CODING_ATTR_PRE_WRITE (attrs
),
6648 make_number (BEG
), make_number (Z
));
6649 coding
->src_object
= Fcurrent_buffer ();
6651 move_gap_both (BEG
, BEG_BYTE
);
6652 coding
->src_chars
= Z
- BEG
;
6653 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6654 coding
->src_pos
= BEG
;
6655 coding
->src_pos_byte
= BEG_BYTE
;
6656 coding
->src_multibyte
= Z
< Z_BYTE
;
6658 else if (STRINGP (src_object
))
6660 code_conversion_save (0, 0);
6661 coding
->src_pos
= from
;
6662 coding
->src_pos_byte
= from_byte
;
6664 else if (BUFFERP (src_object
))
6666 code_conversion_save (0, 0);
6667 set_buffer_internal (XBUFFER (src_object
));
6668 if (EQ (src_object
, dst_object
))
6670 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6671 coding
->src_object
= del_range_1 (from
, to
, 1, 1);
6672 coding
->src_pos
= 0;
6673 coding
->src_pos_byte
= 0;
6677 if (from
< GPT
&& to
>= GPT
)
6678 move_gap_both (from
, from_byte
);
6679 coding
->src_pos
= from
;
6680 coding
->src_pos_byte
= from_byte
;
6684 code_conversion_save (0, 0);
6686 if (BUFFERP (dst_object
))
6688 coding
->dst_object
= dst_object
;
6689 if (EQ (src_object
, dst_object
))
6691 coding
->dst_pos
= from
;
6692 coding
->dst_pos_byte
= from_byte
;
6696 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6697 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6699 coding
->dst_multibyte
6700 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6702 else if (EQ (dst_object
, Qt
))
6704 coding
->dst_object
= Qnil
;
6705 coding
->dst_bytes
= coding
->src_chars
;
6706 if (coding
->dst_bytes
== 0)
6707 coding
->dst_bytes
= 1;
6708 coding
->destination
= (unsigned char *) xmalloc (coding
->dst_bytes
);
6709 coding
->dst_multibyte
= 0;
6713 coding
->dst_object
= Qnil
;
6714 coding
->dst_multibyte
= 0;
6717 encode_coding (coding
);
6719 if (EQ (dst_object
, Qt
))
6721 if (BUFFERP (coding
->dst_object
))
6722 coding
->dst_object
= Fbuffer_string ();
6726 = make_unibyte_string ((char *) coding
->destination
,
6728 xfree (coding
->destination
);
6734 /* This is the case of:
6735 (BUFFERP (src_object) && EQ (src_object, dst_object))
6736 As we have moved PT while replacing the original buffer
6737 contents, we must recover it now. */
6738 set_buffer_internal (XBUFFER (src_object
));
6739 if (saved_pt
< from
)
6740 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6741 else if (saved_pt
< from
+ chars
)
6742 TEMP_SET_PT_BOTH (from
, from_byte
);
6743 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6744 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6745 saved_pt_byte
+ (coding
->produced
- bytes
));
6747 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6748 saved_pt_byte
+ (coding
->produced
- bytes
));
6751 unbind_to (count
, Qnil
);
6756 preferred_coding_system ()
6758 int id
= coding_categories
[coding_priorities
[0]].id
;
6760 return CODING_ID_NAME (id
);
6765 /*** 8. Emacs Lisp library functions ***/
6767 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6768 doc
: /* Return t if OBJECT is nil or a coding-system.
6769 See the documentation of `define-coding-system' for information
6770 about coding-system objects. */)
6774 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6777 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6778 Sread_non_nil_coding_system
, 1, 1, 0,
6779 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6786 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6787 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6789 while (SCHARS (val
) == 0);
6790 return (Fintern (val
, Qnil
));
6793 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6794 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6795 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6796 (prompt
, default_coding_system
)
6797 Lisp_Object prompt
, default_coding_system
;
6800 if (SYMBOLP (default_coding_system
))
6801 XSETSTRING (default_coding_system
, XPNTR (SYMBOL_NAME (default_coding_system
)));
6802 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6803 Qt
, Qnil
, Qcoding_system_history
,
6804 default_coding_system
, Qnil
);
6805 return (SCHARS (val
) == 0 ? Qnil
: Fintern (val
, Qnil
));
6808 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6810 doc
: /* Check validity of CODING-SYSTEM.
6811 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
6813 Lisp_Object coding_system
;
6815 CHECK_SYMBOL (coding_system
);
6816 if (!NILP (Fcoding_system_p (coding_system
)))
6817 return coding_system
;
6819 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
6823 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6824 HIGHEST is nonzero, return the coding system of the highest
6825 priority among the detected coding systems. Otherwize return a
6826 list of detected coding systems sorted by their priorities. If
6827 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6828 multibyte form but contains only ASCII and eight-bit chars.
6829 Otherwise, the bytes are raw bytes.
6831 CODING-SYSTEM controls the detection as below:
6833 If it is nil, detect both text-format and eol-format. If the
6834 text-format part of CODING-SYSTEM is already specified
6835 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6836 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6837 detect only text-format. */
6840 detect_coding_system (src
, src_chars
, src_bytes
, highest
, multibytep
,
6842 const unsigned char *src
;
6843 int src_chars
, src_bytes
, highest
;
6845 Lisp_Object coding_system
;
6847 const unsigned char *src_end
= src
+ src_bytes
;
6848 Lisp_Object attrs
, eol_type
;
6850 struct coding_system coding
;
6852 struct coding_detection_info detect_info
;
6853 enum coding_category base_category
;
6855 if (NILP (coding_system
))
6856 coding_system
= Qundecided
;
6857 setup_coding_system (coding_system
, &coding
);
6858 attrs
= CODING_ID_ATTRS (coding
.id
);
6859 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
6860 coding_system
= CODING_ATTR_BASE_NAME (attrs
);
6862 coding
.source
= src
;
6863 coding
.src_chars
= src_chars
;
6864 coding
.src_bytes
= src_bytes
;
6865 coding
.src_multibyte
= multibytep
;
6866 coding
.consumed
= 0;
6867 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6869 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
6871 /* At first, detect text-format if necessary. */
6872 base_category
= XINT (CODING_ATTR_CATEGORY (attrs
));
6873 if (base_category
== coding_category_undecided
)
6875 enum coding_category category
;
6876 struct coding_system
*this;
6879 /* Skip all ASCII bytes except for a few ISO2022 controls. */
6880 for (i
= 0; src
< src_end
; i
++, src
++)
6883 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
6885 || c
== ISO_CODE_SO
)))
6888 coding
.head_ascii
= src
- coding
.source
;
6891 for (i
= 0; i
< coding_category_raw_text
; i
++)
6893 category
= coding_priorities
[i
];
6894 this = coding_categories
+ category
;
6898 /* No coding system of this category is defined. */
6899 detect_info
.rejected
|= (1 << category
);
6901 else if (category
>= coding_category_raw_text
)
6903 else if (detect_info
.checked
& (1 << category
))
6906 && (detect_info
.found
& (1 << category
)))
6911 if ((*(this->detector
)) (&coding
, &detect_info
)
6913 && (detect_info
.found
& (1 << category
)))
6915 if (category
== coding_category_utf_16_auto
)
6917 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
6918 category
= coding_category_utf_16_le
;
6920 category
= coding_category_utf_16_be
;
6927 if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
6929 detect_info
.found
= CATEGORY_MASK_RAW_TEXT
;
6930 id
= coding_categories
[coding_category_raw_text
].id
;
6931 val
= Fcons (make_number (id
), Qnil
);
6933 else if (! detect_info
.rejected
&& ! detect_info
.found
)
6935 detect_info
.found
= CATEGORY_MASK_ANY
;
6936 id
= coding_categories
[coding_category_undecided
].id
;
6937 val
= Fcons (make_number (id
), Qnil
);
6941 if (detect_info
.found
)
6943 detect_info
.found
= 1 << category
;
6944 val
= Fcons (make_number (this->id
), Qnil
);
6947 for (i
= 0; i
< coding_category_raw_text
; i
++)
6948 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
6950 detect_info
.found
= 1 << coding_priorities
[i
];
6951 id
= coding_categories
[coding_priorities
[i
]].id
;
6952 val
= Fcons (make_number (id
), Qnil
);
6958 int mask
= detect_info
.rejected
| detect_info
.found
;
6962 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6964 category
= coding_priorities
[i
];
6965 if (! (mask
& (1 << category
)))
6967 found
|= 1 << category
;
6968 id
= coding_categories
[category
].id
;
6969 val
= Fcons (make_number (id
), val
);
6972 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6974 category
= coding_priorities
[i
];
6975 if (detect_info
.found
& (1 << category
))
6977 id
= coding_categories
[category
].id
;
6978 val
= Fcons (make_number (id
), val
);
6981 detect_info
.found
|= found
;
6984 else if (base_category
== coding_category_utf_16_auto
)
6986 if (detect_coding_utf_16 (&coding
, &detect_info
))
6988 enum coding_category category
;
6989 struct coding_system
*this;
6991 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
6992 this = coding_categories
+ coding_category_utf_16_le
;
6993 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
6994 this = coding_categories
+ coding_category_utf_16_be
;
6995 else if (detect_info
.rejected
& CATEGORY_MASK_UTF_16_LE_NOSIG
)
6996 this = coding_categories
+ coding_category_utf_16_be_nosig
;
6998 this = coding_categories
+ coding_category_utf_16_le_nosig
;
6999 val
= Fcons (make_number (this->id
), Qnil
);
7004 detect_info
.found
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
7005 val
= Fcons (make_number (coding
.id
), Qnil
);
7008 /* Then, detect eol-format if necessary. */
7010 int normal_eol
= -1, utf_16_be_eol
= -1, utf_16_le_eol
;
7013 if (VECTORP (eol_type
))
7015 if (detect_info
.found
& ~CATEGORY_MASK_UTF_16
)
7016 normal_eol
= detect_eol (coding
.source
, src_bytes
,
7017 coding_category_raw_text
);
7018 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_BE
7019 | CATEGORY_MASK_UTF_16_BE_NOSIG
))
7020 utf_16_be_eol
= detect_eol (coding
.source
, src_bytes
,
7021 coding_category_utf_16_be
);
7022 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_LE
7023 | CATEGORY_MASK_UTF_16_LE_NOSIG
))
7024 utf_16_le_eol
= detect_eol (coding
.source
, src_bytes
,
7025 coding_category_utf_16_le
);
7029 if (EQ (eol_type
, Qunix
))
7030 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_LF
;
7031 else if (EQ (eol_type
, Qdos
))
7032 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CRLF
;
7034 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CR
;
7037 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
7039 enum coding_category category
;
7042 id
= XINT (XCAR (tail
));
7043 attrs
= CODING_ID_ATTRS (id
);
7044 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7045 eol_type
= CODING_ID_EOL_TYPE (id
);
7046 if (VECTORP (eol_type
))
7048 if (category
== coding_category_utf_16_be
7049 || category
== coding_category_utf_16_be_nosig
)
7050 this_eol
= utf_16_be_eol
;
7051 else if (category
== coding_category_utf_16_le
7052 || category
== coding_category_utf_16_le_nosig
)
7053 this_eol
= utf_16_le_eol
;
7055 this_eol
= normal_eol
;
7057 if (this_eol
== EOL_SEEN_LF
)
7058 XSETCAR (tail
, AREF (eol_type
, 0));
7059 else if (this_eol
== EOL_SEEN_CRLF
)
7060 XSETCAR (tail
, AREF (eol_type
, 1));
7061 else if (this_eol
== EOL_SEEN_CR
)
7062 XSETCAR (tail
, AREF (eol_type
, 2));
7064 XSETCAR (tail
, CODING_ID_NAME (id
));
7067 XSETCAR (tail
, CODING_ID_NAME (id
));
7071 return (highest
? XCAR (val
) : val
);
7075 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
7077 doc
: /* Detect coding system of the text in the region between START and END.
7078 Return a list of possible coding systems ordered by priority.
7080 If only ASCII characters are found, it returns a list of single element
7081 `undecided' or its subsidiary coding system according to a detected
7084 If optional argument HIGHEST is non-nil, return the coding system of
7085 highest priority. */)
7086 (start
, end
, highest
)
7087 Lisp_Object start
, end
, highest
;
7090 int from_byte
, to_byte
;
7092 CHECK_NUMBER_COERCE_MARKER (start
);
7093 CHECK_NUMBER_COERCE_MARKER (end
);
7095 validate_region (&start
, &end
);
7096 from
= XINT (start
), to
= XINT (end
);
7097 from_byte
= CHAR_TO_BYTE (from
);
7098 to_byte
= CHAR_TO_BYTE (to
);
7100 if (from
< GPT
&& to
>= GPT
)
7101 move_gap_both (to
, to_byte
);
7103 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
7104 to
- from
, to_byte
- from_byte
,
7106 !NILP (current_buffer
7107 ->enable_multibyte_characters
),
7111 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
7113 doc
: /* Detect coding system of the text in STRING.
7114 Return a list of possible coding systems ordered by priority.
7116 If only ASCII characters are found, it returns a list of single element
7117 `undecided' or its subsidiary coding system according to a detected
7120 If optional argument HIGHEST is non-nil, return the coding system of
7121 highest priority. */)
7123 Lisp_Object string
, highest
;
7125 CHECK_STRING (string
);
7127 return detect_coding_system (SDATA (string
),
7128 SCHARS (string
), SBYTES (string
),
7129 !NILP (highest
), STRING_MULTIBYTE (string
),
7135 char_encodable_p (c
, attrs
)
7140 struct charset
*charset
;
7141 Lisp_Object translation_table
;
7143 translation_table
= CODING_ATTR_TRANS_TBL (attrs
);
7144 if (! NILP (translation_table
))
7145 c
= translate_char (translation_table
, c
);
7146 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
7147 CONSP (tail
); tail
= XCDR (tail
))
7149 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
7150 if (CHAR_CHARSET_P (c
, charset
))
7153 return (! NILP (tail
));
7157 /* Return a list of coding systems that safely encode the text between
7158 START and END. If EXCLUDE is non-nil, it is a list of coding
7159 systems not to check. The returned list doesn't contain any such
7160 coding systems. In any case, if the text contains only ASCII or is
7161 unibyte, return t. */
7163 DEFUN ("find-coding-systems-region-internal",
7164 Ffind_coding_systems_region_internal
,
7165 Sfind_coding_systems_region_internal
, 2, 3, 0,
7166 doc
: /* Internal use only. */)
7167 (start
, end
, exclude
)
7168 Lisp_Object start
, end
, exclude
;
7170 Lisp_Object coding_attrs_list
, safe_codings
;
7171 EMACS_INT start_byte
, end_byte
;
7172 const unsigned char *p
, *pbeg
, *pend
;
7174 Lisp_Object tail
, elt
;
7176 if (STRINGP (start
))
7178 if (!STRING_MULTIBYTE (start
)
7179 || SCHARS (start
) == SBYTES (start
))
7182 end_byte
= SBYTES (start
);
7186 CHECK_NUMBER_COERCE_MARKER (start
);
7187 CHECK_NUMBER_COERCE_MARKER (end
);
7188 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7189 args_out_of_range (start
, end
);
7190 if (NILP (current_buffer
->enable_multibyte_characters
))
7192 start_byte
= CHAR_TO_BYTE (XINT (start
));
7193 end_byte
= CHAR_TO_BYTE (XINT (end
));
7194 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7197 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7199 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7200 move_gap_both (XINT (start
), start_byte
);
7202 move_gap_both (XINT (end
), end_byte
);
7206 coding_attrs_list
= Qnil
;
7207 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7209 || NILP (Fmemq (XCAR (tail
), exclude
)))
7213 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
7214 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
7215 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
7217 ASET (attrs
, coding_attr_trans_tbl
,
7218 get_translation_table (attrs
, 1));
7219 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
7223 if (STRINGP (start
))
7224 p
= pbeg
= SDATA (start
);
7226 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7227 pend
= p
+ (end_byte
- start_byte
);
7229 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
7230 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7234 if (ASCII_BYTE_P (*p
))
7238 c
= STRING_CHAR_ADVANCE (p
);
7240 charset_map_loaded
= 0;
7241 for (tail
= coding_attrs_list
; CONSP (tail
);)
7246 else if (char_encodable_p (c
, elt
))
7248 else if (CONSP (XCDR (tail
)))
7250 XSETCAR (tail
, XCAR (XCDR (tail
)));
7251 XSETCDR (tail
, XCDR (XCDR (tail
)));
7255 XSETCAR (tail
, Qnil
);
7259 if (charset_map_loaded
)
7261 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7263 if (STRINGP (start
))
7264 pbeg
= SDATA (start
);
7266 pbeg
= BYTE_POS_ADDR (start_byte
);
7267 p
= pbeg
+ p_offset
;
7268 pend
= pbeg
+ pend_offset
;
7273 safe_codings
= Qnil
;
7274 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
7275 if (! NILP (XCAR (tail
)))
7276 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
7278 return safe_codings
;
7282 DEFUN ("unencodable-char-position", Funencodable_char_position
,
7283 Sunencodable_char_position
, 3, 5, 0,
7285 Return position of first un-encodable character in a region.
7286 START and END specfiy the region and CODING-SYSTEM specifies the
7287 encoding to check. Return nil if CODING-SYSTEM does encode the region.
7289 If optional 4th argument COUNT is non-nil, it specifies at most how
7290 many un-encodable characters to search. In this case, the value is a
7293 If optional 5th argument STRING is non-nil, it is a string to search
7294 for un-encodable characters. In that case, START and END are indexes
7296 (start
, end
, coding_system
, count
, string
)
7297 Lisp_Object start
, end
, coding_system
, count
, string
;
7300 struct coding_system coding
;
7301 Lisp_Object attrs
, charset_list
, translation_table
;
7302 Lisp_Object positions
;
7304 const unsigned char *p
, *stop
, *pend
;
7305 int ascii_compatible
;
7307 setup_coding_system (Fcheck_coding_system (coding_system
), &coding
);
7308 attrs
= CODING_ID_ATTRS (coding
.id
);
7309 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
7311 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
7312 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7313 translation_table
= get_translation_table (attrs
, 1);
7317 validate_region (&start
, &end
);
7318 from
= XINT (start
);
7320 if (NILP (current_buffer
->enable_multibyte_characters
)
7321 || (ascii_compatible
7322 && (to
- from
) == (CHAR_TO_BYTE (to
) - (CHAR_TO_BYTE (from
)))))
7324 p
= CHAR_POS_ADDR (from
);
7325 pend
= CHAR_POS_ADDR (to
);
7326 if (from
< GPT
&& to
>= GPT
)
7333 CHECK_STRING (string
);
7334 CHECK_NATNUM (start
);
7336 from
= XINT (start
);
7339 || to
> SCHARS (string
))
7340 args_out_of_range_3 (string
, start
, end
);
7341 if (! STRING_MULTIBYTE (string
))
7343 p
= SDATA (string
) + string_char_to_byte (string
, from
);
7344 stop
= pend
= SDATA (string
) + string_char_to_byte (string
, to
);
7345 if (ascii_compatible
&& (to
- from
) == (pend
- p
))
7353 CHECK_NATNUM (count
);
7362 if (ascii_compatible
)
7363 while (p
< stop
&& ASCII_BYTE_P (*p
))
7373 c
= STRING_CHAR_ADVANCE (p
);
7374 if (! (ASCII_CHAR_P (c
) && ascii_compatible
)
7375 && ! char_charset (translate_char (translation_table
, c
),
7376 charset_list
, NULL
))
7378 positions
= Fcons (make_number (from
), positions
);
7387 return (NILP (count
) ? Fcar (positions
) : Fnreverse (positions
));
7391 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
7392 Scheck_coding_systems_region
, 3, 3, 0,
7393 doc
: /* Check if the region is encodable by coding systems.
7395 START and END are buffer positions specifying the region.
7396 CODING-SYSTEM-LIST is a list of coding systems to check.
7398 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7399 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7400 whole region, POS0, POS1, ... are buffer positions where non-encodable
7401 characters are found.
7403 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7406 START may be a string. In that case, check if the string is
7407 encodable, and the value contains indices to the string instead of
7408 buffer positions. END is ignored. */)
7409 (start
, end
, coding_system_list
)
7410 Lisp_Object start
, end
, coding_system_list
;
7413 EMACS_INT start_byte
, end_byte
;
7415 const unsigned char *p
, *pbeg
, *pend
;
7417 Lisp_Object tail
, elt
, attrs
;
7419 if (STRINGP (start
))
7421 if (!STRING_MULTIBYTE (start
)
7422 && SCHARS (start
) != SBYTES (start
))
7425 end_byte
= SBYTES (start
);
7430 CHECK_NUMBER_COERCE_MARKER (start
);
7431 CHECK_NUMBER_COERCE_MARKER (end
);
7432 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7433 args_out_of_range (start
, end
);
7434 if (NILP (current_buffer
->enable_multibyte_characters
))
7436 start_byte
= CHAR_TO_BYTE (XINT (start
));
7437 end_byte
= CHAR_TO_BYTE (XINT (end
));
7438 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7441 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7443 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7444 move_gap_both (XINT (start
), start_byte
);
7446 move_gap_both (XINT (end
), end_byte
);
7452 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7455 attrs
= AREF (CODING_SYSTEM_SPEC (elt
), 0);
7456 ASET (attrs
, coding_attr_trans_tbl
, get_translation_table (attrs
, 1));
7457 list
= Fcons (Fcons (elt
, Fcons (attrs
, Qnil
)), list
);
7460 if (STRINGP (start
))
7461 p
= pbeg
= SDATA (start
);
7463 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7464 pend
= p
+ (end_byte
- start_byte
);
7466 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
7467 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7471 if (ASCII_BYTE_P (*p
))
7475 c
= STRING_CHAR_ADVANCE (p
);
7477 charset_map_loaded
= 0;
7478 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
7480 elt
= XCDR (XCAR (tail
));
7481 if (! char_encodable_p (c
, XCAR (elt
)))
7482 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
7484 if (charset_map_loaded
)
7486 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7488 if (STRINGP (start
))
7489 pbeg
= SDATA (start
);
7491 pbeg
= BYTE_POS_ADDR (start_byte
);
7492 p
= pbeg
+ p_offset
;
7493 pend
= pbeg
+ pend_offset
;
7501 for (; CONSP (tail
); tail
= XCDR (tail
))
7504 if (CONSP (XCDR (XCDR (elt
))))
7505 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
7514 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
7515 Lisp_Object start
, end
, coding_system
, dst_object
;
7516 int encodep
, norecord
;
7518 struct coding_system coding
;
7519 EMACS_INT from
, from_byte
, to
, to_byte
;
7520 Lisp_Object src_object
;
7522 CHECK_NUMBER_COERCE_MARKER (start
);
7523 CHECK_NUMBER_COERCE_MARKER (end
);
7524 if (NILP (coding_system
))
7525 coding_system
= Qno_conversion
;
7527 CHECK_CODING_SYSTEM (coding_system
);
7528 src_object
= Fcurrent_buffer ();
7529 if (NILP (dst_object
))
7530 dst_object
= src_object
;
7531 else if (! EQ (dst_object
, Qt
))
7532 CHECK_BUFFER (dst_object
);
7534 validate_region (&start
, &end
);
7535 from
= XFASTINT (start
);
7536 from_byte
= CHAR_TO_BYTE (from
);
7537 to
= XFASTINT (end
);
7538 to_byte
= CHAR_TO_BYTE (to
);
7540 setup_coding_system (coding_system
, &coding
);
7541 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7544 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7547 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7550 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7552 return (BUFFERP (dst_object
)
7553 ? make_number (coding
.produced_char
)
7554 : coding
.dst_object
);
7558 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
7559 3, 4, "r\nzCoding system: ",
7560 doc
: /* Decode the current region from the specified coding system.
7561 When called from a program, takes four arguments:
7562 START, END, CODING-SYSTEM, and DESTINATION.
7563 START and END are buffer positions.
7565 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7566 If nil, the region between START and END is replace by the decoded text.
7567 If buffer, the decoded text is inserted in the buffer.
7568 If t, the decoded text is returned.
7570 This function sets `last-coding-system-used' to the precise coding system
7571 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7572 not fully specified.)
7573 It returns the length of the decoded text. */)
7574 (start
, end
, coding_system
, destination
)
7575 Lisp_Object start
, end
, coding_system
, destination
;
7577 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
7580 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
7581 3, 4, "r\nzCoding system: ",
7582 doc
: /* Encode the current region by specified coding system.
7583 When called from a program, takes three arguments:
7584 START, END, and CODING-SYSTEM. START and END are buffer positions.
7586 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7587 If nil, the region between START and END is replace by the encoded text.
7588 If buffer, the encoded text is inserted in the buffer.
7589 If t, the encoded text is returned.
7591 This function sets `last-coding-system-used' to the precise coding system
7592 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7593 not fully specified.)
7594 It returns the length of the encoded text. */)
7595 (start
, end
, coding_system
, destination
)
7596 Lisp_Object start
, end
, coding_system
, destination
;
7598 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
7602 code_convert_string (string
, coding_system
, dst_object
,
7603 encodep
, nocopy
, norecord
)
7604 Lisp_Object string
, coding_system
, dst_object
;
7605 int encodep
, nocopy
, norecord
;
7607 struct coding_system coding
;
7608 EMACS_INT chars
, bytes
;
7610 CHECK_STRING (string
);
7611 if (NILP (coding_system
))
7614 Vlast_coding_system_used
= Qno_conversion
;
7615 if (NILP (dst_object
))
7616 return (nocopy
? Fcopy_sequence (string
) : string
);
7619 if (NILP (coding_system
))
7620 coding_system
= Qno_conversion
;
7622 CHECK_CODING_SYSTEM (coding_system
);
7623 if (NILP (dst_object
))
7625 else if (! EQ (dst_object
, Qt
))
7626 CHECK_BUFFER (dst_object
);
7628 setup_coding_system (coding_system
, &coding
);
7629 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7630 chars
= SCHARS (string
);
7631 bytes
= SBYTES (string
);
7633 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7635 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7637 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7639 return (BUFFERP (dst_object
)
7640 ? make_number (coding
.produced_char
)
7641 : coding
.dst_object
);
7645 /* Encode or decode STRING according to CODING_SYSTEM.
7646 Do not set Vlast_coding_system_used.
7648 This function is called only from macros DECODE_FILE and
7649 ENCODE_FILE, thus we ignore character composition. */
7652 code_convert_string_norecord (string
, coding_system
, encodep
)
7653 Lisp_Object string
, coding_system
;
7656 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
7660 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
7662 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7664 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7665 if the decoding operation is trivial.
7667 Optional fourth arg BUFFER non-nil meant that the decoded text is
7668 inserted in BUFFER instead of returned as a string. In this case,
7669 the return value is BUFFER.
7671 This function sets `last-coding-system-used' to the precise coding system
7672 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7673 not fully specified. */)
7674 (string
, coding_system
, nocopy
, buffer
)
7675 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7677 return code_convert_string (string
, coding_system
, buffer
,
7678 0, ! NILP (nocopy
), 0);
7681 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
7683 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
7685 Optional third arg NOCOPY non-nil means it is OK to return STRING
7686 itself if the encoding operation is trivial.
7688 Optional fourth arg BUFFER non-nil meant that the encoded text is
7689 inserted in BUFFER instead of returned as a string. In this case,
7690 the return value is BUFFER.
7692 This function sets `last-coding-system-used' to the precise coding system
7693 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7694 not fully specified.) */)
7695 (string
, coding_system
, nocopy
, buffer
)
7696 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7698 return code_convert_string (string
, coding_system
, buffer
,
7699 1, ! NILP (nocopy
), 1);
7703 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
7704 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
7705 Return the corresponding character. */)
7709 Lisp_Object spec
, attrs
, val
;
7710 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
7713 CHECK_NATNUM (code
);
7714 c
= XFASTINT (code
);
7715 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7716 attrs
= AREF (spec
, 0);
7718 if (ASCII_BYTE_P (c
)
7719 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7722 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7723 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7724 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7725 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7728 charset
= charset_roman
;
7729 else if (c
>= 0xA0 && c
< 0xDF)
7731 charset
= charset_kana
;
7736 int s1
= c
>> 8, s2
= c
& 0xFF;
7738 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
7739 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
7740 error ("Invalid code: %d", code
);
7742 charset
= charset_kanji
;
7744 c
= DECODE_CHAR (charset
, c
);
7746 error ("Invalid code: %d", code
);
7747 return make_number (c
);
7751 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
7752 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
7753 Return the corresponding code in SJIS. */)
7757 Lisp_Object spec
, attrs
, charset_list
;
7759 struct charset
*charset
;
7762 CHECK_CHARACTER (ch
);
7764 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7765 attrs
= AREF (spec
, 0);
7767 if (ASCII_CHAR_P (c
)
7768 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7771 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7772 charset
= char_charset (c
, charset_list
, &code
);
7773 if (code
== CHARSET_INVALID_CODE (charset
))
7774 error ("Can't encode by shift_jis encoding: %d", c
);
7777 return make_number (code
);
7780 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
7781 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
7782 Return the corresponding character. */)
7786 Lisp_Object spec
, attrs
, val
;
7787 struct charset
*charset_roman
, *charset_big5
, *charset
;
7790 CHECK_NATNUM (code
);
7791 c
= XFASTINT (code
);
7792 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7793 attrs
= AREF (spec
, 0);
7795 if (ASCII_BYTE_P (c
)
7796 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7799 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7800 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7801 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7804 charset
= charset_roman
;
7807 int b1
= c
>> 8, b2
= c
& 0x7F;
7808 if (b1
< 0xA1 || b1
> 0xFE
7809 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
7810 error ("Invalid code: %d", code
);
7811 charset
= charset_big5
;
7813 c
= DECODE_CHAR (charset
, (unsigned )c
);
7815 error ("Invalid code: %d", code
);
7816 return make_number (c
);
7819 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
7820 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
7821 Return the corresponding character code in Big5. */)
7825 Lisp_Object spec
, attrs
, charset_list
;
7826 struct charset
*charset
;
7830 CHECK_CHARACTER (ch
);
7832 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7833 attrs
= AREF (spec
, 0);
7834 if (ASCII_CHAR_P (c
)
7835 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7838 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7839 charset
= char_charset (c
, charset_list
, &code
);
7840 if (code
== CHARSET_INVALID_CODE (charset
))
7841 error ("Can't encode by Big5 encoding: %d", c
);
7843 return make_number (code
);
7847 DEFUN ("set-terminal-coding-system-internal",
7848 Fset_terminal_coding_system_internal
,
7849 Sset_terminal_coding_system_internal
, 1, 1, 0,
7850 doc
: /* Internal use only. */)
7852 Lisp_Object coding_system
;
7854 CHECK_SYMBOL (coding_system
);
7855 setup_coding_system (Fcheck_coding_system (coding_system
),
7858 /* We had better not send unsafe characters to terminal. */
7859 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
7860 /* Characer composition should be disabled. */
7861 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7862 terminal_coding
.src_multibyte
= 1;
7863 terminal_coding
.dst_multibyte
= 0;
7867 DEFUN ("set-safe-terminal-coding-system-internal",
7868 Fset_safe_terminal_coding_system_internal
,
7869 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
7870 doc
: /* Internal use only. */)
7872 Lisp_Object coding_system
;
7874 CHECK_SYMBOL (coding_system
);
7875 setup_coding_system (Fcheck_coding_system (coding_system
),
7876 &safe_terminal_coding
);
7877 /* Characer composition should be disabled. */
7878 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7879 safe_terminal_coding
.src_multibyte
= 1;
7880 safe_terminal_coding
.dst_multibyte
= 0;
7884 DEFUN ("terminal-coding-system",
7885 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
7886 doc
: /* Return coding system specified for terminal output. */)
7889 return CODING_ID_NAME (terminal_coding
.id
);
7892 DEFUN ("set-keyboard-coding-system-internal",
7893 Fset_keyboard_coding_system_internal
,
7894 Sset_keyboard_coding_system_internal
, 1, 1, 0,
7895 doc
: /* Internal use only. */)
7897 Lisp_Object coding_system
;
7899 CHECK_SYMBOL (coding_system
);
7900 setup_coding_system (Fcheck_coding_system (coding_system
),
7902 /* Characer composition should be disabled. */
7903 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7907 DEFUN ("keyboard-coding-system",
7908 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
7909 doc
: /* Return coding system specified for decoding keyboard input. */)
7912 return CODING_ID_NAME (keyboard_coding
.id
);
7916 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
7917 Sfind_operation_coding_system
, 1, MANY
, 0,
7918 doc
: /* Choose a coding system for an operation based on the target name.
7919 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7920 DECODING-SYSTEM is the coding system to use for decoding
7921 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7922 for encoding (in case OPERATION does encoding).
7924 The first argument OPERATION specifies an I/O primitive:
7925 For file I/O, `insert-file-contents' or `write-region'.
7926 For process I/O, `call-process', `call-process-region', or `start-process'.
7927 For network I/O, `open-network-stream'.
7929 The remaining arguments should be the same arguments that were passed
7930 to the primitive. Depending on which primitive, one of those arguments
7931 is selected as the TARGET. For example, if OPERATION does file I/O,
7932 whichever argument specifies the file name is TARGET.
7934 TARGET has a meaning which depends on OPERATION:
7935 For file I/O, TARGET is a file name.
7936 For process I/O, TARGET is a process name.
7937 For network I/O, TARGET is a service name or a port number
7939 This function looks up what specified for TARGET in,
7940 `file-coding-system-alist', `process-coding-system-alist',
7941 or `network-coding-system-alist' depending on OPERATION.
7942 They may specify a coding system, a cons of coding systems,
7943 or a function symbol to call.
7944 In the last case, we call the function with one argument,
7945 which is a list of all the arguments given to this function.
7947 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7952 Lisp_Object operation
, target_idx
, target
, val
;
7953 register Lisp_Object chain
;
7956 error ("Too few arguments");
7957 operation
= args
[0];
7958 if (!SYMBOLP (operation
)
7959 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
7960 error ("Invalid first arguement");
7961 if (nargs
< 1 + XINT (target_idx
))
7962 error ("Too few arguments for operation: %s",
7963 SDATA (SYMBOL_NAME (operation
)));
7964 target
= args
[XINT (target_idx
) + 1];
7965 if (!(STRINGP (target
)
7966 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
7967 error ("Invalid %dth argument", XINT (target_idx
) + 1);
7969 chain
= ((EQ (operation
, Qinsert_file_contents
)
7970 || EQ (operation
, Qwrite_region
))
7971 ? Vfile_coding_system_alist
7972 : (EQ (operation
, Qopen_network_stream
)
7973 ? Vnetwork_coding_system_alist
7974 : Vprocess_coding_system_alist
));
7978 for (; CONSP (chain
); chain
= XCDR (chain
))
7984 && ((STRINGP (target
)
7985 && STRINGP (XCAR (elt
))
7986 && fast_string_match (XCAR (elt
), target
) >= 0)
7987 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
7990 /* Here, if VAL is both a valid coding system and a valid
7991 function symbol, we return VAL as a coding system. */
7994 if (! SYMBOLP (val
))
7996 if (! NILP (Fcoding_system_p (val
)))
7997 return Fcons (val
, val
);
7998 if (! NILP (Ffboundp (val
)))
8000 val
= call1 (val
, Flist (nargs
, args
));
8003 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
8004 return Fcons (val
, val
);
8012 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
8013 Sset_coding_system_priority
, 0, MANY
, 0,
8014 doc
: /* Assign higher priority to the coding systems given as arguments.
8015 If multiple coding systems belongs to the same category,
8016 all but the first one are ignored.
8018 usage: (set-coding-system-priority ...) */)
8024 int changed
[coding_category_max
];
8025 enum coding_category priorities
[coding_category_max
];
8027 bzero (changed
, sizeof changed
);
8029 for (i
= j
= 0; i
< nargs
; i
++)
8031 enum coding_category category
;
8032 Lisp_Object spec
, attrs
;
8034 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
8035 attrs
= AREF (spec
, 0);
8036 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
8037 if (changed
[category
])
8038 /* Ignore this coding system because a coding system of the
8039 same category already had a higher priority. */
8041 changed
[category
] = 1;
8042 priorities
[j
++] = category
;
8043 if (coding_categories
[category
].id
>= 0
8044 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
8045 setup_coding_system (args
[i
], &coding_categories
[category
]);
8046 Fset (AREF (Vcoding_category_table
, category
), args
[i
]);
8049 /* Now we have decided top J priorities. Reflect the order of the
8050 original priorities to the remaining priorities. */
8052 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
8054 while (j
< coding_category_max
8055 && changed
[coding_priorities
[j
]])
8057 if (j
== coding_category_max
)
8059 priorities
[i
] = coding_priorities
[j
];
8062 bcopy (priorities
, coding_priorities
, sizeof priorities
);
8064 /* Update `coding-category-list'. */
8065 Vcoding_category_list
= Qnil
;
8066 for (i
= coding_category_max
- 1; i
>= 0; i
--)
8067 Vcoding_category_list
8068 = Fcons (AREF (Vcoding_category_table
, priorities
[i
]),
8069 Vcoding_category_list
);
8074 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
8075 Scoding_system_priority_list
, 0, 1, 0,
8076 doc
: /* Return a list of coding systems ordered by their priorities.
8077 HIGHESTP non-nil means just return the highest priority one. */)
8079 Lisp_Object highestp
;
8084 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
8086 enum coding_category category
= coding_priorities
[i
];
8087 int id
= coding_categories
[category
].id
;
8092 attrs
= CODING_ID_ATTRS (id
);
8093 if (! NILP (highestp
))
8094 return CODING_ATTR_BASE_NAME (attrs
);
8095 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
8097 return Fnreverse (val
);
8100 static char *suffixes
[] = { "-unix", "-dos", "-mac" };
8103 make_subsidiaries (base
)
8106 Lisp_Object subsidiaries
;
8107 int base_name_len
= SBYTES (SYMBOL_NAME (base
));
8108 char *buf
= (char *) alloca (base_name_len
+ 6);
8111 bcopy (SDATA (SYMBOL_NAME (base
)), buf
, base_name_len
);
8112 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
8113 for (i
= 0; i
< 3; i
++)
8115 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
8116 ASET (subsidiaries
, i
, intern (buf
));
8118 return subsidiaries
;
8122 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
8123 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
8124 doc
: /* For internal use only.
8125 usage: (define-coding-system-internal ...) */)
8131 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
8132 Lisp_Object attrs
; /* Vector of attributes. */
8133 Lisp_Object eol_type
;
8134 Lisp_Object aliases
;
8135 Lisp_Object coding_type
, charset_list
, safe_charsets
;
8136 enum coding_category category
;
8137 Lisp_Object tail
, val
;
8138 int max_charset_id
= 0;
8141 if (nargs
< coding_arg_max
)
8144 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
8146 name
= args
[coding_arg_name
];
8147 CHECK_SYMBOL (name
);
8148 CODING_ATTR_BASE_NAME (attrs
) = name
;
8150 val
= args
[coding_arg_mnemonic
];
8151 if (! STRINGP (val
))
8152 CHECK_CHARACTER (val
);
8153 CODING_ATTR_MNEMONIC (attrs
) = val
;
8155 coding_type
= args
[coding_arg_coding_type
];
8156 CHECK_SYMBOL (coding_type
);
8157 CODING_ATTR_TYPE (attrs
) = coding_type
;
8159 charset_list
= args
[coding_arg_charset_list
];
8160 if (SYMBOLP (charset_list
))
8162 if (EQ (charset_list
, Qiso_2022
))
8164 if (! EQ (coding_type
, Qiso_2022
))
8165 error ("Invalid charset-list");
8166 charset_list
= Viso_2022_charset_list
;
8168 else if (EQ (charset_list
, Qemacs_mule
))
8170 if (! EQ (coding_type
, Qemacs_mule
))
8171 error ("Invalid charset-list");
8172 charset_list
= Vemacs_mule_charset_list
;
8174 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8175 if (max_charset_id
< XFASTINT (XCAR (tail
)))
8176 max_charset_id
= XFASTINT (XCAR (tail
));
8180 charset_list
= Fcopy_sequence (charset_list
);
8181 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
8183 struct charset
*charset
;
8186 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8187 if (EQ (coding_type
, Qiso_2022
)
8188 ? CHARSET_ISO_FINAL (charset
) < 0
8189 : EQ (coding_type
, Qemacs_mule
)
8190 ? CHARSET_EMACS_MULE_ID (charset
) < 0
8192 error ("Can't handle charset `%s'",
8193 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8195 XSETCAR (tail
, make_number (charset
->id
));
8196 if (max_charset_id
< charset
->id
)
8197 max_charset_id
= charset
->id
;
8200 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
8202 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
8204 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8205 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
8206 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
8208 CODING_ATTR_ASCII_COMPAT (attrs
) = args
[coding_arg_ascii_compatible_p
];
8210 val
= args
[coding_arg_decode_translation_table
];
8211 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8213 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8215 val
= args
[coding_arg_encode_translation_table
];
8216 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8218 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8220 val
= args
[coding_arg_post_read_conversion
];
8222 CODING_ATTR_POST_READ (attrs
) = val
;
8224 val
= args
[coding_arg_pre_write_conversion
];
8226 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8228 val
= args
[coding_arg_default_char
];
8230 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
8233 CHECK_CHARACTER (val
);
8234 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8237 val
= args
[coding_arg_for_unibyte
];
8238 CODING_ATTR_FOR_UNIBYTE (attrs
) = NILP (val
) ? Qnil
: Qt
;
8240 val
= args
[coding_arg_plist
];
8242 CODING_ATTR_PLIST (attrs
) = val
;
8244 if (EQ (coding_type
, Qcharset
))
8246 /* Generate a lisp vector of 256 elements. Each element is nil,
8247 integer, or a list of charset IDs.
8249 If Nth element is nil, the byte code N is invalid in this
8252 If Nth element is a number NUM, N is the first byte of a
8253 charset whose ID is NUM.
8255 If Nth element is a list of charset IDs, N is the first byte
8256 of one of them. The list is sorted by dimensions of the
8257 charsets. A charset of smaller dimension comes firtst. */
8258 val
= Fmake_vector (make_number (256), Qnil
);
8260 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8262 struct charset
*charset
= CHARSET_FROM_ID (XFASTINT (XCAR (tail
)));
8263 int dim
= CHARSET_DIMENSION (charset
);
8264 int idx
= (dim
- 1) * 4;
8266 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8267 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8269 for (i
= charset
->code_space
[idx
];
8270 i
<= charset
->code_space
[idx
+ 1]; i
++)
8272 Lisp_Object tmp
, tmp2
;
8275 tmp
= AREF (val
, i
);
8278 else if (NUMBERP (tmp
))
8280 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp
)));
8282 tmp
= Fcons (XCAR (tail
), Fcons (tmp
, Qnil
));
8284 tmp
= Fcons (tmp
, Fcons (XCAR (tail
), Qnil
));
8288 for (tmp2
= tmp
; CONSP (tmp2
); tmp2
= XCDR (tmp2
))
8290 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2
))));
8295 tmp
= nconc2 (tmp
, Fcons (XCAR (tail
), Qnil
));
8298 XSETCDR (tmp2
, Fcons (XCAR (tmp2
), XCDR (tmp2
)));
8299 XSETCAR (tmp2
, XCAR (tail
));
8305 ASET (attrs
, coding_attr_charset_valids
, val
);
8306 category
= coding_category_charset
;
8308 else if (EQ (coding_type
, Qccl
))
8312 if (nargs
< coding_arg_ccl_max
)
8315 val
= args
[coding_arg_ccl_decoder
];
8316 CHECK_CCL_PROGRAM (val
);
8318 val
= Fcopy_sequence (val
);
8319 ASET (attrs
, coding_attr_ccl_decoder
, val
);
8321 val
= args
[coding_arg_ccl_encoder
];
8322 CHECK_CCL_PROGRAM (val
);
8324 val
= Fcopy_sequence (val
);
8325 ASET (attrs
, coding_attr_ccl_encoder
, val
);
8327 val
= args
[coding_arg_ccl_valids
];
8328 valids
= Fmake_string (make_number (256), make_number (0));
8329 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
8336 from
= to
= XINT (val
);
8337 if (from
< 0 || from
> 255)
8338 args_out_of_range_3 (val
, make_number (0), make_number (255));
8343 CHECK_NATNUM_CAR (val
);
8344 CHECK_NATNUM_CDR (val
);
8345 from
= XINT (XCAR (val
));
8347 args_out_of_range_3 (XCAR (val
),
8348 make_number (0), make_number (255));
8349 to
= XINT (XCDR (val
));
8350 if (to
< from
|| to
> 255)
8351 args_out_of_range_3 (XCDR (val
),
8352 XCAR (val
), make_number (255));
8354 for (i
= from
; i
<= to
; i
++)
8355 SSET (valids
, i
, 1);
8357 ASET (attrs
, coding_attr_ccl_valids
, valids
);
8359 category
= coding_category_ccl
;
8361 else if (EQ (coding_type
, Qutf_16
))
8363 Lisp_Object bom
, endian
;
8365 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8367 if (nargs
< coding_arg_utf16_max
)
8370 bom
= args
[coding_arg_utf16_bom
];
8371 if (! NILP (bom
) && ! EQ (bom
, Qt
))
8375 CHECK_CODING_SYSTEM (val
);
8377 CHECK_CODING_SYSTEM (val
);
8379 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
8381 endian
= args
[coding_arg_utf16_endian
];
8382 CHECK_SYMBOL (endian
);
8385 else if (! EQ (endian
, Qbig
) && ! EQ (endian
, Qlittle
))
8386 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian
)));
8387 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
8389 category
= (CONSP (bom
)
8390 ? coding_category_utf_16_auto
8392 ? (EQ (endian
, Qbig
)
8393 ? coding_category_utf_16_be_nosig
8394 : coding_category_utf_16_le_nosig
)
8395 : (EQ (endian
, Qbig
)
8396 ? coding_category_utf_16_be
8397 : coding_category_utf_16_le
));
8399 else if (EQ (coding_type
, Qiso_2022
))
8401 Lisp_Object initial
, reg_usage
, request
, flags
;
8404 if (nargs
< coding_arg_iso2022_max
)
8407 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
8408 CHECK_VECTOR (initial
);
8409 for (i
= 0; i
< 4; i
++)
8411 val
= Faref (initial
, make_number (i
));
8414 struct charset
*charset
;
8416 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8417 ASET (initial
, i
, make_number (CHARSET_ID (charset
)));
8418 if (i
== 0 && CHARSET_ASCII_COMPATIBLE_P (charset
))
8419 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8422 ASET (initial
, i
, make_number (-1));
8425 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
8426 CHECK_CONS (reg_usage
);
8427 CHECK_NUMBER_CAR (reg_usage
);
8428 CHECK_NUMBER_CDR (reg_usage
);
8430 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
8431 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
8439 CHECK_CHARSET_GET_ID (tmp
, id
);
8440 CHECK_NATNUM_CDR (val
);
8441 if (XINT (XCDR (val
)) >= 4)
8442 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
8443 XSETCAR (val
, make_number (id
));
8446 flags
= args
[coding_arg_iso2022_flags
];
8447 CHECK_NATNUM (flags
);
8449 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
8450 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
8452 ASET (attrs
, coding_attr_iso_initial
, initial
);
8453 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
8454 ASET (attrs
, coding_attr_iso_request
, request
);
8455 ASET (attrs
, coding_attr_iso_flags
, flags
);
8456 setup_iso_safe_charsets (attrs
);
8458 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
8459 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
8460 | CODING_ISO_FLAG_SINGLE_SHIFT
))
8461 ? coding_category_iso_7_else
8462 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8463 ? coding_category_iso_7
8464 : coding_category_iso_7_tight
);
8467 int id
= XINT (AREF (initial
, 1));
8469 category
= (((i
& CODING_ISO_FLAG_LOCKING_SHIFT
)
8470 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8472 ? coding_category_iso_8_else
8473 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
8474 ? coding_category_iso_8_1
8475 : coding_category_iso_8_2
);
8477 if (category
!= coding_category_iso_8_1
8478 && category
!= coding_category_iso_8_2
)
8479 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8481 else if (EQ (coding_type
, Qemacs_mule
))
8483 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
8484 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
8485 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8486 category
= coding_category_emacs_mule
;
8488 else if (EQ (coding_type
, Qshift_jis
))
8491 struct charset
*charset
;
8493 if (XINT (Flength (charset_list
)) != 3
8494 && XINT (Flength (charset_list
)) != 4)
8495 error ("There should be three or four charsets");
8497 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8498 if (CHARSET_DIMENSION (charset
) != 1)
8499 error ("Dimension of charset %s is not one",
8500 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8501 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8502 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8504 charset_list
= XCDR (charset_list
);
8505 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8506 if (CHARSET_DIMENSION (charset
) != 1)
8507 error ("Dimension of charset %s is not one",
8508 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8510 charset_list
= XCDR (charset_list
);
8511 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8512 if (CHARSET_DIMENSION (charset
) != 2)
8513 error ("Dimension of charset %s is not two",
8514 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8516 charset_list
= XCDR (charset_list
);
8517 if (! NILP (charset_list
))
8519 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8520 if (CHARSET_DIMENSION (charset
) != 2)
8521 error ("Dimension of charset %s is not two",
8522 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8525 category
= coding_category_sjis
;
8526 Vsjis_coding_system
= name
;
8528 else if (EQ (coding_type
, Qbig5
))
8530 struct charset
*charset
;
8532 if (XINT (Flength (charset_list
)) != 2)
8533 error ("There should be just two charsets");
8535 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8536 if (CHARSET_DIMENSION (charset
) != 1)
8537 error ("Dimension of charset %s is not one",
8538 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8539 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8540 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8542 charset_list
= XCDR (charset_list
);
8543 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8544 if (CHARSET_DIMENSION (charset
) != 2)
8545 error ("Dimension of charset %s is not two",
8546 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8548 category
= coding_category_big5
;
8549 Vbig5_coding_system
= name
;
8551 else if (EQ (coding_type
, Qraw_text
))
8553 category
= coding_category_raw_text
;
8554 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8556 else if (EQ (coding_type
, Qutf_8
))
8558 category
= coding_category_utf_8
;
8559 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8561 else if (EQ (coding_type
, Qundecided
))
8562 category
= coding_category_undecided
;
8564 error ("Invalid coding system type: %s",
8565 SDATA (SYMBOL_NAME (coding_type
)));
8567 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
8568 CODING_ATTR_PLIST (attrs
)
8569 = Fcons (QCcategory
, Fcons (AREF (Vcoding_category_table
, category
),
8570 CODING_ATTR_PLIST (attrs
)));
8572 eol_type
= args
[coding_arg_eol_type
];
8573 if (! NILP (eol_type
)
8574 && ! EQ (eol_type
, Qunix
)
8575 && ! EQ (eol_type
, Qdos
)
8576 && ! EQ (eol_type
, Qmac
))
8577 error ("Invalid eol-type");
8579 aliases
= Fcons (name
, Qnil
);
8581 if (NILP (eol_type
))
8583 eol_type
= make_subsidiaries (name
);
8584 for (i
= 0; i
< 3; i
++)
8586 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
8588 this_name
= AREF (eol_type
, i
);
8589 this_aliases
= Fcons (this_name
, Qnil
);
8590 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
8591 this_spec
= Fmake_vector (make_number (3), attrs
);
8592 ASET (this_spec
, 1, this_aliases
);
8593 ASET (this_spec
, 2, this_eol_type
);
8594 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
8595 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
8596 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
8597 Vcoding_system_alist
);
8601 spec_vec
= Fmake_vector (make_number (3), attrs
);
8602 ASET (spec_vec
, 1, aliases
);
8603 ASET (spec_vec
, 2, eol_type
);
8605 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
8606 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
8607 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
8608 Vcoding_system_alist
);
8611 int id
= coding_categories
[category
].id
;
8613 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
8614 setup_coding_system (name
, &coding_categories
[category
]);
8620 return Fsignal (Qwrong_number_of_arguments
,
8621 Fcons (intern ("define-coding-system-internal"),
8622 make_number (nargs
)));
8626 DEFUN ("coding-system-put", Fcoding_system_put
, Scoding_system_put
,
8628 doc
: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
8629 (coding_system
, prop
, val
)
8630 Lisp_Object coding_system
, prop
, val
;
8632 Lisp_Object spec
, attrs
, plist
;
8634 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8635 attrs
= AREF (spec
, 0);
8636 if (EQ (prop
, QCmnemonic
))
8638 if (! STRINGP (val
))
8639 CHECK_CHARACTER (val
);
8640 CODING_ATTR_MNEMONIC (attrs
) = val
;
8642 else if (EQ (prop
, QCdefalut_char
))
8645 val
= make_number (' ');
8647 CHECK_CHARACTER (val
);
8648 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8650 else if (EQ (prop
, QCdecode_translation_table
))
8652 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8654 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8656 else if (EQ (prop
, QCencode_translation_table
))
8658 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8660 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8662 else if (EQ (prop
, QCpost_read_conversion
))
8665 CODING_ATTR_POST_READ (attrs
) = val
;
8667 else if (EQ (prop
, QCpre_write_conversion
))
8670 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8673 CODING_ATTR_PLIST (attrs
)
8674 = Fplist_put (CODING_ATTR_PLIST (attrs
), prop
, val
);
8679 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
8680 Sdefine_coding_system_alias
, 2, 2, 0,
8681 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8682 (alias
, coding_system
)
8683 Lisp_Object alias
, coding_system
;
8685 Lisp_Object spec
, aliases
, eol_type
;
8687 CHECK_SYMBOL (alias
);
8688 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8689 aliases
= AREF (spec
, 1);
8690 /* ALISES should be a list of length more than zero, and the first
8691 element is a base coding system. Append ALIAS at the tail of the
8693 while (!NILP (XCDR (aliases
)))
8694 aliases
= XCDR (aliases
);
8695 XSETCDR (aliases
, Fcons (alias
, Qnil
));
8697 eol_type
= AREF (spec
, 2);
8698 if (VECTORP (eol_type
))
8700 Lisp_Object subsidiaries
;
8703 subsidiaries
= make_subsidiaries (alias
);
8704 for (i
= 0; i
< 3; i
++)
8705 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
8706 AREF (eol_type
, i
));
8709 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
8710 Vcoding_system_list
= Fcons (alias
, Vcoding_system_list
);
8711 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (alias
), Qnil
),
8712 Vcoding_system_alist
);
8717 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
8719 doc
: /* Return the base of CODING-SYSTEM.
8720 Any alias or subsidiary coding system is not a base coding system. */)
8722 Lisp_Object coding_system
;
8724 Lisp_Object spec
, attrs
;
8726 if (NILP (coding_system
))
8727 return (Qno_conversion
);
8728 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8729 attrs
= AREF (spec
, 0);
8730 return CODING_ATTR_BASE_NAME (attrs
);
8733 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
8735 doc
: "Return the property list of CODING-SYSTEM.")
8737 Lisp_Object coding_system
;
8739 Lisp_Object spec
, attrs
;
8741 if (NILP (coding_system
))
8742 coding_system
= Qno_conversion
;
8743 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8744 attrs
= AREF (spec
, 0);
8745 return CODING_ATTR_PLIST (attrs
);
8749 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
8751 doc
: /* Return the list of aliases of CODING-SYSTEM. */)
8753 Lisp_Object coding_system
;
8757 if (NILP (coding_system
))
8758 coding_system
= Qno_conversion
;
8759 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8760 return AREF (spec
, 1);
8763 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
8764 Scoding_system_eol_type
, 1, 1, 0,
8765 doc
: /* Return eol-type of CODING-SYSTEM.
8766 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8768 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8769 and CR respectively.
8771 A vector value indicates that a format of end-of-line should be
8772 detected automatically. Nth element of the vector is the subsidiary
8773 coding system whose eol-type is N. */)
8775 Lisp_Object coding_system
;
8777 Lisp_Object spec
, eol_type
;
8780 if (NILP (coding_system
))
8781 coding_system
= Qno_conversion
;
8782 if (! CODING_SYSTEM_P (coding_system
))
8784 spec
= CODING_SYSTEM_SPEC (coding_system
);
8785 eol_type
= AREF (spec
, 2);
8786 if (VECTORP (eol_type
))
8787 return Fcopy_sequence (eol_type
);
8788 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
8789 return make_number (n
);
8795 /*** 9. Post-amble ***/
8802 for (i
= 0; i
< coding_category_max
; i
++)
8804 coding_categories
[i
].id
= -1;
8805 coding_priorities
[i
] = i
;
8808 /* ISO2022 specific initialize routine. */
8809 for (i
= 0; i
< 0x20; i
++)
8810 iso_code_class
[i
] = ISO_control_0
;
8811 for (i
= 0x21; i
< 0x7F; i
++)
8812 iso_code_class
[i
] = ISO_graphic_plane_0
;
8813 for (i
= 0x80; i
< 0xA0; i
++)
8814 iso_code_class
[i
] = ISO_control_1
;
8815 for (i
= 0xA1; i
< 0xFF; i
++)
8816 iso_code_class
[i
] = ISO_graphic_plane_1
;
8817 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
8818 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
8819 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
8820 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
8821 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
8822 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
8823 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
8824 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
8825 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
8827 for (i
= 0; i
< 256; i
++)
8829 emacs_mule_bytes
[i
] = 1;
8831 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_11
] = 3;
8832 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_12
] = 3;
8833 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_21
] = 4;
8834 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_22
] = 4;
8842 staticpro (&Vcoding_system_hash_table
);
8844 Lisp_Object args
[2];
8847 Vcoding_system_hash_table
= Fmake_hash_table (2, args
);
8850 staticpro (&Vsjis_coding_system
);
8851 Vsjis_coding_system
= Qnil
;
8853 staticpro (&Vbig5_coding_system
);
8854 Vbig5_coding_system
= Qnil
;
8856 staticpro (&Vcode_conversion_reused_workbuf
);
8857 Vcode_conversion_reused_workbuf
= Qnil
;
8859 staticpro (&Vcode_conversion_workbuf_name
);
8860 Vcode_conversion_workbuf_name
= build_string (" *code-conversion-work*");
8862 reused_workbuf_in_use
= 0;
8864 DEFSYM (Qcharset
, "charset");
8865 DEFSYM (Qtarget_idx
, "target-idx");
8866 DEFSYM (Qcoding_system_history
, "coding-system-history");
8867 Fset (Qcoding_system_history
, Qnil
);
8869 /* Target FILENAME is the first argument. */
8870 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
8871 /* Target FILENAME is the third argument. */
8872 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
8874 DEFSYM (Qcall_process
, "call-process");
8875 /* Target PROGRAM is the first argument. */
8876 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
8878 DEFSYM (Qcall_process_region
, "call-process-region");
8879 /* Target PROGRAM is the third argument. */
8880 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
8882 DEFSYM (Qstart_process
, "start-process");
8883 /* Target PROGRAM is the third argument. */
8884 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
8886 DEFSYM (Qopen_network_stream
, "open-network-stream");
8887 /* Target SERVICE is the fourth argument. */
8888 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
8890 DEFSYM (Qcoding_system
, "coding-system");
8891 DEFSYM (Qcoding_aliases
, "coding-aliases");
8893 DEFSYM (Qeol_type
, "eol-type");
8894 DEFSYM (Qunix
, "unix");
8895 DEFSYM (Qdos
, "dos");
8897 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
8898 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
8899 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
8900 DEFSYM (Qdefault_char
, "default-char");
8901 DEFSYM (Qundecided
, "undecided");
8902 DEFSYM (Qno_conversion
, "no-conversion");
8903 DEFSYM (Qraw_text
, "raw-text");
8905 DEFSYM (Qiso_2022
, "iso-2022");
8907 DEFSYM (Qutf_8
, "utf-8");
8908 DEFSYM (Qutf_8_emacs
, "utf-8-emacs");
8910 DEFSYM (Qutf_16
, "utf-16");
8911 DEFSYM (Qbig
, "big");
8912 DEFSYM (Qlittle
, "little");
8914 DEFSYM (Qshift_jis
, "shift-jis");
8915 DEFSYM (Qbig5
, "big5");
8917 DEFSYM (Qcoding_system_p
, "coding-system-p");
8919 DEFSYM (Qcoding_system_error
, "coding-system-error");
8920 Fput (Qcoding_system_error
, Qerror_conditions
,
8921 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
8922 Fput (Qcoding_system_error
, Qerror_message
,
8923 build_string ("Invalid coding system"));
8925 /* Intern this now in case it isn't already done.
8926 Setting this variable twice is harmless.
8927 But don't staticpro it here--that is done in alloc.c. */
8928 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
8930 DEFSYM (Qtranslation_table
, "translation-table");
8931 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
8932 DEFSYM (Qtranslation_table_id
, "translation-table-id");
8933 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
8934 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
8936 DEFSYM (Qvalid_codes
, "valid-codes");
8938 DEFSYM (Qemacs_mule
, "emacs-mule");
8940 DEFSYM (QCcategory
, ":category");
8941 DEFSYM (QCmnemonic
, ":mnemonic");
8942 DEFSYM (QCdefalut_char
, ":default-char");
8943 DEFSYM (QCdecode_translation_table
, ":decode-translation-table");
8944 DEFSYM (QCencode_translation_table
, ":encode-translation-table");
8945 DEFSYM (QCpost_read_conversion
, ":post-read-conversion");
8946 DEFSYM (QCpre_write_conversion
, ":pre-write-conversion");
8948 Vcoding_category_table
8949 = Fmake_vector (make_number (coding_category_max
), Qnil
);
8950 staticpro (&Vcoding_category_table
);
8951 /* Followings are target of code detection. */
8952 ASET (Vcoding_category_table
, coding_category_iso_7
,
8953 intern ("coding-category-iso-7"));
8954 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
8955 intern ("coding-category-iso-7-tight"));
8956 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
8957 intern ("coding-category-iso-8-1"));
8958 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
8959 intern ("coding-category-iso-8-2"));
8960 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
8961 intern ("coding-category-iso-7-else"));
8962 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
8963 intern ("coding-category-iso-8-else"));
8964 ASET (Vcoding_category_table
, coding_category_utf_8
,
8965 intern ("coding-category-utf-8"));
8966 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
8967 intern ("coding-category-utf-16-be"));
8968 ASET (Vcoding_category_table
, coding_category_utf_16_auto
,
8969 intern ("coding-category-utf-16-auto"));
8970 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
8971 intern ("coding-category-utf-16-le"));
8972 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
8973 intern ("coding-category-utf-16-be-nosig"));
8974 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
8975 intern ("coding-category-utf-16-le-nosig"));
8976 ASET (Vcoding_category_table
, coding_category_charset
,
8977 intern ("coding-category-charset"));
8978 ASET (Vcoding_category_table
, coding_category_sjis
,
8979 intern ("coding-category-sjis"));
8980 ASET (Vcoding_category_table
, coding_category_big5
,
8981 intern ("coding-category-big5"));
8982 ASET (Vcoding_category_table
, coding_category_ccl
,
8983 intern ("coding-category-ccl"));
8984 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
8985 intern ("coding-category-emacs-mule"));
8986 /* Followings are NOT target of code detection. */
8987 ASET (Vcoding_category_table
, coding_category_raw_text
,
8988 intern ("coding-category-raw-text"));
8989 ASET (Vcoding_category_table
, coding_category_undecided
,
8990 intern ("coding-category-undecided"));
8992 DEFSYM (Qinsufficient_source
, "insufficient-source");
8993 DEFSYM (Qinconsistent_eol
, "inconsistent-eol");
8994 DEFSYM (Qinvalid_source
, "invalid-source");
8995 DEFSYM (Qinterrupted
, "interrupted");
8996 DEFSYM (Qinsufficient_memory
, "insufficient-memory");
8998 defsubr (&Scoding_system_p
);
8999 defsubr (&Sread_coding_system
);
9000 defsubr (&Sread_non_nil_coding_system
);
9001 defsubr (&Scheck_coding_system
);
9002 defsubr (&Sdetect_coding_region
);
9003 defsubr (&Sdetect_coding_string
);
9004 defsubr (&Sfind_coding_systems_region_internal
);
9005 defsubr (&Sunencodable_char_position
);
9006 defsubr (&Scheck_coding_systems_region
);
9007 defsubr (&Sdecode_coding_region
);
9008 defsubr (&Sencode_coding_region
);
9009 defsubr (&Sdecode_coding_string
);
9010 defsubr (&Sencode_coding_string
);
9011 defsubr (&Sdecode_sjis_char
);
9012 defsubr (&Sencode_sjis_char
);
9013 defsubr (&Sdecode_big5_char
);
9014 defsubr (&Sencode_big5_char
);
9015 defsubr (&Sset_terminal_coding_system_internal
);
9016 defsubr (&Sset_safe_terminal_coding_system_internal
);
9017 defsubr (&Sterminal_coding_system
);
9018 defsubr (&Sset_keyboard_coding_system_internal
);
9019 defsubr (&Skeyboard_coding_system
);
9020 defsubr (&Sfind_operation_coding_system
);
9021 defsubr (&Sset_coding_system_priority
);
9022 defsubr (&Sdefine_coding_system_internal
);
9023 defsubr (&Sdefine_coding_system_alias
);
9024 defsubr (&Scoding_system_put
);
9025 defsubr (&Scoding_system_base
);
9026 defsubr (&Scoding_system_plist
);
9027 defsubr (&Scoding_system_aliases
);
9028 defsubr (&Scoding_system_eol_type
);
9029 defsubr (&Scoding_system_priority_list
);
9031 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
9032 doc
: /* List of coding systems.
9034 Do not alter the value of this variable manually. This variable should be
9035 updated by the functions `define-coding-system' and
9036 `define-coding-system-alias'. */);
9037 Vcoding_system_list
= Qnil
;
9039 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
9040 doc
: /* Alist of coding system names.
9041 Each element is one element list of coding system name.
9042 This variable is given to `completing-read' as TABLE argument.
9044 Do not alter the value of this variable manually. This variable should be
9045 updated by the functions `make-coding-system' and
9046 `define-coding-system-alias'. */);
9047 Vcoding_system_alist
= Qnil
;
9049 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
9050 doc
: /* List of coding-categories (symbols) ordered by priority.
9052 On detecting a coding system, Emacs tries code detection algorithms
9053 associated with each coding-category one by one in this order. When
9054 one algorithm agrees with a byte sequence of source text, the coding
9055 system bound to the corresponding coding-category is selected. */);
9059 Vcoding_category_list
= Qnil
;
9060 for (i
= coding_category_max
- 1; i
>= 0; i
--)
9061 Vcoding_category_list
9062 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
9063 Vcoding_category_list
);
9066 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
9067 doc
: /* Specify the coding system for read operations.
9068 It is useful to bind this variable with `let', but do not set it globally.
9069 If the value is a coding system, it is used for decoding on read operation.
9070 If not, an appropriate element is used from one of the coding system alists:
9071 There are three such tables, `file-coding-system-alist',
9072 `process-coding-system-alist', and `network-coding-system-alist'. */);
9073 Vcoding_system_for_read
= Qnil
;
9075 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
9076 doc
: /* Specify the coding system for write operations.
9077 Programs bind this variable with `let', but you should not set it globally.
9078 If the value is a coding system, it is used for encoding of output,
9079 when writing it to a file and when sending it to a file or subprocess.
9081 If this does not specify a coding system, an appropriate element
9082 is used from one of the coding system alists:
9083 There are three such tables, `file-coding-system-alist',
9084 `process-coding-system-alist', and `network-coding-system-alist'.
9085 For output to files, if the above procedure does not specify a coding system,
9086 the value of `buffer-file-coding-system' is used. */);
9087 Vcoding_system_for_write
= Qnil
;
9089 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
9091 Coding system used in the latest file or process I/O. */);
9092 Vlast_coding_system_used
= Qnil
;
9094 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error
,
9096 Error status of the last code conversion.
9098 When an error was detected in the last code conversion, this variable
9099 is set to one of the following symbols.
9100 `insufficient-source'
9104 `insufficient-memory'
9105 When no error was detected, the value doesn't change. So, to check
9106 the error status of a code conversion by this variable, you must
9107 explicitly set this variable to nil before performing code
9109 Vlast_code_conversion_error
= Qnil
;
9111 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
9113 *Non-nil means always inhibit code conversion of end-of-line format.
9114 See info node `Coding Systems' and info node `Text and Binary' concerning
9115 such conversion. */);
9116 inhibit_eol_conversion
= 0;
9118 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
9120 Non-nil means process buffer inherits coding system of process output.
9121 Bind it to t if the process output is to be treated as if it were a file
9122 read from some filesystem. */);
9123 inherit_process_coding_system
= 0;
9125 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
9127 Alist to decide a coding system to use for a file I/O operation.
9128 The format is ((PATTERN . VAL) ...),
9129 where PATTERN is a regular expression matching a file name,
9130 VAL is a coding system, a cons of coding systems, or a function symbol.
9131 If VAL is a coding system, it is used for both decoding and encoding
9133 If VAL is a cons of coding systems, the car part is used for decoding,
9134 and the cdr part is used for encoding.
9135 If VAL is a function symbol, the function must return a coding system
9136 or a cons of coding systems which are used as above. The function gets
9137 the arguments with which `find-operation-coding-systems' was called.
9139 See also the function `find-operation-coding-system'
9140 and the variable `auto-coding-alist'. */);
9141 Vfile_coding_system_alist
= Qnil
;
9143 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
9145 Alist to decide a coding system to use for a process I/O operation.
9146 The format is ((PATTERN . VAL) ...),
9147 where PATTERN is a regular expression matching a program name,
9148 VAL is a coding system, a cons of coding systems, or a function symbol.
9149 If VAL is a coding system, it is used for both decoding what received
9150 from the program and encoding what sent to the program.
9151 If VAL is a cons of coding systems, the car part is used for decoding,
9152 and the cdr part is used for encoding.
9153 If VAL is a function symbol, the function must return a coding system
9154 or a cons of coding systems which are used as above.
9156 See also the function `find-operation-coding-system'. */);
9157 Vprocess_coding_system_alist
= Qnil
;
9159 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
9161 Alist to decide a coding system to use for a network I/O operation.
9162 The format is ((PATTERN . VAL) ...),
9163 where PATTERN is a regular expression matching a network service name
9164 or is a port number to connect to,
9165 VAL is a coding system, a cons of coding systems, or a function symbol.
9166 If VAL is a coding system, it is used for both decoding what received
9167 from the network stream and encoding what sent to the network stream.
9168 If VAL is a cons of coding systems, the car part is used for decoding,
9169 and the cdr part is used for encoding.
9170 If VAL is a function symbol, the function must return a coding system
9171 or a cons of coding systems which are used as above.
9173 See also the function `find-operation-coding-system'. */);
9174 Vnetwork_coding_system_alist
= Qnil
;
9176 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
9177 doc
: /* Coding system to use with system messages.
9178 Also used for decoding keyboard input on X Window system. */);
9179 Vlocale_coding_system
= Qnil
;
9181 /* The eol mnemonics are reset in startup.el system-dependently. */
9182 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
9184 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
9185 eol_mnemonic_unix
= build_string (":");
9187 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
9189 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
9190 eol_mnemonic_dos
= build_string ("\\");
9192 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
9194 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
9195 eol_mnemonic_mac
= build_string ("/");
9197 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
9199 *String displayed in mode line when end-of-line format is not yet determined. */);
9200 eol_mnemonic_undecided
= build_string (":");
9202 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
9204 *Non-nil enables character translation while encoding and decoding. */);
9205 Venable_character_translation
= Qt
;
9207 DEFVAR_LISP ("standard-translation-table-for-decode",
9208 &Vstandard_translation_table_for_decode
,
9209 doc
: /* Table for translating characters while decoding. */);
9210 Vstandard_translation_table_for_decode
= Qnil
;
9212 DEFVAR_LISP ("standard-translation-table-for-encode",
9213 &Vstandard_translation_table_for_encode
,
9214 doc
: /* Table for translating characters while encoding. */);
9215 Vstandard_translation_table_for_encode
= Qnil
;
9217 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
9218 doc
: /* Alist of charsets vs revision numbers.
9219 While encoding, if a charset (car part of an element) is found,
9220 designate it with the escape sequence identifying revision (cdr part
9221 of the element). */);
9222 Vcharset_revision_table
= Qnil
;
9224 DEFVAR_LISP ("default-process-coding-system",
9225 &Vdefault_process_coding_system
,
9226 doc
: /* Cons of coding systems used for process I/O by default.
9227 The car part is used for decoding a process output,
9228 the cdr part is used for encoding a text to be sent to a process. */);
9229 Vdefault_process_coding_system
= Qnil
;
9231 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
9233 Table of extra Latin codes in the range 128..159 (inclusive).
9234 This is a vector of length 256.
9235 If Nth element is non-nil, the existence of code N in a file
9236 \(or output of subprocess) doesn't prevent it to be detected as
9237 a coding system of ISO 2022 variant which has a flag
9238 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9239 or reading output of a subprocess.
9240 Only 128th through 159th elements has a meaning. */);
9241 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
9243 DEFVAR_LISP ("select-safe-coding-system-function",
9244 &Vselect_safe_coding_system_function
,
9246 Function to call to select safe coding system for encoding a text.
9248 If set, this function is called to force a user to select a proper
9249 coding system which can encode the text in the case that a default
9250 coding system used in each operation can't encode the text.
9252 The default value is `select-safe-coding-system' (which see). */);
9253 Vselect_safe_coding_system_function
= Qnil
;
9255 DEFVAR_BOOL ("coding-system-require-warning",
9256 &coding_system_require_warning
,
9257 doc
: /* Internal use only.
9258 If non-nil, on writing a file, `select-safe-coding-system-function' is
9259 called even if `coding-system-for-write' is non-nil. The command
9260 `universal-coding-system-argument' binds this variable to t temporarily. */);
9261 coding_system_require_warning
= 0;
9264 DEFVAR_BOOL ("inhibit-iso-escape-detection",
9265 &inhibit_iso_escape_detection
,
9267 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9269 By default, on reading a file, Emacs tries to detect how the text is
9270 encoded. This code detection is sensitive to escape sequences. If
9271 the sequence is valid as ISO2022, the code is determined as one of
9272 the ISO2022 encodings, and the file is decoded by the corresponding
9273 coding system (e.g. `iso-2022-7bit').
9275 However, there may be a case that you want to read escape sequences in
9276 a file as is. In such a case, you can set this variable to non-nil.
9277 Then, as the code detection ignores any escape sequences, no file is
9278 detected as encoded in some ISO2022 encoding. The result is that all
9279 escape sequences become visible in a buffer.
9281 The default value is nil, and it is strongly recommended not to change
9282 it. That is because many Emacs Lisp source files that contain
9283 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9284 in Emacs's distribution, and they won't be decoded correctly on
9285 reading if you suppress escape sequence detection.
9287 The other way to read escape sequences in a file without decoding is
9288 to explicitly specify some coding system that doesn't use ISO2022's
9289 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
9290 inhibit_iso_escape_detection
= 0;
9292 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input
,
9293 doc
: /* Char table for translating self-inserting characters.
9294 This is applied to the result of input methods, not their input. See also
9295 `keyboard-translate-table'. */);
9296 Vtranslation_table_for_input
= Qnil
;
9299 Lisp_Object args
[coding_arg_max
];
9300 Lisp_Object plist
[16];
9303 for (i
= 0; i
< coding_arg_max
; i
++)
9306 plist
[0] = intern (":name");
9307 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
9308 plist
[2] = intern (":mnemonic");
9309 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
9310 plist
[4] = intern (":coding-type");
9311 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
9312 plist
[6] = intern (":ascii-compatible-p");
9313 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
9314 plist
[8] = intern (":default-char");
9315 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
9316 plist
[10] = intern (":for-unibyte");
9317 plist
[11] = args
[coding_arg_for_unibyte
] = Qt
;
9318 plist
[12] = intern (":docstring");
9319 plist
[13] = build_string ("Do no conversion.\n\
9321 When you visit a file with this coding, the file is read into a\n\
9322 unibyte buffer as is, thus each byte of a file is treated as a\n\
9324 plist
[14] = intern (":eol-type");
9325 plist
[15] = args
[coding_arg_eol_type
] = Qunix
;
9326 args
[coding_arg_plist
] = Flist (16, plist
);
9327 Fdefine_coding_system_internal (coding_arg_max
, args
);
9330 setup_coding_system (Qno_conversion
, &keyboard_coding
);
9331 setup_coding_system (Qno_conversion
, &terminal_coding
);
9332 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
9337 for (i
= 0; i
< coding_category_max
; i
++)
9338 Fset (AREF (Vcoding_category_table
, i
), Qno_conversion
);
9343 emacs_strerror (error_number
)
9348 synchronize_system_messages_locale ();
9349 str
= strerror (error_number
);
9351 if (! NILP (Vlocale_coding_system
))
9353 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
9354 Vlocale_coding_system
,
9356 str
= (char *) SDATA (dec
);