1 /* Coding system handler (conversion, detection, etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
77 o Old Emacs internal format (emacs-mule)
79 The coding system adopted by old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX, and update the members of
150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
152 Below is the template of these functions. */
156 detect_coding_XXX (coding
, detect_info
)
157 struct coding_system
*coding
;
158 struct coding_detection_info
*detect_info
;
160 unsigned char *src
= coding
->source
;
161 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
162 int multibytep
= coding
->src_multibyte
;
163 int consumed_chars
= 0;
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
173 if (! __C_conforms_to_XXX___ (c
))
175 if (! __C_strongly_suggests_XXX__ (c
))
176 found
= CATEGORY_MASK_XXX
;
178 /* The byte sequence is invalid for XXX. */
179 detect_info
->rejected
|= CATEGORY_MASK_XXX
;
183 /* The source exausted successfully. */
184 detect_info
->found
|= found
;
189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
201 Below is the template of these functions. */
205 decode_coding_XXXX (coding
)
206 struct coding_system
*coding
;
208 unsigned char *src
= coding
->source
+ coding
->consumed
;
209 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base
;
214 /* A buffer to produce decoded characters. */
215 int *charbuf
= coding
->charbuf
;
216 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
217 int multibytep
= coding
->src_multibyte
;
222 if (charbuf
< charbuf_end
)
223 /* No more room to produce a decoded character. */
230 if (src_base
< src_end
231 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base
< src_end
&& charbuf
< charbuf_end
)
235 *charbuf
++ = *src_base
++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
239 /* Remember how many characters we produced. */
240 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
260 Below is a template of these functions. */
263 encode_coding_XXX (coding
)
264 struct coding_system
*coding
;
266 int multibytep
= coding
->dst_multibyte
;
267 int *charbuf
= coding
->charbuf
;
268 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
269 unsigned char *dst
= coding
->destination
+ coding
->produced
;
270 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
271 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
272 int produced_chars
= 0;
274 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
277 /* Encode C into DST, and increment DST. */
279 label_no_more_destination
:
280 /* How many chars and bytes we produced. */
281 coding
->produced_char
+= produced_chars
;
282 coding
->produced
= dst
- coding
->destination
;
287 /*** 1. Preamble ***/
294 #include "character.h"
297 #include "composite.h"
301 Lisp_Object Vcoding_system_hash_table
;
303 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
304 Lisp_Object Qunix
, Qdos
;
305 extern Lisp_Object Qmac
; /* frame.c */
306 Lisp_Object Qbuffer_file_coding_system
;
307 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
308 Lisp_Object Qdefault_char
;
309 Lisp_Object Qno_conversion
, Qundecided
;
310 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
311 Lisp_Object Qbig
, Qlittle
;
312 Lisp_Object Qcoding_system_history
;
313 Lisp_Object Qvalid_codes
;
314 Lisp_Object QCcategory
, QCmnemonic
, QCdefalut_char
;
315 Lisp_Object QCdecode_translation_table
, QCencode_translation_table
;
316 Lisp_Object QCpost_read_conversion
, QCpre_write_conversion
;
318 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
319 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
320 Lisp_Object Qstart_process
, Qopen_network_stream
;
321 Lisp_Object Qtarget_idx
;
323 Lisp_Object Qinsufficient_source
, Qinconsistent_eol
, Qinvalid_source
;
324 Lisp_Object Qinterrupted
, Qinsufficient_memory
;
326 int coding_system_require_warning
;
328 Lisp_Object Vselect_safe_coding_system_function
;
330 /* Mnemonic string for each format of end-of-line. */
331 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
332 /* Mnemonic string to indicate format of end-of-line is not yet
334 Lisp_Object eol_mnemonic_undecided
;
338 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
340 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
342 /* Coding system emacs-mule and raw-text are for converting only
343 end-of-line format. */
344 Lisp_Object Qemacs_mule
, Qraw_text
;
345 Lisp_Object Qutf_8_emacs
;
347 /* Coding-systems are handed between Emacs Lisp programs and C internal
348 routines by the following three variables. */
349 /* Coding-system for reading files and receiving data from process. */
350 Lisp_Object Vcoding_system_for_read
;
351 /* Coding-system for writing files and sending data to process. */
352 Lisp_Object Vcoding_system_for_write
;
353 /* Coding-system actually used in the latest I/O. */
354 Lisp_Object Vlast_coding_system_used
;
355 /* Set to non-nil when an error is detected while code conversion. */
356 Lisp_Object Vlast_code_conversion_error
;
357 /* A vector of length 256 which contains information about special
358 Latin codes (especially for dealing with Microsoft codes). */
359 Lisp_Object Vlatin_extra_code_table
;
361 /* Flag to inhibit code conversion of end-of-line format. */
362 int inhibit_eol_conversion
;
364 /* Flag to inhibit ISO2022 escape sequence detection. */
365 int inhibit_iso_escape_detection
;
367 /* Flag to make buffer-file-coding-system inherit from process-coding. */
368 int inherit_process_coding_system
;
370 /* Coding system to be used to encode text for terminal display. */
371 struct coding_system terminal_coding
;
373 /* Coding system to be used to encode text for terminal display when
374 terminal coding system is nil. */
375 struct coding_system safe_terminal_coding
;
377 /* Coding system of what is sent from terminal keyboard. */
378 struct coding_system keyboard_coding
;
380 Lisp_Object Vfile_coding_system_alist
;
381 Lisp_Object Vprocess_coding_system_alist
;
382 Lisp_Object Vnetwork_coding_system_alist
;
384 Lisp_Object Vlocale_coding_system
;
388 /* Flag to tell if we look up translation table on character code
390 Lisp_Object Venable_character_translation
;
391 /* Standard translation table to look up on decoding (reading). */
392 Lisp_Object Vstandard_translation_table_for_decode
;
393 /* Standard translation table to look up on encoding (writing). */
394 Lisp_Object Vstandard_translation_table_for_encode
;
396 Lisp_Object Qtranslation_table
;
397 Lisp_Object Qtranslation_table_id
;
398 Lisp_Object Qtranslation_table_for_decode
;
399 Lisp_Object Qtranslation_table_for_encode
;
401 /* Alist of charsets vs revision number. */
402 static Lisp_Object Vcharset_revision_table
;
404 /* Default coding systems used for process I/O. */
405 Lisp_Object Vdefault_process_coding_system
;
407 /* Char table for translating Quail and self-inserting input. */
408 Lisp_Object Vtranslation_table_for_input
;
410 /* Two special coding systems. */
411 Lisp_Object Vsjis_coding_system
;
412 Lisp_Object Vbig5_coding_system
;
414 static void record_conversion_result (struct coding_system
*coding
,
415 enum coding_result_code result
);
416 static int detect_coding_utf_8
P_ ((struct coding_system
*,
417 struct coding_detection_info
*info
));
418 static void decode_coding_utf_8
P_ ((struct coding_system
*));
419 static int encode_coding_utf_8
P_ ((struct coding_system
*));
421 static int detect_coding_utf_16
P_ ((struct coding_system
*,
422 struct coding_detection_info
*info
));
423 static void decode_coding_utf_16
P_ ((struct coding_system
*));
424 static int encode_coding_utf_16
P_ ((struct coding_system
*));
426 static int detect_coding_iso_2022
P_ ((struct coding_system
*,
427 struct coding_detection_info
*info
));
428 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
429 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
431 static int detect_coding_emacs_mule
P_ ((struct coding_system
*,
432 struct coding_detection_info
*info
));
433 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
434 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
436 static int detect_coding_sjis
P_ ((struct coding_system
*,
437 struct coding_detection_info
*info
));
438 static void decode_coding_sjis
P_ ((struct coding_system
*));
439 static int encode_coding_sjis
P_ ((struct coding_system
*));
441 static int detect_coding_big5
P_ ((struct coding_system
*,
442 struct coding_detection_info
*info
));
443 static void decode_coding_big5
P_ ((struct coding_system
*));
444 static int encode_coding_big5
P_ ((struct coding_system
*));
446 static int detect_coding_ccl
P_ ((struct coding_system
*,
447 struct coding_detection_info
*info
));
448 static void decode_coding_ccl
P_ ((struct coding_system
*));
449 static int encode_coding_ccl
P_ ((struct coding_system
*));
451 static void decode_coding_raw_text
P_ ((struct coding_system
*));
452 static int encode_coding_raw_text
P_ ((struct coding_system
*));
455 /* ISO2022 section */
457 #define CODING_ISO_INITIAL(coding, reg) \
458 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
459 coding_attr_iso_initial), \
463 #define CODING_ISO_REQUEST(coding, charset_id) \
464 ((charset_id <= (coding)->max_charset_id \
465 ? (coding)->safe_charsets[charset_id] \
469 #define CODING_ISO_FLAGS(coding) \
470 ((coding)->spec.iso_2022.flags)
471 #define CODING_ISO_DESIGNATION(coding, reg) \
472 ((coding)->spec.iso_2022.current_designation[reg])
473 #define CODING_ISO_INVOCATION(coding, plane) \
474 ((coding)->spec.iso_2022.current_invocation[plane])
475 #define CODING_ISO_SINGLE_SHIFTING(coding) \
476 ((coding)->spec.iso_2022.single_shifting)
477 #define CODING_ISO_BOL(coding) \
478 ((coding)->spec.iso_2022.bol)
479 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
480 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
482 /* Control characters of ISO2022. */
483 /* code */ /* function */
484 #define ISO_CODE_LF 0x0A /* line-feed */
485 #define ISO_CODE_CR 0x0D /* carriage-return */
486 #define ISO_CODE_SO 0x0E /* shift-out */
487 #define ISO_CODE_SI 0x0F /* shift-in */
488 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
489 #define ISO_CODE_ESC 0x1B /* escape */
490 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
491 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
492 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
494 /* All code (1-byte) of ISO2022 is classified into one of the
496 enum iso_code_class_type
498 ISO_control_0
, /* Control codes in the range
499 0x00..0x1F and 0x7F, except for the
500 following 5 codes. */
501 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
502 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
503 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
504 ISO_escape
, /* ISO_CODE_SO (0x1B) */
505 ISO_control_1
, /* Control codes in the range
506 0x80..0x9F, except for the
507 following 3 codes. */
508 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
509 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
510 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
511 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
512 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
513 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
514 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
517 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
518 `iso-flags' attribute of an iso2022 coding system. */
520 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
521 instead of the correct short-form sequence (e.g. ESC $ A). */
522 #define CODING_ISO_FLAG_LONG_FORM 0x0001
524 /* If set, reset graphic planes and registers at end-of-line to the
526 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
528 /* If set, reset graphic planes and registers before any control
529 characters to the initial state. */
530 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
532 /* If set, encode by 7-bit environment. */
533 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
535 /* If set, use locking-shift function. */
536 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
538 /* If set, use single-shift function. Overwrite
539 CODING_ISO_FLAG_LOCKING_SHIFT. */
540 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
542 /* If set, use designation escape sequence. */
543 #define CODING_ISO_FLAG_DESIGNATION 0x0040
545 /* If set, produce revision number sequence. */
546 #define CODING_ISO_FLAG_REVISION 0x0080
548 /* If set, produce ISO6429's direction specifying sequence. */
549 #define CODING_ISO_FLAG_DIRECTION 0x0100
551 /* If set, assume designation states are reset at beginning of line on
553 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
555 /* If set, designation sequence should be placed at beginning of line
557 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
559 /* If set, do not encode unsafe charactes on output. */
560 #define CODING_ISO_FLAG_SAFE 0x0800
562 /* If set, extra latin codes (128..159) are accepted as a valid code
564 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
566 #define CODING_ISO_FLAG_COMPOSITION 0x2000
568 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
570 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
572 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
574 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
576 /* A character to be produced on output if encoding of the original
577 character is prohibited by CODING_ISO_FLAG_SAFE. */
578 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
582 #define CODING_UTF_16_BOM(coding) \
583 ((coding)->spec.utf_16.bom)
585 #define CODING_UTF_16_ENDIAN(coding) \
586 ((coding)->spec.utf_16.endian)
588 #define CODING_UTF_16_SURROGATE(coding) \
589 ((coding)->spec.utf_16.surrogate)
593 #define CODING_CCL_DECODER(coding) \
594 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
595 #define CODING_CCL_ENCODER(coding) \
596 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
597 #define CODING_CCL_VALIDS(coding) \
598 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
600 /* Index for each coding category in `coding_categories' */
604 coding_category_iso_7
,
605 coding_category_iso_7_tight
,
606 coding_category_iso_8_1
,
607 coding_category_iso_8_2
,
608 coding_category_iso_7_else
,
609 coding_category_iso_8_else
,
610 coding_category_utf_8
,
611 coding_category_utf_16_auto
,
612 coding_category_utf_16_be
,
613 coding_category_utf_16_le
,
614 coding_category_utf_16_be_nosig
,
615 coding_category_utf_16_le_nosig
,
616 coding_category_charset
,
617 coding_category_sjis
,
618 coding_category_big5
,
620 coding_category_emacs_mule
,
621 /* All above are targets of code detection. */
622 coding_category_raw_text
,
623 coding_category_undecided
,
627 /* Definitions of flag bits used in detect_coding_XXXX. */
628 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
629 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
630 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
631 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
632 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
633 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
634 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
635 #define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
636 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
637 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
638 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
639 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
640 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
641 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
642 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
643 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
644 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
645 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
647 /* This value is returned if detect_coding_mask () find nothing other
648 than ASCII characters. */
649 #define CATEGORY_MASK_ANY \
650 (CATEGORY_MASK_ISO_7 \
651 | CATEGORY_MASK_ISO_7_TIGHT \
652 | CATEGORY_MASK_ISO_8_1 \
653 | CATEGORY_MASK_ISO_8_2 \
654 | CATEGORY_MASK_ISO_7_ELSE \
655 | CATEGORY_MASK_ISO_8_ELSE \
656 | CATEGORY_MASK_UTF_8 \
657 | CATEGORY_MASK_UTF_16_BE \
658 | CATEGORY_MASK_UTF_16_LE \
659 | CATEGORY_MASK_UTF_16_BE_NOSIG \
660 | CATEGORY_MASK_UTF_16_LE_NOSIG \
661 | CATEGORY_MASK_CHARSET \
662 | CATEGORY_MASK_SJIS \
663 | CATEGORY_MASK_BIG5 \
664 | CATEGORY_MASK_CCL \
665 | CATEGORY_MASK_EMACS_MULE)
668 #define CATEGORY_MASK_ISO_7BIT \
669 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
671 #define CATEGORY_MASK_ISO_8BIT \
672 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
674 #define CATEGORY_MASK_ISO_ELSE \
675 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
677 #define CATEGORY_MASK_ISO_ESCAPE \
678 (CATEGORY_MASK_ISO_7 \
679 | CATEGORY_MASK_ISO_7_TIGHT \
680 | CATEGORY_MASK_ISO_7_ELSE \
681 | CATEGORY_MASK_ISO_8_ELSE)
683 #define CATEGORY_MASK_ISO \
684 ( CATEGORY_MASK_ISO_7BIT \
685 | CATEGORY_MASK_ISO_8BIT \
686 | CATEGORY_MASK_ISO_ELSE)
688 #define CATEGORY_MASK_UTF_16 \
689 (CATEGORY_MASK_UTF_16_BE \
690 | CATEGORY_MASK_UTF_16_LE \
691 | CATEGORY_MASK_UTF_16_BE_NOSIG \
692 | CATEGORY_MASK_UTF_16_LE_NOSIG)
695 /* List of symbols `coding-category-xxx' ordered by priority. This
696 variable is exposed to Emacs Lisp. */
697 static Lisp_Object Vcoding_category_list
;
699 /* Table of coding categories (Lisp symbols). This variable is for
701 static Lisp_Object Vcoding_category_table
;
703 /* Table of coding-categories ordered by priority. */
704 static enum coding_category coding_priorities
[coding_category_max
];
706 /* Nth element is a coding context for the coding system bound to the
707 Nth coding category. */
708 static struct coding_system coding_categories
[coding_category_max
];
710 /*** Commonly used macros and functions ***/
713 #define min(a, b) ((a) < (b) ? (a) : (b))
716 #define max(a, b) ((a) > (b) ? (a) : (b))
719 #define CODING_GET_INFO(coding, attrs, charset_list) \
721 (attrs) = CODING_ID_ATTRS ((coding)->id); \
722 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
726 /* Safely get one byte from the source text pointed by SRC which ends
727 at SRC_END, and set C to that byte. If there are not enough bytes
728 in the source, it jumps to `no_more_source'. If multibytep is
729 nonzero, and a multibyte character is found at SRC, set C to the
730 negative value of the character code. The caller should declare
731 and set these variables appropriately in advance:
732 src, src_end, multibytep */
734 #define ONE_MORE_BYTE(c) \
736 if (src == src_end) \
738 if (src_base < src) \
739 record_conversion_result \
740 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
741 goto no_more_source; \
744 if (multibytep && (c & 0x80)) \
746 if ((c & 0xFE) == 0xC0) \
747 c = ((c & 1) << 6) | *src++; \
750 c = - string_char (--src, &src, NULL); \
751 record_conversion_result \
752 (coding, CODING_RESULT_INVALID_SRC); \
759 #define ONE_MORE_BYTE_NO_CHECK(c) \
762 if (multibytep && (c & 0x80)) \
764 if ((c & 0xFE) == 0xC0) \
765 c = ((c & 1) << 6) | *src++; \
768 c = - string_char (--src, &src, NULL); \
769 record_conversion_result \
770 (coding, CODING_RESULT_INVALID_SRC); \
777 /* Store a byte C in the place pointed by DST and increment DST to the
778 next free point, and increment PRODUCED_CHARS. The caller should
779 assure that C is 0..127, and declare and set the variable `dst'
780 appropriately in advance.
784 #define EMIT_ONE_ASCII_BYTE(c) \
791 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
793 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
795 produced_chars += 2; \
796 *dst++ = (c1), *dst++ = (c2); \
800 /* Store a byte C in the place pointed by DST and increment DST to the
801 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
802 nonzero, store in an appropriate multibyte from. The caller should
803 declare and set the variables `dst' and `multibytep' appropriately
806 #define EMIT_ONE_BYTE(c) \
813 ch = BYTE8_TO_CHAR (ch); \
814 CHAR_STRING_ADVANCE (ch, dst); \
821 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
823 #define EMIT_TWO_BYTES(c1, c2) \
825 produced_chars += 2; \
832 ch = BYTE8_TO_CHAR (ch); \
833 CHAR_STRING_ADVANCE (ch, dst); \
836 ch = BYTE8_TO_CHAR (ch); \
837 CHAR_STRING_ADVANCE (ch, dst); \
847 #define EMIT_THREE_BYTES(c1, c2, c3) \
849 EMIT_ONE_BYTE (c1); \
850 EMIT_TWO_BYTES (c2, c3); \
854 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
856 EMIT_TWO_BYTES (c1, c2); \
857 EMIT_TWO_BYTES (c3, c4); \
862 record_conversion_result (struct coding_system
*coding
,
863 enum coding_result_code result
)
865 coding
->result
= result
;
868 case CODING_RESULT_INSUFFICIENT_SRC
:
869 Vlast_code_conversion_error
= Qinsufficient_source
;
871 case CODING_RESULT_INCONSISTENT_EOL
:
872 Vlast_code_conversion_error
= Qinconsistent_eol
;
874 case CODING_RESULT_INVALID_SRC
:
875 Vlast_code_conversion_error
= Qinvalid_source
;
877 case CODING_RESULT_INTERRUPT
:
878 Vlast_code_conversion_error
= Qinterrupted
;
880 case CODING_RESULT_INSUFFICIENT_MEM
:
881 Vlast_code_conversion_error
= Qinsufficient_memory
;
886 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
888 charset_map_loaded = 0; \
889 c = DECODE_CHAR (charset, code); \
890 if (charset_map_loaded) \
892 const unsigned char *orig = coding->source; \
895 coding_set_source (coding); \
896 offset = coding->source - orig; \
898 src_base += offset; \
904 #define ASSURE_DESTINATION(bytes) \
906 if (dst + (bytes) >= dst_end) \
908 int more_bytes = charbuf_end - charbuf + (bytes); \
910 dst = alloc_destination (coding, more_bytes, dst); \
911 dst_end = coding->destination + coding->dst_bytes; \
918 coding_set_source (coding
)
919 struct coding_system
*coding
;
921 if (BUFFERP (coding
->src_object
))
923 struct buffer
*buf
= XBUFFER (coding
->src_object
);
925 if (coding
->src_pos
< 0)
926 coding
->source
= BUF_GAP_END_ADDR (buf
) + coding
->src_pos_byte
;
928 coding
->source
= BUF_BYTE_ADDRESS (buf
, coding
->src_pos_byte
);
930 else if (STRINGP (coding
->src_object
))
932 coding
->source
= SDATA (coding
->src_object
) + coding
->src_pos_byte
;
935 /* Otherwise, the source is C string and is never relocated
936 automatically. Thus we don't have to update anything. */
941 coding_set_destination (coding
)
942 struct coding_system
*coding
;
944 if (BUFFERP (coding
->dst_object
))
946 if (coding
->src_pos
< 0)
948 coding
->destination
= BEG_ADDR
+ coding
->dst_pos_byte
- 1;
949 coding
->dst_bytes
= (GAP_END_ADDR
950 - (coding
->src_bytes
- coding
->consumed
)
951 - coding
->destination
);
955 /* We are sure that coding->dst_pos_byte is before the gap
957 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
958 + coding
->dst_pos_byte
- 1);
959 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
960 - coding
->destination
);
964 /* Otherwise, the destination is C string and is never relocated
965 automatically. Thus we don't have to update anything. */
971 coding_alloc_by_realloc (coding
, bytes
)
972 struct coding_system
*coding
;
975 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
976 coding
->dst_bytes
+ bytes
);
977 coding
->dst_bytes
+= bytes
;
981 coding_alloc_by_making_gap (coding
, bytes
)
982 struct coding_system
*coding
;
985 if (BUFFERP (coding
->dst_object
)
986 && EQ (coding
->src_object
, coding
->dst_object
))
988 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
990 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
992 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
996 Lisp_Object this_buffer
;
998 this_buffer
= Fcurrent_buffer ();
999 set_buffer_internal (XBUFFER (coding
->dst_object
));
1001 set_buffer_internal (XBUFFER (this_buffer
));
1006 static unsigned char *
1007 alloc_destination (coding
, nbytes
, dst
)
1008 struct coding_system
*coding
;
1012 EMACS_INT offset
= dst
- coding
->destination
;
1014 if (BUFFERP (coding
->dst_object
))
1015 coding_alloc_by_making_gap (coding
, nbytes
);
1017 coding_alloc_by_realloc (coding
, nbytes
);
1018 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1019 coding_set_destination (coding
);
1020 dst
= coding
->destination
+ offset
;
1024 /** Macros for annotations. */
1026 /* Maximum length of annotation data (sum of annotations for
1027 composition and charset). */
1028 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1030 /* An annotation data is stored in the array coding->charbuf in this
1032 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1033 LENGTH is the number of elements in the annotation.
1034 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1035 FROM and TO specify the range of text annotated. They are relative
1036 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1038 The format of the following elements depend on ANNOTATION_MASK.
1040 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1042 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1043 METHOD is one of enum composition_method.
1044 Optionnal COMPOSITION-COMPONENTS are characters and composition
1047 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1050 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1052 *(buf)++ = -(len); \
1053 *(buf)++ = (mask); \
1054 *(buf)++ = (from); \
1056 coding->annotated = 1; \
1059 #define ADD_COMPOSITION_DATA(buf, from, to, method) \
1061 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1066 #define ADD_CHARSET_DATA(buf, from, to, id) \
1068 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1073 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1080 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1081 Check if a text is encoded in UTF-8. If it is, return 1, else
1084 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1085 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1086 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1087 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1088 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1089 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1092 detect_coding_utf_8 (coding
, detect_info
)
1093 struct coding_system
*coding
;
1094 struct coding_detection_info
*detect_info
;
1096 const unsigned char *src
= coding
->source
, *src_base
;
1097 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1098 int multibytep
= coding
->src_multibyte
;
1099 int consumed_chars
= 0;
1102 detect_info
->checked
|= CATEGORY_MASK_UTF_8
;
1103 /* A coding system of this category is always ASCII compatible. */
1104 src
+= coding
->head_ascii
;
1108 int c
, c1
, c2
, c3
, c4
;
1112 if (c
< 0 || UTF_8_1_OCTET_P (c
))
1115 if (c1
< 0 || ! UTF_8_EXTRA_OCTET_P (c1
))
1117 if (UTF_8_2_OCTET_LEADING_P (c
))
1119 found
= CATEGORY_MASK_UTF_8
;
1123 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1125 if (UTF_8_3_OCTET_LEADING_P (c
))
1127 found
= CATEGORY_MASK_UTF_8
;
1131 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1133 if (UTF_8_4_OCTET_LEADING_P (c
))
1135 found
= CATEGORY_MASK_UTF_8
;
1139 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1141 if (UTF_8_5_OCTET_LEADING_P (c
))
1143 found
= CATEGORY_MASK_UTF_8
;
1148 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1152 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1154 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1157 detect_info
->found
|= found
;
1163 decode_coding_utf_8 (coding
)
1164 struct coding_system
*coding
;
1166 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1167 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1168 const unsigned char *src_base
;
1169 int *charbuf
= coding
->charbuf
;
1170 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1171 int consumed_chars
= 0, consumed_chars_base
;
1172 int multibytep
= coding
->src_multibyte
;
1173 Lisp_Object attr
, charset_list
;
1175 CODING_GET_INFO (coding
, attr
, charset_list
);
1179 int c
, c1
, c2
, c3
, c4
, c5
;
1182 consumed_chars_base
= consumed_chars
;
1184 if (charbuf
>= charbuf_end
)
1192 else if (UTF_8_1_OCTET_P(c1
))
1199 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1201 if (UTF_8_2_OCTET_LEADING_P (c1
))
1203 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1204 /* Reject overlong sequences here and below. Encoders
1205 producing them are incorrect, they can be misleading,
1206 and they mess up read/write invariance. */
1213 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1215 if (UTF_8_3_OCTET_LEADING_P (c1
))
1217 c
= (((c1
& 0xF) << 12)
1218 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1220 || (c
>= 0xd800 && c
< 0xe000)) /* surrogates (invalid) */
1226 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1228 if (UTF_8_4_OCTET_LEADING_P (c1
))
1230 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1231 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1238 if (c5
< 0 || ! UTF_8_EXTRA_OCTET_P (c5
))
1240 if (UTF_8_5_OCTET_LEADING_P (c1
))
1242 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1243 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1245 if ((c
> MAX_CHAR
) || (c
< 0x200000))
1260 consumed_chars
= consumed_chars_base
;
1262 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1267 coding
->consumed_char
+= consumed_chars_base
;
1268 coding
->consumed
= src_base
- coding
->source
;
1269 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1274 encode_coding_utf_8 (coding
)
1275 struct coding_system
*coding
;
1277 int multibytep
= coding
->dst_multibyte
;
1278 int *charbuf
= coding
->charbuf
;
1279 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1280 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1281 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1282 int produced_chars
= 0;
1287 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1289 while (charbuf
< charbuf_end
)
1291 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1293 ASSURE_DESTINATION (safe_room
);
1295 if (CHAR_BYTE8_P (c
))
1297 c
= CHAR_TO_BYTE8 (c
);
1302 CHAR_STRING_ADVANCE (c
, pend
);
1303 for (p
= str
; p
< pend
; p
++)
1310 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1312 while (charbuf
< charbuf_end
)
1314 ASSURE_DESTINATION (safe_room
);
1316 dst
+= CHAR_STRING (c
, dst
);
1320 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1321 coding
->produced_char
+= produced_chars
;
1322 coding
->produced
= dst
- coding
->destination
;
1327 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1328 Check if a text is encoded in one of UTF-16 based coding systems.
1329 If it is, return 1, else return 0. */
1331 #define UTF_16_HIGH_SURROGATE_P(val) \
1332 (((val) & 0xFC00) == 0xD800)
1334 #define UTF_16_LOW_SURROGATE_P(val) \
1335 (((val) & 0xFC00) == 0xDC00)
1337 #define UTF_16_INVALID_P(val) \
1338 (((val) == 0xFFFE) \
1339 || ((val) == 0xFFFF) \
1340 || UTF_16_LOW_SURROGATE_P (val))
1344 detect_coding_utf_16 (coding
, detect_info
)
1345 struct coding_system
*coding
;
1346 struct coding_detection_info
*detect_info
;
1348 const unsigned char *src
= coding
->source
, *src_base
= src
;
1349 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1350 int multibytep
= coding
->src_multibyte
;
1351 int consumed_chars
= 0;
1354 detect_info
->checked
|= CATEGORY_MASK_UTF_16
;
1355 if (coding
->mode
& CODING_MODE_LAST_BLOCK
1356 && (coding
->src_chars
& 1))
1358 detect_info
->rejected
|= CATEGORY_MASK_UTF_16
;
1364 if ((c1
== 0xFF) && (c2
== 0xFE))
1366 detect_info
->found
|= (CATEGORY_MASK_UTF_16_LE
1367 | CATEGORY_MASK_UTF_16_AUTO
);
1368 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_BE
1369 | CATEGORY_MASK_UTF_16_BE_NOSIG
1370 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1372 else if ((c1
== 0xFE) && (c2
== 0xFF))
1374 detect_info
->found
|= (CATEGORY_MASK_UTF_16_BE
1375 | CATEGORY_MASK_UTF_16_AUTO
);
1376 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_LE
1377 | CATEGORY_MASK_UTF_16_BE_NOSIG
1378 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1380 else if (c1
>= 0 && c2
>= 0)
1382 unsigned char b1
[256], b2
[256];
1383 int b1_variants
= 1, b2_variants
= 1;
1386 bzero (b1
, 256), bzero (b2
, 256);
1388 for (n
= 0; n
< 256 && src
< src_end
; n
++)
1393 if (c1
< 0 || c2
< 0)
1395 if (! b1
[c1
++]) b1_variants
++;
1396 if (! b2
[c2
++]) b2_variants
++;
1398 if (b1_variants
< b2_variants
)
1399 detect_info
->found
|= CATEGORY_MASK_UTF_16_BE_NOSIG
;
1401 detect_info
->found
|= CATEGORY_MASK_UTF_16_LE_NOSIG
;
1402 detect_info
->rejected
1403 |= (CATEGORY_MASK_UTF_16_BE
| CATEGORY_MASK_UTF_16_LE
);
1410 decode_coding_utf_16 (coding
)
1411 struct coding_system
*coding
;
1413 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1414 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1415 const unsigned char *src_base
;
1416 int *charbuf
= coding
->charbuf
;
1417 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1418 int consumed_chars
= 0, consumed_chars_base
;
1419 int multibytep
= coding
->src_multibyte
;
1420 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1421 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1422 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1423 Lisp_Object attr
, charset_list
;
1425 CODING_GET_INFO (coding
, attr
, charset_list
);
1427 if (bom
== utf_16_with_bom
)
1436 if (endian
== utf_16_big_endian
1437 ? c
!= 0xFEFF : c
!= 0xFFFE)
1439 /* The first two bytes are not BOM. Treat them as bytes
1440 for a normal character. */
1444 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1446 else if (bom
== utf_16_detect_bom
)
1448 /* We have already tried to detect BOM and failed in
1450 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1458 consumed_chars_base
= consumed_chars
;
1460 if (charbuf
+ 2 >= charbuf_end
)
1472 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
1476 c
= (endian
== utf_16_big_endian
1477 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1480 if (! UTF_16_LOW_SURROGATE_P (c
))
1482 if (endian
== utf_16_big_endian
)
1483 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1485 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1489 if (UTF_16_HIGH_SURROGATE_P (c
))
1490 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1496 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1497 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1503 if (UTF_16_HIGH_SURROGATE_P (c
))
1504 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1511 coding
->consumed_char
+= consumed_chars_base
;
1512 coding
->consumed
= src_base
- coding
->source
;
1513 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1517 encode_coding_utf_16 (coding
)
1518 struct coding_system
*coding
;
1520 int multibytep
= coding
->dst_multibyte
;
1521 int *charbuf
= coding
->charbuf
;
1522 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1523 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1524 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1526 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1527 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1528 int produced_chars
= 0;
1529 Lisp_Object attrs
, charset_list
;
1532 CODING_GET_INFO (coding
, attrs
, charset_list
);
1534 if (bom
!= utf_16_without_bom
)
1536 ASSURE_DESTINATION (safe_room
);
1538 EMIT_TWO_BYTES (0xFE, 0xFF);
1540 EMIT_TWO_BYTES (0xFF, 0xFE);
1541 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1544 while (charbuf
< charbuf_end
)
1546 ASSURE_DESTINATION (safe_room
);
1548 if (c
>= MAX_UNICODE_CHAR
)
1549 c
= coding
->default_char
;
1554 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1556 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1563 c1
= (c
>> 10) + 0xD800;
1564 c2
= (c
& 0x3FF) + 0xDC00;
1566 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1568 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1571 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1572 coding
->produced
= dst
- coding
->destination
;
1573 coding
->produced_char
+= produced_chars
;
1578 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1580 /* Emacs' internal format for representation of multiple character
1581 sets is a kind of multi-byte encoding, i.e. characters are
1582 represented by variable-length sequences of one-byte codes.
1584 ASCII characters and control characters (e.g. `tab', `newline') are
1585 represented by one-byte sequences which are their ASCII codes, in
1586 the range 0x00 through 0x7F.
1588 8-bit characters of the range 0x80..0x9F are represented by
1589 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1592 8-bit characters of the range 0xA0..0xFF are represented by
1593 one-byte sequences which are their 8-bit code.
1595 The other characters are represented by a sequence of `base
1596 leading-code', optional `extended leading-code', and one or two
1597 `position-code's. The length of the sequence is determined by the
1598 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1599 whereas extended leading-code and position-code take the range 0xA0
1600 through 0xFF. See `charset.h' for more details about leading-code
1603 --- CODE RANGE of Emacs' internal format ---
1607 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1608 eight-bit-graphic 0xA0..0xBF
1609 ELSE 0x81..0x9D + [0xA0..0xFF]+
1610 ---------------------------------------------
1612 As this is the internal character representation, the format is
1613 usually not used externally (i.e. in a file or in a data sent to a
1614 process). But, it is possible to have a text externally in this
1615 format (i.e. by encoding by the coding system `emacs-mule').
1617 In that case, a sequence of one-byte codes has a slightly different
1620 At first, all characters in eight-bit-control are represented by
1621 one-byte sequences which are their 8-bit code.
1623 Next, character composition data are represented by the byte
1624 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1626 METHOD is 0xF0 plus one of composition method (enum
1627 composition_method),
1629 BYTES is 0xA0 plus a byte length of this composition data,
1631 CHARS is 0x20 plus a number of characters composed by this
1634 COMPONENTs are characters of multibye form or composition
1635 rules encoded by two-byte of ASCII codes.
1637 In addition, for backward compatibility, the following formats are
1638 also recognized as composition data on decoding.
1641 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1644 MSEQ is a multibyte form but in these special format:
1645 ASCII: 0xA0 ASCII_CODE+0x80,
1646 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1647 RULE is a one byte code of the range 0xA0..0xF0 that
1648 represents a composition rule.
1651 char emacs_mule_bytes
[256];
1654 emacs_mule_char (coding
, src
, nbytes
, nchars
, id
)
1655 struct coding_system
*coding
;
1656 const unsigned char *src
;
1657 int *nbytes
, *nchars
, *id
;
1659 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1660 const unsigned char *src_base
= src
;
1661 int multibytep
= coding
->src_multibyte
;
1662 struct charset
*charset
;
1665 int consumed_chars
= 0;
1671 charset
= emacs_mule_charset
[0];
1675 switch (emacs_mule_bytes
[c
])
1678 if (! (charset
= emacs_mule_charset
[c
]))
1687 if (c
== EMACS_MULE_LEADING_CODE_PRIVATE_11
1688 || c
== EMACS_MULE_LEADING_CODE_PRIVATE_12
)
1691 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1700 if (! (charset
= emacs_mule_charset
[c
]))
1705 code
= (c
& 0x7F) << 8;
1715 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1720 code
= (c
& 0x7F) << 8;
1729 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
)
1730 ? charset_ascii
: charset_eight_bit
);
1736 c
= DECODE_CHAR (charset
, code
);
1740 *nbytes
= src
- src_base
;
1741 *nchars
= consumed_chars
;
1754 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1755 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1759 detect_coding_emacs_mule (coding
, detect_info
)
1760 struct coding_system
*coding
;
1761 struct coding_detection_info
*detect_info
;
1763 const unsigned char *src
= coding
->source
, *src_base
;
1764 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1765 int multibytep
= coding
->src_multibyte
;
1766 int consumed_chars
= 0;
1770 detect_info
->checked
|= CATEGORY_MASK_EMACS_MULE
;
1771 /* A coding system of this category is always ASCII compatible. */
1772 src
+= coding
->head_ascii
;
1782 /* Perhaps the start of composite character. We simple skip
1783 it because analyzing it is too heavy for detecting. But,
1784 at least, we check that the composite character
1785 constitues of more than 4 bytes. */
1786 const unsigned char *src_base
;
1796 if (src
- src_base
<= 4)
1798 found
= CATEGORY_MASK_EMACS_MULE
;
1806 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1811 const unsigned char *src_base
= src
- 1;
1818 if (src
- src_base
!= emacs_mule_bytes
[*src_base
])
1820 found
= CATEGORY_MASK_EMACS_MULE
;
1823 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1827 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1829 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1832 detect_info
->found
|= found
;
1837 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1839 /* Decode a character represented as a component of composition
1840 sequence of Emacs 20/21 style at SRC. Set C to that character and
1841 update SRC to the head of next character (or an encoded composition
1842 rule). If SRC doesn't points a composition component, set C to -1.
1843 If SRC points an invalid byte sequence, global exit by a return
1846 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1850 int nbytes, nchars; \
1852 if (src == src_end) \
1854 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1859 goto invalid_code; \
1863 consumed_chars += nchars; \
1868 /* Decode a composition rule represented as a component of composition
1869 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1870 and increment BUF. If SRC points an invalid byte sequence, set C
1873 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1875 int c, gref, nref; \
1877 if (src >= src_end) \
1878 goto invalid_code; \
1879 ONE_MORE_BYTE_NO_CHECK (c); \
1881 if (c < 0 || c >= 81) \
1882 goto invalid_code; \
1884 gref = c / 9, nref = c % 9; \
1885 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1889 /* Decode a composition rule represented as a component of composition
1890 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1891 and increment BUF. If SRC points an invalid byte sequence, set C
1894 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1898 if (src + 1>= src_end) \
1899 goto invalid_code; \
1900 ONE_MORE_BYTE_NO_CHECK (gref); \
1902 ONE_MORE_BYTE_NO_CHECK (nref); \
1904 if (gref < 0 || gref >= 81 \
1905 || nref < 0 || nref >= 81) \
1906 goto invalid_code; \
1907 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1911 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1913 /* Emacs 21 style format. The first three bytes at SRC are \
1914 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1915 the byte length of this composition information, CHARS is the \
1916 number of characters composed by this composition. */ \
1917 enum composition_method method = c - 0xF2; \
1918 int *charbuf_base = charbuf; \
1920 int consumed_chars_limit; \
1921 int nbytes, nchars; \
1923 ONE_MORE_BYTE (c); \
1925 goto invalid_code; \
1926 nbytes = c - 0xA0; \
1928 goto invalid_code; \
1929 ONE_MORE_BYTE (c); \
1931 goto invalid_code; \
1932 nchars = c - 0xA0; \
1933 from = coding->produced + char_offset; \
1934 to = from + nchars; \
1935 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1936 consumed_chars_limit = consumed_chars_base + nbytes; \
1937 if (method != COMPOSITION_RELATIVE) \
1940 while (consumed_chars < consumed_chars_limit) \
1942 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1943 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1945 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1948 if (consumed_chars < consumed_chars_limit) \
1949 goto invalid_code; \
1950 charbuf_base[0] -= i; \
1955 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1957 /* Emacs 20 style format for relative composition. */ \
1958 /* Store multibyte form of characters to be composed. */ \
1959 enum composition_method method = COMPOSITION_RELATIVE; \
1960 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1961 int *buf = components; \
1966 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1967 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1968 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1970 goto invalid_code; \
1971 from = coding->produced_char + char_offset; \
1973 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1974 for (j = 0; j < i; j++) \
1975 *charbuf++ = components[j]; \
1979 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1981 /* Emacs 20 style format for rule-base composition. */ \
1982 /* Store multibyte form of characters to be composed. */ \
1983 enum composition_method method = COMPOSITION_WITH_RULE; \
1984 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1985 int *buf = components; \
1989 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1990 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1992 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1993 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1995 if (i < 1 || (buf - components) % 2 == 0) \
1996 goto invalid_code; \
1997 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1998 goto no_more_source; \
1999 from = coding->produced_char + char_offset; \
2001 ADD_COMPOSITION_DATA (buf, from, to, method); \
2002 for (j = 0; j < i; j++) \
2003 *charbuf++ = components[j]; \
2004 for (j = 0; j < i; j += 2) \
2005 *charbuf++ = components[j]; \
2010 decode_coding_emacs_mule (coding
)
2011 struct coding_system
*coding
;
2013 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2014 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2015 const unsigned char *src_base
;
2016 int *charbuf
= coding
->charbuf
;
2017 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
2018 int consumed_chars
= 0, consumed_chars_base
;
2019 int multibytep
= coding
->src_multibyte
;
2020 Lisp_Object attrs
, charset_list
;
2021 int char_offset
= coding
->produced_char
;
2022 int last_offset
= char_offset
;
2023 int last_id
= charset_ascii
;
2025 CODING_GET_INFO (coding
, attrs
, charset_list
);
2032 consumed_chars_base
= consumed_chars
;
2034 if (charbuf
>= charbuf_end
)
2053 if (c
- 0xF2 >= COMPOSITION_RELATIVE
2054 && c
- 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS
)
2055 DECODE_EMACS_MULE_21_COMPOSITION (c
);
2057 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
2059 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
2063 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
2069 consumed_chars
= consumed_chars_base
;
2070 c
= emacs_mule_char (coding
, src
, &nbytes
, &nchars
, &id
);
2079 if (last_id
!= charset_ascii
)
2080 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2082 last_offset
= char_offset
;
2086 consumed_chars
+= nchars
;
2093 consumed_chars
= consumed_chars_base
;
2095 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
2101 if (last_id
!= charset_ascii
)
2102 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2103 coding
->consumed_char
+= consumed_chars_base
;
2104 coding
->consumed
= src_base
- coding
->source
;
2105 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
2109 #define EMACS_MULE_LEADING_CODES(id, codes) \
2112 codes[0] = id, codes[1] = 0; \
2113 else if (id < 0xE0) \
2114 codes[0] = 0x9A, codes[1] = id; \
2115 else if (id < 0xF0) \
2116 codes[0] = 0x9B, codes[1] = id; \
2117 else if (id < 0xF5) \
2118 codes[0] = 0x9C, codes[1] = id; \
2120 codes[0] = 0x9D, codes[1] = id; \
2125 encode_coding_emacs_mule (coding
)
2126 struct coding_system
*coding
;
2128 int multibytep
= coding
->dst_multibyte
;
2129 int *charbuf
= coding
->charbuf
;
2130 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
2131 unsigned char *dst
= coding
->destination
+ coding
->produced
;
2132 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
2134 int produced_chars
= 0;
2135 Lisp_Object attrs
, charset_list
;
2137 int preferred_charset_id
= -1;
2139 CODING_GET_INFO (coding
, attrs
, charset_list
);
2140 if (! EQ (charset_list
, Vemacs_mule_charset_list
))
2142 CODING_ATTR_CHARSET_LIST (attrs
)
2143 = charset_list
= Vemacs_mule_charset_list
;
2146 while (charbuf
< charbuf_end
)
2148 ASSURE_DESTINATION (safe_room
);
2153 /* Handle an annotation. */
2156 case CODING_ANNOTATE_COMPOSITION_MASK
:
2157 /* Not yet implemented. */
2159 case CODING_ANNOTATE_CHARSET_MASK
:
2160 preferred_charset_id
= charbuf
[3];
2161 if (preferred_charset_id
>= 0
2162 && NILP (Fmemq (make_number (preferred_charset_id
),
2164 preferred_charset_id
= -1;
2173 if (ASCII_CHAR_P (c
))
2174 EMIT_ONE_ASCII_BYTE (c
);
2175 else if (CHAR_BYTE8_P (c
))
2177 c
= CHAR_TO_BYTE8 (c
);
2182 struct charset
*charset
;
2186 unsigned char leading_codes
[2];
2188 if (preferred_charset_id
>= 0)
2190 charset
= CHARSET_FROM_ID (preferred_charset_id
);
2191 if (! CHAR_CHARSET_P (c
, charset
))
2192 charset
= char_charset (c
, charset_list
, NULL
);
2195 charset
= char_charset (c
, charset_list
, &code
);
2198 c
= coding
->default_char
;
2199 if (ASCII_CHAR_P (c
))
2201 EMIT_ONE_ASCII_BYTE (c
);
2204 charset
= char_charset (c
, charset_list
, &code
);
2206 dimension
= CHARSET_DIMENSION (charset
);
2207 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
2208 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
2209 EMIT_ONE_BYTE (leading_codes
[0]);
2210 if (leading_codes
[1])
2211 EMIT_ONE_BYTE (leading_codes
[1]);
2213 EMIT_ONE_BYTE (code
| 0x80);
2217 EMIT_ONE_BYTE (code
>> 8);
2218 EMIT_ONE_BYTE (code
& 0xFF);
2222 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
2223 coding
->produced_char
+= produced_chars
;
2224 coding
->produced
= dst
- coding
->destination
;
2229 /*** 7. ISO2022 handlers ***/
2231 /* The following note describes the coding system ISO2022 briefly.
2232 Since the intention of this note is to help understand the
2233 functions in this file, some parts are NOT ACCURATE or are OVERLY
2234 SIMPLIFIED. For thorough understanding, please refer to the
2235 original document of ISO2022. This is equivalent to the standard
2236 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2238 ISO2022 provides many mechanisms to encode several character sets
2239 in 7-bit and 8-bit environments. For 7-bit environments, all text
2240 is encoded using bytes less than 128. This may make the encoded
2241 text a little bit longer, but the text passes more easily through
2242 several types of gateway, some of which strip off the MSB (Most
2245 There are two kinds of character sets: control character sets and
2246 graphic character sets. The former contain control characters such
2247 as `newline' and `escape' to provide control functions (control
2248 functions are also provided by escape sequences). The latter
2249 contain graphic characters such as 'A' and '-'. Emacs recognizes
2250 two control character sets and many graphic character sets.
2252 Graphic character sets are classified into one of the following
2253 four classes, according to the number of bytes (DIMENSION) and
2254 number of characters in one dimension (CHARS) of the set:
2255 - DIMENSION1_CHARS94
2256 - DIMENSION1_CHARS96
2257 - DIMENSION2_CHARS94
2258 - DIMENSION2_CHARS96
2260 In addition, each character set is assigned an identification tag,
2261 unique for each set, called the "final character" (denoted as <F>
2262 hereafter). The <F> of each character set is decided by ECMA(*)
2263 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2264 (0x30..0x3F are for private use only).
2266 Note (*): ECMA = European Computer Manufacturers Association
2268 Here are examples of graphic character sets [NAME(<F>)]:
2269 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2270 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2271 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2272 o DIMENSION2_CHARS96 -- none for the moment
2274 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2275 C0 [0x00..0x1F] -- control character plane 0
2276 GL [0x20..0x7F] -- graphic character plane 0
2277 C1 [0x80..0x9F] -- control character plane 1
2278 GR [0xA0..0xFF] -- graphic character plane 1
2280 A control character set is directly designated and invoked to C0 or
2281 C1 by an escape sequence. The most common case is that:
2282 - ISO646's control character set is designated/invoked to C0, and
2283 - ISO6429's control character set is designated/invoked to C1,
2284 and usually these designations/invocations are omitted in encoded
2285 text. In a 7-bit environment, only C0 can be used, and a control
2286 character for C1 is encoded by an appropriate escape sequence to
2287 fit into the environment. All control characters for C1 are
2288 defined to have corresponding escape sequences.
2290 A graphic character set is at first designated to one of four
2291 graphic registers (G0 through G3), then these graphic registers are
2292 invoked to GL or GR. These designations and invocations can be
2293 done independently. The most common case is that G0 is invoked to
2294 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2295 these invocations and designations are omitted in encoded text.
2296 In a 7-bit environment, only GL can be used.
2298 When a graphic character set of CHARS94 is invoked to GL, codes
2299 0x20 and 0x7F of the GL area work as control characters SPACE and
2300 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2303 There are two ways of invocation: locking-shift and single-shift.
2304 With locking-shift, the invocation lasts until the next different
2305 invocation, whereas with single-shift, the invocation affects the
2306 following character only and doesn't affect the locking-shift
2307 state. Invocations are done by the following control characters or
2310 ----------------------------------------------------------------------
2311 abbrev function cntrl escape seq description
2312 ----------------------------------------------------------------------
2313 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2314 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2315 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2316 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2317 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2318 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2319 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2320 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2321 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2322 ----------------------------------------------------------------------
2323 (*) These are not used by any known coding system.
2325 Control characters for these functions are defined by macros
2326 ISO_CODE_XXX in `coding.h'.
2328 Designations are done by the following escape sequences:
2329 ----------------------------------------------------------------------
2330 escape sequence description
2331 ----------------------------------------------------------------------
2332 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2333 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2334 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2335 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2336 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2337 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2338 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2339 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2340 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2341 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2342 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2343 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2344 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2345 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2346 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2347 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2348 ----------------------------------------------------------------------
2350 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2351 of dimension 1, chars 94, and final character <F>, etc...
2353 Note (*): Although these designations are not allowed in ISO2022,
2354 Emacs accepts them on decoding, and produces them on encoding
2355 CHARS96 character sets in a coding system which is characterized as
2356 7-bit environment, non-locking-shift, and non-single-shift.
2358 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2359 '(' must be omitted. We refer to this as "short-form" hereafter.
2361 Now you may notice that there are a lot of ways of encoding the
2362 same multilingual text in ISO2022. Actually, there exist many
2363 coding systems such as Compound Text (used in X11's inter client
2364 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2365 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2366 localized platforms), and all of these are variants of ISO2022.
2368 In addition to the above, Emacs handles two more kinds of escape
2369 sequences: ISO6429's direction specification and Emacs' private
2370 sequence for specifying character composition.
2372 ISO6429's direction specification takes the following form:
2373 o CSI ']' -- end of the current direction
2374 o CSI '0' ']' -- end of the current direction
2375 o CSI '1' ']' -- start of left-to-right text
2376 o CSI '2' ']' -- start of right-to-left text
2377 The control character CSI (0x9B: control sequence introducer) is
2378 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2380 Character composition specification takes the following form:
2381 o ESC '0' -- start relative composition
2382 o ESC '1' -- end composition
2383 o ESC '2' -- start rule-base composition (*)
2384 o ESC '3' -- start relative composition with alternate chars (**)
2385 o ESC '4' -- start rule-base composition with alternate chars (**)
2386 Since these are not standard escape sequences of any ISO standard,
2387 the use of them with these meanings is restricted to Emacs only.
2389 (*) This form is used only in Emacs 20.7 and older versions,
2390 but newer versions can safely decode it.
2391 (**) This form is used only in Emacs 21.1 and newer versions,
2392 and older versions can't decode it.
2394 Here's a list of example usages of these composition escape
2395 sequences (categorized by `enum composition_method').
2397 COMPOSITION_RELATIVE:
2398 ESC 0 CHAR [ CHAR ] ESC 1
2399 COMPOSITION_WITH_RULE:
2400 ESC 2 CHAR [ RULE CHAR ] ESC 1
2401 COMPOSITION_WITH_ALTCHARS:
2402 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2403 COMPOSITION_WITH_RULE_ALTCHARS:
2404 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2406 enum iso_code_class_type iso_code_class
[256];
2408 #define SAFE_CHARSET_P(coding, id) \
2409 ((id) <= (coding)->max_charset_id \
2410 && (coding)->safe_charsets[id] >= 0)
2413 #define SHIFT_OUT_OK(category) \
2414 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2417 setup_iso_safe_charsets (attrs
)
2420 Lisp_Object charset_list
, safe_charsets
;
2421 Lisp_Object request
;
2422 Lisp_Object reg_usage
;
2425 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2428 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2429 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2430 && ! EQ (charset_list
, Viso_2022_charset_list
))
2432 CODING_ATTR_CHARSET_LIST (attrs
)
2433 = charset_list
= Viso_2022_charset_list
;
2434 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2437 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2441 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2443 int id
= XINT (XCAR (tail
));
2444 if (max_charset_id
< id
)
2445 max_charset_id
= id
;
2448 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2450 request
= AREF (attrs
, coding_attr_iso_request
);
2451 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2452 reg94
= XINT (XCAR (reg_usage
));
2453 reg96
= XINT (XCDR (reg_usage
));
2455 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2459 struct charset
*charset
;
2462 charset
= CHARSET_FROM_ID (XINT (id
));
2463 reg
= Fcdr (Fassq (id
, request
));
2465 SSET (safe_charsets
, XINT (id
), XINT (reg
));
2466 else if (charset
->iso_chars_96
)
2469 SSET (safe_charsets
, XINT (id
), reg96
);
2474 SSET (safe_charsets
, XINT (id
), reg94
);
2477 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2481 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2482 Check if a text is encoded in one of ISO-2022 based codig systems.
2483 If it is, return 1, else return 0. */
2486 detect_coding_iso_2022 (coding
, detect_info
)
2487 struct coding_system
*coding
;
2488 struct coding_detection_info
*detect_info
;
2490 const unsigned char *src
= coding
->source
, *src_base
= src
;
2491 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2492 int multibytep
= coding
->src_multibyte
;
2493 int single_shifting
= 0;
2496 int consumed_chars
= 0;
2501 detect_info
->checked
|= CATEGORY_MASK_ISO
;
2503 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2505 struct coding_system
*this = &(coding_categories
[i
]);
2506 Lisp_Object attrs
, val
;
2508 attrs
= CODING_ID_ATTRS (this->id
);
2509 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2510 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2511 setup_iso_safe_charsets (attrs
);
2512 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2513 this->max_charset_id
= SCHARS (val
) - 1;
2514 this->safe_charsets
= (char *) SDATA (val
);
2517 /* A coding system of this category is always ASCII compatible. */
2518 src
+= coding
->head_ascii
;
2520 while (rejected
!= CATEGORY_MASK_ISO
)
2527 if (inhibit_iso_escape_detection
)
2529 single_shifting
= 0;
2531 if (c
>= '(' && c
<= '/')
2533 /* Designation sequence for a charset of dimension 1. */
2535 if (c1
< ' ' || c1
>= 0x80
2536 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2537 /* Invalid designation sequence. Just ignore. */
2542 /* Designation sequence for a charset of dimension 2. */
2544 if (c
>= '@' && c
<= 'B')
2545 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2546 id
= iso_charset_table
[1][0][c
];
2547 else if (c
>= '(' && c
<= '/')
2550 if (c1
< ' ' || c1
>= 0x80
2551 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2552 /* Invalid designation sequence. Just ignore. */
2556 /* Invalid designation sequence. Just ignore it. */
2559 else if (c
== 'N' || c
== 'O')
2561 /* ESC <Fe> for SS2 or SS3. */
2562 single_shifting
= 1;
2563 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2566 else if (c
>= '0' && c
<= '4')
2568 /* ESC <Fp> for start/end composition. */
2569 found
|= CATEGORY_MASK_ISO
;
2574 /* Invalid escape sequence. Just ignore it. */
2578 /* We found a valid designation sequence for CHARSET. */
2579 rejected
|= CATEGORY_MASK_ISO_8BIT
;
2580 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2582 found
|= CATEGORY_MASK_ISO_7
;
2584 rejected
|= CATEGORY_MASK_ISO_7
;
2585 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2587 found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2589 rejected
|= CATEGORY_MASK_ISO_7_TIGHT
;
2590 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2592 found
|= CATEGORY_MASK_ISO_7_ELSE
;
2594 rejected
|= CATEGORY_MASK_ISO_7_ELSE
;
2595 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2597 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2599 rejected
|= CATEGORY_MASK_ISO_8_ELSE
;
2604 /* Locking shift out/in. */
2605 if (inhibit_iso_escape_detection
)
2607 single_shifting
= 0;
2608 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2609 found
|= CATEGORY_MASK_ISO_ELSE
;
2613 /* Control sequence introducer. */
2614 single_shifting
= 0;
2615 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2616 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2617 goto check_extra_latin
;
2622 if (inhibit_iso_escape_detection
)
2624 single_shifting
= 0;
2625 rejected
|= CATEGORY_MASK_ISO_7BIT
;
2626 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2627 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2628 found
|= CATEGORY_MASK_ISO_8_1
, single_shifting
= 1;
2629 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2630 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2631 found
|= CATEGORY_MASK_ISO_8_2
, single_shifting
= 1;
2632 if (single_shifting
)
2634 goto check_extra_latin
;
2641 single_shifting
= 0;
2646 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2647 found
|= CATEGORY_MASK_ISO_8_1
;
2648 /* Check the length of succeeding codes of the range
2649 0xA0..0FF. If the byte length is even, we include
2650 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2651 only when we are not single shifting. */
2652 if (! single_shifting
2653 && ! (rejected
& CATEGORY_MASK_ISO_8_2
))
2656 while (src
< src_end
)
2664 if (i
& 1 && src
< src_end
)
2665 rejected
|= CATEGORY_MASK_ISO_8_2
;
2667 found
|= CATEGORY_MASK_ISO_8_2
;
2672 single_shifting
= 0;
2673 if (! VECTORP (Vlatin_extra_code_table
)
2674 || NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2676 rejected
= CATEGORY_MASK_ISO
;
2679 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2680 & CODING_ISO_FLAG_LATIN_EXTRA
)
2681 found
|= CATEGORY_MASK_ISO_8_1
;
2683 rejected
|= CATEGORY_MASK_ISO_8_1
;
2684 rejected
|= CATEGORY_MASK_ISO_8_2
;
2687 detect_info
->rejected
|= CATEGORY_MASK_ISO
;
2691 detect_info
->rejected
|= rejected
;
2692 detect_info
->found
|= (found
& ~rejected
);
2697 /* Set designation state into CODING. */
2698 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2702 if (final < '0' || final >= 128 \
2703 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2704 || !SAFE_CHARSET_P (coding, id)) \
2706 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2707 goto invalid_code; \
2709 prev = CODING_ISO_DESIGNATION (coding, reg); \
2710 if (id == charset_jisx0201_roman) \
2712 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2713 id = charset_ascii; \
2715 else if (id == charset_jisx0208_1978) \
2717 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2718 id = charset_jisx0208; \
2720 CODING_ISO_DESIGNATION (coding, reg) = id; \
2721 /* If there was an invalid designation to REG previously, and this \
2722 designation is ASCII to REG, we should keep this designation \
2724 if (prev == -2 && id == charset_ascii) \
2725 goto invalid_code; \
2729 #define MAYBE_FINISH_COMPOSITION() \
2732 if (composition_state == COMPOSING_NO) \
2734 /* It is assured that we have enough room for producing \
2735 characters stored in the table `components'. */ \
2736 if (charbuf + component_idx > charbuf_end) \
2737 goto no_more_source; \
2738 composition_state = COMPOSING_NO; \
2739 if (method == COMPOSITION_RELATIVE \
2740 || method == COMPOSITION_WITH_ALTCHARS) \
2742 for (i = 0; i < component_idx; i++) \
2743 *charbuf++ = components[i]; \
2744 char_offset += component_idx; \
2748 for (i = 0; i < component_idx; i += 2) \
2749 *charbuf++ = components[i]; \
2750 char_offset += (component_idx / 2) + 1; \
2755 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2756 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2757 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2758 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2759 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2762 #define DECODE_COMPOSITION_START(c1) \
2765 && composition_state == COMPOSING_COMPONENT_RULE) \
2767 component_len = component_idx; \
2768 composition_state = COMPOSING_CHAR; \
2772 const unsigned char *p; \
2774 MAYBE_FINISH_COMPOSITION (); \
2775 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2776 goto no_more_source; \
2777 for (p = src; p < src_end - 1; p++) \
2778 if (*p == ISO_CODE_ESC && p[1] == '1') \
2780 if (p == src_end - 1) \
2782 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2783 goto invalid_code; \
2784 goto no_more_source; \
2787 /* This is surely the start of a composition. */ \
2788 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2789 : c1 == '2' ? COMPOSITION_WITH_RULE \
2790 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2791 : COMPOSITION_WITH_RULE_ALTCHARS); \
2792 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2793 : COMPOSING_COMPONENT_CHAR); \
2794 component_idx = component_len = 0; \
2799 /* Handle compositoin end sequence ESC 1. */
2801 #define DECODE_COMPOSITION_END() \
2803 int nchars = (component_len > 0 ? component_idx - component_len \
2804 : method == COMPOSITION_RELATIVE ? component_idx \
2805 : (component_idx + 1) / 2); \
2807 int *saved_charbuf = charbuf; \
2808 int from = char_offset; \
2809 int to = from + nchars; \
2811 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
2812 if (method != COMPOSITION_RELATIVE) \
2814 if (component_len == 0) \
2815 for (i = 0; i < component_idx; i++) \
2816 *charbuf++ = components[i]; \
2818 for (i = 0; i < component_len; i++) \
2819 *charbuf++ = components[i]; \
2820 *saved_charbuf = saved_charbuf - charbuf; \
2822 if (method == COMPOSITION_WITH_RULE) \
2823 for (i = 0; i < component_idx; i += 2, char_offset++) \
2824 *charbuf++ = components[i]; \
2826 for (i = component_len; i < component_idx; i++, char_offset++) \
2827 *charbuf++ = components[i]; \
2828 coding->annotated = 1; \
2829 composition_state = COMPOSING_NO; \
2833 /* Decode a composition rule from the byte C1 (and maybe one more byte
2834 from SRC) and store one encoded composition rule in
2835 coding->cmp_data. */
2837 #define DECODE_COMPOSITION_RULE(c1) \
2840 if (c1 < 81) /* old format (before ver.21) */ \
2842 int gref = (c1) / 9; \
2843 int nref = (c1) % 9; \
2844 if (gref == 4) gref = 10; \
2845 if (nref == 4) nref = 10; \
2846 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2848 else if (c1 < 93) /* new format (after ver.21) */ \
2850 ONE_MORE_BYTE (c2); \
2851 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2858 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2861 decode_coding_iso_2022 (coding
)
2862 struct coding_system
*coding
;
2864 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2865 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2866 const unsigned char *src_base
;
2867 int *charbuf
= coding
->charbuf
;
2869 = charbuf
+ coding
->charbuf_size
- 4 - MAX_ANNOTATION_LENGTH
;
2870 int consumed_chars
= 0, consumed_chars_base
;
2871 int multibytep
= coding
->src_multibyte
;
2872 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2873 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2874 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2875 struct charset
*charset
;
2877 /* For handling composition sequence. */
2878 #define COMPOSING_NO 0
2879 #define COMPOSING_CHAR 1
2880 #define COMPOSING_RULE 2
2881 #define COMPOSING_COMPONENT_CHAR 3
2882 #define COMPOSING_COMPONENT_RULE 4
2884 int composition_state
= COMPOSING_NO
;
2885 enum composition_method method
;
2886 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2889 Lisp_Object attrs
, charset_list
;
2890 int char_offset
= coding
->produced_char
;
2891 int last_offset
= char_offset
;
2892 int last_id
= charset_ascii
;
2894 CODING_GET_INFO (coding
, attrs
, charset_list
);
2895 setup_iso_safe_charsets (attrs
);
2902 consumed_chars_base
= consumed_chars
;
2904 if (charbuf
>= charbuf_end
)
2911 /* We produce at most one character. */
2912 switch (iso_code_class
[c1
])
2914 case ISO_0x20_or_0x7F
:
2915 if (composition_state
!= COMPOSING_NO
)
2917 if (composition_state
== COMPOSING_RULE
2918 || composition_state
== COMPOSING_COMPONENT_RULE
)
2920 DECODE_COMPOSITION_RULE (c1
);
2921 components
[component_idx
++] = c1
;
2922 composition_state
--;
2926 if (charset_id_0
< 0
2927 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2928 /* This is SPACE or DEL. */
2929 charset
= CHARSET_FROM_ID (charset_ascii
);
2931 charset
= CHARSET_FROM_ID (charset_id_0
);
2934 case ISO_graphic_plane_0
:
2935 if (composition_state
!= COMPOSING_NO
)
2937 if (composition_state
== COMPOSING_RULE
2938 || composition_state
== COMPOSING_COMPONENT_RULE
)
2940 DECODE_COMPOSITION_RULE (c1
);
2941 components
[component_idx
++] = c1
;
2942 composition_state
--;
2946 charset
= CHARSET_FROM_ID (charset_id_0
);
2949 case ISO_0xA0_or_0xFF
:
2950 if (charset_id_1
< 0
2951 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2952 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2954 /* This is a graphic character, we fall down ... */
2956 case ISO_graphic_plane_1
:
2957 if (charset_id_1
< 0)
2959 charset
= CHARSET_FROM_ID (charset_id_1
);
2963 MAYBE_FINISH_COMPOSITION ();
2964 charset
= CHARSET_FROM_ID (charset_ascii
);
2968 MAYBE_FINISH_COMPOSITION ();
2972 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2973 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
2975 CODING_ISO_INVOCATION (coding
, 0) = 1;
2976 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2980 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
2982 CODING_ISO_INVOCATION (coding
, 0) = 0;
2983 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2986 case ISO_single_shift_2_7
:
2987 case ISO_single_shift_2
:
2988 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2990 /* SS2 is handled as an escape sequence of ESC 'N' */
2992 goto label_escape_sequence
;
2994 case ISO_single_shift_3
:
2995 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2997 /* SS2 is handled as an escape sequence of ESC 'O' */
2999 goto label_escape_sequence
;
3001 case ISO_control_sequence_introducer
:
3002 /* CSI is handled as an escape sequence of ESC '[' ... */
3004 goto label_escape_sequence
;
3008 label_escape_sequence
:
3009 /* Escape sequences handled here are invocation,
3010 designation, direction specification, and character
3011 composition specification. */
3014 case '&': /* revision of following character set */
3016 if (!(c1
>= '@' && c1
<= '~'))
3019 if (c1
!= ISO_CODE_ESC
)
3022 goto label_escape_sequence
;
3024 case '$': /* designation of 2-byte character set */
3025 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3028 if (c1
>= '@' && c1
<= 'B')
3029 { /* designation of JISX0208.1978, GB2312.1980,
3031 DECODE_DESIGNATION (0, 2, 0, c1
);
3033 else if (c1
>= 0x28 && c1
<= 0x2B)
3034 { /* designation of DIMENSION2_CHARS94 character set */
3036 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
3038 else if (c1
>= 0x2C && c1
<= 0x2F)
3039 { /* designation of DIMENSION2_CHARS96 character set */
3041 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
3045 /* We must update these variables now. */
3046 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3047 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3050 case 'n': /* invocation of locking-shift-2 */
3051 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3052 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3054 CODING_ISO_INVOCATION (coding
, 0) = 2;
3055 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3058 case 'o': /* invocation of locking-shift-3 */
3059 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3060 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3062 CODING_ISO_INVOCATION (coding
, 0) = 3;
3063 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3066 case 'N': /* invocation of single-shift-2 */
3067 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3068 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3070 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
3072 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3076 case 'O': /* invocation of single-shift-3 */
3077 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3078 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3080 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
3082 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3086 case '0': case '2': case '3': case '4': /* start composition */
3087 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
3089 DECODE_COMPOSITION_START (c1
);
3092 case '1': /* end composition */
3093 if (composition_state
== COMPOSING_NO
)
3095 DECODE_COMPOSITION_END ();
3098 case '[': /* specification of direction */
3099 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
3101 /* For the moment, nested direction is not supported.
3102 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3103 left-to-right, and nozero means right-to-left. */
3107 case ']': /* end of the current direction */
3108 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3110 case '0': /* end of the current direction */
3111 case '1': /* start of left-to-right direction */
3114 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3119 case '2': /* start of right-to-left direction */
3122 coding
->mode
|= CODING_MODE_DIRECTION
;
3136 /* CTEXT extended segment:
3137 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3138 We keep these bytes as is for the moment.
3139 They may be decoded by post-read-conversion. */
3143 ONE_MORE_BYTE (dim
);
3146 size
= ((M
- 128) * 128) + (L
- 128);
3147 if (charbuf
+ 8 + size
> charbuf_end
)
3149 *charbuf
++ = ISO_CODE_ESC
;
3153 *charbuf
++ = BYTE8_TO_CHAR (M
);
3154 *charbuf
++ = BYTE8_TO_CHAR (L
);
3158 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3163 /* XFree86 extension for embedding UTF-8 in CTEXT:
3164 ESC % G --UTF-8-BYTES-- ESC % @
3165 We keep these bytes as is for the moment.
3166 They may be decoded by post-read-conversion. */
3169 if (p
+ 6 > charbuf_end
)
3171 *p
++ = ISO_CODE_ESC
;
3174 while (p
< charbuf_end
)
3177 if (c1
== ISO_CODE_ESC
3178 && src
+ 1 < src_end
3182 *p
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3184 if (p
+ 3 > charbuf_end
)
3186 *p
++ = ISO_CODE_ESC
;
3197 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3199 if (c1
>= 0x28 && c1
<= 0x2B)
3200 { /* designation of DIMENSION1_CHARS94 character set */
3202 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
3204 else if (c1
>= 0x2C && c1
<= 0x2F)
3205 { /* designation of DIMENSION1_CHARS96 character set */
3207 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
3211 /* We must update these variables now. */
3212 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3213 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3218 if (charset
->id
!= charset_ascii
3219 && last_id
!= charset
->id
)
3221 if (last_id
!= charset_ascii
)
3222 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3223 last_id
= charset
->id
;
3224 last_offset
= char_offset
;
3227 /* Now we know CHARSET and 1st position code C1 of a character.
3228 Produce a decoded character while getting 2nd position code
3231 if (CHARSET_DIMENSION (charset
) > 1)
3234 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3235 /* C2 is not in a valid range. */
3237 c1
= (c1
<< 8) | (c2
& 0x7F);
3238 if (CHARSET_DIMENSION (charset
) > 2)
3241 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3242 /* C2 is not in a valid range. */
3244 c1
= (c1
<< 8) | (c2
& 0x7F);
3248 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3251 MAYBE_FINISH_COMPOSITION ();
3252 for (; src_base
< src
; src_base
++, char_offset
++)
3254 if (ASCII_BYTE_P (*src_base
))
3255 *charbuf
++ = *src_base
;
3257 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3260 else if (composition_state
== COMPOSING_NO
)
3267 components
[component_idx
++] = c
;
3268 if (method
== COMPOSITION_WITH_RULE
3269 || (method
== COMPOSITION_WITH_RULE_ALTCHARS
3270 && composition_state
== COMPOSING_COMPONENT_CHAR
))
3271 composition_state
++;
3276 MAYBE_FINISH_COMPOSITION ();
3278 consumed_chars
= consumed_chars_base
;
3280 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3290 if (last_id
!= charset_ascii
)
3291 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3292 coding
->consumed_char
+= consumed_chars_base
;
3293 coding
->consumed
= src_base
- coding
->source
;
3294 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3298 /* ISO2022 encoding stuff. */
3301 It is not enough to say just "ISO2022" on encoding, we have to
3302 specify more details. In Emacs, each coding system of ISO2022
3303 variant has the following specifications:
3304 1. Initial designation to G0 thru G3.
3305 2. Allows short-form designation?
3306 3. ASCII should be designated to G0 before control characters?
3307 4. ASCII should be designated to G0 at end of line?
3308 5. 7-bit environment or 8-bit environment?
3309 6. Use locking-shift?
3310 7. Use Single-shift?
3311 And the following two are only for Japanese:
3312 8. Use ASCII in place of JIS0201-1976-Roman?
3313 9. Use JISX0208-1983 in place of JISX0208-1978?
3314 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3315 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3319 /* Produce codes (escape sequence) for designating CHARSET to graphic
3320 register REG at DST, and increment DST. If <final-char> of CHARSET is
3321 '@', 'A', or 'B' and the coding system CODING allows, produce
3322 designation sequence of short-form. */
3324 #define ENCODE_DESIGNATION(charset, reg, coding) \
3326 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3327 char *intermediate_char_94 = "()*+"; \
3328 char *intermediate_char_96 = ",-./"; \
3329 int revision = -1; \
3332 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3333 revision = CHARSET_ISO_REVISION (charset); \
3335 if (revision >= 0) \
3337 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3338 EMIT_ONE_BYTE ('@' + revision); \
3340 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3341 if (CHARSET_DIMENSION (charset) == 1) \
3343 if (! CHARSET_ISO_CHARS_96 (charset)) \
3344 c = intermediate_char_94[reg]; \
3346 c = intermediate_char_96[reg]; \
3347 EMIT_ONE_ASCII_BYTE (c); \
3351 EMIT_ONE_ASCII_BYTE ('$'); \
3352 if (! CHARSET_ISO_CHARS_96 (charset)) \
3354 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3356 || final_char < '@' || final_char > 'B') \
3357 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3360 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3362 EMIT_ONE_ASCII_BYTE (final_char); \
3364 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3368 /* The following two macros produce codes (control character or escape
3369 sequence) for ISO2022 single-shift functions (single-shift-2 and
3372 #define ENCODE_SINGLE_SHIFT_2 \
3374 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3375 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3377 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3378 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3382 #define ENCODE_SINGLE_SHIFT_3 \
3384 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3385 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3387 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3388 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3392 /* The following four macros produce codes (control character or
3393 escape sequence) for ISO2022 locking-shift functions (shift-in,
3394 shift-out, locking-shift-2, and locking-shift-3). */
3396 #define ENCODE_SHIFT_IN \
3398 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3399 CODING_ISO_INVOCATION (coding, 0) = 0; \
3403 #define ENCODE_SHIFT_OUT \
3405 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3406 CODING_ISO_INVOCATION (coding, 0) = 1; \
3410 #define ENCODE_LOCKING_SHIFT_2 \
3412 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3413 CODING_ISO_INVOCATION (coding, 0) = 2; \
3417 #define ENCODE_LOCKING_SHIFT_3 \
3419 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3420 CODING_ISO_INVOCATION (coding, 0) = 3; \
3424 /* Produce codes for a DIMENSION1 character whose character set is
3425 CHARSET and whose position-code is C1. Designation and invocation
3426 sequences are also produced in advance if necessary. */
3428 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3430 int id = CHARSET_ID (charset); \
3432 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3433 && id == charset_ascii) \
3435 id = charset_jisx0201_roman; \
3436 charset = CHARSET_FROM_ID (id); \
3439 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3441 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3442 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3444 EMIT_ONE_BYTE (c1 | 0x80); \
3445 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3448 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3450 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3453 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3455 EMIT_ONE_BYTE (c1 | 0x80); \
3459 /* Since CHARSET is not yet invoked to any graphic planes, we \
3460 must invoke it, or, at first, designate it to some graphic \
3461 register. Then repeat the loop to actually produce the \
3463 dst = encode_invocation_designation (charset, coding, dst, \
3468 /* Produce codes for a DIMENSION2 character whose character set is
3469 CHARSET and whose position-codes are C1 and C2. Designation and
3470 invocation codes are also produced in advance if necessary. */
3472 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3474 int id = CHARSET_ID (charset); \
3476 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3477 && id == charset_jisx0208) \
3479 id = charset_jisx0208_1978; \
3480 charset = CHARSET_FROM_ID (id); \
3483 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3485 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3486 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3488 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3489 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3492 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3494 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3497 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3499 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3503 /* Since CHARSET is not yet invoked to any graphic planes, we \
3504 must invoke it, or, at first, designate it to some graphic \
3505 register. Then repeat the loop to actually produce the \
3507 dst = encode_invocation_designation (charset, coding, dst, \
3512 #define ENCODE_ISO_CHARACTER(charset, c) \
3514 int code = ENCODE_CHAR ((charset),(c)); \
3516 if (CHARSET_DIMENSION (charset) == 1) \
3517 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3519 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3523 /* Produce designation and invocation codes at a place pointed by DST
3524 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3528 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3529 struct charset
*charset
;
3530 struct coding_system
*coding
;
3534 int multibytep
= coding
->dst_multibyte
;
3535 int produced_chars
= *p_nchars
;
3536 int reg
; /* graphic register number */
3537 int id
= CHARSET_ID (charset
);
3539 /* At first, check designations. */
3540 for (reg
= 0; reg
< 4; reg
++)
3541 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3546 /* CHARSET is not yet designated to any graphic registers. */
3547 /* At first check the requested designation. */
3548 reg
= CODING_ISO_REQUEST (coding
, id
);
3550 /* Since CHARSET requests no special designation, designate it
3551 to graphic register 0. */
3554 ENCODE_DESIGNATION (charset
, reg
, coding
);
3557 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3558 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3560 /* Since the graphic register REG is not invoked to any graphic
3561 planes, invoke it to graphic plane 0. */
3564 case 0: /* graphic register 0 */
3568 case 1: /* graphic register 1 */
3572 case 2: /* graphic register 2 */
3573 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3574 ENCODE_SINGLE_SHIFT_2
;
3576 ENCODE_LOCKING_SHIFT_2
;
3579 case 3: /* graphic register 3 */
3580 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3581 ENCODE_SINGLE_SHIFT_3
;
3583 ENCODE_LOCKING_SHIFT_3
;
3588 *p_nchars
= produced_chars
;
3592 /* The following three macros produce codes for indicating direction
3594 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3596 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3597 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3599 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3603 #define ENCODE_DIRECTION_R2L() \
3605 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3606 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3610 #define ENCODE_DIRECTION_L2R() \
3612 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3613 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3617 /* Produce codes for designation and invocation to reset the graphic
3618 planes and registers to initial state. */
3619 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3622 struct charset *charset; \
3624 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3626 for (reg = 0; reg < 4; reg++) \
3627 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3628 && (CODING_ISO_DESIGNATION (coding, reg) \
3629 != CODING_ISO_INITIAL (coding, reg))) \
3631 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3632 ENCODE_DESIGNATION (charset, reg, coding); \
3637 /* Produce designation sequences of charsets in the line started from
3638 SRC to a place pointed by DST, and return updated DST.
3640 If the current block ends before any end-of-line, we may fail to
3641 find all the necessary designations. */
3643 static unsigned char *
3644 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3645 struct coding_system
*coding
;
3646 int *charbuf
, *charbuf_end
;
3649 struct charset
*charset
;
3650 /* Table of charsets to be designated to each graphic register. */
3652 int c
, found
= 0, reg
;
3653 int produced_chars
= 0;
3654 int multibytep
= coding
->dst_multibyte
;
3656 Lisp_Object charset_list
;
3658 attrs
= CODING_ID_ATTRS (coding
->id
);
3659 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3660 if (EQ (charset_list
, Qiso_2022
))
3661 charset_list
= Viso_2022_charset_list
;
3663 for (reg
= 0; reg
< 4; reg
++)
3673 charset
= char_charset (c
, charset_list
, NULL
);
3674 id
= CHARSET_ID (charset
);
3675 reg
= CODING_ISO_REQUEST (coding
, id
);
3676 if (reg
>= 0 && r
[reg
] < 0)
3685 for (reg
= 0; reg
< 4; reg
++)
3687 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3688 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3694 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3697 encode_coding_iso_2022 (coding
)
3698 struct coding_system
*coding
;
3700 int multibytep
= coding
->dst_multibyte
;
3701 int *charbuf
= coding
->charbuf
;
3702 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3703 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3704 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3707 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3708 && CODING_ISO_BOL (coding
));
3709 int produced_chars
= 0;
3710 Lisp_Object attrs
, eol_type
, charset_list
;
3711 int ascii_compatible
;
3713 int preferred_charset_id
= -1;
3715 CODING_GET_INFO (coding
, attrs
, charset_list
);
3716 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
3717 if (VECTORP (eol_type
))
3720 setup_iso_safe_charsets (attrs
);
3721 /* Charset list may have been changed. */
3722 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
); \
3723 coding
->safe_charsets
= (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs
));
3725 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3727 while (charbuf
< charbuf_end
)
3729 ASSURE_DESTINATION (safe_room
);
3731 if (bol_designation
)
3733 unsigned char *dst_prev
= dst
;
3735 /* We have to produce designation sequences if any now. */
3736 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3737 bol_designation
= 0;
3738 /* We are sure that designation sequences are all ASCII bytes. */
3739 produced_chars
+= dst
- dst_prev
;
3746 /* Handle an annotation. */
3749 case CODING_ANNOTATE_COMPOSITION_MASK
:
3750 /* Not yet implemented. */
3752 case CODING_ANNOTATE_CHARSET_MASK
:
3753 preferred_charset_id
= charbuf
[3];
3754 if (preferred_charset_id
>= 0
3755 && NILP (Fmemq (make_number (preferred_charset_id
),
3757 preferred_charset_id
= -1;
3766 /* Now encode the character C. */
3767 if (c
< 0x20 || c
== 0x7F)
3770 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3772 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3773 ENCODE_RESET_PLANE_AND_REGISTER ();
3774 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3778 for (i
= 0; i
< 4; i
++)
3779 CODING_ISO_DESIGNATION (coding
, i
)
3780 = CODING_ISO_INITIAL (coding
, i
);
3783 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3785 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3786 ENCODE_RESET_PLANE_AND_REGISTER ();
3787 EMIT_ONE_ASCII_BYTE (c
);
3789 else if (ASCII_CHAR_P (c
))
3791 if (ascii_compatible
)
3792 EMIT_ONE_ASCII_BYTE (c
);
3795 struct charset
*charset
= CHARSET_FROM_ID (charset_ascii
);
3796 ENCODE_ISO_CHARACTER (charset
, c
);
3799 else if (CHAR_BYTE8_P (c
))
3801 c
= CHAR_TO_BYTE8 (c
);
3806 struct charset
*charset
;
3808 if (preferred_charset_id
>= 0)
3810 charset
= CHARSET_FROM_ID (preferred_charset_id
);
3811 if (! CHAR_CHARSET_P (c
, charset
))
3812 charset
= char_charset (c
, charset_list
, NULL
);
3815 charset
= char_charset (c
, charset_list
, NULL
);
3818 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3820 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3821 charset
= CHARSET_FROM_ID (charset_ascii
);
3825 c
= coding
->default_char
;
3826 charset
= char_charset (c
, charset_list
, NULL
);
3829 ENCODE_ISO_CHARACTER (charset
, c
);
3833 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3834 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3836 ASSURE_DESTINATION (safe_room
);
3837 ENCODE_RESET_PLANE_AND_REGISTER ();
3839 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
3840 CODING_ISO_BOL (coding
) = bol_designation
;
3841 coding
->produced_char
+= produced_chars
;
3842 coding
->produced
= dst
- coding
->destination
;
3847 /*** 8,9. SJIS and BIG5 handlers ***/
3849 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3850 quite widely. So, for the moment, Emacs supports them in the bare
3851 C code. But, in the future, they may be supported only by CCL. */
3853 /* SJIS is a coding system encoding three character sets: ASCII, right
3854 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3855 as is. A character of charset katakana-jisx0201 is encoded by
3856 "position-code + 0x80". A character of charset japanese-jisx0208
3857 is encoded in 2-byte but two position-codes are divided and shifted
3858 so that it fit in the range below.
3860 --- CODE RANGE of SJIS ---
3861 (character set) (range)
3863 KATAKANA-JISX0201 0xA0 .. 0xDF
3864 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3865 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3866 -------------------------------
3870 /* BIG5 is a coding system encoding two character sets: ASCII and
3871 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3872 character set and is encoded in two-byte.
3874 --- CODE RANGE of BIG5 ---
3875 (character set) (range)
3877 Big5 (1st byte) 0xA1 .. 0xFE
3878 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3879 --------------------------
3883 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3884 Check if a text is encoded in SJIS. If it is, return
3885 CATEGORY_MASK_SJIS, else return 0. */
3888 detect_coding_sjis (coding
, detect_info
)
3889 struct coding_system
*coding
;
3890 struct coding_detection_info
*detect_info
;
3892 const unsigned char *src
= coding
->source
, *src_base
;
3893 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3894 int multibytep
= coding
->src_multibyte
;
3895 int consumed_chars
= 0;
3899 detect_info
->checked
|= CATEGORY_MASK_SJIS
;
3900 /* A coding system of this category is always ASCII compatible. */
3901 src
+= coding
->head_ascii
;
3909 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3912 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3914 found
= CATEGORY_MASK_SJIS
;
3916 else if (c
>= 0xA0 && c
< 0xE0)
3917 found
= CATEGORY_MASK_SJIS
;
3921 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3925 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3927 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3930 detect_info
->found
|= found
;
3934 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3935 Check if a text is encoded in BIG5. If it is, return
3936 CATEGORY_MASK_BIG5, else return 0. */
3939 detect_coding_big5 (coding
, detect_info
)
3940 struct coding_system
*coding
;
3941 struct coding_detection_info
*detect_info
;
3943 const unsigned char *src
= coding
->source
, *src_base
;
3944 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3945 int multibytep
= coding
->src_multibyte
;
3946 int consumed_chars
= 0;
3950 detect_info
->checked
|= CATEGORY_MASK_BIG5
;
3951 /* A coding system of this category is always ASCII compatible. */
3952 src
+= coding
->head_ascii
;
3963 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3965 found
= CATEGORY_MASK_BIG5
;
3970 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3974 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3976 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3979 detect_info
->found
|= found
;
3983 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3984 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3987 decode_coding_sjis (coding
)
3988 struct coding_system
*coding
;
3990 const unsigned char *src
= coding
->source
+ coding
->consumed
;
3991 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3992 const unsigned char *src_base
;
3993 int *charbuf
= coding
->charbuf
;
3994 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
3995 int consumed_chars
= 0, consumed_chars_base
;
3996 int multibytep
= coding
->src_multibyte
;
3997 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3998 struct charset
*charset_kanji2
;
3999 Lisp_Object attrs
, charset_list
, val
;
4000 int char_offset
= coding
->produced_char
;
4001 int last_offset
= char_offset
;
4002 int last_id
= charset_ascii
;
4004 CODING_GET_INFO (coding
, attrs
, charset_list
);
4007 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4008 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4009 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4010 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4015 struct charset
*charset
;
4018 consumed_chars_base
= consumed_chars
;
4020 if (charbuf
>= charbuf_end
)
4027 charset
= charset_roman
;
4028 else if (c
== 0x80 || c
== 0xA0)
4030 else if (c
>= 0xA1 && c
<= 0xDF)
4032 /* SJIS -> JISX0201-Kana */
4034 charset
= charset_kana
;
4038 /* SJIS -> JISX0208 */
4040 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4044 charset
= charset_kanji
;
4046 else if (c
<= 0xFC && charset_kanji2
)
4048 /* SJIS -> JISX0213-2 */
4050 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4054 charset
= charset_kanji2
;
4058 if (charset
->id
!= charset_ascii
4059 && last_id
!= charset
->id
)
4061 if (last_id
!= charset_ascii
)
4062 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4063 last_id
= charset
->id
;
4064 last_offset
= char_offset
;
4066 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4073 consumed_chars
= consumed_chars_base
;
4075 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4081 if (last_id
!= charset_ascii
)
4082 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4083 coding
->consumed_char
+= consumed_chars_base
;
4084 coding
->consumed
= src_base
- coding
->source
;
4085 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4089 decode_coding_big5 (coding
)
4090 struct coding_system
*coding
;
4092 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4093 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4094 const unsigned char *src_base
;
4095 int *charbuf
= coding
->charbuf
;
4096 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4097 int consumed_chars
= 0, consumed_chars_base
;
4098 int multibytep
= coding
->src_multibyte
;
4099 struct charset
*charset_roman
, *charset_big5
;
4100 Lisp_Object attrs
, charset_list
, val
;
4101 int char_offset
= coding
->produced_char
;
4102 int last_offset
= char_offset
;
4103 int last_id
= charset_ascii
;
4105 CODING_GET_INFO (coding
, attrs
, charset_list
);
4107 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4108 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4113 struct charset
*charset
;
4116 consumed_chars_base
= consumed_chars
;
4118 if (charbuf
>= charbuf_end
)
4126 charset
= charset_roman
;
4130 if (c
< 0xA1 || c
> 0xFE)
4133 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
4136 charset
= charset_big5
;
4138 if (charset
->id
!= charset_ascii
4139 && last_id
!= charset
->id
)
4141 if (last_id
!= charset_ascii
)
4142 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4143 last_id
= charset
->id
;
4144 last_offset
= char_offset
;
4146 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4153 consumed_chars
= consumed_chars_base
;
4155 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4161 if (last_id
!= charset_ascii
)
4162 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4163 coding
->consumed_char
+= consumed_chars_base
;
4164 coding
->consumed
= src_base
- coding
->source
;
4165 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4168 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4169 This function can encode charsets `ascii', `katakana-jisx0201',
4170 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4171 are sure that all these charsets are registered as official charset
4172 (i.e. do not have extended leading-codes). Characters of other
4173 charsets are produced without any encoding. If SJIS_P is 1, encode
4174 SJIS text, else encode BIG5 text. */
4177 encode_coding_sjis (coding
)
4178 struct coding_system
*coding
;
4180 int multibytep
= coding
->dst_multibyte
;
4181 int *charbuf
= coding
->charbuf
;
4182 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4183 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4184 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4186 int produced_chars
= 0;
4187 Lisp_Object attrs
, charset_list
, val
;
4188 int ascii_compatible
;
4189 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4190 struct charset
*charset_kanji2
;
4193 CODING_GET_INFO (coding
, attrs
, charset_list
);
4195 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4196 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4197 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4198 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4200 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4202 while (charbuf
< charbuf_end
)
4204 ASSURE_DESTINATION (safe_room
);
4206 /* Now encode the character C. */
4207 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4208 EMIT_ONE_ASCII_BYTE (c
);
4209 else if (CHAR_BYTE8_P (c
))
4211 c
= CHAR_TO_BYTE8 (c
);
4217 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4221 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4223 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4224 charset
= CHARSET_FROM_ID (charset_ascii
);
4228 c
= coding
->default_char
;
4229 charset
= char_charset (c
, charset_list
, &code
);
4232 if (code
== CHARSET_INVALID_CODE (charset
))
4234 if (charset
== charset_kanji
)
4238 c1
= code
>> 8, c2
= code
& 0xFF;
4239 EMIT_TWO_BYTES (c1
, c2
);
4241 else if (charset
== charset_kana
)
4242 EMIT_ONE_BYTE (code
| 0x80);
4243 else if (charset_kanji2
&& charset
== charset_kanji2
)
4248 if (c1
== 0x21 || (c1
>= 0x23 && c1
< 0x25)
4249 || (c1
>= 0x2C && c1
<= 0x2F) || c1
>= 0x6E)
4251 JIS_TO_SJIS2 (code
);
4252 c1
= code
>> 8, c2
= code
& 0xFF;
4253 EMIT_TWO_BYTES (c1
, c2
);
4256 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4259 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4262 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4263 coding
->produced_char
+= produced_chars
;
4264 coding
->produced
= dst
- coding
->destination
;
4269 encode_coding_big5 (coding
)
4270 struct coding_system
*coding
;
4272 int multibytep
= coding
->dst_multibyte
;
4273 int *charbuf
= coding
->charbuf
;
4274 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4275 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4276 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4278 int produced_chars
= 0;
4279 Lisp_Object attrs
, charset_list
, val
;
4280 int ascii_compatible
;
4281 struct charset
*charset_roman
, *charset_big5
;
4284 CODING_GET_INFO (coding
, attrs
, charset_list
);
4286 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4287 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4288 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4290 while (charbuf
< charbuf_end
)
4292 ASSURE_DESTINATION (safe_room
);
4294 /* Now encode the character C. */
4295 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4296 EMIT_ONE_ASCII_BYTE (c
);
4297 else if (CHAR_BYTE8_P (c
))
4299 c
= CHAR_TO_BYTE8 (c
);
4305 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4309 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4311 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4312 charset
= CHARSET_FROM_ID (charset_ascii
);
4316 c
= coding
->default_char
;
4317 charset
= char_charset (c
, charset_list
, &code
);
4320 if (code
== CHARSET_INVALID_CODE (charset
))
4322 if (charset
== charset_big5
)
4326 c1
= code
>> 8, c2
= code
& 0xFF;
4327 EMIT_TWO_BYTES (c1
, c2
);
4330 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4333 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4334 coding
->produced_char
+= produced_chars
;
4335 coding
->produced
= dst
- coding
->destination
;
4340 /*** 10. CCL handlers ***/
4342 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4343 Check if a text is encoded in a coding system of which
4344 encoder/decoder are written in CCL program. If it is, return
4345 CATEGORY_MASK_CCL, else return 0. */
4348 detect_coding_ccl (coding
, detect_info
)
4349 struct coding_system
*coding
;
4350 struct coding_detection_info
*detect_info
;
4352 const unsigned char *src
= coding
->source
, *src_base
;
4353 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4354 int multibytep
= coding
->src_multibyte
;
4355 int consumed_chars
= 0;
4357 unsigned char *valids
= CODING_CCL_VALIDS (coding
);
4358 int head_ascii
= coding
->head_ascii
;
4361 detect_info
->checked
|= CATEGORY_MASK_CCL
;
4363 coding
= &coding_categories
[coding_category_ccl
];
4364 attrs
= CODING_ID_ATTRS (coding
->id
);
4365 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4374 if (c
< 0 || ! valids
[c
])
4376 if ((valids
[c
] > 1))
4377 found
= CATEGORY_MASK_CCL
;
4379 detect_info
->rejected
|= CATEGORY_MASK_CCL
;
4383 detect_info
->found
|= found
;
4388 decode_coding_ccl (coding
)
4389 struct coding_system
*coding
;
4391 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4392 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4393 int *charbuf
= coding
->charbuf
;
4394 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
4395 int consumed_chars
= 0;
4396 int multibytep
= coding
->src_multibyte
;
4397 struct ccl_program ccl
;
4398 int source_charbuf
[1024];
4399 int source_byteidx
[1024];
4400 Lisp_Object attrs
, charset_list
;
4402 CODING_GET_INFO (coding
, attrs
, charset_list
);
4403 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4405 while (src
< src_end
)
4407 const unsigned char *p
= src
;
4408 int *source
, *source_end
;
4412 while (i
< 1024 && p
< src_end
)
4414 source_byteidx
[i
] = p
- src
;
4415 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4418 while (i
< 1024 && p
< src_end
)
4419 source_charbuf
[i
++] = *p
++;
4421 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4424 source
= source_charbuf
;
4425 source_end
= source
+ i
;
4426 while (source
< source_end
)
4428 ccl_driver (&ccl
, source
, charbuf
,
4429 source_end
- source
, charbuf_end
- charbuf
,
4431 source
+= ccl
.consumed
;
4432 charbuf
+= ccl
.produced
;
4433 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4436 if (source
< source_end
)
4437 src
+= source_byteidx
[source
- source_charbuf
];
4440 consumed_chars
+= source
- source_charbuf
;
4442 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4443 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4449 case CCL_STAT_SUSPEND_BY_SRC
:
4450 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4452 case CCL_STAT_SUSPEND_BY_DST
:
4455 case CCL_STAT_INVALID_CMD
:
4456 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4459 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4462 coding
->consumed_char
+= consumed_chars
;
4463 coding
->consumed
= src
- coding
->source
;
4464 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4468 encode_coding_ccl (coding
)
4469 struct coding_system
*coding
;
4471 struct ccl_program ccl
;
4472 int multibytep
= coding
->dst_multibyte
;
4473 int *charbuf
= coding
->charbuf
;
4474 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4475 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4476 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4477 unsigned char *adjusted_dst_end
= dst_end
- 1;
4478 int destination_charbuf
[1024];
4479 int i
, produced_chars
= 0;
4480 Lisp_Object attrs
, charset_list
;
4482 CODING_GET_INFO (coding
, attrs
, charset_list
);
4483 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4485 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4486 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4488 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4490 int dst_bytes
= dst_end
- dst
;
4491 if (dst_bytes
> 1024)
4494 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4495 charbuf_end
- charbuf
, dst_bytes
, charset_list
);
4496 charbuf
+= ccl
.consumed
;
4498 for (i
= 0; i
< ccl
.produced
; i
++)
4499 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4502 for (i
= 0; i
< ccl
.produced
; i
++)
4503 *dst
++ = destination_charbuf
[i
] & 0xFF;
4504 produced_chars
+= ccl
.produced
;
4510 case CCL_STAT_SUSPEND_BY_SRC
:
4511 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4513 case CCL_STAT_SUSPEND_BY_DST
:
4514 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_DST
);
4517 case CCL_STAT_INVALID_CMD
:
4518 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4521 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4525 coding
->produced_char
+= produced_chars
;
4526 coding
->produced
= dst
- coding
->destination
;
4532 /*** 10, 11. no-conversion handlers ***/
4534 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4537 decode_coding_raw_text (coding
)
4538 struct coding_system
*coding
;
4540 coding
->chars_at_source
= 1;
4541 coding
->consumed_char
= 0;
4542 coding
->consumed
= 0;
4543 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4547 encode_coding_raw_text (coding
)
4548 struct coding_system
*coding
;
4550 int multibytep
= coding
->dst_multibyte
;
4551 int *charbuf
= coding
->charbuf
;
4552 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4553 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4554 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4555 int produced_chars
= 0;
4560 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4562 if (coding
->src_multibyte
)
4563 while (charbuf
< charbuf_end
)
4565 ASSURE_DESTINATION (safe_room
);
4567 if (ASCII_CHAR_P (c
))
4568 EMIT_ONE_ASCII_BYTE (c
);
4569 else if (CHAR_BYTE8_P (c
))
4571 c
= CHAR_TO_BYTE8 (c
);
4576 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4578 CHAR_STRING_ADVANCE (c
, p1
);
4581 EMIT_ONE_BYTE (*p0
);
4587 while (charbuf
< charbuf_end
)
4589 ASSURE_DESTINATION (safe_room
);
4596 if (coding
->src_multibyte
)
4598 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4600 while (charbuf
< charbuf_end
)
4602 ASSURE_DESTINATION (safe_room
);
4604 if (ASCII_CHAR_P (c
))
4606 else if (CHAR_BYTE8_P (c
))
4607 *dst
++ = CHAR_TO_BYTE8 (c
);
4609 CHAR_STRING_ADVANCE (c
, dst
);
4615 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4616 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4617 *dst
++ = *charbuf
++;
4618 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4621 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4622 coding
->produced_char
+= produced_chars
;
4623 coding
->produced
= dst
- coding
->destination
;
4627 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4628 Check if a text is encoded in a charset-based coding system. If it
4629 is, return 1, else return 0. */
4632 detect_coding_charset (coding
, detect_info
)
4633 struct coding_system
*coding
;
4634 struct coding_detection_info
*detect_info
;
4636 const unsigned char *src
= coding
->source
, *src_base
;
4637 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4638 int multibytep
= coding
->src_multibyte
;
4639 int consumed_chars
= 0;
4640 Lisp_Object attrs
, valids
;
4643 detect_info
->checked
|= CATEGORY_MASK_CHARSET
;
4645 coding
= &coding_categories
[coding_category_charset
];
4646 attrs
= CODING_ID_ATTRS (coding
->id
);
4647 valids
= AREF (attrs
, coding_attr_charset_valids
);
4649 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4650 src
+= coding
->head_ascii
;
4660 if (NILP (AREF (valids
, c
)))
4663 found
= CATEGORY_MASK_CHARSET
;
4665 detect_info
->rejected
|= CATEGORY_MASK_CHARSET
;
4669 detect_info
->found
|= found
;
4674 decode_coding_charset (coding
)
4675 struct coding_system
*coding
;
4677 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4678 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4679 const unsigned char *src_base
;
4680 int *charbuf
= coding
->charbuf
;
4681 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4682 int consumed_chars
= 0, consumed_chars_base
;
4683 int multibytep
= coding
->src_multibyte
;
4684 Lisp_Object attrs
, charset_list
, valids
;
4685 int char_offset
= coding
->produced_char
;
4686 int last_offset
= char_offset
;
4687 int last_id
= charset_ascii
;
4689 CODING_GET_INFO (coding
, attrs
, charset_list
);
4690 valids
= AREF (attrs
, coding_attr_charset_valids
);
4696 struct charset
*charset
;
4702 consumed_chars_base
= consumed_chars
;
4704 if (charbuf
>= charbuf_end
)
4712 val
= AREF (valids
, c
);
4717 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4718 dim
= CHARSET_DIMENSION (charset
);
4722 code
= (code
<< 8) | c
;
4725 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
,
4730 /* VAL is a list of charset IDs. It is assured that the
4731 list is sorted by charset dimensions (smaller one
4735 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (val
)));
4736 dim
= CHARSET_DIMENSION (charset
);
4740 code
= (code
<< 8) | c
;
4743 CODING_DECODE_CHAR (coding
, src
, src_base
,
4744 src_end
, charset
, code
, c
);
4752 if (charset
->id
!= charset_ascii
4753 && last_id
!= charset
->id
)
4755 if (last_id
!= charset_ascii
)
4756 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4757 last_id
= charset
->id
;
4758 last_offset
= char_offset
;
4767 consumed_chars
= consumed_chars_base
;
4769 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4775 if (last_id
!= charset_ascii
)
4776 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4777 coding
->consumed_char
+= consumed_chars_base
;
4778 coding
->consumed
= src_base
- coding
->source
;
4779 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4783 encode_coding_charset (coding
)
4784 struct coding_system
*coding
;
4786 int multibytep
= coding
->dst_multibyte
;
4787 int *charbuf
= coding
->charbuf
;
4788 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4789 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4790 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4791 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4792 int produced_chars
= 0;
4793 Lisp_Object attrs
, charset_list
;
4794 int ascii_compatible
;
4797 CODING_GET_INFO (coding
, attrs
, charset_list
);
4798 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4800 while (charbuf
< charbuf_end
)
4802 struct charset
*charset
;
4805 ASSURE_DESTINATION (safe_room
);
4807 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4808 EMIT_ONE_ASCII_BYTE (c
);
4809 else if (CHAR_BYTE8_P (c
))
4811 c
= CHAR_TO_BYTE8 (c
);
4816 charset
= char_charset (c
, charset_list
, &code
);
4819 if (CHARSET_DIMENSION (charset
) == 1)
4820 EMIT_ONE_BYTE (code
);
4821 else if (CHARSET_DIMENSION (charset
) == 2)
4822 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4823 else if (CHARSET_DIMENSION (charset
) == 3)
4824 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4826 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4827 (code
>> 8) & 0xFF, code
& 0xFF);
4831 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4832 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4834 c
= coding
->default_char
;
4840 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4841 coding
->produced_char
+= produced_chars
;
4842 coding
->produced
= dst
- coding
->destination
;
4847 /*** 7. C library functions ***/
4849 /* Setup coding context CODING from information about CODING_SYSTEM.
4850 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4851 CODING_SYSTEM is invalid, signal an error. */
4854 setup_coding_system (coding_system
, coding
)
4855 Lisp_Object coding_system
;
4856 struct coding_system
*coding
;
4859 Lisp_Object eol_type
;
4860 Lisp_Object coding_type
;
4863 if (NILP (coding_system
))
4864 coding_system
= Qno_conversion
;
4866 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4868 attrs
= CODING_ID_ATTRS (coding
->id
);
4869 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4872 coding
->head_ascii
= -1;
4873 coding
->common_flags
4874 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4875 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
4876 coding
->common_flags
|= CODING_REQUIRE_DECODING_MASK
;
4877 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
4878 coding
->common_flags
|= CODING_REQUIRE_ENCODING_MASK
;
4879 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs
)))
4880 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4882 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4883 coding
->max_charset_id
= SCHARS (val
) - 1;
4884 coding
->safe_charsets
= (char *) SDATA (val
);
4885 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4887 coding_type
= CODING_ATTR_TYPE (attrs
);
4888 if (EQ (coding_type
, Qundecided
))
4890 coding
->detector
= NULL
;
4891 coding
->decoder
= decode_coding_raw_text
;
4892 coding
->encoder
= encode_coding_raw_text
;
4893 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4895 else if (EQ (coding_type
, Qiso_2022
))
4898 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4900 /* Invoke graphic register 0 to plane 0. */
4901 CODING_ISO_INVOCATION (coding
, 0) = 0;
4902 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4903 CODING_ISO_INVOCATION (coding
, 1)
4904 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4905 /* Setup the initial status of designation. */
4906 for (i
= 0; i
< 4; i
++)
4907 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4908 /* Not single shifting initially. */
4909 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4910 /* Beginning of buffer should also be regarded as bol. */
4911 CODING_ISO_BOL (coding
) = 1;
4912 coding
->detector
= detect_coding_iso_2022
;
4913 coding
->decoder
= decode_coding_iso_2022
;
4914 coding
->encoder
= encode_coding_iso_2022
;
4915 if (flags
& CODING_ISO_FLAG_SAFE
)
4916 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4917 coding
->common_flags
4918 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4919 | CODING_REQUIRE_FLUSHING_MASK
);
4920 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4921 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4922 if (flags
& CODING_ISO_FLAG_DESIGNATION
)
4923 coding
->common_flags
|= CODING_ANNOTATE_CHARSET_MASK
;
4924 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4926 setup_iso_safe_charsets (attrs
);
4927 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4928 coding
->max_charset_id
= SCHARS (val
) - 1;
4929 coding
->safe_charsets
= (char *) SDATA (val
);
4931 CODING_ISO_FLAGS (coding
) = flags
;
4933 else if (EQ (coding_type
, Qcharset
))
4935 coding
->detector
= detect_coding_charset
;
4936 coding
->decoder
= decode_coding_charset
;
4937 coding
->encoder
= encode_coding_charset
;
4938 coding
->common_flags
4939 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4941 else if (EQ (coding_type
, Qutf_8
))
4943 coding
->detector
= detect_coding_utf_8
;
4944 coding
->decoder
= decode_coding_utf_8
;
4945 coding
->encoder
= encode_coding_utf_8
;
4946 coding
->common_flags
4947 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4949 else if (EQ (coding_type
, Qutf_16
))
4951 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4952 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4953 : EQ (val
, Qt
) ? utf_16_with_bom
4954 : utf_16_without_bom
);
4955 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4956 CODING_UTF_16_ENDIAN (coding
) = (EQ (val
, Qbig
) ? utf_16_big_endian
4957 : utf_16_little_endian
);
4958 CODING_UTF_16_SURROGATE (coding
) = 0;
4959 coding
->detector
= detect_coding_utf_16
;
4960 coding
->decoder
= decode_coding_utf_16
;
4961 coding
->encoder
= encode_coding_utf_16
;
4962 coding
->common_flags
4963 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4964 if (CODING_UTF_16_BOM (coding
) == utf_16_detect_bom
)
4965 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4967 else if (EQ (coding_type
, Qccl
))
4969 coding
->detector
= detect_coding_ccl
;
4970 coding
->decoder
= decode_coding_ccl
;
4971 coding
->encoder
= encode_coding_ccl
;
4972 coding
->common_flags
4973 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4974 | CODING_REQUIRE_FLUSHING_MASK
);
4976 else if (EQ (coding_type
, Qemacs_mule
))
4978 coding
->detector
= detect_coding_emacs_mule
;
4979 coding
->decoder
= decode_coding_emacs_mule
;
4980 coding
->encoder
= encode_coding_emacs_mule
;
4981 coding
->common_flags
4982 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4983 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
4984 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
4986 Lisp_Object tail
, safe_charsets
;
4987 int max_charset_id
= 0;
4989 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4991 if (max_charset_id
< XFASTINT (XCAR (tail
)))
4992 max_charset_id
= XFASTINT (XCAR (tail
));
4993 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
4995 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4997 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
4998 coding
->max_charset_id
= max_charset_id
;
4999 coding
->safe_charsets
= (char *) SDATA (safe_charsets
);
5002 else if (EQ (coding_type
, Qshift_jis
))
5004 coding
->detector
= detect_coding_sjis
;
5005 coding
->decoder
= decode_coding_sjis
;
5006 coding
->encoder
= encode_coding_sjis
;
5007 coding
->common_flags
5008 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5010 else if (EQ (coding_type
, Qbig5
))
5012 coding
->detector
= detect_coding_big5
;
5013 coding
->decoder
= decode_coding_big5
;
5014 coding
->encoder
= encode_coding_big5
;
5015 coding
->common_flags
5016 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5018 else /* EQ (coding_type, Qraw_text) */
5020 coding
->detector
= NULL
;
5021 coding
->decoder
= decode_coding_raw_text
;
5022 coding
->encoder
= encode_coding_raw_text
;
5028 /* Return raw-text or one of its subsidiaries that has the same
5029 eol_type as CODING-SYSTEM. */
5032 raw_text_coding_system (coding_system
)
5033 Lisp_Object coding_system
;
5035 Lisp_Object spec
, attrs
;
5036 Lisp_Object eol_type
, raw_text_eol_type
;
5038 if (NILP (coding_system
))
5040 spec
= CODING_SYSTEM_SPEC (coding_system
);
5041 attrs
= AREF (spec
, 0);
5043 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
5044 return coding_system
;
5046 eol_type
= AREF (spec
, 2);
5047 if (VECTORP (eol_type
))
5049 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
5050 raw_text_eol_type
= AREF (spec
, 2);
5051 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
5052 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
5053 : AREF (raw_text_eol_type
, 2));
5057 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5058 does, return one of the subsidiary that has the same eol-spec as
5059 PARENT. Otherwise, return CODING_SYSTEM. */
5062 coding_inherit_eol_type (coding_system
, parent
)
5063 Lisp_Object coding_system
, parent
;
5065 Lisp_Object spec
, eol_type
;
5067 if (NILP (coding_system
))
5068 coding_system
= Qraw_text
;
5069 spec
= CODING_SYSTEM_SPEC (coding_system
);
5070 eol_type
= AREF (spec
, 2);
5071 if (VECTORP (eol_type
)
5074 Lisp_Object parent_spec
;
5075 Lisp_Object parent_eol_type
;
5078 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
5079 parent_eol_type
= AREF (parent_spec
, 2);
5080 if (EQ (parent_eol_type
, Qunix
))
5081 coding_system
= AREF (eol_type
, 0);
5082 else if (EQ (parent_eol_type
, Qdos
))
5083 coding_system
= AREF (eol_type
, 1);
5084 else if (EQ (parent_eol_type
, Qmac
))
5085 coding_system
= AREF (eol_type
, 2);
5087 return coding_system
;
5090 /* Emacs has a mechanism to automatically detect a coding system if it
5091 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5092 it's impossible to distinguish some coding systems accurately
5093 because they use the same range of codes. So, at first, coding
5094 systems are categorized into 7, those are:
5096 o coding-category-emacs-mule
5098 The category for a coding system which has the same code range
5099 as Emacs' internal format. Assigned the coding-system (Lisp
5100 symbol) `emacs-mule' by default.
5102 o coding-category-sjis
5104 The category for a coding system which has the same code range
5105 as SJIS. Assigned the coding-system (Lisp
5106 symbol) `japanese-shift-jis' by default.
5108 o coding-category-iso-7
5110 The category for a coding system which has the same code range
5111 as ISO2022 of 7-bit environment. This doesn't use any locking
5112 shift and single shift functions. This can encode/decode all
5113 charsets. Assigned the coding-system (Lisp symbol)
5114 `iso-2022-7bit' by default.
5116 o coding-category-iso-7-tight
5118 Same as coding-category-iso-7 except that this can
5119 encode/decode only the specified charsets.
5121 o coding-category-iso-8-1
5123 The category for a coding system which has the same code range
5124 as ISO2022 of 8-bit environment and graphic plane 1 used only
5125 for DIMENSION1 charset. This doesn't use any locking shift
5126 and single shift functions. Assigned the coding-system (Lisp
5127 symbol) `iso-latin-1' by default.
5129 o coding-category-iso-8-2
5131 The category for a coding system which has the same code range
5132 as ISO2022 of 8-bit environment and graphic plane 1 used only
5133 for DIMENSION2 charset. This doesn't use any locking shift
5134 and single shift functions. Assigned the coding-system (Lisp
5135 symbol) `japanese-iso-8bit' by default.
5137 o coding-category-iso-7-else
5139 The category for a coding system which has the same code range
5140 as ISO2022 of 7-bit environemnt but uses locking shift or
5141 single shift functions. Assigned the coding-system (Lisp
5142 symbol) `iso-2022-7bit-lock' by default.
5144 o coding-category-iso-8-else
5146 The category for a coding system which has the same code range
5147 as ISO2022 of 8-bit environemnt but uses locking shift or
5148 single shift functions. Assigned the coding-system (Lisp
5149 symbol) `iso-2022-8bit-ss2' by default.
5151 o coding-category-big5
5153 The category for a coding system which has the same code range
5154 as BIG5. Assigned the coding-system (Lisp symbol)
5155 `cn-big5' by default.
5157 o coding-category-utf-8
5159 The category for a coding system which has the same code range
5160 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5161 symbol) `utf-8' by default.
5163 o coding-category-utf-16-be
5165 The category for a coding system in which a text has an
5166 Unicode signature (cf. Unicode Standard) in the order of BIG
5167 endian at the head. Assigned the coding-system (Lisp symbol)
5168 `utf-16-be' by default.
5170 o coding-category-utf-16-le
5172 The category for a coding system in which a text has an
5173 Unicode signature (cf. Unicode Standard) in the order of
5174 LITTLE endian at the head. Assigned the coding-system (Lisp
5175 symbol) `utf-16-le' by default.
5177 o coding-category-ccl
5179 The category for a coding system of which encoder/decoder is
5180 written in CCL programs. The default value is nil, i.e., no
5181 coding system is assigned.
5183 o coding-category-binary
5185 The category for a coding system not categorized in any of the
5186 above. Assigned the coding-system (Lisp symbol)
5187 `no-conversion' by default.
5189 Each of them is a Lisp symbol and the value is an actual
5190 `coding-system's (this is also a Lisp symbol) assigned by a user.
5191 What Emacs does actually is to detect a category of coding system.
5192 Then, it uses a `coding-system' assigned to it. If Emacs can't
5193 decide only one possible category, it selects a category of the
5194 highest priority. Priorities of categories are also specified by a
5195 user in a Lisp variable `coding-category-list'.
5199 #define EOL_SEEN_NONE 0
5200 #define EOL_SEEN_LF 1
5201 #define EOL_SEEN_CR 2
5202 #define EOL_SEEN_CRLF 4
5204 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5205 SOURCE is encoded. If CATEGORY is one of
5206 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5207 two-byte, else they are encoded by one-byte.
5209 Return one of EOL_SEEN_XXX. */
5211 #define MAX_EOL_CHECK_COUNT 3
5214 detect_eol (source
, src_bytes
, category
)
5215 unsigned char *source
;
5216 EMACS_INT src_bytes
;
5217 enum coding_category category
;
5219 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
5222 int eol_seen
= EOL_SEEN_NONE
;
5224 if ((1 << category
) & CATEGORY_MASK_UTF_16
)
5228 msb
= category
== (coding_category_utf_16_le
5229 | coding_category_utf_16_le_nosig
);
5232 while (src
+ 1 < src_end
)
5235 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
5240 this_eol
= EOL_SEEN_LF
;
5241 else if (src
+ 3 >= src_end
5242 || src
[msb
+ 2] != 0
5243 || src
[lsb
+ 2] != '\n')
5244 this_eol
= EOL_SEEN_CR
;
5246 this_eol
= EOL_SEEN_CRLF
;
5248 if (eol_seen
== EOL_SEEN_NONE
)
5249 /* This is the first end-of-line. */
5250 eol_seen
= this_eol
;
5251 else if (eol_seen
!= this_eol
)
5253 /* The found type is different from what found before. */
5254 eol_seen
= EOL_SEEN_LF
;
5257 if (++total
== MAX_EOL_CHECK_COUNT
)
5265 while (src
< src_end
)
5268 if (c
== '\n' || c
== '\r')
5273 this_eol
= EOL_SEEN_LF
;
5274 else if (src
>= src_end
|| *src
!= '\n')
5275 this_eol
= EOL_SEEN_CR
;
5277 this_eol
= EOL_SEEN_CRLF
, src
++;
5279 if (eol_seen
== EOL_SEEN_NONE
)
5280 /* This is the first end-of-line. */
5281 eol_seen
= this_eol
;
5282 else if (eol_seen
!= this_eol
)
5284 /* The found type is different from what found before. */
5285 eol_seen
= EOL_SEEN_LF
;
5288 if (++total
== MAX_EOL_CHECK_COUNT
)
5298 adjust_coding_eol_type (coding
, eol_seen
)
5299 struct coding_system
*coding
;
5302 Lisp_Object eol_type
;
5304 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5305 if (eol_seen
& EOL_SEEN_LF
)
5307 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
5310 else if (eol_seen
& EOL_SEEN_CRLF
)
5312 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
5315 else if (eol_seen
& EOL_SEEN_CR
)
5317 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
5323 /* Detect how a text specified in CODING is encoded. If a coding
5324 system is detected, update fields of CODING by the detected coding
5328 detect_coding (coding
)
5329 struct coding_system
*coding
;
5331 const unsigned char *src
, *src_end
;
5332 Lisp_Object attrs
, coding_type
;
5334 coding
->consumed
= coding
->consumed_char
= 0;
5335 coding
->produced
= coding
->produced_char
= 0;
5336 coding_set_source (coding
);
5338 src_end
= coding
->source
+ coding
->src_bytes
;
5340 /* If we have not yet decided the text encoding type, detect it
5342 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
5346 for (i
= 0, src
= coding
->source
; src
< src_end
; i
++, src
++)
5349 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
5351 || c
== ISO_CODE_SO
)))
5354 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
5356 if (coding
->head_ascii
< coding
->src_bytes
)
5358 struct coding_detection_info detect_info
;
5359 enum coding_category category
;
5360 struct coding_system
*this;
5362 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
5363 for (i
= 0; i
< coding_category_raw_text
; i
++)
5365 category
= coding_priorities
[i
];
5366 this = coding_categories
+ category
;
5369 /* No coding system of this category is defined. */
5370 detect_info
.rejected
|= (1 << category
);
5372 else if (category
>= coding_category_raw_text
)
5374 else if (detect_info
.checked
& (1 << category
))
5376 if (detect_info
.found
& (1 << category
))
5379 else if ((*(this->detector
)) (coding
, &detect_info
)
5380 && detect_info
.found
& (1 << category
))
5382 if (category
== coding_category_utf_16_auto
)
5384 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5385 category
= coding_category_utf_16_le
;
5387 category
= coding_category_utf_16_be
;
5392 if (i
< coding_category_raw_text
)
5393 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5394 else if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
5395 setup_coding_system (Qraw_text
, coding
);
5396 else if (detect_info
.rejected
)
5397 for (i
= 0; i
< coding_category_raw_text
; i
++)
5398 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
5400 this = coding_categories
+ coding_priorities
[i
];
5401 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5406 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding
->id
)))
5407 == coding_category_utf_16_auto
)
5409 Lisp_Object coding_systems
;
5410 struct coding_detection_info detect_info
;
5413 = AREF (CODING_ID_ATTRS (coding
->id
), coding_attr_utf_16_bom
);
5414 detect_info
.found
= detect_info
.rejected
= 0;
5415 if (CONSP (coding_systems
)
5416 && detect_coding_utf_16 (coding
, &detect_info
))
5418 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5419 setup_coding_system (XCAR (coding_systems
), coding
);
5420 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
5421 setup_coding_system (XCDR (coding_systems
), coding
);
5429 struct coding_system
*coding
;
5431 Lisp_Object eol_type
;
5432 unsigned char *p
, *pbeg
, *pend
;
5434 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5435 if (EQ (eol_type
, Qunix
))
5438 if (NILP (coding
->dst_object
))
5439 pbeg
= coding
->destination
;
5441 pbeg
= BYTE_POS_ADDR (coding
->dst_pos_byte
);
5442 pend
= pbeg
+ coding
->produced
;
5444 if (VECTORP (eol_type
))
5446 int eol_seen
= EOL_SEEN_NONE
;
5448 for (p
= pbeg
; p
< pend
; p
++)
5451 eol_seen
|= EOL_SEEN_LF
;
5452 else if (*p
== '\r')
5454 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5456 eol_seen
|= EOL_SEEN_CRLF
;
5460 eol_seen
|= EOL_SEEN_CR
;
5463 if (eol_seen
!= EOL_SEEN_NONE
5464 && eol_seen
!= EOL_SEEN_LF
5465 && eol_seen
!= EOL_SEEN_CRLF
5466 && eol_seen
!= EOL_SEEN_CR
)
5467 eol_seen
= EOL_SEEN_LF
;
5468 if (eol_seen
!= EOL_SEEN_NONE
)
5469 eol_type
= adjust_coding_eol_type (coding
, eol_seen
);
5472 if (EQ (eol_type
, Qmac
))
5474 for (p
= pbeg
; p
< pend
; p
++)
5478 else if (EQ (eol_type
, Qdos
))
5482 if (NILP (coding
->dst_object
))
5484 for (p
= pend
- 2; p
>= pbeg
; p
--)
5487 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
-- - p
- 1);
5493 for (p
= pend
- 2; p
>= pbeg
; p
--)
5496 int pos_byte
= coding
->dst_pos_byte
+ (p
- pbeg
);
5497 int pos
= BYTE_TO_CHAR (pos_byte
);
5499 del_range_2 (pos
, pos_byte
, pos
+ 1, pos_byte
+ 1, 0);
5503 coding
->produced
-= n
;
5504 coding
->produced_char
-= n
;
5509 /* Return a translation table (or list of them) from coding system
5510 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5511 decoding (ENCODEP is zero). */
5514 get_translation_table (attrs
, encodep
)
5516 Lisp_Object standard
, translation_table
;
5519 translation_table
= CODING_ATTR_ENCODE_TBL (attrs
),
5520 standard
= Vstandard_translation_table_for_encode
;
5522 translation_table
= CODING_ATTR_DECODE_TBL (attrs
),
5523 standard
= Vstandard_translation_table_for_decode
;
5524 if (NILP (translation_table
))
5526 if (SYMBOLP (translation_table
))
5527 translation_table
= Fget (translation_table
, Qtranslation_table
);
5528 else if (CONSP (translation_table
))
5532 translation_table
= Fcopy_sequence (translation_table
);
5533 for (val
= translation_table
; CONSP (val
); val
= XCDR (val
))
5534 if (SYMBOLP (XCAR (val
)))
5535 XSETCAR (val
, Fget (XCAR (val
), Qtranslation_table
));
5537 if (! NILP (standard
))
5539 if (CONSP (translation_table
))
5540 translation_table
= nconc2 (translation_table
, Fcons (standard
, Qnil
));
5542 translation_table
= Fcons (translation_table
, Fcons (standard
, Qnil
));
5544 return translation_table
;
5549 translate_chars (coding
, table
)
5550 struct coding_system
*coding
;
5553 int *charbuf
= coding
->charbuf
;
5554 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5557 if (coding
->chars_at_source
)
5560 while (charbuf
< charbuf_end
)
5566 *charbuf
++ = translate_char (table
, c
);
5571 produce_chars (coding
)
5572 struct coding_system
*coding
;
5574 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5575 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5577 int produced_chars
= 0;
5579 if (! coding
->chars_at_source
)
5581 /* Characters are in coding->charbuf. */
5582 int *buf
= coding
->charbuf
;
5583 int *buf_end
= buf
+ coding
->charbuf_used
;
5584 unsigned char *adjusted_dst_end
;
5586 if (BUFFERP (coding
->src_object
)
5587 && EQ (coding
->src_object
, coding
->dst_object
))
5588 dst_end
= ((unsigned char *) coding
->source
) + coding
->consumed
;
5589 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5591 while (buf
< buf_end
)
5595 if (dst
>= adjusted_dst_end
)
5597 dst
= alloc_destination (coding
,
5598 buf_end
- buf
+ MAX_MULTIBYTE_LENGTH
,
5600 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5601 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5605 if (coding
->dst_multibyte
5606 || ! CHAR_BYTE8_P (c
))
5607 CHAR_STRING_ADVANCE (c
, dst
);
5609 *dst
++ = CHAR_TO_BYTE8 (c
);
5613 /* This is an annotation datum. (-C) is the length of
5620 const unsigned char *src
= coding
->source
;
5621 const unsigned char *src_end
= src
+ coding
->src_bytes
;
5622 Lisp_Object eol_type
;
5624 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5626 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5628 if (coding
->src_multibyte
)
5635 const unsigned char *src_base
= src
;
5641 if (EQ (eol_type
, Qdos
))
5645 record_conversion_result
5646 (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
5647 goto no_more_source
;
5652 else if (EQ (eol_type
, Qmac
))
5657 coding
->consumed
= src
- coding
->source
;
5659 if (EQ (coding
->src_object
, coding
->dst_object
))
5660 dst_end
= (unsigned char *) src
;
5663 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5665 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5666 coding_set_source (coding
);
5667 src
= coding
->source
+ coding
->consumed
;
5668 src_end
= coding
->source
+ coding
->src_bytes
;
5678 while (src
< src_end
)
5685 if (EQ (eol_type
, Qdos
))
5691 else if (EQ (eol_type
, Qmac
))
5694 if (dst
>= dst_end
- 1)
5696 coding
->consumed
= src
- coding
->source
;
5698 if (EQ (coding
->src_object
, coding
->dst_object
))
5699 dst_end
= (unsigned char *) src
;
5700 if (dst
>= dst_end
- 1)
5702 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5704 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5705 coding_set_source (coding
);
5706 src
= coding
->source
+ coding
->consumed
;
5707 src_end
= coding
->source
+ coding
->src_bytes
;
5715 if (!EQ (coding
->src_object
, coding
->dst_object
))
5717 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5721 EMACS_INT offset
= src
- coding
->source
;
5723 dst
= alloc_destination (coding
, require
, dst
);
5724 coding_set_source (coding
);
5725 src
= coding
->source
+ offset
;
5726 src_end
= coding
->source
+ coding
->src_bytes
;
5729 produced_chars
= coding
->src_chars
;
5730 while (src
< src_end
)
5736 if (EQ (eol_type
, Qdos
))
5743 else if (EQ (eol_type
, Qmac
))
5749 coding
->consumed
= coding
->src_bytes
;
5750 coding
->consumed_char
= coding
->src_chars
;
5753 produced
= dst
- (coding
->destination
+ coding
->produced
);
5754 if (BUFFERP (coding
->dst_object
))
5755 insert_from_gap (produced_chars
, produced
);
5756 coding
->produced
+= produced
;
5757 coding
->produced_char
+= produced_chars
;
5758 return produced_chars
;
5761 /* Compose text in CODING->object according to the annotation data at
5762 CHARBUF. CHARBUF is an array:
5763 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5767 produce_composition (coding
, charbuf
)
5768 struct coding_system
*coding
;
5773 enum composition_method method
;
5774 Lisp_Object components
;
5777 from
= coding
->dst_pos
+ charbuf
[2];
5778 to
= coding
->dst_pos
+ charbuf
[3];
5779 method
= (enum composition_method
) (charbuf
[4]);
5781 if (method
== COMPOSITION_RELATIVE
)
5785 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5790 for (i
= 0; i
< len
; i
++)
5791 args
[i
] = make_number (charbuf
[i
]);
5792 components
= (method
== COMPOSITION_WITH_ALTCHARS
5793 ? Fstring (len
, args
) : Fvector (len
, args
));
5795 compose_text (from
, to
, components
, Qnil
, coding
->dst_object
);
5799 /* Put `charset' property on text in CODING->object according to
5800 the annotation data at CHARBUF. CHARBUF is an array:
5801 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5805 produce_charset (coding
, charbuf
)
5806 struct coding_system
*coding
;
5809 EMACS_INT from
= coding
->dst_pos
+ charbuf
[2];
5810 EMACS_INT to
= coding
->dst_pos
+ charbuf
[3];
5811 struct charset
*charset
= CHARSET_FROM_ID (charbuf
[4]);
5813 Fput_text_property (make_number (from
), make_number (to
),
5814 Qcharset
, CHARSET_NAME (charset
),
5815 coding
->dst_object
);
5819 #define CHARBUF_SIZE 0x4000
5821 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5823 int size = CHARBUF_SIZE;; \
5825 coding->charbuf = NULL; \
5826 while (size > 1024) \
5828 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5829 if (coding->charbuf) \
5833 if (! coding->charbuf) \
5835 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5836 return coding->result; \
5838 coding->charbuf_size = size; \
5843 produce_annotation (coding
)
5844 struct coding_system
*coding
;
5846 int *charbuf
= coding
->charbuf
;
5847 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5849 if (NILP (coding
->dst_object
))
5852 while (charbuf
< charbuf_end
)
5858 int len
= -*charbuf
;
5861 case CODING_ANNOTATE_COMPOSITION_MASK
:
5862 produce_composition (coding
, charbuf
);
5864 case CODING_ANNOTATE_CHARSET_MASK
:
5865 produce_charset (coding
, charbuf
);
5875 /* Decode the data at CODING->src_object into CODING->dst_object.
5876 CODING->src_object is a buffer, a string, or nil.
5877 CODING->dst_object is a buffer.
5879 If CODING->src_object is a buffer, it must be the current buffer.
5880 In this case, if CODING->src_pos is positive, it is a position of
5881 the source text in the buffer, otherwise, the source text is in the
5882 gap area of the buffer, and CODING->src_pos specifies the offset of
5883 the text from GPT (which must be the same as PT). If this is the
5884 same buffer as CODING->dst_object, CODING->src_pos must be
5887 If CODING->src_object is a string, CODING->src_pos in an index to
5890 If CODING->src_object is nil, CODING->source must already point to
5891 the non-relocatable memory area. In this case, CODING->src_pos is
5892 an offset from CODING->source.
5894 The decoded data is inserted at the current point of the buffer
5899 decode_coding (coding
)
5900 struct coding_system
*coding
;
5903 Lisp_Object undo_list
;
5904 Lisp_Object translation_table
;
5906 if (BUFFERP (coding
->src_object
)
5907 && coding
->src_pos
> 0
5908 && coding
->src_pos
< GPT
5909 && coding
->src_pos
+ coding
->src_chars
> GPT
)
5910 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
5913 if (BUFFERP (coding
->dst_object
))
5915 if (current_buffer
!= XBUFFER (coding
->dst_object
))
5916 set_buffer_internal (XBUFFER (coding
->dst_object
));
5918 move_gap_both (PT
, PT_BYTE
);
5919 undo_list
= current_buffer
->undo_list
;
5920 current_buffer
->undo_list
= Qt
;
5923 coding
->consumed
= coding
->consumed_char
= 0;
5924 coding
->produced
= coding
->produced_char
= 0;
5925 coding
->chars_at_source
= 0;
5926 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
5929 ALLOC_CONVERSION_WORK_AREA (coding
);
5931 attrs
= CODING_ID_ATTRS (coding
->id
);
5932 translation_table
= get_translation_table (attrs
, 0);
5936 coding_set_source (coding
);
5937 coding
->annotated
= 0;
5938 (*(coding
->decoder
)) (coding
);
5939 if (!NILP (translation_table
))
5940 translate_chars (coding
, translation_table
);
5941 coding_set_destination (coding
);
5942 produce_chars (coding
);
5943 if (coding
->annotated
)
5944 produce_annotation (coding
);
5946 while (coding
->consumed
< coding
->src_bytes
5947 && ! coding
->result
);
5949 coding
->carryover_bytes
= 0;
5950 if (coding
->consumed
< coding
->src_bytes
)
5952 int nbytes
= coding
->src_bytes
- coding
->consumed
;
5953 const unsigned char *src
;
5955 coding_set_source (coding
);
5956 coding_set_destination (coding
);
5957 src
= coding
->source
+ coding
->consumed
;
5959 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
5961 /* Flush out unprocessed data as binary chars. We are sure
5962 that the number of data is less than the size of
5964 coding
->charbuf_used
= 0;
5965 while (nbytes
-- > 0)
5969 coding
->charbuf
[coding
->charbuf_used
++] = (c
& 0x80 ? - c
: c
);
5971 produce_chars (coding
);
5975 /* Record unprocessed bytes in coding->carryover. We are
5976 sure that the number of data is less than the size of
5977 coding->carryover. */
5978 unsigned char *p
= coding
->carryover
;
5980 coding
->carryover_bytes
= nbytes
;
5981 while (nbytes
-- > 0)
5984 coding
->consumed
= coding
->src_bytes
;
5987 if (BUFFERP (coding
->dst_object
))
5989 current_buffer
->undo_list
= undo_list
;
5990 record_insert (coding
->dst_pos
, coding
->produced_char
);
5992 if (! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
5993 decode_eol (coding
);
5994 return coding
->result
;
5998 /* Extract an annotation datum from a composition starting at POS and
5999 ending before LIMIT of CODING->src_object (buffer or string), store
6000 the data in BUF, set *STOP to a starting position of the next
6001 composition (if any) or to LIMIT, and return the address of the
6002 next element of BUF.
6004 If such an annotation is not found, set *STOP to a starting
6005 position of a composition after POS (if any) or to LIMIT, and
6009 handle_composition_annotation (pos
, limit
, coding
, buf
, stop
)
6010 EMACS_INT pos
, limit
;
6011 struct coding_system
*coding
;
6015 EMACS_INT start
, end
;
6018 if (! find_composition (pos
, limit
, &start
, &end
, &prop
, coding
->src_object
)
6021 else if (start
> pos
)
6027 /* We found a composition. Store the corresponding
6028 annotation data in BUF. */
6030 enum composition_method method
= COMPOSITION_METHOD (prop
);
6031 int nchars
= COMPOSITION_LENGTH (prop
);
6033 ADD_COMPOSITION_DATA (buf
, 0, nchars
, method
);
6034 if (method
!= COMPOSITION_RELATIVE
)
6036 Lisp_Object components
;
6039 components
= COMPOSITION_COMPONENTS (prop
);
6040 if (VECTORP (components
))
6042 len
= XVECTOR (components
)->size
;
6043 for (i
= 0; i
< len
; i
++)
6044 *buf
++ = XINT (AREF (components
, i
));
6046 else if (STRINGP (components
))
6048 len
= SCHARS (components
);
6052 FETCH_STRING_CHAR_ADVANCE (*buf
, components
, i
, i_byte
);
6056 else if (INTEGERP (components
))
6059 *buf
++ = XINT (components
);
6061 else if (CONSP (components
))
6063 for (len
= 0; CONSP (components
);
6064 len
++, components
= XCDR (components
))
6065 *buf
++ = XINT (XCAR (components
));
6073 if (find_composition (end
, limit
, &start
, &end
, &prop
,
6084 /* Extract an annotation datum from a text property `charset' at POS of
6085 CODING->src_object (buffer of string), store the data in BUF, set
6086 *STOP to the position where the value of `charset' property changes
6087 (limiting by LIMIT), and return the address of the next element of
6090 If the property value is nil, set *STOP to the position where the
6091 property value is non-nil (limiting by LIMIT), and return BUF. */
6094 handle_charset_annotation (pos
, limit
, coding
, buf
, stop
)
6095 EMACS_INT pos
, limit
;
6096 struct coding_system
*coding
;
6100 Lisp_Object val
, next
;
6103 val
= Fget_text_property (make_number (pos
), Qcharset
, coding
->src_object
);
6104 if (! NILP (val
) && CHARSETP (val
))
6105 id
= XINT (CHARSET_SYMBOL_ID (val
));
6108 ADD_CHARSET_DATA (buf
, 0, 0, id
);
6109 next
= Fnext_single_property_change (make_number (pos
), Qcharset
,
6111 make_number (limit
));
6112 *stop
= XINT (next
);
6118 consume_chars (coding
)
6119 struct coding_system
*coding
;
6121 int *buf
= coding
->charbuf
;
6122 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
;
6123 const unsigned char *src
= coding
->source
+ coding
->consumed
;
6124 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
6125 EMACS_INT pos
= coding
->src_pos
+ coding
->consumed_char
;
6126 EMACS_INT end_pos
= coding
->src_pos
+ coding
->src_chars
;
6127 int multibytep
= coding
->src_multibyte
;
6128 Lisp_Object eol_type
;
6130 EMACS_INT stop
, stop_composition
, stop_charset
;
6132 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
6133 if (VECTORP (eol_type
))
6136 /* Note: composition handling is not yet implemented. */
6137 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6139 if (NILP (coding
->src_object
))
6140 stop
= stop_composition
= stop_charset
= end_pos
;
6143 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
)
6144 stop
= stop_composition
= pos
;
6146 stop
= stop_composition
= end_pos
;
6147 if (coding
->common_flags
& CODING_ANNOTATE_CHARSET_MASK
)
6148 stop
= stop_charset
= pos
;
6150 stop_charset
= end_pos
;
6153 /* Compensate for CRLF and conversion. */
6154 buf_end
-= 1 + MAX_ANNOTATION_LENGTH
;
6155 while (buf
< buf_end
)
6161 if (pos
== stop_composition
)
6162 buf
= handle_composition_annotation (pos
, end_pos
, coding
,
6163 buf
, &stop_composition
);
6164 if (pos
== stop_charset
)
6165 buf
= handle_charset_annotation (pos
, end_pos
, coding
,
6166 buf
, &stop_charset
);
6167 stop
= (stop_composition
< stop_charset
6168 ? stop_composition
: stop_charset
);
6175 if (! CODING_FOR_UNIBYTE (coding
)
6176 && (bytes
= MULTIBYTE_LENGTH (src
, src_end
)) > 0)
6177 c
= STRING_CHAR_ADVANCE (src
), pos
+= bytes
;
6182 c
= STRING_CHAR_ADVANCE (src
), pos
++;
6183 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
6185 if (! EQ (eol_type
, Qunix
))
6189 if (EQ (eol_type
, Qdos
))
6198 coding
->consumed
= src
- coding
->source
;
6199 coding
->consumed_char
= pos
- coding
->src_pos
;
6200 coding
->charbuf_used
= buf
- coding
->charbuf
;
6201 coding
->chars_at_source
= 0;
6205 /* Encode the text at CODING->src_object into CODING->dst_object.
6206 CODING->src_object is a buffer or a string.
6207 CODING->dst_object is a buffer or nil.
6209 If CODING->src_object is a buffer, it must be the current buffer.
6210 In this case, if CODING->src_pos is positive, it is a position of
6211 the source text in the buffer, otherwise. the source text is in the
6212 gap area of the buffer, and coding->src_pos specifies the offset of
6213 the text from GPT (which must be the same as PT). If this is the
6214 same buffer as CODING->dst_object, CODING->src_pos must be
6215 negative and CODING should not have `pre-write-conversion'.
6217 If CODING->src_object is a string, CODING should not have
6218 `pre-write-conversion'.
6220 If CODING->dst_object is a buffer, the encoded data is inserted at
6221 the current point of that buffer.
6223 If CODING->dst_object is nil, the encoded data is placed at the
6224 memory area specified by CODING->destination. */
6227 encode_coding (coding
)
6228 struct coding_system
*coding
;
6231 Lisp_Object translation_table
;
6233 attrs
= CODING_ID_ATTRS (coding
->id
);
6234 translation_table
= get_translation_table (attrs
, 1);
6236 if (BUFFERP (coding
->dst_object
))
6238 set_buffer_internal (XBUFFER (coding
->dst_object
));
6239 coding
->dst_multibyte
6240 = ! NILP (current_buffer
->enable_multibyte_characters
);
6243 coding
->consumed
= coding
->consumed_char
= 0;
6244 coding
->produced
= coding
->produced_char
= 0;
6245 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
6248 ALLOC_CONVERSION_WORK_AREA (coding
);
6251 coding_set_source (coding
);
6252 consume_chars (coding
);
6254 if (!NILP (translation_table
))
6255 translate_chars (coding
, translation_table
);
6257 coding_set_destination (coding
);
6258 (*(coding
->encoder
)) (coding
);
6259 } while (coding
->consumed_char
< coding
->src_chars
);
6261 if (BUFFERP (coding
->dst_object
))
6262 insert_from_gap (coding
->produced_char
, coding
->produced
);
6264 return (coding
->result
);
6268 /* Name (or base name) of work buffer for code conversion. */
6269 static Lisp_Object Vcode_conversion_workbuf_name
;
6271 /* A working buffer used by the top level conversion. Once it is
6272 created, it is never destroyed. It has the name
6273 Vcode_conversion_workbuf_name. The other working buffers are
6274 destroyed after the use is finished, and their names are modified
6275 versions of Vcode_conversion_workbuf_name. */
6276 static Lisp_Object Vcode_conversion_reused_workbuf
;
6278 /* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6279 static int reused_workbuf_in_use
;
6282 /* Return a working buffer of code convesion. MULTIBYTE specifies the
6283 multibyteness of returning buffer. */
6286 make_conversion_work_buffer (multibyte
)
6288 Lisp_Object name
, workbuf
;
6289 struct buffer
*current
;
6291 if (reused_workbuf_in_use
++)
6293 name
= Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name
, Qnil
);
6294 workbuf
= Fget_buffer_create (name
);
6298 name
= Vcode_conversion_workbuf_name
;
6299 workbuf
= Fget_buffer_create (name
);
6300 if (NILP (Vcode_conversion_reused_workbuf
))
6301 Vcode_conversion_reused_workbuf
= workbuf
;
6303 current
= current_buffer
;
6304 set_buffer_internal (XBUFFER (workbuf
));
6306 current_buffer
->undo_list
= Qt
;
6307 current_buffer
->enable_multibyte_characters
= multibyte
? Qt
: Qnil
;
6308 set_buffer_internal (current
);
6314 code_conversion_restore (arg
)
6317 Lisp_Object current
, workbuf
;
6319 current
= XCAR (arg
);
6320 workbuf
= XCDR (arg
);
6321 if (! NILP (workbuf
))
6323 if (EQ (workbuf
, Vcode_conversion_reused_workbuf
))
6324 reused_workbuf_in_use
= 0;
6325 else if (! NILP (Fbuffer_live_p (workbuf
)))
6326 Fkill_buffer (workbuf
);
6328 set_buffer_internal (XBUFFER (current
));
6333 code_conversion_save (with_work_buf
, multibyte
)
6334 int with_work_buf
, multibyte
;
6336 Lisp_Object workbuf
= Qnil
;
6339 workbuf
= make_conversion_work_buffer (multibyte
);
6340 record_unwind_protect (code_conversion_restore
,
6341 Fcons (Fcurrent_buffer (), workbuf
));
6346 decode_coding_gap (coding
, chars
, bytes
)
6347 struct coding_system
*coding
;
6348 EMACS_INT chars
, bytes
;
6350 int count
= specpdl_ptr
- specpdl
;
6353 code_conversion_save (0, 0);
6355 coding
->src_object
= Fcurrent_buffer ();
6356 coding
->src_chars
= chars
;
6357 coding
->src_bytes
= bytes
;
6358 coding
->src_pos
= -chars
;
6359 coding
->src_pos_byte
= -bytes
;
6360 coding
->src_multibyte
= chars
< bytes
;
6361 coding
->dst_object
= coding
->src_object
;
6362 coding
->dst_pos
= PT
;
6363 coding
->dst_pos_byte
= PT_BYTE
;
6364 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
6365 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
6367 if (CODING_REQUIRE_DETECTION (coding
))
6368 detect_coding (coding
);
6370 decode_coding (coding
);
6372 attrs
= CODING_ID_ATTRS (coding
->id
);
6373 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6375 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6378 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6379 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6380 make_number (coding
->produced_char
));
6382 coding
->produced_char
+= Z
- prev_Z
;
6383 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6386 unbind_to (count
, Qnil
);
6387 return coding
->result
;
6391 encode_coding_gap (coding
, chars
, bytes
)
6392 struct coding_system
*coding
;
6393 EMACS_INT chars
, bytes
;
6395 int count
= specpdl_ptr
- specpdl
;
6397 code_conversion_save (0, 0);
6399 coding
->src_object
= Fcurrent_buffer ();
6400 coding
->src_chars
= chars
;
6401 coding
->src_bytes
= bytes
;
6402 coding
->src_pos
= -chars
;
6403 coding
->src_pos_byte
= -bytes
;
6404 coding
->src_multibyte
= chars
< bytes
;
6405 coding
->dst_object
= coding
->src_object
;
6406 coding
->dst_pos
= PT
;
6407 coding
->dst_pos_byte
= PT_BYTE
;
6409 encode_coding (coding
);
6411 unbind_to (count
, Qnil
);
6412 return coding
->result
;
6416 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6417 SRC_OBJECT into DST_OBJECT by coding context CODING.
6419 SRC_OBJECT is a buffer, a string, or Qnil.
6421 If it is a buffer, the text is at point of the buffer. FROM and TO
6422 are positions in the buffer.
6424 If it is a string, the text is at the beginning of the string.
6425 FROM and TO are indices to the string.
6427 If it is nil, the text is at coding->source. FROM and TO are
6428 indices to coding->source.
6430 DST_OBJECT is a buffer, Qt, or Qnil.
6432 If it is a buffer, the decoded text is inserted at point of the
6433 buffer. If the buffer is the same as SRC_OBJECT, the source text
6436 If it is Qt, a string is made from the decoded text, and
6437 set in CODING->dst_object.
6439 If it is Qnil, the decoded text is stored at CODING->destination.
6440 The caller must allocate CODING->dst_bytes bytes at
6441 CODING->destination by xmalloc. If the decoded text is longer than
6442 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6446 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6448 struct coding_system
*coding
;
6449 Lisp_Object src_object
;
6450 EMACS_INT from
, from_byte
, to
, to_byte
;
6451 Lisp_Object dst_object
;
6453 int count
= specpdl_ptr
- specpdl
;
6454 unsigned char *destination
;
6455 EMACS_INT dst_bytes
;
6456 EMACS_INT chars
= to
- from
;
6457 EMACS_INT bytes
= to_byte
- from_byte
;
6460 int saved_pt
= -1, saved_pt_byte
;
6462 buffer
= Fcurrent_buffer ();
6464 if (NILP (dst_object
))
6466 destination
= coding
->destination
;
6467 dst_bytes
= coding
->dst_bytes
;
6470 coding
->src_object
= src_object
;
6471 coding
->src_chars
= chars
;
6472 coding
->src_bytes
= bytes
;
6473 coding
->src_multibyte
= chars
< bytes
;
6475 if (STRINGP (src_object
))
6477 coding
->src_pos
= from
;
6478 coding
->src_pos_byte
= from_byte
;
6480 else if (BUFFERP (src_object
))
6482 set_buffer_internal (XBUFFER (src_object
));
6484 move_gap_both (from
, from_byte
);
6485 if (EQ (src_object
, dst_object
))
6487 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6488 TEMP_SET_PT_BOTH (from
, from_byte
);
6489 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6490 coding
->src_pos
= -chars
;
6491 coding
->src_pos_byte
= -bytes
;
6495 coding
->src_pos
= from
;
6496 coding
->src_pos_byte
= from_byte
;
6500 if (CODING_REQUIRE_DETECTION (coding
))
6501 detect_coding (coding
);
6502 attrs
= CODING_ID_ATTRS (coding
->id
);
6504 if (EQ (dst_object
, Qt
)
6505 || (! NILP (CODING_ATTR_POST_READ (attrs
))
6506 && NILP (dst_object
)))
6508 coding
->dst_object
= code_conversion_save (1, 1);
6509 coding
->dst_pos
= BEG
;
6510 coding
->dst_pos_byte
= BEG_BYTE
;
6511 coding
->dst_multibyte
= 1;
6513 else if (BUFFERP (dst_object
))
6515 code_conversion_save (0, 0);
6516 coding
->dst_object
= dst_object
;
6517 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6518 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6519 coding
->dst_multibyte
6520 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6524 code_conversion_save (0, 0);
6525 coding
->dst_object
= Qnil
;
6526 coding
->dst_multibyte
= 1;
6529 decode_coding (coding
);
6531 if (BUFFERP (coding
->dst_object
))
6532 set_buffer_internal (XBUFFER (coding
->dst_object
));
6534 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6536 struct gcpro gcpro1
, gcpro2
;
6537 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6540 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6541 GCPRO2 (coding
->src_object
, coding
->dst_object
);
6542 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6543 make_number (coding
->produced_char
));
6546 coding
->produced_char
+= Z
- prev_Z
;
6547 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6550 if (EQ (dst_object
, Qt
))
6552 coding
->dst_object
= Fbuffer_string ();
6554 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
6556 set_buffer_internal (XBUFFER (coding
->dst_object
));
6557 if (dst_bytes
< coding
->produced
)
6560 = (unsigned char *) xrealloc (destination
, coding
->produced
);
6563 record_conversion_result (coding
,
6564 CODING_RESULT_INSUFFICIENT_DST
);
6565 unbind_to (count
, Qnil
);
6568 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
6569 move_gap_both (BEGV
, BEGV_BYTE
);
6570 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
6571 coding
->destination
= destination
;
6577 /* This is the case of:
6578 (BUFFERP (src_object) && EQ (src_object, dst_object))
6579 As we have moved PT while replacing the original buffer
6580 contents, we must recover it now. */
6581 set_buffer_internal (XBUFFER (src_object
));
6582 if (saved_pt
< from
)
6583 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6584 else if (saved_pt
< from
+ chars
)
6585 TEMP_SET_PT_BOTH (from
, from_byte
);
6586 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6587 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6588 saved_pt_byte
+ (coding
->produced
- bytes
));
6590 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6591 saved_pt_byte
+ (coding
->produced
- bytes
));
6594 unbind_to (count
, coding
->dst_object
);
6599 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6601 struct coding_system
*coding
;
6602 Lisp_Object src_object
;
6603 EMACS_INT from
, from_byte
, to
, to_byte
;
6604 Lisp_Object dst_object
;
6606 int count
= specpdl_ptr
- specpdl
;
6607 EMACS_INT chars
= to
- from
;
6608 EMACS_INT bytes
= to_byte
- from_byte
;
6611 int saved_pt
= -1, saved_pt_byte
;
6613 buffer
= Fcurrent_buffer ();
6615 coding
->src_object
= src_object
;
6616 coding
->src_chars
= chars
;
6617 coding
->src_bytes
= bytes
;
6618 coding
->src_multibyte
= chars
< bytes
;
6620 attrs
= CODING_ID_ATTRS (coding
->id
);
6622 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
6624 coding
->src_object
= code_conversion_save (1, coding
->src_multibyte
);
6625 set_buffer_internal (XBUFFER (coding
->src_object
));
6626 if (STRINGP (src_object
))
6627 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
6628 else if (BUFFERP (src_object
))
6629 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
6631 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
6633 if (EQ (src_object
, dst_object
))
6635 set_buffer_internal (XBUFFER (src_object
));
6636 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6637 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6638 set_buffer_internal (XBUFFER (coding
->src_object
));
6641 call2 (CODING_ATTR_PRE_WRITE (attrs
),
6642 make_number (BEG
), make_number (Z
));
6643 coding
->src_object
= Fcurrent_buffer ();
6645 move_gap_both (BEG
, BEG_BYTE
);
6646 coding
->src_chars
= Z
- BEG
;
6647 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6648 coding
->src_pos
= BEG
;
6649 coding
->src_pos_byte
= BEG_BYTE
;
6650 coding
->src_multibyte
= Z
< Z_BYTE
;
6652 else if (STRINGP (src_object
))
6654 code_conversion_save (0, 0);
6655 coding
->src_pos
= from
;
6656 coding
->src_pos_byte
= from_byte
;
6658 else if (BUFFERP (src_object
))
6660 code_conversion_save (0, 0);
6661 set_buffer_internal (XBUFFER (src_object
));
6662 if (EQ (src_object
, dst_object
))
6664 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6665 coding
->src_object
= del_range_1 (from
, to
, 1, 1);
6666 coding
->src_pos
= 0;
6667 coding
->src_pos_byte
= 0;
6671 if (from
< GPT
&& to
>= GPT
)
6672 move_gap_both (from
, from_byte
);
6673 coding
->src_pos
= from
;
6674 coding
->src_pos_byte
= from_byte
;
6678 code_conversion_save (0, 0);
6680 if (BUFFERP (dst_object
))
6682 coding
->dst_object
= dst_object
;
6683 if (EQ (src_object
, dst_object
))
6685 coding
->dst_pos
= from
;
6686 coding
->dst_pos_byte
= from_byte
;
6690 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6691 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6693 coding
->dst_multibyte
6694 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6696 else if (EQ (dst_object
, Qt
))
6698 coding
->dst_object
= Qnil
;
6699 coding
->dst_bytes
= coding
->src_chars
;
6700 if (coding
->dst_bytes
== 0)
6701 coding
->dst_bytes
= 1;
6702 coding
->destination
= (unsigned char *) xmalloc (coding
->dst_bytes
);
6703 coding
->dst_multibyte
= 0;
6707 coding
->dst_object
= Qnil
;
6708 coding
->dst_multibyte
= 0;
6711 encode_coding (coding
);
6713 if (EQ (dst_object
, Qt
))
6715 if (BUFFERP (coding
->dst_object
))
6716 coding
->dst_object
= Fbuffer_string ();
6720 = make_unibyte_string ((char *) coding
->destination
,
6722 xfree (coding
->destination
);
6728 /* This is the case of:
6729 (BUFFERP (src_object) && EQ (src_object, dst_object))
6730 As we have moved PT while replacing the original buffer
6731 contents, we must recover it now. */
6732 set_buffer_internal (XBUFFER (src_object
));
6733 if (saved_pt
< from
)
6734 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6735 else if (saved_pt
< from
+ chars
)
6736 TEMP_SET_PT_BOTH (from
, from_byte
);
6737 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6738 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6739 saved_pt_byte
+ (coding
->produced
- bytes
));
6741 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6742 saved_pt_byte
+ (coding
->produced
- bytes
));
6745 unbind_to (count
, Qnil
);
6750 preferred_coding_system ()
6752 int id
= coding_categories
[coding_priorities
[0]].id
;
6754 return CODING_ID_NAME (id
);
6759 /*** 8. Emacs Lisp library functions ***/
6761 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6762 doc
: /* Return t if OBJECT is nil or a coding-system.
6763 See the documentation of `define-coding-system' for information
6764 about coding-system objects. */)
6768 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6771 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6772 Sread_non_nil_coding_system
, 1, 1, 0,
6773 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6780 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6781 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6783 while (SCHARS (val
) == 0);
6784 return (Fintern (val
, Qnil
));
6787 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6788 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6789 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6790 (prompt
, default_coding_system
)
6791 Lisp_Object prompt
, default_coding_system
;
6794 if (SYMBOLP (default_coding_system
))
6795 XSETSTRING (default_coding_system
, XPNTR (SYMBOL_NAME (default_coding_system
)));
6796 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6797 Qt
, Qnil
, Qcoding_system_history
,
6798 default_coding_system
, Qnil
);
6799 return (SCHARS (val
) == 0 ? Qnil
: Fintern (val
, Qnil
));
6802 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6804 doc
: /* Check validity of CODING-SYSTEM.
6805 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
6807 Lisp_Object coding_system
;
6809 CHECK_SYMBOL (coding_system
);
6810 if (!NILP (Fcoding_system_p (coding_system
)))
6811 return coding_system
;
6813 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
6817 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6818 HIGHEST is nonzero, return the coding system of the highest
6819 priority among the detected coding systems. Otherwize return a
6820 list of detected coding systems sorted by their priorities. If
6821 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6822 multibyte form but contains only ASCII and eight-bit chars.
6823 Otherwise, the bytes are raw bytes.
6825 CODING-SYSTEM controls the detection as below:
6827 If it is nil, detect both text-format and eol-format. If the
6828 text-format part of CODING-SYSTEM is already specified
6829 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6830 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6831 detect only text-format. */
6834 detect_coding_system (src
, src_chars
, src_bytes
, highest
, multibytep
,
6836 const unsigned char *src
;
6837 int src_chars
, src_bytes
, highest
;
6839 Lisp_Object coding_system
;
6841 const unsigned char *src_end
= src
+ src_bytes
;
6842 Lisp_Object attrs
, eol_type
;
6844 struct coding_system coding
;
6846 struct coding_detection_info detect_info
;
6847 enum coding_category base_category
;
6849 if (NILP (coding_system
))
6850 coding_system
= Qundecided
;
6851 setup_coding_system (coding_system
, &coding
);
6852 attrs
= CODING_ID_ATTRS (coding
.id
);
6853 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
6854 coding_system
= CODING_ATTR_BASE_NAME (attrs
);
6856 coding
.source
= src
;
6857 coding
.src_chars
= src_chars
;
6858 coding
.src_bytes
= src_bytes
;
6859 coding
.src_multibyte
= multibytep
;
6860 coding
.consumed
= 0;
6861 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6863 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
6865 /* At first, detect text-format if necessary. */
6866 base_category
= XINT (CODING_ATTR_CATEGORY (attrs
));
6867 if (base_category
== coding_category_undecided
)
6869 enum coding_category category
;
6870 struct coding_system
*this;
6873 /* Skip all ASCII bytes except for a few ISO2022 controls. */
6874 for (i
= 0; src
< src_end
; i
++, src
++)
6877 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
6879 || c
== ISO_CODE_SO
)))
6882 coding
.head_ascii
= src
- coding
.source
;
6885 for (i
= 0; i
< coding_category_raw_text
; i
++)
6887 category
= coding_priorities
[i
];
6888 this = coding_categories
+ category
;
6892 /* No coding system of this category is defined. */
6893 detect_info
.rejected
|= (1 << category
);
6895 else if (category
>= coding_category_raw_text
)
6897 else if (detect_info
.checked
& (1 << category
))
6900 && (detect_info
.found
& (1 << category
)))
6905 if ((*(this->detector
)) (&coding
, &detect_info
)
6907 && (detect_info
.found
& (1 << category
)))
6909 if (category
== coding_category_utf_16_auto
)
6911 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
6912 category
= coding_category_utf_16_le
;
6914 category
= coding_category_utf_16_be
;
6921 if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
6923 detect_info
.found
= CATEGORY_MASK_RAW_TEXT
;
6924 id
= coding_categories
[coding_category_raw_text
].id
;
6925 val
= Fcons (make_number (id
), Qnil
);
6927 else if (! detect_info
.rejected
&& ! detect_info
.found
)
6929 detect_info
.found
= CATEGORY_MASK_ANY
;
6930 id
= coding_categories
[coding_category_undecided
].id
;
6931 val
= Fcons (make_number (id
), Qnil
);
6935 if (detect_info
.found
)
6937 detect_info
.found
= 1 << category
;
6938 val
= Fcons (make_number (this->id
), Qnil
);
6941 for (i
= 0; i
< coding_category_raw_text
; i
++)
6942 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
6944 detect_info
.found
= 1 << coding_priorities
[i
];
6945 id
= coding_categories
[coding_priorities
[i
]].id
;
6946 val
= Fcons (make_number (id
), Qnil
);
6952 int mask
= detect_info
.rejected
| detect_info
.found
;
6956 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6958 category
= coding_priorities
[i
];
6959 if (! (mask
& (1 << category
)))
6961 found
|= 1 << category
;
6962 id
= coding_categories
[category
].id
;
6963 val
= Fcons (make_number (id
), val
);
6966 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6968 category
= coding_priorities
[i
];
6969 if (detect_info
.found
& (1 << category
))
6971 id
= coding_categories
[category
].id
;
6972 val
= Fcons (make_number (id
), val
);
6975 detect_info
.found
|= found
;
6978 else if (base_category
== coding_category_utf_16_auto
)
6980 if (detect_coding_utf_16 (&coding
, &detect_info
))
6982 enum coding_category category
;
6983 struct coding_system
*this;
6985 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
6986 this = coding_categories
+ coding_category_utf_16_le
;
6987 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
6988 this = coding_categories
+ coding_category_utf_16_be
;
6989 else if (detect_info
.rejected
& CATEGORY_MASK_UTF_16_LE_NOSIG
)
6990 this = coding_categories
+ coding_category_utf_16_be_nosig
;
6992 this = coding_categories
+ coding_category_utf_16_le_nosig
;
6993 val
= Fcons (make_number (this->id
), Qnil
);
6998 detect_info
.found
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
6999 val
= Fcons (make_number (coding
.id
), Qnil
);
7002 /* Then, detect eol-format if necessary. */
7004 int normal_eol
= -1, utf_16_be_eol
= -1, utf_16_le_eol
;
7007 if (VECTORP (eol_type
))
7009 if (detect_info
.found
& ~CATEGORY_MASK_UTF_16
)
7010 normal_eol
= detect_eol (coding
.source
, src_bytes
,
7011 coding_category_raw_text
);
7012 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_BE
7013 | CATEGORY_MASK_UTF_16_BE_NOSIG
))
7014 utf_16_be_eol
= detect_eol (coding
.source
, src_bytes
,
7015 coding_category_utf_16_be
);
7016 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_LE
7017 | CATEGORY_MASK_UTF_16_LE_NOSIG
))
7018 utf_16_le_eol
= detect_eol (coding
.source
, src_bytes
,
7019 coding_category_utf_16_le
);
7023 if (EQ (eol_type
, Qunix
))
7024 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_LF
;
7025 else if (EQ (eol_type
, Qdos
))
7026 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CRLF
;
7028 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CR
;
7031 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
7033 enum coding_category category
;
7036 id
= XINT (XCAR (tail
));
7037 attrs
= CODING_ID_ATTRS (id
);
7038 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7039 eol_type
= CODING_ID_EOL_TYPE (id
);
7040 if (VECTORP (eol_type
))
7042 if (category
== coding_category_utf_16_be
7043 || category
== coding_category_utf_16_be_nosig
)
7044 this_eol
= utf_16_be_eol
;
7045 else if (category
== coding_category_utf_16_le
7046 || category
== coding_category_utf_16_le_nosig
)
7047 this_eol
= utf_16_le_eol
;
7049 this_eol
= normal_eol
;
7051 if (this_eol
== EOL_SEEN_LF
)
7052 XSETCAR (tail
, AREF (eol_type
, 0));
7053 else if (this_eol
== EOL_SEEN_CRLF
)
7054 XSETCAR (tail
, AREF (eol_type
, 1));
7055 else if (this_eol
== EOL_SEEN_CR
)
7056 XSETCAR (tail
, AREF (eol_type
, 2));
7058 XSETCAR (tail
, CODING_ID_NAME (id
));
7061 XSETCAR (tail
, CODING_ID_NAME (id
));
7065 return (highest
? XCAR (val
) : val
);
7069 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
7071 doc
: /* Detect coding system of the text in the region between START and END.
7072 Return a list of possible coding systems ordered by priority.
7074 If only ASCII characters are found, it returns a list of single element
7075 `undecided' or its subsidiary coding system according to a detected
7078 If optional argument HIGHEST is non-nil, return the coding system of
7079 highest priority. */)
7080 (start
, end
, highest
)
7081 Lisp_Object start
, end
, highest
;
7084 int from_byte
, to_byte
;
7086 CHECK_NUMBER_COERCE_MARKER (start
);
7087 CHECK_NUMBER_COERCE_MARKER (end
);
7089 validate_region (&start
, &end
);
7090 from
= XINT (start
), to
= XINT (end
);
7091 from_byte
= CHAR_TO_BYTE (from
);
7092 to_byte
= CHAR_TO_BYTE (to
);
7094 if (from
< GPT
&& to
>= GPT
)
7095 move_gap_both (to
, to_byte
);
7097 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
7098 to
- from
, to_byte
- from_byte
,
7100 !NILP (current_buffer
7101 ->enable_multibyte_characters
),
7105 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
7107 doc
: /* Detect coding system of the text in STRING.
7108 Return a list of possible coding systems ordered by priority.
7110 If only ASCII characters are found, it returns a list of single element
7111 `undecided' or its subsidiary coding system according to a detected
7114 If optional argument HIGHEST is non-nil, return the coding system of
7115 highest priority. */)
7117 Lisp_Object string
, highest
;
7119 CHECK_STRING (string
);
7121 return detect_coding_system (SDATA (string
),
7122 SCHARS (string
), SBYTES (string
),
7123 !NILP (highest
), STRING_MULTIBYTE (string
),
7129 char_encodable_p (c
, attrs
)
7134 struct charset
*charset
;
7135 Lisp_Object translation_table
;
7137 translation_table
= CODING_ATTR_TRANS_TBL (attrs
);
7138 if (! NILP (translation_table
))
7139 c
= translate_char (translation_table
, c
);
7140 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
7141 CONSP (tail
); tail
= XCDR (tail
))
7143 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
7144 if (CHAR_CHARSET_P (c
, charset
))
7147 return (! NILP (tail
));
7151 /* Return a list of coding systems that safely encode the text between
7152 START and END. If EXCLUDE is non-nil, it is a list of coding
7153 systems not to check. The returned list doesn't contain any such
7154 coding systems. In any case, if the text contains only ASCII or is
7155 unibyte, return t. */
7157 DEFUN ("find-coding-systems-region-internal",
7158 Ffind_coding_systems_region_internal
,
7159 Sfind_coding_systems_region_internal
, 2, 3, 0,
7160 doc
: /* Internal use only. */)
7161 (start
, end
, exclude
)
7162 Lisp_Object start
, end
, exclude
;
7164 Lisp_Object coding_attrs_list
, safe_codings
;
7165 EMACS_INT start_byte
, end_byte
;
7166 const unsigned char *p
, *pbeg
, *pend
;
7168 Lisp_Object tail
, elt
;
7170 if (STRINGP (start
))
7172 if (!STRING_MULTIBYTE (start
)
7173 || SCHARS (start
) == SBYTES (start
))
7176 end_byte
= SBYTES (start
);
7180 CHECK_NUMBER_COERCE_MARKER (start
);
7181 CHECK_NUMBER_COERCE_MARKER (end
);
7182 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7183 args_out_of_range (start
, end
);
7184 if (NILP (current_buffer
->enable_multibyte_characters
))
7186 start_byte
= CHAR_TO_BYTE (XINT (start
));
7187 end_byte
= CHAR_TO_BYTE (XINT (end
));
7188 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7191 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7193 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7194 move_gap_both (XINT (start
), start_byte
);
7196 move_gap_both (XINT (end
), end_byte
);
7200 coding_attrs_list
= Qnil
;
7201 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7203 || NILP (Fmemq (XCAR (tail
), exclude
)))
7207 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
7208 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
7209 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
7211 ASET (attrs
, coding_attr_trans_tbl
,
7212 get_translation_table (attrs
, 1));
7213 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
7217 if (STRINGP (start
))
7218 p
= pbeg
= SDATA (start
);
7220 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7221 pend
= p
+ (end_byte
- start_byte
);
7223 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
7224 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7228 if (ASCII_BYTE_P (*p
))
7232 c
= STRING_CHAR_ADVANCE (p
);
7234 charset_map_loaded
= 0;
7235 for (tail
= coding_attrs_list
; CONSP (tail
);)
7240 else if (char_encodable_p (c
, elt
))
7242 else if (CONSP (XCDR (tail
)))
7244 XSETCAR (tail
, XCAR (XCDR (tail
)));
7245 XSETCDR (tail
, XCDR (XCDR (tail
)));
7249 XSETCAR (tail
, Qnil
);
7253 if (charset_map_loaded
)
7255 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7257 if (STRINGP (start
))
7258 pbeg
= SDATA (start
);
7260 pbeg
= BYTE_POS_ADDR (start_byte
);
7261 p
= pbeg
+ p_offset
;
7262 pend
= pbeg
+ pend_offset
;
7267 safe_codings
= Qnil
;
7268 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
7269 if (! NILP (XCAR (tail
)))
7270 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
7272 return safe_codings
;
7276 DEFUN ("unencodable-char-position", Funencodable_char_position
,
7277 Sunencodable_char_position
, 3, 5, 0,
7279 Return position of first un-encodable character in a region.
7280 START and END specfiy the region and CODING-SYSTEM specifies the
7281 encoding to check. Return nil if CODING-SYSTEM does encode the region.
7283 If optional 4th argument COUNT is non-nil, it specifies at most how
7284 many un-encodable characters to search. In this case, the value is a
7287 If optional 5th argument STRING is non-nil, it is a string to search
7288 for un-encodable characters. In that case, START and END are indexes
7290 (start
, end
, coding_system
, count
, string
)
7291 Lisp_Object start
, end
, coding_system
, count
, string
;
7294 struct coding_system coding
;
7295 Lisp_Object attrs
, charset_list
, translation_table
;
7296 Lisp_Object positions
;
7298 const unsigned char *p
, *stop
, *pend
;
7299 int ascii_compatible
;
7301 setup_coding_system (Fcheck_coding_system (coding_system
), &coding
);
7302 attrs
= CODING_ID_ATTRS (coding
.id
);
7303 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
7305 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
7306 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7307 translation_table
= get_translation_table (attrs
, 1);
7311 validate_region (&start
, &end
);
7312 from
= XINT (start
);
7314 if (NILP (current_buffer
->enable_multibyte_characters
)
7315 || (ascii_compatible
7316 && (to
- from
) == (CHAR_TO_BYTE (to
) - (CHAR_TO_BYTE (from
)))))
7318 p
= CHAR_POS_ADDR (from
);
7319 pend
= CHAR_POS_ADDR (to
);
7320 if (from
< GPT
&& to
>= GPT
)
7327 CHECK_STRING (string
);
7328 CHECK_NATNUM (start
);
7330 from
= XINT (start
);
7333 || to
> SCHARS (string
))
7334 args_out_of_range_3 (string
, start
, end
);
7335 if (! STRING_MULTIBYTE (string
))
7337 p
= SDATA (string
) + string_char_to_byte (string
, from
);
7338 stop
= pend
= SDATA (string
) + string_char_to_byte (string
, to
);
7339 if (ascii_compatible
&& (to
- from
) == (pend
- p
))
7347 CHECK_NATNUM (count
);
7356 if (ascii_compatible
)
7357 while (p
< stop
&& ASCII_BYTE_P (*p
))
7367 c
= STRING_CHAR_ADVANCE (p
);
7368 if (! (ASCII_CHAR_P (c
) && ascii_compatible
)
7369 && ! char_charset (translate_char (translation_table
, c
),
7370 charset_list
, NULL
))
7372 positions
= Fcons (make_number (from
), positions
);
7381 return (NILP (count
) ? Fcar (positions
) : Fnreverse (positions
));
7385 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
7386 Scheck_coding_systems_region
, 3, 3, 0,
7387 doc
: /* Check if the region is encodable by coding systems.
7389 START and END are buffer positions specifying the region.
7390 CODING-SYSTEM-LIST is a list of coding systems to check.
7392 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7393 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7394 whole region, POS0, POS1, ... are buffer positions where non-encodable
7395 characters are found.
7397 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7400 START may be a string. In that case, check if the string is
7401 encodable, and the value contains indices to the string instead of
7402 buffer positions. END is ignored. */)
7403 (start
, end
, coding_system_list
)
7404 Lisp_Object start
, end
, coding_system_list
;
7407 EMACS_INT start_byte
, end_byte
;
7409 const unsigned char *p
, *pbeg
, *pend
;
7411 Lisp_Object tail
, elt
, attrs
;
7413 if (STRINGP (start
))
7415 if (!STRING_MULTIBYTE (start
)
7416 && SCHARS (start
) != SBYTES (start
))
7419 end_byte
= SBYTES (start
);
7424 CHECK_NUMBER_COERCE_MARKER (start
);
7425 CHECK_NUMBER_COERCE_MARKER (end
);
7426 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7427 args_out_of_range (start
, end
);
7428 if (NILP (current_buffer
->enable_multibyte_characters
))
7430 start_byte
= CHAR_TO_BYTE (XINT (start
));
7431 end_byte
= CHAR_TO_BYTE (XINT (end
));
7432 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7435 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7437 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7438 move_gap_both (XINT (start
), start_byte
);
7440 move_gap_both (XINT (end
), end_byte
);
7446 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7449 attrs
= AREF (CODING_SYSTEM_SPEC (elt
), 0);
7450 ASET (attrs
, coding_attr_trans_tbl
, get_translation_table (attrs
, 1));
7451 list
= Fcons (Fcons (elt
, Fcons (attrs
, Qnil
)), list
);
7454 if (STRINGP (start
))
7455 p
= pbeg
= SDATA (start
);
7457 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7458 pend
= p
+ (end_byte
- start_byte
);
7460 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
7461 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7465 if (ASCII_BYTE_P (*p
))
7469 c
= STRING_CHAR_ADVANCE (p
);
7471 charset_map_loaded
= 0;
7472 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
7474 elt
= XCDR (XCAR (tail
));
7475 if (! char_encodable_p (c
, XCAR (elt
)))
7476 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
7478 if (charset_map_loaded
)
7480 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7482 if (STRINGP (start
))
7483 pbeg
= SDATA (start
);
7485 pbeg
= BYTE_POS_ADDR (start_byte
);
7486 p
= pbeg
+ p_offset
;
7487 pend
= pbeg
+ pend_offset
;
7495 for (; CONSP (tail
); tail
= XCDR (tail
))
7498 if (CONSP (XCDR (XCDR (elt
))))
7499 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
7508 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
7509 Lisp_Object start
, end
, coding_system
, dst_object
;
7510 int encodep
, norecord
;
7512 struct coding_system coding
;
7513 EMACS_INT from
, from_byte
, to
, to_byte
;
7514 Lisp_Object src_object
;
7516 CHECK_NUMBER_COERCE_MARKER (start
);
7517 CHECK_NUMBER_COERCE_MARKER (end
);
7518 if (NILP (coding_system
))
7519 coding_system
= Qno_conversion
;
7521 CHECK_CODING_SYSTEM (coding_system
);
7522 src_object
= Fcurrent_buffer ();
7523 if (NILP (dst_object
))
7524 dst_object
= src_object
;
7525 else if (! EQ (dst_object
, Qt
))
7526 CHECK_BUFFER (dst_object
);
7528 validate_region (&start
, &end
);
7529 from
= XFASTINT (start
);
7530 from_byte
= CHAR_TO_BYTE (from
);
7531 to
= XFASTINT (end
);
7532 to_byte
= CHAR_TO_BYTE (to
);
7534 setup_coding_system (coding_system
, &coding
);
7535 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7538 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7541 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7544 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7546 return (BUFFERP (dst_object
)
7547 ? make_number (coding
.produced_char
)
7548 : coding
.dst_object
);
7552 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
7553 3, 4, "r\nzCoding system: ",
7554 doc
: /* Decode the current region from the specified coding system.
7555 When called from a program, takes four arguments:
7556 START, END, CODING-SYSTEM, and DESTINATION.
7557 START and END are buffer positions.
7559 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7560 If nil, the region between START and END is replace by the decoded text.
7561 If buffer, the decoded text is inserted in the buffer.
7562 If t, the decoded text is returned.
7564 This function sets `last-coding-system-used' to the precise coding system
7565 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7566 not fully specified.)
7567 It returns the length of the decoded text. */)
7568 (start
, end
, coding_system
, destination
)
7569 Lisp_Object start
, end
, coding_system
, destination
;
7571 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
7574 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
7575 3, 4, "r\nzCoding system: ",
7576 doc
: /* Encode the current region by specified coding system.
7577 When called from a program, takes three arguments:
7578 START, END, and CODING-SYSTEM. START and END are buffer positions.
7580 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7581 If nil, the region between START and END is replace by the encoded text.
7582 If buffer, the encoded text is inserted in the buffer.
7583 If t, the encoded text is returned.
7585 This function sets `last-coding-system-used' to the precise coding system
7586 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7587 not fully specified.)
7588 It returns the length of the encoded text. */)
7589 (start
, end
, coding_system
, destination
)
7590 Lisp_Object start
, end
, coding_system
, destination
;
7592 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
7596 code_convert_string (string
, coding_system
, dst_object
,
7597 encodep
, nocopy
, norecord
)
7598 Lisp_Object string
, coding_system
, dst_object
;
7599 int encodep
, nocopy
, norecord
;
7601 struct coding_system coding
;
7602 EMACS_INT chars
, bytes
;
7604 CHECK_STRING (string
);
7605 if (NILP (coding_system
))
7608 Vlast_coding_system_used
= Qno_conversion
;
7609 if (NILP (dst_object
))
7610 return (nocopy
? Fcopy_sequence (string
) : string
);
7613 if (NILP (coding_system
))
7614 coding_system
= Qno_conversion
;
7616 CHECK_CODING_SYSTEM (coding_system
);
7617 if (NILP (dst_object
))
7619 else if (! EQ (dst_object
, Qt
))
7620 CHECK_BUFFER (dst_object
);
7622 setup_coding_system (coding_system
, &coding
);
7623 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7624 chars
= SCHARS (string
);
7625 bytes
= SBYTES (string
);
7627 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7629 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7631 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7633 return (BUFFERP (dst_object
)
7634 ? make_number (coding
.produced_char
)
7635 : coding
.dst_object
);
7639 /* Encode or decode STRING according to CODING_SYSTEM.
7640 Do not set Vlast_coding_system_used.
7642 This function is called only from macros DECODE_FILE and
7643 ENCODE_FILE, thus we ignore character composition. */
7646 code_convert_string_norecord (string
, coding_system
, encodep
)
7647 Lisp_Object string
, coding_system
;
7650 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
7654 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
7656 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7658 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7659 if the decoding operation is trivial.
7661 Optional fourth arg BUFFER non-nil meant that the decoded text is
7662 inserted in BUFFER instead of returned as a string. In this case,
7663 the return value is BUFFER.
7665 This function sets `last-coding-system-used' to the precise coding system
7666 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7667 not fully specified. */)
7668 (string
, coding_system
, nocopy
, buffer
)
7669 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7671 return code_convert_string (string
, coding_system
, buffer
,
7672 0, ! NILP (nocopy
), 0);
7675 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
7677 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
7679 Optional third arg NOCOPY non-nil means it is OK to return STRING
7680 itself if the encoding operation is trivial.
7682 Optional fourth arg BUFFER non-nil meant that the encoded text is
7683 inserted in BUFFER instead of returned as a string. In this case,
7684 the return value is BUFFER.
7686 This function sets `last-coding-system-used' to the precise coding system
7687 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7688 not fully specified.) */)
7689 (string
, coding_system
, nocopy
, buffer
)
7690 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7692 return code_convert_string (string
, coding_system
, buffer
,
7693 1, ! NILP (nocopy
), 1);
7697 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
7698 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
7699 Return the corresponding character. */)
7703 Lisp_Object spec
, attrs
, val
;
7704 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
7707 CHECK_NATNUM (code
);
7708 c
= XFASTINT (code
);
7709 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7710 attrs
= AREF (spec
, 0);
7712 if (ASCII_BYTE_P (c
)
7713 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7716 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7717 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7718 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7719 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7722 charset
= charset_roman
;
7723 else if (c
>= 0xA0 && c
< 0xDF)
7725 charset
= charset_kana
;
7730 int s1
= c
>> 8, s2
= c
& 0xFF;
7732 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
7733 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
7734 error ("Invalid code: %d", code
);
7736 charset
= charset_kanji
;
7738 c
= DECODE_CHAR (charset
, c
);
7740 error ("Invalid code: %d", code
);
7741 return make_number (c
);
7745 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
7746 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
7747 Return the corresponding code in SJIS. */)
7751 Lisp_Object spec
, attrs
, charset_list
;
7753 struct charset
*charset
;
7756 CHECK_CHARACTER (ch
);
7758 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7759 attrs
= AREF (spec
, 0);
7761 if (ASCII_CHAR_P (c
)
7762 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7765 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7766 charset
= char_charset (c
, charset_list
, &code
);
7767 if (code
== CHARSET_INVALID_CODE (charset
))
7768 error ("Can't encode by shift_jis encoding: %d", c
);
7771 return make_number (code
);
7774 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
7775 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
7776 Return the corresponding character. */)
7780 Lisp_Object spec
, attrs
, val
;
7781 struct charset
*charset_roman
, *charset_big5
, *charset
;
7784 CHECK_NATNUM (code
);
7785 c
= XFASTINT (code
);
7786 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7787 attrs
= AREF (spec
, 0);
7789 if (ASCII_BYTE_P (c
)
7790 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7793 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7794 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7795 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7798 charset
= charset_roman
;
7801 int b1
= c
>> 8, b2
= c
& 0x7F;
7802 if (b1
< 0xA1 || b1
> 0xFE
7803 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
7804 error ("Invalid code: %d", code
);
7805 charset
= charset_big5
;
7807 c
= DECODE_CHAR (charset
, (unsigned )c
);
7809 error ("Invalid code: %d", code
);
7810 return make_number (c
);
7813 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
7814 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
7815 Return the corresponding character code in Big5. */)
7819 Lisp_Object spec
, attrs
, charset_list
;
7820 struct charset
*charset
;
7824 CHECK_CHARACTER (ch
);
7826 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7827 attrs
= AREF (spec
, 0);
7828 if (ASCII_CHAR_P (c
)
7829 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7832 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7833 charset
= char_charset (c
, charset_list
, &code
);
7834 if (code
== CHARSET_INVALID_CODE (charset
))
7835 error ("Can't encode by Big5 encoding: %d", c
);
7837 return make_number (code
);
7841 DEFUN ("set-terminal-coding-system-internal",
7842 Fset_terminal_coding_system_internal
,
7843 Sset_terminal_coding_system_internal
, 1, 1, 0,
7844 doc
: /* Internal use only. */)
7846 Lisp_Object coding_system
;
7848 CHECK_SYMBOL (coding_system
);
7849 setup_coding_system (Fcheck_coding_system (coding_system
),
7852 /* We had better not send unsafe characters to terminal. */
7853 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
7854 /* Characer composition should be disabled. */
7855 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7856 terminal_coding
.src_multibyte
= 1;
7857 terminal_coding
.dst_multibyte
= 0;
7861 DEFUN ("set-safe-terminal-coding-system-internal",
7862 Fset_safe_terminal_coding_system_internal
,
7863 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
7864 doc
: /* Internal use only. */)
7866 Lisp_Object coding_system
;
7868 CHECK_SYMBOL (coding_system
);
7869 setup_coding_system (Fcheck_coding_system (coding_system
),
7870 &safe_terminal_coding
);
7871 /* Characer composition should be disabled. */
7872 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7873 safe_terminal_coding
.src_multibyte
= 1;
7874 safe_terminal_coding
.dst_multibyte
= 0;
7878 DEFUN ("terminal-coding-system",
7879 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
7880 doc
: /* Return coding system specified for terminal output. */)
7883 return CODING_ID_NAME (terminal_coding
.id
);
7886 DEFUN ("set-keyboard-coding-system-internal",
7887 Fset_keyboard_coding_system_internal
,
7888 Sset_keyboard_coding_system_internal
, 1, 1, 0,
7889 doc
: /* Internal use only. */)
7891 Lisp_Object coding_system
;
7893 CHECK_SYMBOL (coding_system
);
7894 setup_coding_system (Fcheck_coding_system (coding_system
),
7896 /* Characer composition should be disabled. */
7897 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7901 DEFUN ("keyboard-coding-system",
7902 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
7903 doc
: /* Return coding system specified for decoding keyboard input. */)
7906 return CODING_ID_NAME (keyboard_coding
.id
);
7910 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
7911 Sfind_operation_coding_system
, 1, MANY
, 0,
7912 doc
: /* Choose a coding system for an operation based on the target name.
7913 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7914 DECODING-SYSTEM is the coding system to use for decoding
7915 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7916 for encoding (in case OPERATION does encoding).
7918 The first argument OPERATION specifies an I/O primitive:
7919 For file I/O, `insert-file-contents' or `write-region'.
7920 For process I/O, `call-process', `call-process-region', or `start-process'.
7921 For network I/O, `open-network-stream'.
7923 The remaining arguments should be the same arguments that were passed
7924 to the primitive. Depending on which primitive, one of those arguments
7925 is selected as the TARGET. For example, if OPERATION does file I/O,
7926 whichever argument specifies the file name is TARGET.
7928 TARGET has a meaning which depends on OPERATION:
7929 For file I/O, TARGET is a file name.
7930 For process I/O, TARGET is a process name.
7931 For network I/O, TARGET is a service name or a port number
7933 This function looks up what specified for TARGET in,
7934 `file-coding-system-alist', `process-coding-system-alist',
7935 or `network-coding-system-alist' depending on OPERATION.
7936 They may specify a coding system, a cons of coding systems,
7937 or a function symbol to call.
7938 In the last case, we call the function with one argument,
7939 which is a list of all the arguments given to this function.
7941 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7946 Lisp_Object operation
, target_idx
, target
, val
;
7947 register Lisp_Object chain
;
7950 error ("Too few arguments");
7951 operation
= args
[0];
7952 if (!SYMBOLP (operation
)
7953 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
7954 error ("Invalid first arguement");
7955 if (nargs
< 1 + XINT (target_idx
))
7956 error ("Too few arguments for operation: %s",
7957 SDATA (SYMBOL_NAME (operation
)));
7958 target
= args
[XINT (target_idx
) + 1];
7959 if (!(STRINGP (target
)
7960 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
7961 error ("Invalid %dth argument", XINT (target_idx
) + 1);
7963 chain
= ((EQ (operation
, Qinsert_file_contents
)
7964 || EQ (operation
, Qwrite_region
))
7965 ? Vfile_coding_system_alist
7966 : (EQ (operation
, Qopen_network_stream
)
7967 ? Vnetwork_coding_system_alist
7968 : Vprocess_coding_system_alist
));
7972 for (; CONSP (chain
); chain
= XCDR (chain
))
7978 && ((STRINGP (target
)
7979 && STRINGP (XCAR (elt
))
7980 && fast_string_match (XCAR (elt
), target
) >= 0)
7981 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
7984 /* Here, if VAL is both a valid coding system and a valid
7985 function symbol, we return VAL as a coding system. */
7988 if (! SYMBOLP (val
))
7990 if (! NILP (Fcoding_system_p (val
)))
7991 return Fcons (val
, val
);
7992 if (! NILP (Ffboundp (val
)))
7994 val
= call1 (val
, Flist (nargs
, args
));
7997 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
7998 return Fcons (val
, val
);
8006 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
8007 Sset_coding_system_priority
, 0, MANY
, 0,
8008 doc
: /* Assign higher priority to the coding systems given as arguments.
8009 If multiple coding systems belongs to the same category,
8010 all but the first one are ignored.
8012 usage: (set-coding-system-priority ...) */)
8018 int changed
[coding_category_max
];
8019 enum coding_category priorities
[coding_category_max
];
8021 bzero (changed
, sizeof changed
);
8023 for (i
= j
= 0; i
< nargs
; i
++)
8025 enum coding_category category
;
8026 Lisp_Object spec
, attrs
;
8028 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
8029 attrs
= AREF (spec
, 0);
8030 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
8031 if (changed
[category
])
8032 /* Ignore this coding system because a coding system of the
8033 same category already had a higher priority. */
8035 changed
[category
] = 1;
8036 priorities
[j
++] = category
;
8037 if (coding_categories
[category
].id
>= 0
8038 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
8039 setup_coding_system (args
[i
], &coding_categories
[category
]);
8040 Fset (AREF (Vcoding_category_table
, category
), args
[i
]);
8043 /* Now we have decided top J priorities. Reflect the order of the
8044 original priorities to the remaining priorities. */
8046 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
8048 while (j
< coding_category_max
8049 && changed
[coding_priorities
[j
]])
8051 if (j
== coding_category_max
)
8053 priorities
[i
] = coding_priorities
[j
];
8056 bcopy (priorities
, coding_priorities
, sizeof priorities
);
8058 /* Update `coding-category-list'. */
8059 Vcoding_category_list
= Qnil
;
8060 for (i
= coding_category_max
- 1; i
>= 0; i
--)
8061 Vcoding_category_list
8062 = Fcons (AREF (Vcoding_category_table
, priorities
[i
]),
8063 Vcoding_category_list
);
8068 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
8069 Scoding_system_priority_list
, 0, 1, 0,
8070 doc
: /* Return a list of coding systems ordered by their priorities.
8071 HIGHESTP non-nil means just return the highest priority one. */)
8073 Lisp_Object highestp
;
8078 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
8080 enum coding_category category
= coding_priorities
[i
];
8081 int id
= coding_categories
[category
].id
;
8086 attrs
= CODING_ID_ATTRS (id
);
8087 if (! NILP (highestp
))
8088 return CODING_ATTR_BASE_NAME (attrs
);
8089 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
8091 return Fnreverse (val
);
8094 static char *suffixes
[] = { "-unix", "-dos", "-mac" };
8097 make_subsidiaries (base
)
8100 Lisp_Object subsidiaries
;
8101 int base_name_len
= SBYTES (SYMBOL_NAME (base
));
8102 char *buf
= (char *) alloca (base_name_len
+ 6);
8105 bcopy (SDATA (SYMBOL_NAME (base
)), buf
, base_name_len
);
8106 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
8107 for (i
= 0; i
< 3; i
++)
8109 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
8110 ASET (subsidiaries
, i
, intern (buf
));
8112 return subsidiaries
;
8116 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
8117 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
8118 doc
: /* For internal use only.
8119 usage: (define-coding-system-internal ...) */)
8125 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
8126 Lisp_Object attrs
; /* Vector of attributes. */
8127 Lisp_Object eol_type
;
8128 Lisp_Object aliases
;
8129 Lisp_Object coding_type
, charset_list
, safe_charsets
;
8130 enum coding_category category
;
8131 Lisp_Object tail
, val
;
8132 int max_charset_id
= 0;
8135 if (nargs
< coding_arg_max
)
8138 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
8140 name
= args
[coding_arg_name
];
8141 CHECK_SYMBOL (name
);
8142 CODING_ATTR_BASE_NAME (attrs
) = name
;
8144 val
= args
[coding_arg_mnemonic
];
8145 if (! STRINGP (val
))
8146 CHECK_CHARACTER (val
);
8147 CODING_ATTR_MNEMONIC (attrs
) = val
;
8149 coding_type
= args
[coding_arg_coding_type
];
8150 CHECK_SYMBOL (coding_type
);
8151 CODING_ATTR_TYPE (attrs
) = coding_type
;
8153 charset_list
= args
[coding_arg_charset_list
];
8154 if (SYMBOLP (charset_list
))
8156 if (EQ (charset_list
, Qiso_2022
))
8158 if (! EQ (coding_type
, Qiso_2022
))
8159 error ("Invalid charset-list");
8160 charset_list
= Viso_2022_charset_list
;
8162 else if (EQ (charset_list
, Qemacs_mule
))
8164 if (! EQ (coding_type
, Qemacs_mule
))
8165 error ("Invalid charset-list");
8166 charset_list
= Vemacs_mule_charset_list
;
8168 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8169 if (max_charset_id
< XFASTINT (XCAR (tail
)))
8170 max_charset_id
= XFASTINT (XCAR (tail
));
8174 charset_list
= Fcopy_sequence (charset_list
);
8175 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
8177 struct charset
*charset
;
8180 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8181 if (EQ (coding_type
, Qiso_2022
)
8182 ? CHARSET_ISO_FINAL (charset
) < 0
8183 : EQ (coding_type
, Qemacs_mule
)
8184 ? CHARSET_EMACS_MULE_ID (charset
) < 0
8186 error ("Can't handle charset `%s'",
8187 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8189 XSETCAR (tail
, make_number (charset
->id
));
8190 if (max_charset_id
< charset
->id
)
8191 max_charset_id
= charset
->id
;
8194 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
8196 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
8198 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8199 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
8200 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
8202 CODING_ATTR_ASCII_COMPAT (attrs
) = args
[coding_arg_ascii_compatible_p
];
8204 val
= args
[coding_arg_decode_translation_table
];
8205 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8207 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8209 val
= args
[coding_arg_encode_translation_table
];
8210 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8212 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8214 val
= args
[coding_arg_post_read_conversion
];
8216 CODING_ATTR_POST_READ (attrs
) = val
;
8218 val
= args
[coding_arg_pre_write_conversion
];
8220 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8222 val
= args
[coding_arg_default_char
];
8224 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
8227 CHECK_CHARACTER (val
);
8228 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8231 val
= args
[coding_arg_for_unibyte
];
8232 CODING_ATTR_FOR_UNIBYTE (attrs
) = NILP (val
) ? Qnil
: Qt
;
8234 val
= args
[coding_arg_plist
];
8236 CODING_ATTR_PLIST (attrs
) = val
;
8238 if (EQ (coding_type
, Qcharset
))
8240 /* Generate a lisp vector of 256 elements. Each element is nil,
8241 integer, or a list of charset IDs.
8243 If Nth element is nil, the byte code N is invalid in this
8246 If Nth element is a number NUM, N is the first byte of a
8247 charset whose ID is NUM.
8249 If Nth element is a list of charset IDs, N is the first byte
8250 of one of them. The list is sorted by dimensions of the
8251 charsets. A charset of smaller dimension comes firtst. */
8252 val
= Fmake_vector (make_number (256), Qnil
);
8254 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8256 struct charset
*charset
= CHARSET_FROM_ID (XFASTINT (XCAR (tail
)));
8257 int dim
= CHARSET_DIMENSION (charset
);
8258 int idx
= (dim
- 1) * 4;
8260 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8261 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8263 for (i
= charset
->code_space
[idx
];
8264 i
<= charset
->code_space
[idx
+ 1]; i
++)
8266 Lisp_Object tmp
, tmp2
;
8269 tmp
= AREF (val
, i
);
8272 else if (NUMBERP (tmp
))
8274 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp
)));
8276 tmp
= Fcons (XCAR (tail
), Fcons (tmp
, Qnil
));
8278 tmp
= Fcons (tmp
, Fcons (XCAR (tail
), Qnil
));
8282 for (tmp2
= tmp
; CONSP (tmp2
); tmp2
= XCDR (tmp2
))
8284 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2
))));
8289 tmp
= nconc2 (tmp
, Fcons (XCAR (tail
), Qnil
));
8292 XSETCDR (tmp2
, Fcons (XCAR (tmp2
), XCDR (tmp2
)));
8293 XSETCAR (tmp2
, XCAR (tail
));
8299 ASET (attrs
, coding_attr_charset_valids
, val
);
8300 category
= coding_category_charset
;
8302 else if (EQ (coding_type
, Qccl
))
8306 if (nargs
< coding_arg_ccl_max
)
8309 val
= args
[coding_arg_ccl_decoder
];
8310 CHECK_CCL_PROGRAM (val
);
8312 val
= Fcopy_sequence (val
);
8313 ASET (attrs
, coding_attr_ccl_decoder
, val
);
8315 val
= args
[coding_arg_ccl_encoder
];
8316 CHECK_CCL_PROGRAM (val
);
8318 val
= Fcopy_sequence (val
);
8319 ASET (attrs
, coding_attr_ccl_encoder
, val
);
8321 val
= args
[coding_arg_ccl_valids
];
8322 valids
= Fmake_string (make_number (256), make_number (0));
8323 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
8330 from
= to
= XINT (val
);
8331 if (from
< 0 || from
> 255)
8332 args_out_of_range_3 (val
, make_number (0), make_number (255));
8337 CHECK_NATNUM_CAR (val
);
8338 CHECK_NATNUM_CDR (val
);
8339 from
= XINT (XCAR (val
));
8341 args_out_of_range_3 (XCAR (val
),
8342 make_number (0), make_number (255));
8343 to
= XINT (XCDR (val
));
8344 if (to
< from
|| to
> 255)
8345 args_out_of_range_3 (XCDR (val
),
8346 XCAR (val
), make_number (255));
8348 for (i
= from
; i
<= to
; i
++)
8349 SSET (valids
, i
, 1);
8351 ASET (attrs
, coding_attr_ccl_valids
, valids
);
8353 category
= coding_category_ccl
;
8355 else if (EQ (coding_type
, Qutf_16
))
8357 Lisp_Object bom
, endian
;
8359 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8361 if (nargs
< coding_arg_utf16_max
)
8364 bom
= args
[coding_arg_utf16_bom
];
8365 if (! NILP (bom
) && ! EQ (bom
, Qt
))
8369 CHECK_CODING_SYSTEM (val
);
8371 CHECK_CODING_SYSTEM (val
);
8373 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
8375 endian
= args
[coding_arg_utf16_endian
];
8376 CHECK_SYMBOL (endian
);
8379 else if (! EQ (endian
, Qbig
) && ! EQ (endian
, Qlittle
))
8380 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian
)));
8381 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
8383 category
= (CONSP (bom
)
8384 ? coding_category_utf_16_auto
8386 ? (EQ (endian
, Qbig
)
8387 ? coding_category_utf_16_be_nosig
8388 : coding_category_utf_16_le_nosig
)
8389 : (EQ (endian
, Qbig
)
8390 ? coding_category_utf_16_be
8391 : coding_category_utf_16_le
));
8393 else if (EQ (coding_type
, Qiso_2022
))
8395 Lisp_Object initial
, reg_usage
, request
, flags
;
8398 if (nargs
< coding_arg_iso2022_max
)
8401 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
8402 CHECK_VECTOR (initial
);
8403 for (i
= 0; i
< 4; i
++)
8405 val
= Faref (initial
, make_number (i
));
8408 struct charset
*charset
;
8410 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8411 ASET (initial
, i
, make_number (CHARSET_ID (charset
)));
8412 if (i
== 0 && CHARSET_ASCII_COMPATIBLE_P (charset
))
8413 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8416 ASET (initial
, i
, make_number (-1));
8419 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
8420 CHECK_CONS (reg_usage
);
8421 CHECK_NUMBER_CAR (reg_usage
);
8422 CHECK_NUMBER_CDR (reg_usage
);
8424 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
8425 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
8433 CHECK_CHARSET_GET_ID (tmp
, id
);
8434 CHECK_NATNUM_CDR (val
);
8435 if (XINT (XCDR (val
)) >= 4)
8436 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
8437 XSETCAR (val
, make_number (id
));
8440 flags
= args
[coding_arg_iso2022_flags
];
8441 CHECK_NATNUM (flags
);
8443 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
8444 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
8446 ASET (attrs
, coding_attr_iso_initial
, initial
);
8447 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
8448 ASET (attrs
, coding_attr_iso_request
, request
);
8449 ASET (attrs
, coding_attr_iso_flags
, flags
);
8450 setup_iso_safe_charsets (attrs
);
8452 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
8453 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
8454 | CODING_ISO_FLAG_SINGLE_SHIFT
))
8455 ? coding_category_iso_7_else
8456 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8457 ? coding_category_iso_7
8458 : coding_category_iso_7_tight
);
8461 int id
= XINT (AREF (initial
, 1));
8463 category
= (((i
& CODING_ISO_FLAG_LOCKING_SHIFT
)
8464 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8466 ? coding_category_iso_8_else
8467 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
8468 ? coding_category_iso_8_1
8469 : coding_category_iso_8_2
);
8471 if (category
!= coding_category_iso_8_1
8472 && category
!= coding_category_iso_8_2
)
8473 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8475 else if (EQ (coding_type
, Qemacs_mule
))
8477 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
8478 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
8479 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8480 category
= coding_category_emacs_mule
;
8482 else if (EQ (coding_type
, Qshift_jis
))
8485 struct charset
*charset
;
8487 if (XINT (Flength (charset_list
)) != 3
8488 && XINT (Flength (charset_list
)) != 4)
8489 error ("There should be three or four charsets");
8491 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8492 if (CHARSET_DIMENSION (charset
) != 1)
8493 error ("Dimension of charset %s is not one",
8494 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8495 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8496 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8498 charset_list
= XCDR (charset_list
);
8499 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8500 if (CHARSET_DIMENSION (charset
) != 1)
8501 error ("Dimension of charset %s is not one",
8502 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8504 charset_list
= XCDR (charset_list
);
8505 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8506 if (CHARSET_DIMENSION (charset
) != 2)
8507 error ("Dimension of charset %s is not two",
8508 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8510 charset_list
= XCDR (charset_list
);
8511 if (! NILP (charset_list
))
8513 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8514 if (CHARSET_DIMENSION (charset
) != 2)
8515 error ("Dimension of charset %s is not two",
8516 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8519 category
= coding_category_sjis
;
8520 Vsjis_coding_system
= name
;
8522 else if (EQ (coding_type
, Qbig5
))
8524 struct charset
*charset
;
8526 if (XINT (Flength (charset_list
)) != 2)
8527 error ("There should be just two charsets");
8529 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8530 if (CHARSET_DIMENSION (charset
) != 1)
8531 error ("Dimension of charset %s is not one",
8532 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8533 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8534 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8536 charset_list
= XCDR (charset_list
);
8537 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8538 if (CHARSET_DIMENSION (charset
) != 2)
8539 error ("Dimension of charset %s is not two",
8540 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8542 category
= coding_category_big5
;
8543 Vbig5_coding_system
= name
;
8545 else if (EQ (coding_type
, Qraw_text
))
8547 category
= coding_category_raw_text
;
8548 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8550 else if (EQ (coding_type
, Qutf_8
))
8552 category
= coding_category_utf_8
;
8553 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8555 else if (EQ (coding_type
, Qundecided
))
8556 category
= coding_category_undecided
;
8558 error ("Invalid coding system type: %s",
8559 SDATA (SYMBOL_NAME (coding_type
)));
8561 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
8562 CODING_ATTR_PLIST (attrs
)
8563 = Fcons (QCcategory
, Fcons (AREF (Vcoding_category_table
, category
),
8564 CODING_ATTR_PLIST (attrs
)));
8566 eol_type
= args
[coding_arg_eol_type
];
8567 if (! NILP (eol_type
)
8568 && ! EQ (eol_type
, Qunix
)
8569 && ! EQ (eol_type
, Qdos
)
8570 && ! EQ (eol_type
, Qmac
))
8571 error ("Invalid eol-type");
8573 aliases
= Fcons (name
, Qnil
);
8575 if (NILP (eol_type
))
8577 eol_type
= make_subsidiaries (name
);
8578 for (i
= 0; i
< 3; i
++)
8580 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
8582 this_name
= AREF (eol_type
, i
);
8583 this_aliases
= Fcons (this_name
, Qnil
);
8584 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
8585 this_spec
= Fmake_vector (make_number (3), attrs
);
8586 ASET (this_spec
, 1, this_aliases
);
8587 ASET (this_spec
, 2, this_eol_type
);
8588 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
8589 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
8590 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
8591 Vcoding_system_alist
);
8595 spec_vec
= Fmake_vector (make_number (3), attrs
);
8596 ASET (spec_vec
, 1, aliases
);
8597 ASET (spec_vec
, 2, eol_type
);
8599 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
8600 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
8601 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
8602 Vcoding_system_alist
);
8605 int id
= coding_categories
[category
].id
;
8607 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
8608 setup_coding_system (name
, &coding_categories
[category
]);
8614 return Fsignal (Qwrong_number_of_arguments
,
8615 Fcons (intern ("define-coding-system-internal"),
8616 make_number (nargs
)));
8620 DEFUN ("coding-system-put", Fcoding_system_put
, Scoding_system_put
,
8622 doc
: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
8623 (coding_system
, prop
, val
)
8624 Lisp_Object coding_system
, prop
, val
;
8626 Lisp_Object spec
, attrs
, plist
;
8628 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8629 attrs
= AREF (spec
, 0);
8630 if (EQ (prop
, QCmnemonic
))
8632 if (! STRINGP (val
))
8633 CHECK_CHARACTER (val
);
8634 CODING_ATTR_MNEMONIC (attrs
) = val
;
8636 else if (EQ (prop
, QCdefalut_char
))
8639 val
= make_number (' ');
8641 CHECK_CHARACTER (val
);
8642 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8644 else if (EQ (prop
, QCdecode_translation_table
))
8646 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8648 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8650 else if (EQ (prop
, QCencode_translation_table
))
8652 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8654 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8656 else if (EQ (prop
, QCpost_read_conversion
))
8659 CODING_ATTR_POST_READ (attrs
) = val
;
8661 else if (EQ (prop
, QCpre_write_conversion
))
8664 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8667 CODING_ATTR_PLIST (attrs
)
8668 = Fplist_put (CODING_ATTR_PLIST (attrs
), prop
, val
);
8673 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
8674 Sdefine_coding_system_alias
, 2, 2, 0,
8675 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8676 (alias
, coding_system
)
8677 Lisp_Object alias
, coding_system
;
8679 Lisp_Object spec
, aliases
, eol_type
;
8681 CHECK_SYMBOL (alias
);
8682 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8683 aliases
= AREF (spec
, 1);
8684 /* ALISES should be a list of length more than zero, and the first
8685 element is a base coding system. Append ALIAS at the tail of the
8687 while (!NILP (XCDR (aliases
)))
8688 aliases
= XCDR (aliases
);
8689 XSETCDR (aliases
, Fcons (alias
, Qnil
));
8691 eol_type
= AREF (spec
, 2);
8692 if (VECTORP (eol_type
))
8694 Lisp_Object subsidiaries
;
8697 subsidiaries
= make_subsidiaries (alias
);
8698 for (i
= 0; i
< 3; i
++)
8699 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
8700 AREF (eol_type
, i
));
8703 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
8704 Vcoding_system_list
= Fcons (alias
, Vcoding_system_list
);
8705 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (alias
), Qnil
),
8706 Vcoding_system_alist
);
8711 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
8713 doc
: /* Return the base of CODING-SYSTEM.
8714 Any alias or subsidiary coding system is not a base coding system. */)
8716 Lisp_Object coding_system
;
8718 Lisp_Object spec
, attrs
;
8720 if (NILP (coding_system
))
8721 return (Qno_conversion
);
8722 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8723 attrs
= AREF (spec
, 0);
8724 return CODING_ATTR_BASE_NAME (attrs
);
8727 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
8729 doc
: "Return the property list of CODING-SYSTEM.")
8731 Lisp_Object coding_system
;
8733 Lisp_Object spec
, attrs
;
8735 if (NILP (coding_system
))
8736 coding_system
= Qno_conversion
;
8737 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8738 attrs
= AREF (spec
, 0);
8739 return CODING_ATTR_PLIST (attrs
);
8743 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
8745 doc
: /* Return the list of aliases of CODING-SYSTEM. */)
8747 Lisp_Object coding_system
;
8751 if (NILP (coding_system
))
8752 coding_system
= Qno_conversion
;
8753 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8754 return AREF (spec
, 1);
8757 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
8758 Scoding_system_eol_type
, 1, 1, 0,
8759 doc
: /* Return eol-type of CODING-SYSTEM.
8760 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8762 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8763 and CR respectively.
8765 A vector value indicates that a format of end-of-line should be
8766 detected automatically. Nth element of the vector is the subsidiary
8767 coding system whose eol-type is N. */)
8769 Lisp_Object coding_system
;
8771 Lisp_Object spec
, eol_type
;
8774 if (NILP (coding_system
))
8775 coding_system
= Qno_conversion
;
8776 if (! CODING_SYSTEM_P (coding_system
))
8778 spec
= CODING_SYSTEM_SPEC (coding_system
);
8779 eol_type
= AREF (spec
, 2);
8780 if (VECTORP (eol_type
))
8781 return Fcopy_sequence (eol_type
);
8782 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
8783 return make_number (n
);
8789 /*** 9. Post-amble ***/
8796 for (i
= 0; i
< coding_category_max
; i
++)
8798 coding_categories
[i
].id
= -1;
8799 coding_priorities
[i
] = i
;
8802 /* ISO2022 specific initialize routine. */
8803 for (i
= 0; i
< 0x20; i
++)
8804 iso_code_class
[i
] = ISO_control_0
;
8805 for (i
= 0x21; i
< 0x7F; i
++)
8806 iso_code_class
[i
] = ISO_graphic_plane_0
;
8807 for (i
= 0x80; i
< 0xA0; i
++)
8808 iso_code_class
[i
] = ISO_control_1
;
8809 for (i
= 0xA1; i
< 0xFF; i
++)
8810 iso_code_class
[i
] = ISO_graphic_plane_1
;
8811 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
8812 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
8813 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
8814 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
8815 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
8816 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
8817 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
8818 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
8819 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
8821 for (i
= 0; i
< 256; i
++)
8823 emacs_mule_bytes
[i
] = 1;
8825 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_11
] = 3;
8826 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_12
] = 3;
8827 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_21
] = 4;
8828 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_22
] = 4;
8836 staticpro (&Vcoding_system_hash_table
);
8838 Lisp_Object args
[2];
8841 Vcoding_system_hash_table
= Fmake_hash_table (2, args
);
8844 staticpro (&Vsjis_coding_system
);
8845 Vsjis_coding_system
= Qnil
;
8847 staticpro (&Vbig5_coding_system
);
8848 Vbig5_coding_system
= Qnil
;
8850 staticpro (&Vcode_conversion_reused_workbuf
);
8851 Vcode_conversion_reused_workbuf
= Qnil
;
8853 staticpro (&Vcode_conversion_workbuf_name
);
8854 Vcode_conversion_workbuf_name
= build_string (" *code-conversion-work*");
8856 reused_workbuf_in_use
= 0;
8858 DEFSYM (Qcharset
, "charset");
8859 DEFSYM (Qtarget_idx
, "target-idx");
8860 DEFSYM (Qcoding_system_history
, "coding-system-history");
8861 Fset (Qcoding_system_history
, Qnil
);
8863 /* Target FILENAME is the first argument. */
8864 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
8865 /* Target FILENAME is the third argument. */
8866 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
8868 DEFSYM (Qcall_process
, "call-process");
8869 /* Target PROGRAM is the first argument. */
8870 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
8872 DEFSYM (Qcall_process_region
, "call-process-region");
8873 /* Target PROGRAM is the third argument. */
8874 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
8876 DEFSYM (Qstart_process
, "start-process");
8877 /* Target PROGRAM is the third argument. */
8878 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
8880 DEFSYM (Qopen_network_stream
, "open-network-stream");
8881 /* Target SERVICE is the fourth argument. */
8882 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
8884 DEFSYM (Qcoding_system
, "coding-system");
8885 DEFSYM (Qcoding_aliases
, "coding-aliases");
8887 DEFSYM (Qeol_type
, "eol-type");
8888 DEFSYM (Qunix
, "unix");
8889 DEFSYM (Qdos
, "dos");
8891 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
8892 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
8893 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
8894 DEFSYM (Qdefault_char
, "default-char");
8895 DEFSYM (Qundecided
, "undecided");
8896 DEFSYM (Qno_conversion
, "no-conversion");
8897 DEFSYM (Qraw_text
, "raw-text");
8899 DEFSYM (Qiso_2022
, "iso-2022");
8901 DEFSYM (Qutf_8
, "utf-8");
8902 DEFSYM (Qutf_8_emacs
, "utf-8-emacs");
8904 DEFSYM (Qutf_16
, "utf-16");
8905 DEFSYM (Qbig
, "big");
8906 DEFSYM (Qlittle
, "little");
8908 DEFSYM (Qshift_jis
, "shift-jis");
8909 DEFSYM (Qbig5
, "big5");
8911 DEFSYM (Qcoding_system_p
, "coding-system-p");
8913 DEFSYM (Qcoding_system_error
, "coding-system-error");
8914 Fput (Qcoding_system_error
, Qerror_conditions
,
8915 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
8916 Fput (Qcoding_system_error
, Qerror_message
,
8917 build_string ("Invalid coding system"));
8919 /* Intern this now in case it isn't already done.
8920 Setting this variable twice is harmless.
8921 But don't staticpro it here--that is done in alloc.c. */
8922 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
8924 DEFSYM (Qtranslation_table
, "translation-table");
8925 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
8926 DEFSYM (Qtranslation_table_id
, "translation-table-id");
8927 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
8928 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
8930 DEFSYM (Qvalid_codes
, "valid-codes");
8932 DEFSYM (Qemacs_mule
, "emacs-mule");
8934 DEFSYM (QCcategory
, ":category");
8935 DEFSYM (QCmnemonic
, ":mnemonic");
8936 DEFSYM (QCdefalut_char
, ":default-char");
8937 DEFSYM (QCdecode_translation_table
, ":decode-translation-table");
8938 DEFSYM (QCencode_translation_table
, ":encode-translation-table");
8939 DEFSYM (QCpost_read_conversion
, ":post-read-conversion");
8940 DEFSYM (QCpre_write_conversion
, ":pre-write-conversion");
8942 Vcoding_category_table
8943 = Fmake_vector (make_number (coding_category_max
), Qnil
);
8944 staticpro (&Vcoding_category_table
);
8945 /* Followings are target of code detection. */
8946 ASET (Vcoding_category_table
, coding_category_iso_7
,
8947 intern ("coding-category-iso-7"));
8948 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
8949 intern ("coding-category-iso-7-tight"));
8950 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
8951 intern ("coding-category-iso-8-1"));
8952 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
8953 intern ("coding-category-iso-8-2"));
8954 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
8955 intern ("coding-category-iso-7-else"));
8956 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
8957 intern ("coding-category-iso-8-else"));
8958 ASET (Vcoding_category_table
, coding_category_utf_8
,
8959 intern ("coding-category-utf-8"));
8960 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
8961 intern ("coding-category-utf-16-be"));
8962 ASET (Vcoding_category_table
, coding_category_utf_16_auto
,
8963 intern ("coding-category-utf-16-auto"));
8964 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
8965 intern ("coding-category-utf-16-le"));
8966 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
8967 intern ("coding-category-utf-16-be-nosig"));
8968 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
8969 intern ("coding-category-utf-16-le-nosig"));
8970 ASET (Vcoding_category_table
, coding_category_charset
,
8971 intern ("coding-category-charset"));
8972 ASET (Vcoding_category_table
, coding_category_sjis
,
8973 intern ("coding-category-sjis"));
8974 ASET (Vcoding_category_table
, coding_category_big5
,
8975 intern ("coding-category-big5"));
8976 ASET (Vcoding_category_table
, coding_category_ccl
,
8977 intern ("coding-category-ccl"));
8978 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
8979 intern ("coding-category-emacs-mule"));
8980 /* Followings are NOT target of code detection. */
8981 ASET (Vcoding_category_table
, coding_category_raw_text
,
8982 intern ("coding-category-raw-text"));
8983 ASET (Vcoding_category_table
, coding_category_undecided
,
8984 intern ("coding-category-undecided"));
8986 DEFSYM (Qinsufficient_source
, "insufficient-source");
8987 DEFSYM (Qinconsistent_eol
, "inconsistent-eol");
8988 DEFSYM (Qinvalid_source
, "invalid-source");
8989 DEFSYM (Qinterrupted
, "interrupted");
8990 DEFSYM (Qinsufficient_memory
, "insufficient-memory");
8992 defsubr (&Scoding_system_p
);
8993 defsubr (&Sread_coding_system
);
8994 defsubr (&Sread_non_nil_coding_system
);
8995 defsubr (&Scheck_coding_system
);
8996 defsubr (&Sdetect_coding_region
);
8997 defsubr (&Sdetect_coding_string
);
8998 defsubr (&Sfind_coding_systems_region_internal
);
8999 defsubr (&Sunencodable_char_position
);
9000 defsubr (&Scheck_coding_systems_region
);
9001 defsubr (&Sdecode_coding_region
);
9002 defsubr (&Sencode_coding_region
);
9003 defsubr (&Sdecode_coding_string
);
9004 defsubr (&Sencode_coding_string
);
9005 defsubr (&Sdecode_sjis_char
);
9006 defsubr (&Sencode_sjis_char
);
9007 defsubr (&Sdecode_big5_char
);
9008 defsubr (&Sencode_big5_char
);
9009 defsubr (&Sset_terminal_coding_system_internal
);
9010 defsubr (&Sset_safe_terminal_coding_system_internal
);
9011 defsubr (&Sterminal_coding_system
);
9012 defsubr (&Sset_keyboard_coding_system_internal
);
9013 defsubr (&Skeyboard_coding_system
);
9014 defsubr (&Sfind_operation_coding_system
);
9015 defsubr (&Sset_coding_system_priority
);
9016 defsubr (&Sdefine_coding_system_internal
);
9017 defsubr (&Sdefine_coding_system_alias
);
9018 defsubr (&Scoding_system_put
);
9019 defsubr (&Scoding_system_base
);
9020 defsubr (&Scoding_system_plist
);
9021 defsubr (&Scoding_system_aliases
);
9022 defsubr (&Scoding_system_eol_type
);
9023 defsubr (&Scoding_system_priority_list
);
9025 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
9026 doc
: /* List of coding systems.
9028 Do not alter the value of this variable manually. This variable should be
9029 updated by the functions `define-coding-system' and
9030 `define-coding-system-alias'. */);
9031 Vcoding_system_list
= Qnil
;
9033 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
9034 doc
: /* Alist of coding system names.
9035 Each element is one element list of coding system name.
9036 This variable is given to `completing-read' as TABLE argument.
9038 Do not alter the value of this variable manually. This variable should be
9039 updated by the functions `make-coding-system' and
9040 `define-coding-system-alias'. */);
9041 Vcoding_system_alist
= Qnil
;
9043 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
9044 doc
: /* List of coding-categories (symbols) ordered by priority.
9046 On detecting a coding system, Emacs tries code detection algorithms
9047 associated with each coding-category one by one in this order. When
9048 one algorithm agrees with a byte sequence of source text, the coding
9049 system bound to the corresponding coding-category is selected. */);
9053 Vcoding_category_list
= Qnil
;
9054 for (i
= coding_category_max
- 1; i
>= 0; i
--)
9055 Vcoding_category_list
9056 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
9057 Vcoding_category_list
);
9060 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
9061 doc
: /* Specify the coding system for read operations.
9062 It is useful to bind this variable with `let', but do not set it globally.
9063 If the value is a coding system, it is used for decoding on read operation.
9064 If not, an appropriate element is used from one of the coding system alists:
9065 There are three such tables, `file-coding-system-alist',
9066 `process-coding-system-alist', and `network-coding-system-alist'. */);
9067 Vcoding_system_for_read
= Qnil
;
9069 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
9070 doc
: /* Specify the coding system for write operations.
9071 Programs bind this variable with `let', but you should not set it globally.
9072 If the value is a coding system, it is used for encoding of output,
9073 when writing it to a file and when sending it to a file or subprocess.
9075 If this does not specify a coding system, an appropriate element
9076 is used from one of the coding system alists:
9077 There are three such tables, `file-coding-system-alist',
9078 `process-coding-system-alist', and `network-coding-system-alist'.
9079 For output to files, if the above procedure does not specify a coding system,
9080 the value of `buffer-file-coding-system' is used. */);
9081 Vcoding_system_for_write
= Qnil
;
9083 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
9085 Coding system used in the latest file or process I/O. */);
9086 Vlast_coding_system_used
= Qnil
;
9088 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error
,
9090 Error status of the last code conversion.
9092 When an error was detected in the last code conversion, this variable
9093 is set to one of the following symbols.
9094 `insufficient-source'
9098 `insufficient-memory'
9099 When no error was detected, the value doesn't change. So, to check
9100 the error status of a code conversion by this variable, you must
9101 explicitly set this variable to nil before performing code
9103 Vlast_code_conversion_error
= Qnil
;
9105 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
9107 *Non-nil means always inhibit code conversion of end-of-line format.
9108 See info node `Coding Systems' and info node `Text and Binary' concerning
9109 such conversion. */);
9110 inhibit_eol_conversion
= 0;
9112 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
9114 Non-nil means process buffer inherits coding system of process output.
9115 Bind it to t if the process output is to be treated as if it were a file
9116 read from some filesystem. */);
9117 inherit_process_coding_system
= 0;
9119 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
9121 Alist to decide a coding system to use for a file I/O operation.
9122 The format is ((PATTERN . VAL) ...),
9123 where PATTERN is a regular expression matching a file name,
9124 VAL is a coding system, a cons of coding systems, or a function symbol.
9125 If VAL is a coding system, it is used for both decoding and encoding
9127 If VAL is a cons of coding systems, the car part is used for decoding,
9128 and the cdr part is used for encoding.
9129 If VAL is a function symbol, the function must return a coding system
9130 or a cons of coding systems which are used as above. The function gets
9131 the arguments with which `find-operation-coding-systems' was called.
9133 See also the function `find-operation-coding-system'
9134 and the variable `auto-coding-alist'. */);
9135 Vfile_coding_system_alist
= Qnil
;
9137 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
9139 Alist to decide a coding system to use for a process I/O operation.
9140 The format is ((PATTERN . VAL) ...),
9141 where PATTERN is a regular expression matching a program name,
9142 VAL is a coding system, a cons of coding systems, or a function symbol.
9143 If VAL is a coding system, it is used for both decoding what received
9144 from the program and encoding what sent to the program.
9145 If VAL is a cons of coding systems, the car part is used for decoding,
9146 and the cdr part is used for encoding.
9147 If VAL is a function symbol, the function must return a coding system
9148 or a cons of coding systems which are used as above.
9150 See also the function `find-operation-coding-system'. */);
9151 Vprocess_coding_system_alist
= Qnil
;
9153 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
9155 Alist to decide a coding system to use for a network I/O operation.
9156 The format is ((PATTERN . VAL) ...),
9157 where PATTERN is a regular expression matching a network service name
9158 or is a port number to connect to,
9159 VAL is a coding system, a cons of coding systems, or a function symbol.
9160 If VAL is a coding system, it is used for both decoding what received
9161 from the network stream and encoding what sent to the network stream.
9162 If VAL is a cons of coding systems, the car part is used for decoding,
9163 and the cdr part is used for encoding.
9164 If VAL is a function symbol, the function must return a coding system
9165 or a cons of coding systems which are used as above.
9167 See also the function `find-operation-coding-system'. */);
9168 Vnetwork_coding_system_alist
= Qnil
;
9170 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
9171 doc
: /* Coding system to use with system messages.
9172 Also used for decoding keyboard input on X Window system. */);
9173 Vlocale_coding_system
= Qnil
;
9175 /* The eol mnemonics are reset in startup.el system-dependently. */
9176 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
9178 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
9179 eol_mnemonic_unix
= build_string (":");
9181 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
9183 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
9184 eol_mnemonic_dos
= build_string ("\\");
9186 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
9188 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
9189 eol_mnemonic_mac
= build_string ("/");
9191 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
9193 *String displayed in mode line when end-of-line format is not yet determined. */);
9194 eol_mnemonic_undecided
= build_string (":");
9196 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
9198 *Non-nil enables character translation while encoding and decoding. */);
9199 Venable_character_translation
= Qt
;
9201 DEFVAR_LISP ("standard-translation-table-for-decode",
9202 &Vstandard_translation_table_for_decode
,
9203 doc
: /* Table for translating characters while decoding. */);
9204 Vstandard_translation_table_for_decode
= Qnil
;
9206 DEFVAR_LISP ("standard-translation-table-for-encode",
9207 &Vstandard_translation_table_for_encode
,
9208 doc
: /* Table for translating characters while encoding. */);
9209 Vstandard_translation_table_for_encode
= Qnil
;
9211 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
9212 doc
: /* Alist of charsets vs revision numbers.
9213 While encoding, if a charset (car part of an element) is found,
9214 designate it with the escape sequence identifying revision (cdr part
9215 of the element). */);
9216 Vcharset_revision_table
= Qnil
;
9218 DEFVAR_LISP ("default-process-coding-system",
9219 &Vdefault_process_coding_system
,
9220 doc
: /* Cons of coding systems used for process I/O by default.
9221 The car part is used for decoding a process output,
9222 the cdr part is used for encoding a text to be sent to a process. */);
9223 Vdefault_process_coding_system
= Qnil
;
9225 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
9227 Table of extra Latin codes in the range 128..159 (inclusive).
9228 This is a vector of length 256.
9229 If Nth element is non-nil, the existence of code N in a file
9230 \(or output of subprocess) doesn't prevent it to be detected as
9231 a coding system of ISO 2022 variant which has a flag
9232 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9233 or reading output of a subprocess.
9234 Only 128th through 159th elements has a meaning. */);
9235 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
9237 DEFVAR_LISP ("select-safe-coding-system-function",
9238 &Vselect_safe_coding_system_function
,
9240 Function to call to select safe coding system for encoding a text.
9242 If set, this function is called to force a user to select a proper
9243 coding system which can encode the text in the case that a default
9244 coding system used in each operation can't encode the text.
9246 The default value is `select-safe-coding-system' (which see). */);
9247 Vselect_safe_coding_system_function
= Qnil
;
9249 DEFVAR_BOOL ("coding-system-require-warning",
9250 &coding_system_require_warning
,
9251 doc
: /* Internal use only.
9252 If non-nil, on writing a file, `select-safe-coding-system-function' is
9253 called even if `coding-system-for-write' is non-nil. The command
9254 `universal-coding-system-argument' binds this variable to t temporarily. */);
9255 coding_system_require_warning
= 0;
9258 DEFVAR_BOOL ("inhibit-iso-escape-detection",
9259 &inhibit_iso_escape_detection
,
9261 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9263 By default, on reading a file, Emacs tries to detect how the text is
9264 encoded. This code detection is sensitive to escape sequences. If
9265 the sequence is valid as ISO2022, the code is determined as one of
9266 the ISO2022 encodings, and the file is decoded by the corresponding
9267 coding system (e.g. `iso-2022-7bit').
9269 However, there may be a case that you want to read escape sequences in
9270 a file as is. In such a case, you can set this variable to non-nil.
9271 Then, as the code detection ignores any escape sequences, no file is
9272 detected as encoded in some ISO2022 encoding. The result is that all
9273 escape sequences become visible in a buffer.
9275 The default value is nil, and it is strongly recommended not to change
9276 it. That is because many Emacs Lisp source files that contain
9277 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9278 in Emacs's distribution, and they won't be decoded correctly on
9279 reading if you suppress escape sequence detection.
9281 The other way to read escape sequences in a file without decoding is
9282 to explicitly specify some coding system that doesn't use ISO2022's
9283 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
9284 inhibit_iso_escape_detection
= 0;
9286 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input
,
9287 doc
: /* Char table for translating self-inserting characters.
9288 This is applied to the result of input methods, not their input. See also
9289 `keyboard-translate-table'. */);
9290 Vtranslation_table_for_input
= Qnil
;
9293 Lisp_Object args
[coding_arg_max
];
9294 Lisp_Object plist
[16];
9297 for (i
= 0; i
< coding_arg_max
; i
++)
9300 plist
[0] = intern (":name");
9301 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
9302 plist
[2] = intern (":mnemonic");
9303 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
9304 plist
[4] = intern (":coding-type");
9305 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
9306 plist
[6] = intern (":ascii-compatible-p");
9307 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
9308 plist
[8] = intern (":default-char");
9309 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
9310 plist
[10] = intern (":for-unibyte");
9311 plist
[11] = args
[coding_arg_for_unibyte
] = Qt
;
9312 plist
[12] = intern (":docstring");
9313 plist
[13] = build_string ("Do no conversion.\n\
9315 When you visit a file with this coding, the file is read into a\n\
9316 unibyte buffer as is, thus each byte of a file is treated as a\n\
9318 plist
[14] = intern (":eol-type");
9319 plist
[15] = args
[coding_arg_eol_type
] = Qunix
;
9320 args
[coding_arg_plist
] = Flist (16, plist
);
9321 Fdefine_coding_system_internal (coding_arg_max
, args
);
9324 setup_coding_system (Qno_conversion
, &keyboard_coding
);
9325 setup_coding_system (Qno_conversion
, &terminal_coding
);
9326 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
9331 for (i
= 0; i
< coding_category_max
; i
++)
9332 Fset (AREF (Vcoding_category_table
, i
), Qno_conversion
);
9337 emacs_strerror (error_number
)
9342 synchronize_system_messages_locale ();
9343 str
= strerror (error_number
);
9345 if (! NILP (Vlocale_coding_system
))
9347 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
9348 Vlocale_coding_system
,
9350 str
= (char *) SDATA (dec
);