1 /* Coding system handler (conversion, detection, etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
77 o Old Emacs internal format (emacs-mule)
79 The coding system adopted by old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX, and update the members of
150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
152 Below is the template of these functions. */
156 detect_coding_XXX (coding
, detect_info
)
157 struct coding_system
*coding
;
158 struct coding_detection_info
*detect_info
;
160 unsigned char *src
= coding
->source
;
161 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
162 int multibytep
= coding
->src_multibyte
;
163 int consumed_chars
= 0;
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
173 if (! __C_conforms_to_XXX___ (c
))
175 if (! __C_strongly_suggests_XXX__ (c
))
176 found
= CATEGORY_MASK_XXX
;
178 /* The byte sequence is invalid for XXX. */
179 detect_info
->rejected
|= CATEGORY_MASK_XXX
;
183 /* The source exausted successfully. */
184 detect_info
->found
|= found
;
189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
201 Below is the template of these functions. */
205 decode_coding_XXXX (coding
)
206 struct coding_system
*coding
;
208 unsigned char *src
= coding
->source
+ coding
->consumed
;
209 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base
;
214 /* A buffer to produce decoded characters. */
215 int *charbuf
= coding
->charbuf
;
216 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
217 int multibytep
= coding
->src_multibyte
;
222 if (charbuf
< charbuf_end
)
223 /* No more room to produce a decoded character. */
230 if (src_base
< src_end
231 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base
< src_end
&& charbuf
< charbuf_end
)
235 *charbuf
++ = *src_base
++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
239 /* Remember how many characters we produced. */
240 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
260 Below is a template of these functions. */
263 encode_coding_XXX (coding
)
264 struct coding_system
*coding
;
266 int multibytep
= coding
->dst_multibyte
;
267 int *charbuf
= coding
->charbuf
;
268 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
269 unsigned char *dst
= coding
->destination
+ coding
->produced
;
270 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
271 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
272 int produced_chars
= 0;
274 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
277 /* Encode C into DST, and increment DST. */
279 label_no_more_destination
:
280 /* How many chars and bytes we produced. */
281 coding
->produced_char
+= produced_chars
;
282 coding
->produced
= dst
- coding
->destination
;
287 /*** 1. Preamble ***/
294 #include "character.h"
297 #include "composite.h"
301 Lisp_Object Vcoding_system_hash_table
;
303 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
304 Lisp_Object Qunix
, Qdos
;
305 extern Lisp_Object Qmac
; /* frame.c */
306 Lisp_Object Qbuffer_file_coding_system
;
307 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
308 Lisp_Object Qdefault_char
;
309 Lisp_Object Qno_conversion
, Qundecided
;
310 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
311 Lisp_Object Qbig
, Qlittle
;
312 Lisp_Object Qcoding_system_history
;
313 Lisp_Object Qvalid_codes
;
314 Lisp_Object QCcategory
, QCmnemonic
, QCdefalut_char
;
315 Lisp_Object QCdecode_translation_table
, QCencode_translation_table
;
316 Lisp_Object QCpost_read_conversion
, QCpre_write_conversion
;
318 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
319 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
320 Lisp_Object Qstart_process
, Qopen_network_stream
;
321 Lisp_Object Qtarget_idx
;
323 Lisp_Object Qinsufficient_source
, Qinconsistent_eol
, Qinvalid_source
;
324 Lisp_Object Qinterrupted
, Qinsufficient_memory
;
326 int coding_system_require_warning
;
328 Lisp_Object Vselect_safe_coding_system_function
;
330 /* Mnemonic string for each format of end-of-line. */
331 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
332 /* Mnemonic string to indicate format of end-of-line is not yet
334 Lisp_Object eol_mnemonic_undecided
;
338 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
340 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
342 /* Coding system emacs-mule and raw-text are for converting only
343 end-of-line format. */
344 Lisp_Object Qemacs_mule
, Qraw_text
;
345 Lisp_Object Qutf_8_emacs
;
347 /* Coding-systems are handed between Emacs Lisp programs and C internal
348 routines by the following three variables. */
349 /* Coding-system for reading files and receiving data from process. */
350 Lisp_Object Vcoding_system_for_read
;
351 /* Coding-system for writing files and sending data to process. */
352 Lisp_Object Vcoding_system_for_write
;
353 /* Coding-system actually used in the latest I/O. */
354 Lisp_Object Vlast_coding_system_used
;
355 /* Set to non-nil when an error is detected while code conversion. */
356 Lisp_Object Vlast_code_conversion_error
;
357 /* A vector of length 256 which contains information about special
358 Latin codes (especially for dealing with Microsoft codes). */
359 Lisp_Object Vlatin_extra_code_table
;
361 /* Flag to inhibit code conversion of end-of-line format. */
362 int inhibit_eol_conversion
;
364 /* Flag to inhibit ISO2022 escape sequence detection. */
365 int inhibit_iso_escape_detection
;
367 /* Flag to make buffer-file-coding-system inherit from process-coding. */
368 int inherit_process_coding_system
;
370 /* Coding system to be used to encode text for terminal display. */
371 struct coding_system terminal_coding
;
373 /* Coding system to be used to encode text for terminal display when
374 terminal coding system is nil. */
375 struct coding_system safe_terminal_coding
;
377 /* Coding system of what is sent from terminal keyboard. */
378 struct coding_system keyboard_coding
;
380 Lisp_Object Vfile_coding_system_alist
;
381 Lisp_Object Vprocess_coding_system_alist
;
382 Lisp_Object Vnetwork_coding_system_alist
;
384 Lisp_Object Vlocale_coding_system
;
388 /* Flag to tell if we look up translation table on character code
390 Lisp_Object Venable_character_translation
;
391 /* Standard translation table to look up on decoding (reading). */
392 Lisp_Object Vstandard_translation_table_for_decode
;
393 /* Standard translation table to look up on encoding (writing). */
394 Lisp_Object Vstandard_translation_table_for_encode
;
396 Lisp_Object Qtranslation_table
;
397 Lisp_Object Qtranslation_table_id
;
398 Lisp_Object Qtranslation_table_for_decode
;
399 Lisp_Object Qtranslation_table_for_encode
;
401 /* Alist of charsets vs revision number. */
402 static Lisp_Object Vcharset_revision_table
;
404 /* Default coding systems used for process I/O. */
405 Lisp_Object Vdefault_process_coding_system
;
407 /* Char table for translating Quail and self-inserting input. */
408 Lisp_Object Vtranslation_table_for_input
;
410 /* Two special coding systems. */
411 Lisp_Object Vsjis_coding_system
;
412 Lisp_Object Vbig5_coding_system
;
414 static void record_conversion_result (struct coding_system
*coding
,
415 enum coding_result_code result
);
416 static int detect_coding_utf_8
P_ ((struct coding_system
*,
417 struct coding_detection_info
*info
));
418 static void decode_coding_utf_8
P_ ((struct coding_system
*));
419 static int encode_coding_utf_8
P_ ((struct coding_system
*));
421 static int detect_coding_utf_16
P_ ((struct coding_system
*,
422 struct coding_detection_info
*info
));
423 static void decode_coding_utf_16
P_ ((struct coding_system
*));
424 static int encode_coding_utf_16
P_ ((struct coding_system
*));
426 static int detect_coding_iso_2022
P_ ((struct coding_system
*,
427 struct coding_detection_info
*info
));
428 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
429 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
431 static int detect_coding_emacs_mule
P_ ((struct coding_system
*,
432 struct coding_detection_info
*info
));
433 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
434 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
436 static int detect_coding_sjis
P_ ((struct coding_system
*,
437 struct coding_detection_info
*info
));
438 static void decode_coding_sjis
P_ ((struct coding_system
*));
439 static int encode_coding_sjis
P_ ((struct coding_system
*));
441 static int detect_coding_big5
P_ ((struct coding_system
*,
442 struct coding_detection_info
*info
));
443 static void decode_coding_big5
P_ ((struct coding_system
*));
444 static int encode_coding_big5
P_ ((struct coding_system
*));
446 static int detect_coding_ccl
P_ ((struct coding_system
*,
447 struct coding_detection_info
*info
));
448 static void decode_coding_ccl
P_ ((struct coding_system
*));
449 static int encode_coding_ccl
P_ ((struct coding_system
*));
451 static void decode_coding_raw_text
P_ ((struct coding_system
*));
452 static int encode_coding_raw_text
P_ ((struct coding_system
*));
455 /* ISO2022 section */
457 #define CODING_ISO_INITIAL(coding, reg) \
458 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
459 coding_attr_iso_initial), \
463 #define CODING_ISO_REQUEST(coding, charset_id) \
464 ((charset_id <= (coding)->max_charset_id \
465 ? (coding)->safe_charsets[charset_id] \
469 #define CODING_ISO_FLAGS(coding) \
470 ((coding)->spec.iso_2022.flags)
471 #define CODING_ISO_DESIGNATION(coding, reg) \
472 ((coding)->spec.iso_2022.current_designation[reg])
473 #define CODING_ISO_INVOCATION(coding, plane) \
474 ((coding)->spec.iso_2022.current_invocation[plane])
475 #define CODING_ISO_SINGLE_SHIFTING(coding) \
476 ((coding)->spec.iso_2022.single_shifting)
477 #define CODING_ISO_BOL(coding) \
478 ((coding)->spec.iso_2022.bol)
479 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
480 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
482 /* Control characters of ISO2022. */
483 /* code */ /* function */
484 #define ISO_CODE_LF 0x0A /* line-feed */
485 #define ISO_CODE_CR 0x0D /* carriage-return */
486 #define ISO_CODE_SO 0x0E /* shift-out */
487 #define ISO_CODE_SI 0x0F /* shift-in */
488 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
489 #define ISO_CODE_ESC 0x1B /* escape */
490 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
491 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
492 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
494 /* All code (1-byte) of ISO2022 is classified into one of the
496 enum iso_code_class_type
498 ISO_control_0
, /* Control codes in the range
499 0x00..0x1F and 0x7F, except for the
500 following 5 codes. */
501 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
502 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
503 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
504 ISO_escape
, /* ISO_CODE_SO (0x1B) */
505 ISO_control_1
, /* Control codes in the range
506 0x80..0x9F, except for the
507 following 3 codes. */
508 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
509 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
510 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
511 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
512 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
513 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
514 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
517 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
518 `iso-flags' attribute of an iso2022 coding system. */
520 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
521 instead of the correct short-form sequence (e.g. ESC $ A). */
522 #define CODING_ISO_FLAG_LONG_FORM 0x0001
524 /* If set, reset graphic planes and registers at end-of-line to the
526 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
528 /* If set, reset graphic planes and registers before any control
529 characters to the initial state. */
530 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
532 /* If set, encode by 7-bit environment. */
533 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
535 /* If set, use locking-shift function. */
536 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
538 /* If set, use single-shift function. Overwrite
539 CODING_ISO_FLAG_LOCKING_SHIFT. */
540 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
542 /* If set, use designation escape sequence. */
543 #define CODING_ISO_FLAG_DESIGNATION 0x0040
545 /* If set, produce revision number sequence. */
546 #define CODING_ISO_FLAG_REVISION 0x0080
548 /* If set, produce ISO6429's direction specifying sequence. */
549 #define CODING_ISO_FLAG_DIRECTION 0x0100
551 /* If set, assume designation states are reset at beginning of line on
553 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
555 /* If set, designation sequence should be placed at beginning of line
557 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
559 /* If set, do not encode unsafe charactes on output. */
560 #define CODING_ISO_FLAG_SAFE 0x0800
562 /* If set, extra latin codes (128..159) are accepted as a valid code
564 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
566 #define CODING_ISO_FLAG_COMPOSITION 0x2000
568 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
570 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
572 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
574 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
576 /* A character to be produced on output if encoding of the original
577 character is prohibited by CODING_ISO_FLAG_SAFE. */
578 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
582 #define CODING_UTF_16_BOM(coding) \
583 ((coding)->spec.utf_16.bom)
585 #define CODING_UTF_16_ENDIAN(coding) \
586 ((coding)->spec.utf_16.endian)
588 #define CODING_UTF_16_SURROGATE(coding) \
589 ((coding)->spec.utf_16.surrogate)
593 #define CODING_CCL_DECODER(coding) \
594 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
595 #define CODING_CCL_ENCODER(coding) \
596 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
597 #define CODING_CCL_VALIDS(coding) \
598 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
600 /* Index for each coding category in `coding_categories' */
604 coding_category_iso_7
,
605 coding_category_iso_7_tight
,
606 coding_category_iso_8_1
,
607 coding_category_iso_8_2
,
608 coding_category_iso_7_else
,
609 coding_category_iso_8_else
,
610 coding_category_utf_8
,
611 coding_category_utf_16_auto
,
612 coding_category_utf_16_be
,
613 coding_category_utf_16_le
,
614 coding_category_utf_16_be_nosig
,
615 coding_category_utf_16_le_nosig
,
616 coding_category_charset
,
617 coding_category_sjis
,
618 coding_category_big5
,
620 coding_category_emacs_mule
,
621 /* All above are targets of code detection. */
622 coding_category_raw_text
,
623 coding_category_undecided
,
627 /* Definitions of flag bits used in detect_coding_XXXX. */
628 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
629 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
630 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
631 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
632 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
633 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
634 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
635 #define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
636 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
637 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
638 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
639 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
640 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
641 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
642 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
643 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
644 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
645 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
647 /* This value is returned if detect_coding_mask () find nothing other
648 than ASCII characters. */
649 #define CATEGORY_MASK_ANY \
650 (CATEGORY_MASK_ISO_7 \
651 | CATEGORY_MASK_ISO_7_TIGHT \
652 | CATEGORY_MASK_ISO_8_1 \
653 | CATEGORY_MASK_ISO_8_2 \
654 | CATEGORY_MASK_ISO_7_ELSE \
655 | CATEGORY_MASK_ISO_8_ELSE \
656 | CATEGORY_MASK_UTF_8 \
657 | CATEGORY_MASK_UTF_16_BE \
658 | CATEGORY_MASK_UTF_16_LE \
659 | CATEGORY_MASK_UTF_16_BE_NOSIG \
660 | CATEGORY_MASK_UTF_16_LE_NOSIG \
661 | CATEGORY_MASK_CHARSET \
662 | CATEGORY_MASK_SJIS \
663 | CATEGORY_MASK_BIG5 \
664 | CATEGORY_MASK_CCL \
665 | CATEGORY_MASK_EMACS_MULE)
668 #define CATEGORY_MASK_ISO_7BIT \
669 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
671 #define CATEGORY_MASK_ISO_8BIT \
672 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
674 #define CATEGORY_MASK_ISO_ELSE \
675 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
677 #define CATEGORY_MASK_ISO_ESCAPE \
678 (CATEGORY_MASK_ISO_7 \
679 | CATEGORY_MASK_ISO_7_TIGHT \
680 | CATEGORY_MASK_ISO_7_ELSE \
681 | CATEGORY_MASK_ISO_8_ELSE)
683 #define CATEGORY_MASK_ISO \
684 ( CATEGORY_MASK_ISO_7BIT \
685 | CATEGORY_MASK_ISO_8BIT \
686 | CATEGORY_MASK_ISO_ELSE)
688 #define CATEGORY_MASK_UTF_16 \
689 (CATEGORY_MASK_UTF_16_BE \
690 | CATEGORY_MASK_UTF_16_LE \
691 | CATEGORY_MASK_UTF_16_BE_NOSIG \
692 | CATEGORY_MASK_UTF_16_LE_NOSIG)
695 /* List of symbols `coding-category-xxx' ordered by priority. This
696 variable is exposed to Emacs Lisp. */
697 static Lisp_Object Vcoding_category_list
;
699 /* Table of coding categories (Lisp symbols). This variable is for
701 static Lisp_Object Vcoding_category_table
;
703 /* Table of coding-categories ordered by priority. */
704 static enum coding_category coding_priorities
[coding_category_max
];
706 /* Nth element is a coding context for the coding system bound to the
707 Nth coding category. */
708 static struct coding_system coding_categories
[coding_category_max
];
710 /*** Commonly used macros and functions ***/
713 #define min(a, b) ((a) < (b) ? (a) : (b))
716 #define max(a, b) ((a) > (b) ? (a) : (b))
719 #define CODING_GET_INFO(coding, attrs, charset_list) \
721 (attrs) = CODING_ID_ATTRS ((coding)->id); \
722 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
726 /* Safely get one byte from the source text pointed by SRC which ends
727 at SRC_END, and set C to that byte. If there are not enough bytes
728 in the source, it jumps to `no_more_source'. If multibytep is
729 nonzero, and a multibyte character is found at SRC, set C to the
730 negative value of the character code. The caller should declare
731 and set these variables appropriately in advance:
732 src, src_end, multibytep */
734 #define ONE_MORE_BYTE(c) \
736 if (src == src_end) \
738 if (src_base < src) \
739 record_conversion_result \
740 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
741 goto no_more_source; \
744 if (multibytep && (c & 0x80)) \
746 if ((c & 0xFE) == 0xC0) \
747 c = ((c & 1) << 6) | *src++; \
750 c = - string_char (--src, &src, NULL); \
751 record_conversion_result \
752 (coding, CODING_RESULT_INVALID_SRC); \
759 #define ONE_MORE_BYTE_NO_CHECK(c) \
762 if (multibytep && (c & 0x80)) \
764 if ((c & 0xFE) == 0xC0) \
765 c = ((c & 1) << 6) | *src++; \
768 c = - string_char (--src, &src, NULL); \
769 record_conversion_result \
770 (coding, CODING_RESULT_INVALID_SRC); \
777 /* Store a byte C in the place pointed by DST and increment DST to the
778 next free point, and increment PRODUCED_CHARS. The caller should
779 assure that C is 0..127, and declare and set the variable `dst'
780 appropriately in advance.
784 #define EMIT_ONE_ASCII_BYTE(c) \
791 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
793 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
795 produced_chars += 2; \
796 *dst++ = (c1), *dst++ = (c2); \
800 /* Store a byte C in the place pointed by DST and increment DST to the
801 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
802 nonzero, store in an appropriate multibyte from. The caller should
803 declare and set the variables `dst' and `multibytep' appropriately
806 #define EMIT_ONE_BYTE(c) \
813 ch = BYTE8_TO_CHAR (ch); \
814 CHAR_STRING_ADVANCE (ch, dst); \
821 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
823 #define EMIT_TWO_BYTES(c1, c2) \
825 produced_chars += 2; \
832 ch = BYTE8_TO_CHAR (ch); \
833 CHAR_STRING_ADVANCE (ch, dst); \
836 ch = BYTE8_TO_CHAR (ch); \
837 CHAR_STRING_ADVANCE (ch, dst); \
847 #define EMIT_THREE_BYTES(c1, c2, c3) \
849 EMIT_ONE_BYTE (c1); \
850 EMIT_TWO_BYTES (c2, c3); \
854 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
856 EMIT_TWO_BYTES (c1, c2); \
857 EMIT_TWO_BYTES (c3, c4); \
862 record_conversion_result (struct coding_system
*coding
,
863 enum coding_result_code result
)
865 coding
->result
= result
;
868 case CODING_RESULT_INSUFFICIENT_SRC
:
869 Vlast_code_conversion_error
= Qinsufficient_source
;
871 case CODING_RESULT_INCONSISTENT_EOL
:
872 Vlast_code_conversion_error
= Qinconsistent_eol
;
874 case CODING_RESULT_INVALID_SRC
:
875 Vlast_code_conversion_error
= Qinvalid_source
;
877 case CODING_RESULT_INTERRUPT
:
878 Vlast_code_conversion_error
= Qinterrupted
;
880 case CODING_RESULT_INSUFFICIENT_MEM
:
881 Vlast_code_conversion_error
= Qinsufficient_memory
;
886 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
888 charset_map_loaded = 0; \
889 c = DECODE_CHAR (charset, code); \
890 if (charset_map_loaded) \
892 const unsigned char *orig = coding->source; \
895 coding_set_source (coding); \
896 offset = coding->source - orig; \
898 src_base += offset; \
904 #define ASSURE_DESTINATION(bytes) \
906 if (dst + (bytes) >= dst_end) \
908 int more_bytes = charbuf_end - charbuf + (bytes); \
910 dst = alloc_destination (coding, more_bytes, dst); \
911 dst_end = coding->destination + coding->dst_bytes; \
918 coding_set_source (coding
)
919 struct coding_system
*coding
;
921 if (BUFFERP (coding
->src_object
))
923 struct buffer
*buf
= XBUFFER (coding
->src_object
);
925 if (coding
->src_pos
< 0)
926 coding
->source
= BUF_GAP_END_ADDR (buf
) + coding
->src_pos_byte
;
928 coding
->source
= BUF_BYTE_ADDRESS (buf
, coding
->src_pos_byte
);
930 else if (STRINGP (coding
->src_object
))
932 coding
->source
= SDATA (coding
->src_object
) + coding
->src_pos_byte
;
935 /* Otherwise, the source is C string and is never relocated
936 automatically. Thus we don't have to update anything. */
941 coding_set_destination (coding
)
942 struct coding_system
*coding
;
944 if (BUFFERP (coding
->dst_object
))
946 if (coding
->src_pos
< 0)
948 coding
->destination
= BEG_ADDR
+ coding
->dst_pos_byte
- 1;
949 coding
->dst_bytes
= (GAP_END_ADDR
950 - (coding
->src_bytes
- coding
->consumed
)
951 - coding
->destination
);
955 /* We are sure that coding->dst_pos_byte is before the gap
957 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
958 + coding
->dst_pos_byte
- 1);
959 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
960 - coding
->destination
);
964 /* Otherwise, the destination is C string and is never relocated
965 automatically. Thus we don't have to update anything. */
971 coding_alloc_by_realloc (coding
, bytes
)
972 struct coding_system
*coding
;
975 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
976 coding
->dst_bytes
+ bytes
);
977 coding
->dst_bytes
+= bytes
;
981 coding_alloc_by_making_gap (coding
, bytes
)
982 struct coding_system
*coding
;
985 if (BUFFERP (coding
->dst_object
)
986 && EQ (coding
->src_object
, coding
->dst_object
))
988 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
990 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
992 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
996 Lisp_Object this_buffer
;
998 this_buffer
= Fcurrent_buffer ();
999 set_buffer_internal (XBUFFER (coding
->dst_object
));
1001 set_buffer_internal (XBUFFER (this_buffer
));
1006 static unsigned char *
1007 alloc_destination (coding
, nbytes
, dst
)
1008 struct coding_system
*coding
;
1012 EMACS_INT offset
= dst
- coding
->destination
;
1014 if (BUFFERP (coding
->dst_object
))
1015 coding_alloc_by_making_gap (coding
, nbytes
);
1017 coding_alloc_by_realloc (coding
, nbytes
);
1018 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1019 coding_set_destination (coding
);
1020 dst
= coding
->destination
+ offset
;
1024 /** Macros for annotations. */
1026 /* Maximum length of annotation data (sum of annotations for
1027 composition and charset). */
1028 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1030 /* An annotation data is stored in the array coding->charbuf in this
1032 [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1033 LENGTH is the number of elements in the annotation.
1034 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1035 FROM and TO specify the range of text annotated. They are relative
1036 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1038 The format of the following elements depend on ANNOTATION_MASK.
1040 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1042 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1043 METHOD is one of enum composition_method.
1044 Optionnal COMPOSITION-COMPONENTS are characters and composition
1047 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1050 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
1052 *(buf)++ = -(len); \
1053 *(buf)++ = (mask); \
1054 *(buf)++ = (from); \
1056 coding->annotated = 1; \
1059 #define ADD_COMPOSITION_DATA(buf, from, to, method) \
1061 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1066 #define ADD_CHARSET_DATA(buf, from, to, id) \
1068 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1073 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1080 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1081 Check if a text is encoded in UTF-8. If it is, return 1, else
1084 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1085 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1086 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1087 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1088 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1089 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1092 detect_coding_utf_8 (coding
, detect_info
)
1093 struct coding_system
*coding
;
1094 struct coding_detection_info
*detect_info
;
1096 const unsigned char *src
= coding
->source
, *src_base
;
1097 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1098 int multibytep
= coding
->src_multibyte
;
1099 int consumed_chars
= 0;
1102 detect_info
->checked
|= CATEGORY_MASK_UTF_8
;
1103 /* A coding system of this category is always ASCII compatible. */
1104 src
+= coding
->head_ascii
;
1108 int c
, c1
, c2
, c3
, c4
;
1112 if (c
< 0 || UTF_8_1_OCTET_P (c
))
1115 if (c1
< 0 || ! UTF_8_EXTRA_OCTET_P (c1
))
1117 if (UTF_8_2_OCTET_LEADING_P (c
))
1119 found
= CATEGORY_MASK_UTF_8
;
1123 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1125 if (UTF_8_3_OCTET_LEADING_P (c
))
1127 found
= CATEGORY_MASK_UTF_8
;
1131 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1133 if (UTF_8_4_OCTET_LEADING_P (c
))
1135 found
= CATEGORY_MASK_UTF_8
;
1139 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1141 if (UTF_8_5_OCTET_LEADING_P (c
))
1143 found
= CATEGORY_MASK_UTF_8
;
1148 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1152 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1154 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1157 detect_info
->found
|= found
;
1163 decode_coding_utf_8 (coding
)
1164 struct coding_system
*coding
;
1166 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1167 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1168 const unsigned char *src_base
;
1169 int *charbuf
= coding
->charbuf
;
1170 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1171 int consumed_chars
= 0, consumed_chars_base
;
1172 int multibytep
= coding
->src_multibyte
;
1173 Lisp_Object attr
, charset_list
;
1175 CODING_GET_INFO (coding
, attr
, charset_list
);
1179 int c
, c1
, c2
, c3
, c4
, c5
;
1182 consumed_chars_base
= consumed_chars
;
1184 if (charbuf
>= charbuf_end
)
1192 else if (UTF_8_1_OCTET_P(c1
))
1199 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1201 if (UTF_8_2_OCTET_LEADING_P (c1
))
1203 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1204 /* Reject overlong sequences here and below. Encoders
1205 producing them are incorrect, they can be misleading,
1206 and they mess up read/write invariance. */
1213 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1215 if (UTF_8_3_OCTET_LEADING_P (c1
))
1217 c
= (((c1
& 0xF) << 12)
1218 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1220 || (c
>= 0xd800 && c
< 0xe000)) /* surrogates (invalid) */
1226 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1228 if (UTF_8_4_OCTET_LEADING_P (c1
))
1230 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1231 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1238 if (c5
< 0 || ! UTF_8_EXTRA_OCTET_P (c5
))
1240 if (UTF_8_5_OCTET_LEADING_P (c1
))
1242 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1243 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1245 if ((c
> MAX_CHAR
) || (c
< 0x200000))
1260 consumed_chars
= consumed_chars_base
;
1262 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1267 coding
->consumed_char
+= consumed_chars_base
;
1268 coding
->consumed
= src_base
- coding
->source
;
1269 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1274 encode_coding_utf_8 (coding
)
1275 struct coding_system
*coding
;
1277 int multibytep
= coding
->dst_multibyte
;
1278 int *charbuf
= coding
->charbuf
;
1279 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1280 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1281 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1282 int produced_chars
= 0;
1287 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1289 while (charbuf
< charbuf_end
)
1291 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1293 ASSURE_DESTINATION (safe_room
);
1295 if (CHAR_BYTE8_P (c
))
1297 c
= CHAR_TO_BYTE8 (c
);
1302 CHAR_STRING_ADVANCE (c
, pend
);
1303 for (p
= str
; p
< pend
; p
++)
1310 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1312 while (charbuf
< charbuf_end
)
1314 ASSURE_DESTINATION (safe_room
);
1316 dst
+= CHAR_STRING (c
, dst
);
1320 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1321 coding
->produced_char
+= produced_chars
;
1322 coding
->produced
= dst
- coding
->destination
;
1327 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1328 Check if a text is encoded in one of UTF-16 based coding systems.
1329 If it is, return 1, else return 0. */
1331 #define UTF_16_HIGH_SURROGATE_P(val) \
1332 (((val) & 0xFC00) == 0xD800)
1334 #define UTF_16_LOW_SURROGATE_P(val) \
1335 (((val) & 0xFC00) == 0xDC00)
1337 #define UTF_16_INVALID_P(val) \
1338 (((val) == 0xFFFE) \
1339 || ((val) == 0xFFFF) \
1340 || UTF_16_LOW_SURROGATE_P (val))
1344 detect_coding_utf_16 (coding
, detect_info
)
1345 struct coding_system
*coding
;
1346 struct coding_detection_info
*detect_info
;
1348 const unsigned char *src
= coding
->source
, *src_base
= src
;
1349 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1350 int multibytep
= coding
->src_multibyte
;
1351 int consumed_chars
= 0;
1354 detect_info
->checked
|= CATEGORY_MASK_UTF_16
;
1355 if (coding
->mode
& CODING_MODE_LAST_BLOCK
1356 && (coding
->src_chars
& 1))
1358 detect_info
->rejected
|= CATEGORY_MASK_UTF_16
;
1364 if ((c1
== 0xFF) && (c2
== 0xFE))
1366 detect_info
->found
|= (CATEGORY_MASK_UTF_16_LE
1367 | CATEGORY_MASK_UTF_16_AUTO
);
1368 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_BE
1369 | CATEGORY_MASK_UTF_16_BE_NOSIG
1370 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1372 else if ((c1
== 0xFE) && (c2
== 0xFF))
1374 detect_info
->found
|= (CATEGORY_MASK_UTF_16_BE
1375 | CATEGORY_MASK_UTF_16_AUTO
);
1376 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_LE
1377 | CATEGORY_MASK_UTF_16_BE_NOSIG
1378 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1380 else if (c1
>= 0 && c2
>= 0)
1382 unsigned char b1
[256], b2
[256];
1383 int b1_variants
= 1, b2_variants
= 1;
1386 bzero (b1
, 256), bzero (b2
, 256);
1388 for (n
= 0; n
< 256 && src
< src_end
; n
++)
1393 if (c1
< 0 || c2
< 0)
1395 if (! b1
[c1
++]) b1_variants
++;
1396 if (! b2
[c2
++]) b2_variants
++;
1398 if (b1_variants
< b2_variants
)
1399 detect_info
->found
|= CATEGORY_MASK_UTF_16_BE_NOSIG
;
1401 detect_info
->found
|= CATEGORY_MASK_UTF_16_LE_NOSIG
;
1402 detect_info
->rejected
1403 |= (CATEGORY_MASK_UTF_16_BE
| CATEGORY_MASK_UTF_16_LE
);
1410 decode_coding_utf_16 (coding
)
1411 struct coding_system
*coding
;
1413 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1414 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1415 const unsigned char *src_base
;
1416 int *charbuf
= coding
->charbuf
;
1417 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1418 int consumed_chars
= 0, consumed_chars_base
;
1419 int multibytep
= coding
->src_multibyte
;
1420 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1421 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1422 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1423 Lisp_Object attr
, charset_list
;
1425 CODING_GET_INFO (coding
, attr
, charset_list
);
1427 if (bom
== utf_16_with_bom
)
1436 if (endian
== utf_16_big_endian
1437 ? c
!= 0xFEFF : c
!= 0xFFFE)
1439 /* The first two bytes are not BOM. Treat them as bytes
1440 for a normal character. */
1444 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1446 else if (bom
== utf_16_detect_bom
)
1448 /* We have already tried to detect BOM and failed in
1450 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1458 consumed_chars_base
= consumed_chars
;
1460 if (charbuf
+ 2 >= charbuf_end
)
1472 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
1476 c
= (endian
== utf_16_big_endian
1477 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1480 if (! UTF_16_LOW_SURROGATE_P (c
))
1482 if (endian
== utf_16_big_endian
)
1483 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1485 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1489 if (UTF_16_HIGH_SURROGATE_P (c
))
1490 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1496 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1497 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1503 if (UTF_16_HIGH_SURROGATE_P (c
))
1504 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1511 coding
->consumed_char
+= consumed_chars_base
;
1512 coding
->consumed
= src_base
- coding
->source
;
1513 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1517 encode_coding_utf_16 (coding
)
1518 struct coding_system
*coding
;
1520 int multibytep
= coding
->dst_multibyte
;
1521 int *charbuf
= coding
->charbuf
;
1522 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1523 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1524 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1526 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1527 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1528 int produced_chars
= 0;
1529 Lisp_Object attrs
, charset_list
;
1532 CODING_GET_INFO (coding
, attrs
, charset_list
);
1534 if (bom
!= utf_16_without_bom
)
1536 ASSURE_DESTINATION (safe_room
);
1538 EMIT_TWO_BYTES (0xFE, 0xFF);
1540 EMIT_TWO_BYTES (0xFF, 0xFE);
1541 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1544 while (charbuf
< charbuf_end
)
1546 ASSURE_DESTINATION (safe_room
);
1548 if (c
>= MAX_UNICODE_CHAR
)
1549 c
= coding
->default_char
;
1554 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1556 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1563 c1
= (c
>> 10) + 0xD800;
1564 c2
= (c
& 0x3FF) + 0xDC00;
1566 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1568 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1571 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1572 coding
->produced
= dst
- coding
->destination
;
1573 coding
->produced_char
+= produced_chars
;
1578 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1580 /* Emacs' internal format for representation of multiple character
1581 sets is a kind of multi-byte encoding, i.e. characters are
1582 represented by variable-length sequences of one-byte codes.
1584 ASCII characters and control characters (e.g. `tab', `newline') are
1585 represented by one-byte sequences which are their ASCII codes, in
1586 the range 0x00 through 0x7F.
1588 8-bit characters of the range 0x80..0x9F are represented by
1589 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1592 8-bit characters of the range 0xA0..0xFF are represented by
1593 one-byte sequences which are their 8-bit code.
1595 The other characters are represented by a sequence of `base
1596 leading-code', optional `extended leading-code', and one or two
1597 `position-code's. The length of the sequence is determined by the
1598 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1599 whereas extended leading-code and position-code take the range 0xA0
1600 through 0xFF. See `charset.h' for more details about leading-code
1603 --- CODE RANGE of Emacs' internal format ---
1607 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1608 eight-bit-graphic 0xA0..0xBF
1609 ELSE 0x81..0x9D + [0xA0..0xFF]+
1610 ---------------------------------------------
1612 As this is the internal character representation, the format is
1613 usually not used externally (i.e. in a file or in a data sent to a
1614 process). But, it is possible to have a text externally in this
1615 format (i.e. by encoding by the coding system `emacs-mule').
1617 In that case, a sequence of one-byte codes has a slightly different
1620 At first, all characters in eight-bit-control are represented by
1621 one-byte sequences which are their 8-bit code.
1623 Next, character composition data are represented by the byte
1624 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1626 METHOD is 0xF0 plus one of composition method (enum
1627 composition_method),
1629 BYTES is 0xA0 plus a byte length of this composition data,
1631 CHARS is 0x20 plus a number of characters composed by this
1634 COMPONENTs are characters of multibye form or composition
1635 rules encoded by two-byte of ASCII codes.
1637 In addition, for backward compatibility, the following formats are
1638 also recognized as composition data on decoding.
1641 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1644 MSEQ is a multibyte form but in these special format:
1645 ASCII: 0xA0 ASCII_CODE+0x80,
1646 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1647 RULE is a one byte code of the range 0xA0..0xF0 that
1648 represents a composition rule.
1651 char emacs_mule_bytes
[256];
1654 emacs_mule_char (coding
, src
, nbytes
, nchars
, id
)
1655 struct coding_system
*coding
;
1656 const unsigned char *src
;
1657 int *nbytes
, *nchars
, *id
;
1659 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1660 const unsigned char *src_base
= src
;
1661 int multibytep
= coding
->src_multibyte
;
1662 struct charset
*charset
;
1665 int consumed_chars
= 0;
1671 charset
= emacs_mule_charset
[0];
1675 switch (emacs_mule_bytes
[c
])
1678 if (! (charset
= emacs_mule_charset
[c
]))
1687 if (c
== EMACS_MULE_LEADING_CODE_PRIVATE_11
1688 || c
== EMACS_MULE_LEADING_CODE_PRIVATE_12
)
1691 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1700 if (! (charset
= emacs_mule_charset
[c
]))
1705 code
= (c
& 0x7F) << 8;
1715 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1720 code
= (c
& 0x7F) << 8;
1729 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
)
1730 ? charset_ascii
: charset_eight_bit
);
1736 c
= DECODE_CHAR (charset
, code
);
1740 *nbytes
= src
- src_base
;
1741 *nchars
= consumed_chars
;
1754 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1755 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1759 detect_coding_emacs_mule (coding
, detect_info
)
1760 struct coding_system
*coding
;
1761 struct coding_detection_info
*detect_info
;
1763 const unsigned char *src
= coding
->source
, *src_base
;
1764 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1765 int multibytep
= coding
->src_multibyte
;
1766 int consumed_chars
= 0;
1770 detect_info
->checked
|= CATEGORY_MASK_EMACS_MULE
;
1771 /* A coding system of this category is always ASCII compatible. */
1772 src
+= coding
->head_ascii
;
1782 /* Perhaps the start of composite character. We simple skip
1783 it because analyzing it is too heavy for detecting. But,
1784 at least, we check that the composite character
1785 constitues of more than 4 bytes. */
1786 const unsigned char *src_base
;
1796 if (src
- src_base
<= 4)
1798 found
= CATEGORY_MASK_EMACS_MULE
;
1806 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1811 const unsigned char *src_base
= src
- 1;
1818 if (src
- src_base
!= emacs_mule_bytes
[*src_base
])
1820 found
= CATEGORY_MASK_EMACS_MULE
;
1823 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1827 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1829 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1832 detect_info
->found
|= found
;
1837 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1839 /* Decode a character represented as a component of composition
1840 sequence of Emacs 20/21 style at SRC. Set C to that character and
1841 update SRC to the head of next character (or an encoded composition
1842 rule). If SRC doesn't points a composition component, set C to -1.
1843 If SRC points an invalid byte sequence, global exit by a return
1846 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1850 int nbytes, nchars; \
1852 if (src == src_end) \
1854 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1859 goto invalid_code; \
1863 consumed_chars += nchars; \
1868 /* Decode a composition rule represented as a component of composition
1869 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1870 and increment BUF. If SRC points an invalid byte sequence, set C
1873 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1875 int c, gref, nref; \
1877 if (src >= src_end) \
1878 goto invalid_code; \
1879 ONE_MORE_BYTE_NO_CHECK (c); \
1881 if (c < 0 || c >= 81) \
1882 goto invalid_code; \
1884 gref = c / 9, nref = c % 9; \
1885 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1889 /* Decode a composition rule represented as a component of composition
1890 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1891 and increment BUF. If SRC points an invalid byte sequence, set C
1894 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1898 if (src + 1>= src_end) \
1899 goto invalid_code; \
1900 ONE_MORE_BYTE_NO_CHECK (gref); \
1902 ONE_MORE_BYTE_NO_CHECK (nref); \
1904 if (gref < 0 || gref >= 81 \
1905 || nref < 0 || nref >= 81) \
1906 goto invalid_code; \
1907 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1911 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1913 /* Emacs 21 style format. The first three bytes at SRC are \
1914 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1915 the byte length of this composition information, CHARS is the \
1916 number of characters composed by this composition. */ \
1917 enum composition_method method = c - 0xF2; \
1918 int *charbuf_base = charbuf; \
1920 int consumed_chars_limit; \
1921 int nbytes, nchars; \
1923 ONE_MORE_BYTE (c); \
1925 goto invalid_code; \
1926 nbytes = c - 0xA0; \
1928 goto invalid_code; \
1929 ONE_MORE_BYTE (c); \
1931 goto invalid_code; \
1932 nchars = c - 0xA0; \
1933 from = coding->produced + char_offset; \
1934 to = from + nchars; \
1935 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1936 consumed_chars_limit = consumed_chars_base + nbytes; \
1937 if (method != COMPOSITION_RELATIVE) \
1940 while (consumed_chars < consumed_chars_limit) \
1942 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1943 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1945 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1948 if (consumed_chars < consumed_chars_limit) \
1949 goto invalid_code; \
1950 charbuf_base[0] -= i; \
1955 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1957 /* Emacs 20 style format for relative composition. */ \
1958 /* Store multibyte form of characters to be composed. */ \
1959 enum composition_method method = COMPOSITION_RELATIVE; \
1960 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1961 int *buf = components; \
1966 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1967 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1968 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1970 goto invalid_code; \
1971 from = coding->produced_char + char_offset; \
1973 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1974 for (j = 0; j < i; j++) \
1975 *charbuf++ = components[j]; \
1979 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1981 /* Emacs 20 style format for rule-base composition. */ \
1982 /* Store multibyte form of characters to be composed. */ \
1983 enum composition_method method = COMPOSITION_WITH_RULE; \
1984 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1985 int *buf = components; \
1989 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1990 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1992 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1993 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1995 if (i < 1 || (buf - components) % 2 == 0) \
1996 goto invalid_code; \
1997 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1998 goto no_more_source; \
1999 from = coding->produced_char + char_offset; \
2001 ADD_COMPOSITION_DATA (buf, from, to, method); \
2002 for (j = 0; j < i; j++) \
2003 *charbuf++ = components[j]; \
2004 for (j = 0; j < i; j += 2) \
2005 *charbuf++ = components[j]; \
2010 decode_coding_emacs_mule (coding
)
2011 struct coding_system
*coding
;
2013 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2014 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2015 const unsigned char *src_base
;
2016 int *charbuf
= coding
->charbuf
;
2017 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
2018 int consumed_chars
= 0, consumed_chars_base
;
2019 int multibytep
= coding
->src_multibyte
;
2020 Lisp_Object attrs
, charset_list
;
2021 int char_offset
= coding
->produced_char
;
2022 int last_offset
= char_offset
;
2023 int last_id
= charset_ascii
;
2025 CODING_GET_INFO (coding
, attrs
, charset_list
);
2032 consumed_chars_base
= consumed_chars
;
2034 if (charbuf
>= charbuf_end
)
2053 if (c
- 0xF2 >= COMPOSITION_RELATIVE
2054 && c
- 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS
)
2055 DECODE_EMACS_MULE_21_COMPOSITION (c
);
2057 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
2059 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
2063 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
2069 consumed_chars
= consumed_chars_base
;
2070 c
= emacs_mule_char (coding
, src
, &nbytes
, &nchars
, &id
);
2079 if (last_id
!= charset_ascii
)
2080 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2082 last_offset
= char_offset
;
2086 consumed_chars
+= nchars
;
2093 consumed_chars
= consumed_chars_base
;
2095 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
2101 if (last_id
!= charset_ascii
)
2102 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
2103 coding
->consumed_char
+= consumed_chars_base
;
2104 coding
->consumed
= src_base
- coding
->source
;
2105 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
2109 #define EMACS_MULE_LEADING_CODES(id, codes) \
2112 codes[0] = id, codes[1] = 0; \
2113 else if (id < 0xE0) \
2114 codes[0] = 0x9A, codes[1] = id; \
2115 else if (id < 0xF0) \
2116 codes[0] = 0x9B, codes[1] = id; \
2117 else if (id < 0xF5) \
2118 codes[0] = 0x9C, codes[1] = id; \
2120 codes[0] = 0x9D, codes[1] = id; \
2125 encode_coding_emacs_mule (coding
)
2126 struct coding_system
*coding
;
2128 int multibytep
= coding
->dst_multibyte
;
2129 int *charbuf
= coding
->charbuf
;
2130 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
2131 unsigned char *dst
= coding
->destination
+ coding
->produced
;
2132 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
2134 int produced_chars
= 0;
2135 Lisp_Object attrs
, charset_list
;
2137 int preferred_charset_id
= -1;
2139 CODING_GET_INFO (coding
, attrs
, charset_list
);
2140 if (! EQ (charset_list
, Vemacs_mule_charset_list
))
2142 CODING_ATTR_CHARSET_LIST (attrs
)
2143 = charset_list
= Vemacs_mule_charset_list
;
2146 while (charbuf
< charbuf_end
)
2148 ASSURE_DESTINATION (safe_room
);
2153 /* Handle an annotation. */
2156 case CODING_ANNOTATE_COMPOSITION_MASK
:
2157 /* Not yet implemented. */
2159 case CODING_ANNOTATE_CHARSET_MASK
:
2160 preferred_charset_id
= charbuf
[3];
2161 if (preferred_charset_id
>= 0
2162 && NILP (Fmemq (make_number (preferred_charset_id
),
2164 preferred_charset_id
= -1;
2173 if (ASCII_CHAR_P (c
))
2174 EMIT_ONE_ASCII_BYTE (c
);
2175 else if (CHAR_BYTE8_P (c
))
2177 c
= CHAR_TO_BYTE8 (c
);
2182 struct charset
*charset
;
2186 unsigned char leading_codes
[2];
2188 if (preferred_charset_id
>= 0)
2190 charset
= CHARSET_FROM_ID (preferred_charset_id
);
2191 if (! CHAR_CHARSET_P (c
, charset
))
2192 charset
= char_charset (c
, charset_list
, NULL
);
2195 charset
= char_charset (c
, charset_list
, &code
);
2198 c
= coding
->default_char
;
2199 if (ASCII_CHAR_P (c
))
2201 EMIT_ONE_ASCII_BYTE (c
);
2204 charset
= char_charset (c
, charset_list
, &code
);
2206 dimension
= CHARSET_DIMENSION (charset
);
2207 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
2208 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
2209 EMIT_ONE_BYTE (leading_codes
[0]);
2210 if (leading_codes
[1])
2211 EMIT_ONE_BYTE (leading_codes
[1]);
2213 EMIT_ONE_BYTE (code
| 0x80);
2217 EMIT_ONE_BYTE (code
>> 8);
2218 EMIT_ONE_BYTE (code
& 0xFF);
2222 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
2223 coding
->produced_char
+= produced_chars
;
2224 coding
->produced
= dst
- coding
->destination
;
2229 /*** 7. ISO2022 handlers ***/
2231 /* The following note describes the coding system ISO2022 briefly.
2232 Since the intention of this note is to help understand the
2233 functions in this file, some parts are NOT ACCURATE or are OVERLY
2234 SIMPLIFIED. For thorough understanding, please refer to the
2235 original document of ISO2022. This is equivalent to the standard
2236 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2238 ISO2022 provides many mechanisms to encode several character sets
2239 in 7-bit and 8-bit environments. For 7-bit environments, all text
2240 is encoded using bytes less than 128. This may make the encoded
2241 text a little bit longer, but the text passes more easily through
2242 several types of gateway, some of which strip off the MSB (Most
2245 There are two kinds of character sets: control character sets and
2246 graphic character sets. The former contain control characters such
2247 as `newline' and `escape' to provide control functions (control
2248 functions are also provided by escape sequences). The latter
2249 contain graphic characters such as 'A' and '-'. Emacs recognizes
2250 two control character sets and many graphic character sets.
2252 Graphic character sets are classified into one of the following
2253 four classes, according to the number of bytes (DIMENSION) and
2254 number of characters in one dimension (CHARS) of the set:
2255 - DIMENSION1_CHARS94
2256 - DIMENSION1_CHARS96
2257 - DIMENSION2_CHARS94
2258 - DIMENSION2_CHARS96
2260 In addition, each character set is assigned an identification tag,
2261 unique for each set, called the "final character" (denoted as <F>
2262 hereafter). The <F> of each character set is decided by ECMA(*)
2263 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2264 (0x30..0x3F are for private use only).
2266 Note (*): ECMA = European Computer Manufacturers Association
2268 Here are examples of graphic character sets [NAME(<F>)]:
2269 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2270 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2271 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2272 o DIMENSION2_CHARS96 -- none for the moment
2274 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2275 C0 [0x00..0x1F] -- control character plane 0
2276 GL [0x20..0x7F] -- graphic character plane 0
2277 C1 [0x80..0x9F] -- control character plane 1
2278 GR [0xA0..0xFF] -- graphic character plane 1
2280 A control character set is directly designated and invoked to C0 or
2281 C1 by an escape sequence. The most common case is that:
2282 - ISO646's control character set is designated/invoked to C0, and
2283 - ISO6429's control character set is designated/invoked to C1,
2284 and usually these designations/invocations are omitted in encoded
2285 text. In a 7-bit environment, only C0 can be used, and a control
2286 character for C1 is encoded by an appropriate escape sequence to
2287 fit into the environment. All control characters for C1 are
2288 defined to have corresponding escape sequences.
2290 A graphic character set is at first designated to one of four
2291 graphic registers (G0 through G3), then these graphic registers are
2292 invoked to GL or GR. These designations and invocations can be
2293 done independently. The most common case is that G0 is invoked to
2294 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2295 these invocations and designations are omitted in encoded text.
2296 In a 7-bit environment, only GL can be used.
2298 When a graphic character set of CHARS94 is invoked to GL, codes
2299 0x20 and 0x7F of the GL area work as control characters SPACE and
2300 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2303 There are two ways of invocation: locking-shift and single-shift.
2304 With locking-shift, the invocation lasts until the next different
2305 invocation, whereas with single-shift, the invocation affects the
2306 following character only and doesn't affect the locking-shift
2307 state. Invocations are done by the following control characters or
2310 ----------------------------------------------------------------------
2311 abbrev function cntrl escape seq description
2312 ----------------------------------------------------------------------
2313 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2314 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2315 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2316 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2317 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2318 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2319 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2320 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2321 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2322 ----------------------------------------------------------------------
2323 (*) These are not used by any known coding system.
2325 Control characters for these functions are defined by macros
2326 ISO_CODE_XXX in `coding.h'.
2328 Designations are done by the following escape sequences:
2329 ----------------------------------------------------------------------
2330 escape sequence description
2331 ----------------------------------------------------------------------
2332 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2333 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2334 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2335 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2336 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2337 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2338 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2339 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2340 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2341 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2342 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2343 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2344 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2345 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2346 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2347 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2348 ----------------------------------------------------------------------
2350 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2351 of dimension 1, chars 94, and final character <F>, etc...
2353 Note (*): Although these designations are not allowed in ISO2022,
2354 Emacs accepts them on decoding, and produces them on encoding
2355 CHARS96 character sets in a coding system which is characterized as
2356 7-bit environment, non-locking-shift, and non-single-shift.
2358 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2359 '(' must be omitted. We refer to this as "short-form" hereafter.
2361 Now you may notice that there are a lot of ways of encoding the
2362 same multilingual text in ISO2022. Actually, there exist many
2363 coding systems such as Compound Text (used in X11's inter client
2364 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2365 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2366 localized platforms), and all of these are variants of ISO2022.
2368 In addition to the above, Emacs handles two more kinds of escape
2369 sequences: ISO6429's direction specification and Emacs' private
2370 sequence for specifying character composition.
2372 ISO6429's direction specification takes the following form:
2373 o CSI ']' -- end of the current direction
2374 o CSI '0' ']' -- end of the current direction
2375 o CSI '1' ']' -- start of left-to-right text
2376 o CSI '2' ']' -- start of right-to-left text
2377 The control character CSI (0x9B: control sequence introducer) is
2378 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2380 Character composition specification takes the following form:
2381 o ESC '0' -- start relative composition
2382 o ESC '1' -- end composition
2383 o ESC '2' -- start rule-base composition (*)
2384 o ESC '3' -- start relative composition with alternate chars (**)
2385 o ESC '4' -- start rule-base composition with alternate chars (**)
2386 Since these are not standard escape sequences of any ISO standard,
2387 the use of them with these meanings is restricted to Emacs only.
2389 (*) This form is used only in Emacs 20.7 and older versions,
2390 but newer versions can safely decode it.
2391 (**) This form is used only in Emacs 21.1 and newer versions,
2392 and older versions can't decode it.
2394 Here's a list of example usages of these composition escape
2395 sequences (categorized by `enum composition_method').
2397 COMPOSITION_RELATIVE:
2398 ESC 0 CHAR [ CHAR ] ESC 1
2399 COMPOSITION_WITH_RULE:
2400 ESC 2 CHAR [ RULE CHAR ] ESC 1
2401 COMPOSITION_WITH_ALTCHARS:
2402 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2403 COMPOSITION_WITH_RULE_ALTCHARS:
2404 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2406 enum iso_code_class_type iso_code_class
[256];
2408 #define SAFE_CHARSET_P(coding, id) \
2409 ((id) <= (coding)->max_charset_id \
2410 && (coding)->safe_charsets[id] >= 0)
2413 #define SHIFT_OUT_OK(category) \
2414 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2417 setup_iso_safe_charsets (attrs
)
2420 Lisp_Object charset_list
, safe_charsets
;
2421 Lisp_Object request
;
2422 Lisp_Object reg_usage
;
2425 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2428 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2429 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2430 && ! EQ (charset_list
, Viso_2022_charset_list
))
2432 CODING_ATTR_CHARSET_LIST (attrs
)
2433 = charset_list
= Viso_2022_charset_list
;
2434 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2437 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2441 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2443 int id
= XINT (XCAR (tail
));
2444 if (max_charset_id
< id
)
2445 max_charset_id
= id
;
2448 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2450 request
= AREF (attrs
, coding_attr_iso_request
);
2451 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2452 reg94
= XINT (XCAR (reg_usage
));
2453 reg96
= XINT (XCDR (reg_usage
));
2455 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2459 struct charset
*charset
;
2462 charset
= CHARSET_FROM_ID (XINT (id
));
2463 reg
= Fcdr (Fassq (id
, request
));
2465 SSET (safe_charsets
, XINT (id
), XINT (reg
));
2466 else if (charset
->iso_chars_96
)
2469 SSET (safe_charsets
, XINT (id
), reg96
);
2474 SSET (safe_charsets
, XINT (id
), reg94
);
2477 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2481 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2482 Check if a text is encoded in one of ISO-2022 based codig systems.
2483 If it is, return 1, else return 0. */
2486 detect_coding_iso_2022 (coding
, detect_info
)
2487 struct coding_system
*coding
;
2488 struct coding_detection_info
*detect_info
;
2490 const unsigned char *src
= coding
->source
, *src_base
= src
;
2491 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2492 int multibytep
= coding
->src_multibyte
;
2493 int single_shifting
= 0;
2496 int consumed_chars
= 0;
2501 detect_info
->checked
|= CATEGORY_MASK_ISO
;
2503 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2505 struct coding_system
*this = &(coding_categories
[i
]);
2506 Lisp_Object attrs
, val
;
2508 attrs
= CODING_ID_ATTRS (this->id
);
2509 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2510 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2511 setup_iso_safe_charsets (attrs
);
2512 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2513 this->max_charset_id
= SCHARS (val
) - 1;
2514 this->safe_charsets
= (char *) SDATA (val
);
2517 /* A coding system of this category is always ASCII compatible. */
2518 src
+= coding
->head_ascii
;
2520 while (rejected
!= CATEGORY_MASK_ISO
)
2527 if (inhibit_iso_escape_detection
)
2529 single_shifting
= 0;
2531 if (c
>= '(' && c
<= '/')
2533 /* Designation sequence for a charset of dimension 1. */
2535 if (c1
< ' ' || c1
>= 0x80
2536 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2537 /* Invalid designation sequence. Just ignore. */
2542 /* Designation sequence for a charset of dimension 2. */
2544 if (c
>= '@' && c
<= 'B')
2545 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2546 id
= iso_charset_table
[1][0][c
];
2547 else if (c
>= '(' && c
<= '/')
2550 if (c1
< ' ' || c1
>= 0x80
2551 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2552 /* Invalid designation sequence. Just ignore. */
2556 /* Invalid designation sequence. Just ignore it. */
2559 else if (c
== 'N' || c
== 'O')
2561 /* ESC <Fe> for SS2 or SS3. */
2562 single_shifting
= 1;
2563 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2566 else if (c
>= '0' && c
<= '4')
2568 /* ESC <Fp> for start/end composition. */
2569 found
|= CATEGORY_MASK_ISO
;
2574 /* Invalid escape sequence. Just ignore it. */
2578 /* We found a valid designation sequence for CHARSET. */
2579 rejected
|= CATEGORY_MASK_ISO_8BIT
;
2580 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2582 found
|= CATEGORY_MASK_ISO_7
;
2584 rejected
|= CATEGORY_MASK_ISO_7
;
2585 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2587 found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2589 rejected
|= CATEGORY_MASK_ISO_7_TIGHT
;
2590 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2592 found
|= CATEGORY_MASK_ISO_7_ELSE
;
2594 rejected
|= CATEGORY_MASK_ISO_7_ELSE
;
2595 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2597 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2599 rejected
|= CATEGORY_MASK_ISO_8_ELSE
;
2604 /* Locking shift out/in. */
2605 if (inhibit_iso_escape_detection
)
2607 single_shifting
= 0;
2608 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2609 found
|= CATEGORY_MASK_ISO_ELSE
;
2613 /* Control sequence introducer. */
2614 single_shifting
= 0;
2615 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2616 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2617 goto check_extra_latin
;
2623 if (inhibit_iso_escape_detection
)
2625 single_shifting
= 1;
2626 rejected
|= CATEGORY_MASK_ISO_7BIT
;
2627 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2628 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2629 found
|= CATEGORY_MASK_ISO_8_1
;
2630 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2631 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2632 found
|= CATEGORY_MASK_ISO_8_2
;
2633 goto check_extra_latin
;
2640 single_shifting
= 0;
2645 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2646 found
|= CATEGORY_MASK_ISO_8_1
;
2647 /* Check the length of succeeding codes of the range
2648 0xA0..0FF. If the byte length is even, we include
2649 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2650 only when we are not single shifting. */
2651 if (! single_shifting
2652 && ! (rejected
& CATEGORY_MASK_ISO_8_2
))
2655 while (src
< src_end
)
2663 if (i
& 1 && src
< src_end
)
2664 rejected
|= CATEGORY_MASK_ISO_8_2
;
2666 found
|= CATEGORY_MASK_ISO_8_2
;
2671 single_shifting
= 0;
2672 if (! VECTORP (Vlatin_extra_code_table
)
2673 || NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2675 rejected
= CATEGORY_MASK_ISO
;
2678 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2679 & CODING_ISO_FLAG_LATIN_EXTRA
)
2680 found
|= CATEGORY_MASK_ISO_8_1
;
2682 rejected
|= CATEGORY_MASK_ISO_8_1
;
2683 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2684 & CODING_ISO_FLAG_LATIN_EXTRA
)
2685 found
|= CATEGORY_MASK_ISO_8_2
;
2687 rejected
|= CATEGORY_MASK_ISO_8_2
;
2690 detect_info
->rejected
|= CATEGORY_MASK_ISO
;
2694 detect_info
->rejected
|= rejected
;
2695 detect_info
->found
|= (found
& ~rejected
);
2700 /* Set designation state into CODING. */
2701 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2705 if (final < '0' || final >= 128 \
2706 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2707 || !SAFE_CHARSET_P (coding, id)) \
2709 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2710 goto invalid_code; \
2712 prev = CODING_ISO_DESIGNATION (coding, reg); \
2713 if (id == charset_jisx0201_roman) \
2715 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2716 id = charset_ascii; \
2718 else if (id == charset_jisx0208_1978) \
2720 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2721 id = charset_jisx0208; \
2723 CODING_ISO_DESIGNATION (coding, reg) = id; \
2724 /* If there was an invalid designation to REG previously, and this \
2725 designation is ASCII to REG, we should keep this designation \
2727 if (prev == -2 && id == charset_ascii) \
2728 goto invalid_code; \
2732 #define MAYBE_FINISH_COMPOSITION() \
2735 if (composition_state == COMPOSING_NO) \
2737 /* It is assured that we have enough room for producing \
2738 characters stored in the table `components'. */ \
2739 if (charbuf + component_idx > charbuf_end) \
2740 goto no_more_source; \
2741 composition_state = COMPOSING_NO; \
2742 if (method == COMPOSITION_RELATIVE \
2743 || method == COMPOSITION_WITH_ALTCHARS) \
2745 for (i = 0; i < component_idx; i++) \
2746 *charbuf++ = components[i]; \
2747 char_offset += component_idx; \
2751 for (i = 0; i < component_idx; i += 2) \
2752 *charbuf++ = components[i]; \
2753 char_offset += (component_idx / 2) + 1; \
2758 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2759 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2760 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2761 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2762 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2765 #define DECODE_COMPOSITION_START(c1) \
2768 && composition_state == COMPOSING_COMPONENT_RULE) \
2770 component_len = component_idx; \
2771 composition_state = COMPOSING_CHAR; \
2775 const unsigned char *p; \
2777 MAYBE_FINISH_COMPOSITION (); \
2778 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2779 goto no_more_source; \
2780 for (p = src; p < src_end - 1; p++) \
2781 if (*p == ISO_CODE_ESC && p[1] == '1') \
2783 if (p == src_end - 1) \
2785 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2786 goto invalid_code; \
2787 goto no_more_source; \
2790 /* This is surely the start of a composition. */ \
2791 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2792 : c1 == '2' ? COMPOSITION_WITH_RULE \
2793 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2794 : COMPOSITION_WITH_RULE_ALTCHARS); \
2795 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2796 : COMPOSING_COMPONENT_CHAR); \
2797 component_idx = component_len = 0; \
2802 /* Handle compositoin end sequence ESC 1. */
2804 #define DECODE_COMPOSITION_END() \
2806 int nchars = (component_len > 0 ? component_idx - component_len \
2807 : method == COMPOSITION_RELATIVE ? component_idx \
2808 : (component_idx + 1) / 2); \
2810 int *saved_charbuf = charbuf; \
2811 int from = char_offset; \
2812 int to = from + nchars; \
2814 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
2815 if (method != COMPOSITION_RELATIVE) \
2817 if (component_len == 0) \
2818 for (i = 0; i < component_idx; i++) \
2819 *charbuf++ = components[i]; \
2821 for (i = 0; i < component_len; i++) \
2822 *charbuf++ = components[i]; \
2823 *saved_charbuf = saved_charbuf - charbuf; \
2825 if (method == COMPOSITION_WITH_RULE) \
2826 for (i = 0; i < component_idx; i += 2, char_offset++) \
2827 *charbuf++ = components[i]; \
2829 for (i = component_len; i < component_idx; i++, char_offset++) \
2830 *charbuf++ = components[i]; \
2831 coding->annotated = 1; \
2832 composition_state = COMPOSING_NO; \
2836 /* Decode a composition rule from the byte C1 (and maybe one more byte
2837 from SRC) and store one encoded composition rule in
2838 coding->cmp_data. */
2840 #define DECODE_COMPOSITION_RULE(c1) \
2843 if (c1 < 81) /* old format (before ver.21) */ \
2845 int gref = (c1) / 9; \
2846 int nref = (c1) % 9; \
2847 if (gref == 4) gref = 10; \
2848 if (nref == 4) nref = 10; \
2849 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2851 else if (c1 < 93) /* new format (after ver.21) */ \
2853 ONE_MORE_BYTE (c2); \
2854 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2861 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2864 decode_coding_iso_2022 (coding
)
2865 struct coding_system
*coding
;
2867 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2868 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2869 const unsigned char *src_base
;
2870 int *charbuf
= coding
->charbuf
;
2872 = charbuf
+ coding
->charbuf_size
- 4 - MAX_ANNOTATION_LENGTH
;
2873 int consumed_chars
= 0, consumed_chars_base
;
2874 int multibytep
= coding
->src_multibyte
;
2875 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2876 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2877 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2878 struct charset
*charset
;
2880 /* For handling composition sequence. */
2881 #define COMPOSING_NO 0
2882 #define COMPOSING_CHAR 1
2883 #define COMPOSING_RULE 2
2884 #define COMPOSING_COMPONENT_CHAR 3
2885 #define COMPOSING_COMPONENT_RULE 4
2887 int composition_state
= COMPOSING_NO
;
2888 enum composition_method method
;
2889 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2892 Lisp_Object attrs
, charset_list
;
2893 int char_offset
= coding
->produced_char
;
2894 int last_offset
= char_offset
;
2895 int last_id
= charset_ascii
;
2897 CODING_GET_INFO (coding
, attrs
, charset_list
);
2898 setup_iso_safe_charsets (attrs
);
2905 consumed_chars_base
= consumed_chars
;
2907 if (charbuf
>= charbuf_end
)
2914 /* We produce at most one character. */
2915 switch (iso_code_class
[c1
])
2917 case ISO_0x20_or_0x7F
:
2918 if (composition_state
!= COMPOSING_NO
)
2920 if (composition_state
== COMPOSING_RULE
2921 || composition_state
== COMPOSING_COMPONENT_RULE
)
2923 DECODE_COMPOSITION_RULE (c1
);
2924 components
[component_idx
++] = c1
;
2925 composition_state
--;
2929 if (charset_id_0
< 0
2930 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2931 /* This is SPACE or DEL. */
2932 charset
= CHARSET_FROM_ID (charset_ascii
);
2934 charset
= CHARSET_FROM_ID (charset_id_0
);
2937 case ISO_graphic_plane_0
:
2938 if (composition_state
!= COMPOSING_NO
)
2940 if (composition_state
== COMPOSING_RULE
2941 || composition_state
== COMPOSING_COMPONENT_RULE
)
2943 DECODE_COMPOSITION_RULE (c1
);
2944 components
[component_idx
++] = c1
;
2945 composition_state
--;
2949 charset
= CHARSET_FROM_ID (charset_id_0
);
2952 case ISO_0xA0_or_0xFF
:
2953 if (charset_id_1
< 0
2954 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2955 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2957 /* This is a graphic character, we fall down ... */
2959 case ISO_graphic_plane_1
:
2960 if (charset_id_1
< 0)
2962 charset
= CHARSET_FROM_ID (charset_id_1
);
2966 MAYBE_FINISH_COMPOSITION ();
2967 charset
= CHARSET_FROM_ID (charset_ascii
);
2971 MAYBE_FINISH_COMPOSITION ();
2975 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2976 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
2978 CODING_ISO_INVOCATION (coding
, 0) = 1;
2979 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2983 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
2985 CODING_ISO_INVOCATION (coding
, 0) = 0;
2986 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2989 case ISO_single_shift_2_7
:
2990 case ISO_single_shift_2
:
2991 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2993 /* SS2 is handled as an escape sequence of ESC 'N' */
2995 goto label_escape_sequence
;
2997 case ISO_single_shift_3
:
2998 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
3000 /* SS2 is handled as an escape sequence of ESC 'O' */
3002 goto label_escape_sequence
;
3004 case ISO_control_sequence_introducer
:
3005 /* CSI is handled as an escape sequence of ESC '[' ... */
3007 goto label_escape_sequence
;
3011 label_escape_sequence
:
3012 /* Escape sequences handled here are invocation,
3013 designation, direction specification, and character
3014 composition specification. */
3017 case '&': /* revision of following character set */
3019 if (!(c1
>= '@' && c1
<= '~'))
3022 if (c1
!= ISO_CODE_ESC
)
3025 goto label_escape_sequence
;
3027 case '$': /* designation of 2-byte character set */
3028 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3031 if (c1
>= '@' && c1
<= 'B')
3032 { /* designation of JISX0208.1978, GB2312.1980,
3034 DECODE_DESIGNATION (0, 2, 0, c1
);
3036 else if (c1
>= 0x28 && c1
<= 0x2B)
3037 { /* designation of DIMENSION2_CHARS94 character set */
3039 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
3041 else if (c1
>= 0x2C && c1
<= 0x2F)
3042 { /* designation of DIMENSION2_CHARS96 character set */
3044 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
3048 /* We must update these variables now. */
3049 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3050 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3053 case 'n': /* invocation of locking-shift-2 */
3054 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3055 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3057 CODING_ISO_INVOCATION (coding
, 0) = 2;
3058 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3061 case 'o': /* invocation of locking-shift-3 */
3062 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3063 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3065 CODING_ISO_INVOCATION (coding
, 0) = 3;
3066 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3069 case 'N': /* invocation of single-shift-2 */
3070 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3071 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3073 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
3075 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3079 case 'O': /* invocation of single-shift-3 */
3080 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3081 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3083 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
3085 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3089 case '0': case '2': case '3': case '4': /* start composition */
3090 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
3092 DECODE_COMPOSITION_START (c1
);
3095 case '1': /* end composition */
3096 if (composition_state
== COMPOSING_NO
)
3098 DECODE_COMPOSITION_END ();
3101 case '[': /* specification of direction */
3102 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
3104 /* For the moment, nested direction is not supported.
3105 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3106 left-to-right, and nozero means right-to-left. */
3110 case ']': /* end of the current direction */
3111 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3113 case '0': /* end of the current direction */
3114 case '1': /* start of left-to-right direction */
3117 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3122 case '2': /* start of right-to-left direction */
3125 coding
->mode
|= CODING_MODE_DIRECTION
;
3139 /* CTEXT extended segment:
3140 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3141 We keep these bytes as is for the moment.
3142 They may be decoded by post-read-conversion. */
3146 ONE_MORE_BYTE (dim
);
3149 size
= ((M
- 128) * 128) + (L
- 128);
3150 if (charbuf
+ 8 + size
> charbuf_end
)
3152 *charbuf
++ = ISO_CODE_ESC
;
3156 *charbuf
++ = BYTE8_TO_CHAR (M
);
3157 *charbuf
++ = BYTE8_TO_CHAR (L
);
3161 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3166 /* XFree86 extension for embedding UTF-8 in CTEXT:
3167 ESC % G --UTF-8-BYTES-- ESC % @
3168 We keep these bytes as is for the moment.
3169 They may be decoded by post-read-conversion. */
3172 if (p
+ 6 > charbuf_end
)
3174 *p
++ = ISO_CODE_ESC
;
3177 while (p
< charbuf_end
)
3180 if (c1
== ISO_CODE_ESC
3181 && src
+ 1 < src_end
3185 *p
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3187 if (p
+ 3 > charbuf_end
)
3189 *p
++ = ISO_CODE_ESC
;
3200 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3202 if (c1
>= 0x28 && c1
<= 0x2B)
3203 { /* designation of DIMENSION1_CHARS94 character set */
3205 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
3207 else if (c1
>= 0x2C && c1
<= 0x2F)
3208 { /* designation of DIMENSION1_CHARS96 character set */
3210 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
3214 /* We must update these variables now. */
3215 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3216 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3221 if (charset
->id
!= charset_ascii
3222 && last_id
!= charset
->id
)
3224 if (last_id
!= charset_ascii
)
3225 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3226 last_id
= charset
->id
;
3227 last_offset
= char_offset
;
3230 /* Now we know CHARSET and 1st position code C1 of a character.
3231 Produce a decoded character while getting 2nd position code
3234 if (CHARSET_DIMENSION (charset
) > 1)
3237 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3238 /* C2 is not in a valid range. */
3240 c1
= (c1
<< 8) | (c2
& 0x7F);
3241 if (CHARSET_DIMENSION (charset
) > 2)
3244 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3245 /* C2 is not in a valid range. */
3247 c1
= (c1
<< 8) | (c2
& 0x7F);
3251 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3254 MAYBE_FINISH_COMPOSITION ();
3255 for (; src_base
< src
; src_base
++, char_offset
++)
3257 if (ASCII_BYTE_P (*src_base
))
3258 *charbuf
++ = *src_base
;
3260 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3263 else if (composition_state
== COMPOSING_NO
)
3270 components
[component_idx
++] = c
;
3271 if (method
== COMPOSITION_WITH_RULE
3272 || (method
== COMPOSITION_WITH_RULE_ALTCHARS
3273 && composition_state
== COMPOSING_COMPONENT_CHAR
))
3274 composition_state
++;
3279 MAYBE_FINISH_COMPOSITION ();
3281 consumed_chars
= consumed_chars_base
;
3283 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3293 if (last_id
!= charset_ascii
)
3294 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
3295 coding
->consumed_char
+= consumed_chars_base
;
3296 coding
->consumed
= src_base
- coding
->source
;
3297 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3301 /* ISO2022 encoding stuff. */
3304 It is not enough to say just "ISO2022" on encoding, we have to
3305 specify more details. In Emacs, each coding system of ISO2022
3306 variant has the following specifications:
3307 1. Initial designation to G0 thru G3.
3308 2. Allows short-form designation?
3309 3. ASCII should be designated to G0 before control characters?
3310 4. ASCII should be designated to G0 at end of line?
3311 5. 7-bit environment or 8-bit environment?
3312 6. Use locking-shift?
3313 7. Use Single-shift?
3314 And the following two are only for Japanese:
3315 8. Use ASCII in place of JIS0201-1976-Roman?
3316 9. Use JISX0208-1983 in place of JISX0208-1978?
3317 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3318 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3322 /* Produce codes (escape sequence) for designating CHARSET to graphic
3323 register REG at DST, and increment DST. If <final-char> of CHARSET is
3324 '@', 'A', or 'B' and the coding system CODING allows, produce
3325 designation sequence of short-form. */
3327 #define ENCODE_DESIGNATION(charset, reg, coding) \
3329 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3330 char *intermediate_char_94 = "()*+"; \
3331 char *intermediate_char_96 = ",-./"; \
3332 int revision = -1; \
3335 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3336 revision = CHARSET_ISO_REVISION (charset); \
3338 if (revision >= 0) \
3340 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3341 EMIT_ONE_BYTE ('@' + revision); \
3343 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3344 if (CHARSET_DIMENSION (charset) == 1) \
3346 if (! CHARSET_ISO_CHARS_96 (charset)) \
3347 c = intermediate_char_94[reg]; \
3349 c = intermediate_char_96[reg]; \
3350 EMIT_ONE_ASCII_BYTE (c); \
3354 EMIT_ONE_ASCII_BYTE ('$'); \
3355 if (! CHARSET_ISO_CHARS_96 (charset)) \
3357 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3359 || final_char < '@' || final_char > 'B') \
3360 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3363 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3365 EMIT_ONE_ASCII_BYTE (final_char); \
3367 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3371 /* The following two macros produce codes (control character or escape
3372 sequence) for ISO2022 single-shift functions (single-shift-2 and
3375 #define ENCODE_SINGLE_SHIFT_2 \
3377 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3378 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3380 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3381 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3385 #define ENCODE_SINGLE_SHIFT_3 \
3387 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3388 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3390 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3391 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3395 /* The following four macros produce codes (control character or
3396 escape sequence) for ISO2022 locking-shift functions (shift-in,
3397 shift-out, locking-shift-2, and locking-shift-3). */
3399 #define ENCODE_SHIFT_IN \
3401 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3402 CODING_ISO_INVOCATION (coding, 0) = 0; \
3406 #define ENCODE_SHIFT_OUT \
3408 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3409 CODING_ISO_INVOCATION (coding, 0) = 1; \
3413 #define ENCODE_LOCKING_SHIFT_2 \
3415 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3416 CODING_ISO_INVOCATION (coding, 0) = 2; \
3420 #define ENCODE_LOCKING_SHIFT_3 \
3422 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3423 CODING_ISO_INVOCATION (coding, 0) = 3; \
3427 /* Produce codes for a DIMENSION1 character whose character set is
3428 CHARSET and whose position-code is C1. Designation and invocation
3429 sequences are also produced in advance if necessary. */
3431 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3433 int id = CHARSET_ID (charset); \
3435 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3436 && id == charset_ascii) \
3438 id = charset_jisx0201_roman; \
3439 charset = CHARSET_FROM_ID (id); \
3442 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3444 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3445 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3447 EMIT_ONE_BYTE (c1 | 0x80); \
3448 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3451 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3453 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3456 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3458 EMIT_ONE_BYTE (c1 | 0x80); \
3462 /* Since CHARSET is not yet invoked to any graphic planes, we \
3463 must invoke it, or, at first, designate it to some graphic \
3464 register. Then repeat the loop to actually produce the \
3466 dst = encode_invocation_designation (charset, coding, dst, \
3471 /* Produce codes for a DIMENSION2 character whose character set is
3472 CHARSET and whose position-codes are C1 and C2. Designation and
3473 invocation codes are also produced in advance if necessary. */
3475 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3477 int id = CHARSET_ID (charset); \
3479 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3480 && id == charset_jisx0208) \
3482 id = charset_jisx0208_1978; \
3483 charset = CHARSET_FROM_ID (id); \
3486 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3488 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3489 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3491 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3492 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3495 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3497 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3500 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3502 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3506 /* Since CHARSET is not yet invoked to any graphic planes, we \
3507 must invoke it, or, at first, designate it to some graphic \
3508 register. Then repeat the loop to actually produce the \
3510 dst = encode_invocation_designation (charset, coding, dst, \
3515 #define ENCODE_ISO_CHARACTER(charset, c) \
3517 int code = ENCODE_CHAR ((charset),(c)); \
3519 if (CHARSET_DIMENSION (charset) == 1) \
3520 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3522 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3526 /* Produce designation and invocation codes at a place pointed by DST
3527 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3531 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3532 struct charset
*charset
;
3533 struct coding_system
*coding
;
3537 int multibytep
= coding
->dst_multibyte
;
3538 int produced_chars
= *p_nchars
;
3539 int reg
; /* graphic register number */
3540 int id
= CHARSET_ID (charset
);
3542 /* At first, check designations. */
3543 for (reg
= 0; reg
< 4; reg
++)
3544 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3549 /* CHARSET is not yet designated to any graphic registers. */
3550 /* At first check the requested designation. */
3551 reg
= CODING_ISO_REQUEST (coding
, id
);
3553 /* Since CHARSET requests no special designation, designate it
3554 to graphic register 0. */
3557 ENCODE_DESIGNATION (charset
, reg
, coding
);
3560 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3561 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3563 /* Since the graphic register REG is not invoked to any graphic
3564 planes, invoke it to graphic plane 0. */
3567 case 0: /* graphic register 0 */
3571 case 1: /* graphic register 1 */
3575 case 2: /* graphic register 2 */
3576 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3577 ENCODE_SINGLE_SHIFT_2
;
3579 ENCODE_LOCKING_SHIFT_2
;
3582 case 3: /* graphic register 3 */
3583 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3584 ENCODE_SINGLE_SHIFT_3
;
3586 ENCODE_LOCKING_SHIFT_3
;
3591 *p_nchars
= produced_chars
;
3595 /* The following three macros produce codes for indicating direction
3597 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3599 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3600 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3602 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3606 #define ENCODE_DIRECTION_R2L() \
3608 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3609 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3613 #define ENCODE_DIRECTION_L2R() \
3615 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3616 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3620 /* Produce codes for designation and invocation to reset the graphic
3621 planes and registers to initial state. */
3622 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3625 struct charset *charset; \
3627 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3629 for (reg = 0; reg < 4; reg++) \
3630 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3631 && (CODING_ISO_DESIGNATION (coding, reg) \
3632 != CODING_ISO_INITIAL (coding, reg))) \
3634 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3635 ENCODE_DESIGNATION (charset, reg, coding); \
3640 /* Produce designation sequences of charsets in the line started from
3641 SRC to a place pointed by DST, and return updated DST.
3643 If the current block ends before any end-of-line, we may fail to
3644 find all the necessary designations. */
3646 static unsigned char *
3647 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3648 struct coding_system
*coding
;
3649 int *charbuf
, *charbuf_end
;
3652 struct charset
*charset
;
3653 /* Table of charsets to be designated to each graphic register. */
3655 int c
, found
= 0, reg
;
3656 int produced_chars
= 0;
3657 int multibytep
= coding
->dst_multibyte
;
3659 Lisp_Object charset_list
;
3661 attrs
= CODING_ID_ATTRS (coding
->id
);
3662 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3663 if (EQ (charset_list
, Qiso_2022
))
3664 charset_list
= Viso_2022_charset_list
;
3666 for (reg
= 0; reg
< 4; reg
++)
3676 charset
= char_charset (c
, charset_list
, NULL
);
3677 id
= CHARSET_ID (charset
);
3678 reg
= CODING_ISO_REQUEST (coding
, id
);
3679 if (reg
>= 0 && r
[reg
] < 0)
3688 for (reg
= 0; reg
< 4; reg
++)
3690 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3691 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3697 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3700 encode_coding_iso_2022 (coding
)
3701 struct coding_system
*coding
;
3703 int multibytep
= coding
->dst_multibyte
;
3704 int *charbuf
= coding
->charbuf
;
3705 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3706 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3707 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3710 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3711 && CODING_ISO_BOL (coding
));
3712 int produced_chars
= 0;
3713 Lisp_Object attrs
, eol_type
, charset_list
;
3714 int ascii_compatible
;
3716 int preferred_charset_id
= -1;
3718 CODING_GET_INFO (coding
, attrs
, charset_list
);
3719 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
3720 if (VECTORP (eol_type
))
3723 setup_iso_safe_charsets (attrs
);
3724 /* Charset list may have been changed. */
3725 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
); \
3726 coding
->safe_charsets
= (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs
));
3728 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3730 while (charbuf
< charbuf_end
)
3732 ASSURE_DESTINATION (safe_room
);
3734 if (bol_designation
)
3736 unsigned char *dst_prev
= dst
;
3738 /* We have to produce designation sequences if any now. */
3739 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3740 bol_designation
= 0;
3741 /* We are sure that designation sequences are all ASCII bytes. */
3742 produced_chars
+= dst
- dst_prev
;
3749 /* Handle an annotation. */
3752 case CODING_ANNOTATE_COMPOSITION_MASK
:
3753 /* Not yet implemented. */
3755 case CODING_ANNOTATE_CHARSET_MASK
:
3756 preferred_charset_id
= charbuf
[3];
3757 if (preferred_charset_id
>= 0
3758 && NILP (Fmemq (make_number (preferred_charset_id
),
3760 preferred_charset_id
= -1;
3769 /* Now encode the character C. */
3770 if (c
< 0x20 || c
== 0x7F)
3773 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3775 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3776 ENCODE_RESET_PLANE_AND_REGISTER ();
3777 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3781 for (i
= 0; i
< 4; i
++)
3782 CODING_ISO_DESIGNATION (coding
, i
)
3783 = CODING_ISO_INITIAL (coding
, i
);
3786 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3788 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3789 ENCODE_RESET_PLANE_AND_REGISTER ();
3790 EMIT_ONE_ASCII_BYTE (c
);
3792 else if (ASCII_CHAR_P (c
))
3794 if (ascii_compatible
)
3795 EMIT_ONE_ASCII_BYTE (c
);
3798 struct charset
*charset
= CHARSET_FROM_ID (charset_ascii
);
3799 ENCODE_ISO_CHARACTER (charset
, c
);
3802 else if (CHAR_BYTE8_P (c
))
3804 c
= CHAR_TO_BYTE8 (c
);
3809 struct charset
*charset
;
3811 if (preferred_charset_id
>= 0)
3813 charset
= CHARSET_FROM_ID (preferred_charset_id
);
3814 if (! CHAR_CHARSET_P (c
, charset
))
3815 charset
= char_charset (c
, charset_list
, NULL
);
3818 charset
= char_charset (c
, charset_list
, NULL
);
3821 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3823 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3824 charset
= CHARSET_FROM_ID (charset_ascii
);
3828 c
= coding
->default_char
;
3829 charset
= char_charset (c
, charset_list
, NULL
);
3832 ENCODE_ISO_CHARACTER (charset
, c
);
3836 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3837 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3839 ASSURE_DESTINATION (safe_room
);
3840 ENCODE_RESET_PLANE_AND_REGISTER ();
3842 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
3843 CODING_ISO_BOL (coding
) = bol_designation
;
3844 coding
->produced_char
+= produced_chars
;
3845 coding
->produced
= dst
- coding
->destination
;
3850 /*** 8,9. SJIS and BIG5 handlers ***/
3852 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3853 quite widely. So, for the moment, Emacs supports them in the bare
3854 C code. But, in the future, they may be supported only by CCL. */
3856 /* SJIS is a coding system encoding three character sets: ASCII, right
3857 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3858 as is. A character of charset katakana-jisx0201 is encoded by
3859 "position-code + 0x80". A character of charset japanese-jisx0208
3860 is encoded in 2-byte but two position-codes are divided and shifted
3861 so that it fit in the range below.
3863 --- CODE RANGE of SJIS ---
3864 (character set) (range)
3866 KATAKANA-JISX0201 0xA0 .. 0xDF
3867 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3868 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3869 -------------------------------
3873 /* BIG5 is a coding system encoding two character sets: ASCII and
3874 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3875 character set and is encoded in two-byte.
3877 --- CODE RANGE of BIG5 ---
3878 (character set) (range)
3880 Big5 (1st byte) 0xA1 .. 0xFE
3881 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3882 --------------------------
3886 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3887 Check if a text is encoded in SJIS. If it is, return
3888 CATEGORY_MASK_SJIS, else return 0. */
3891 detect_coding_sjis (coding
, detect_info
)
3892 struct coding_system
*coding
;
3893 struct coding_detection_info
*detect_info
;
3895 const unsigned char *src
= coding
->source
, *src_base
;
3896 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3897 int multibytep
= coding
->src_multibyte
;
3898 int consumed_chars
= 0;
3902 detect_info
->checked
|= CATEGORY_MASK_SJIS
;
3903 /* A coding system of this category is always ASCII compatible. */
3904 src
+= coding
->head_ascii
;
3912 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3915 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3917 found
= CATEGORY_MASK_SJIS
;
3919 else if (c
>= 0xA0 && c
< 0xE0)
3920 found
= CATEGORY_MASK_SJIS
;
3924 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3928 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3930 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3933 detect_info
->found
|= found
;
3937 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3938 Check if a text is encoded in BIG5. If it is, return
3939 CATEGORY_MASK_BIG5, else return 0. */
3942 detect_coding_big5 (coding
, detect_info
)
3943 struct coding_system
*coding
;
3944 struct coding_detection_info
*detect_info
;
3946 const unsigned char *src
= coding
->source
, *src_base
;
3947 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3948 int multibytep
= coding
->src_multibyte
;
3949 int consumed_chars
= 0;
3953 detect_info
->checked
|= CATEGORY_MASK_BIG5
;
3954 /* A coding system of this category is always ASCII compatible. */
3955 src
+= coding
->head_ascii
;
3966 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3968 found
= CATEGORY_MASK_BIG5
;
3973 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3977 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3979 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3982 detect_info
->found
|= found
;
3986 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3987 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3990 decode_coding_sjis (coding
)
3991 struct coding_system
*coding
;
3993 const unsigned char *src
= coding
->source
+ coding
->consumed
;
3994 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3995 const unsigned char *src_base
;
3996 int *charbuf
= coding
->charbuf
;
3997 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
3998 int consumed_chars
= 0, consumed_chars_base
;
3999 int multibytep
= coding
->src_multibyte
;
4000 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4001 struct charset
*charset_kanji2
;
4002 Lisp_Object attrs
, charset_list
, val
;
4003 int char_offset
= coding
->produced_char
;
4004 int last_offset
= char_offset
;
4005 int last_id
= charset_ascii
;
4007 CODING_GET_INFO (coding
, attrs
, charset_list
);
4010 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4011 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4012 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4013 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4018 struct charset
*charset
;
4021 consumed_chars_base
= consumed_chars
;
4023 if (charbuf
>= charbuf_end
)
4030 charset
= charset_roman
;
4031 else if (c
== 0x80 || c
== 0xA0)
4033 else if (c
>= 0xA1 && c
<= 0xDF)
4035 /* SJIS -> JISX0201-Kana */
4037 charset
= charset_kana
;
4041 /* SJIS -> JISX0208 */
4043 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4047 charset
= charset_kanji
;
4049 else if (c
<= 0xFC && charset_kanji2
)
4051 /* SJIS -> JISX0212 */
4053 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4057 charset
= charset_kanji2
;
4061 if (charset
->id
!= charset_ascii
4062 && last_id
!= charset
->id
)
4064 if (last_id
!= charset_ascii
)
4065 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4066 last_id
= charset
->id
;
4067 last_offset
= char_offset
;
4069 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4076 consumed_chars
= consumed_chars_base
;
4078 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4084 if (last_id
!= charset_ascii
)
4085 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4086 coding
->consumed_char
+= consumed_chars_base
;
4087 coding
->consumed
= src_base
- coding
->source
;
4088 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4092 decode_coding_big5 (coding
)
4093 struct coding_system
*coding
;
4095 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4096 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4097 const unsigned char *src_base
;
4098 int *charbuf
= coding
->charbuf
;
4099 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4100 int consumed_chars
= 0, consumed_chars_base
;
4101 int multibytep
= coding
->src_multibyte
;
4102 struct charset
*charset_roman
, *charset_big5
;
4103 Lisp_Object attrs
, charset_list
, val
;
4104 int char_offset
= coding
->produced_char
;
4105 int last_offset
= char_offset
;
4106 int last_id
= charset_ascii
;
4108 CODING_GET_INFO (coding
, attrs
, charset_list
);
4110 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4111 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4116 struct charset
*charset
;
4119 consumed_chars_base
= consumed_chars
;
4121 if (charbuf
>= charbuf_end
)
4129 charset
= charset_roman
;
4133 if (c
< 0xA1 || c
> 0xFE)
4136 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
4139 charset
= charset_big5
;
4141 if (charset
->id
!= charset_ascii
4142 && last_id
!= charset
->id
)
4144 if (last_id
!= charset_ascii
)
4145 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4146 last_id
= charset
->id
;
4147 last_offset
= char_offset
;
4149 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4156 consumed_chars
= consumed_chars_base
;
4158 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4164 if (last_id
!= charset_ascii
)
4165 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4166 coding
->consumed_char
+= consumed_chars_base
;
4167 coding
->consumed
= src_base
- coding
->source
;
4168 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4171 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4172 This function can encode charsets `ascii', `katakana-jisx0201',
4173 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4174 are sure that all these charsets are registered as official charset
4175 (i.e. do not have extended leading-codes). Characters of other
4176 charsets are produced without any encoding. If SJIS_P is 1, encode
4177 SJIS text, else encode BIG5 text. */
4180 encode_coding_sjis (coding
)
4181 struct coding_system
*coding
;
4183 int multibytep
= coding
->dst_multibyte
;
4184 int *charbuf
= coding
->charbuf
;
4185 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4186 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4187 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4189 int produced_chars
= 0;
4190 Lisp_Object attrs
, charset_list
, val
;
4191 int ascii_compatible
;
4192 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4193 struct charset
*charset_kanji2
;
4196 CODING_GET_INFO (coding
, attrs
, charset_list
);
4198 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4199 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4200 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4201 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4203 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4205 while (charbuf
< charbuf_end
)
4207 ASSURE_DESTINATION (safe_room
);
4209 /* Now encode the character C. */
4210 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4211 EMIT_ONE_ASCII_BYTE (c
);
4212 else if (CHAR_BYTE8_P (c
))
4214 c
= CHAR_TO_BYTE8 (c
);
4220 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4224 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4226 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4227 charset
= CHARSET_FROM_ID (charset_ascii
);
4231 c
= coding
->default_char
;
4232 charset
= char_charset (c
, charset_list
, &code
);
4235 if (code
== CHARSET_INVALID_CODE (charset
))
4237 if (charset
== charset_kanji
)
4241 c1
= code
>> 8, c2
= code
& 0xFF;
4242 EMIT_TWO_BYTES (c1
, c2
);
4244 else if (charset
== charset_kana
)
4245 EMIT_ONE_BYTE (code
| 0x80);
4246 else if (charset_kanji2
&& charset
== charset_kanji2
)
4251 if (c1
== 0x21 || (c1
>= 0x23 && c1
< 0x25)
4252 || (c1
>= 0x2C && c1
<= 0x2F) || c1
>= 0x6E)
4254 JIS_TO_SJIS2 (code
);
4255 c1
= code
>> 8, c2
= code
& 0xFF;
4256 EMIT_TWO_BYTES (c1
, c2
);
4259 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4262 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4265 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4266 coding
->produced_char
+= produced_chars
;
4267 coding
->produced
= dst
- coding
->destination
;
4272 encode_coding_big5 (coding
)
4273 struct coding_system
*coding
;
4275 int multibytep
= coding
->dst_multibyte
;
4276 int *charbuf
= coding
->charbuf
;
4277 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4278 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4279 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4281 int produced_chars
= 0;
4282 Lisp_Object attrs
, charset_list
, val
;
4283 int ascii_compatible
;
4284 struct charset
*charset_roman
, *charset_big5
;
4287 CODING_GET_INFO (coding
, attrs
, charset_list
);
4289 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4290 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4291 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4293 while (charbuf
< charbuf_end
)
4295 ASSURE_DESTINATION (safe_room
);
4297 /* Now encode the character C. */
4298 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4299 EMIT_ONE_ASCII_BYTE (c
);
4300 else if (CHAR_BYTE8_P (c
))
4302 c
= CHAR_TO_BYTE8 (c
);
4308 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4312 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4314 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4315 charset
= CHARSET_FROM_ID (charset_ascii
);
4319 c
= coding
->default_char
;
4320 charset
= char_charset (c
, charset_list
, &code
);
4323 if (code
== CHARSET_INVALID_CODE (charset
))
4325 if (charset
== charset_big5
)
4329 c1
= code
>> 8, c2
= code
& 0xFF;
4330 EMIT_TWO_BYTES (c1
, c2
);
4333 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4336 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4337 coding
->produced_char
+= produced_chars
;
4338 coding
->produced
= dst
- coding
->destination
;
4343 /*** 10. CCL handlers ***/
4345 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4346 Check if a text is encoded in a coding system of which
4347 encoder/decoder are written in CCL program. If it is, return
4348 CATEGORY_MASK_CCL, else return 0. */
4351 detect_coding_ccl (coding
, detect_info
)
4352 struct coding_system
*coding
;
4353 struct coding_detection_info
*detect_info
;
4355 const unsigned char *src
= coding
->source
, *src_base
;
4356 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4357 int multibytep
= coding
->src_multibyte
;
4358 int consumed_chars
= 0;
4360 unsigned char *valids
= CODING_CCL_VALIDS (coding
);
4361 int head_ascii
= coding
->head_ascii
;
4364 detect_info
->checked
|= CATEGORY_MASK_CCL
;
4366 coding
= &coding_categories
[coding_category_ccl
];
4367 attrs
= CODING_ID_ATTRS (coding
->id
);
4368 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4377 if (c
< 0 || ! valids
[c
])
4379 if ((valids
[c
] > 1))
4380 found
= CATEGORY_MASK_CCL
;
4382 detect_info
->rejected
|= CATEGORY_MASK_CCL
;
4386 detect_info
->found
|= found
;
4391 decode_coding_ccl (coding
)
4392 struct coding_system
*coding
;
4394 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4395 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4396 int *charbuf
= coding
->charbuf
;
4397 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
4398 int consumed_chars
= 0;
4399 int multibytep
= coding
->src_multibyte
;
4400 struct ccl_program ccl
;
4401 int source_charbuf
[1024];
4402 int source_byteidx
[1024];
4403 Lisp_Object attrs
, charset_list
;
4405 CODING_GET_INFO (coding
, attrs
, charset_list
);
4406 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4408 while (src
< src_end
)
4410 const unsigned char *p
= src
;
4411 int *source
, *source_end
;
4415 while (i
< 1024 && p
< src_end
)
4417 source_byteidx
[i
] = p
- src
;
4418 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4421 while (i
< 1024 && p
< src_end
)
4422 source_charbuf
[i
++] = *p
++;
4424 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4427 source
= source_charbuf
;
4428 source_end
= source
+ i
;
4429 while (source
< source_end
)
4431 ccl_driver (&ccl
, source
, charbuf
,
4432 source_end
- source
, charbuf_end
- charbuf
,
4434 source
+= ccl
.consumed
;
4435 charbuf
+= ccl
.produced
;
4436 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4439 if (source
< source_end
)
4440 src
+= source_byteidx
[source
- source_charbuf
];
4443 consumed_chars
+= source
- source_charbuf
;
4445 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4446 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4452 case CCL_STAT_SUSPEND_BY_SRC
:
4453 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4455 case CCL_STAT_SUSPEND_BY_DST
:
4458 case CCL_STAT_INVALID_CMD
:
4459 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4462 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4465 coding
->consumed_char
+= consumed_chars
;
4466 coding
->consumed
= src
- coding
->source
;
4467 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4471 encode_coding_ccl (coding
)
4472 struct coding_system
*coding
;
4474 struct ccl_program ccl
;
4475 int multibytep
= coding
->dst_multibyte
;
4476 int *charbuf
= coding
->charbuf
;
4477 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4478 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4479 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4480 unsigned char *adjusted_dst_end
= dst_end
- 1;
4481 int destination_charbuf
[1024];
4482 int i
, produced_chars
= 0;
4483 Lisp_Object attrs
, charset_list
;
4485 CODING_GET_INFO (coding
, attrs
, charset_list
);
4486 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4488 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4489 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4491 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4493 int dst_bytes
= dst_end
- dst
;
4494 if (dst_bytes
> 1024)
4497 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4498 charbuf_end
- charbuf
, dst_bytes
, charset_list
);
4499 charbuf
+= ccl
.consumed
;
4501 for (i
= 0; i
< ccl
.produced
; i
++)
4502 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4505 for (i
= 0; i
< ccl
.produced
; i
++)
4506 *dst
++ = destination_charbuf
[i
] & 0xFF;
4507 produced_chars
+= ccl
.produced
;
4513 case CCL_STAT_SUSPEND_BY_SRC
:
4514 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4516 case CCL_STAT_SUSPEND_BY_DST
:
4517 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_DST
);
4520 case CCL_STAT_INVALID_CMD
:
4521 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4524 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4528 coding
->produced_char
+= produced_chars
;
4529 coding
->produced
= dst
- coding
->destination
;
4535 /*** 10, 11. no-conversion handlers ***/
4537 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4540 decode_coding_raw_text (coding
)
4541 struct coding_system
*coding
;
4543 coding
->chars_at_source
= 1;
4544 coding
->consumed_char
= 0;
4545 coding
->consumed
= 0;
4546 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4550 encode_coding_raw_text (coding
)
4551 struct coding_system
*coding
;
4553 int multibytep
= coding
->dst_multibyte
;
4554 int *charbuf
= coding
->charbuf
;
4555 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4556 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4557 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4558 int produced_chars
= 0;
4563 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4565 if (coding
->src_multibyte
)
4566 while (charbuf
< charbuf_end
)
4568 ASSURE_DESTINATION (safe_room
);
4570 if (ASCII_CHAR_P (c
))
4571 EMIT_ONE_ASCII_BYTE (c
);
4572 else if (CHAR_BYTE8_P (c
))
4574 c
= CHAR_TO_BYTE8 (c
);
4579 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4581 CHAR_STRING_ADVANCE (c
, p1
);
4584 EMIT_ONE_BYTE (*p0
);
4590 while (charbuf
< charbuf_end
)
4592 ASSURE_DESTINATION (safe_room
);
4599 if (coding
->src_multibyte
)
4601 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4603 while (charbuf
< charbuf_end
)
4605 ASSURE_DESTINATION (safe_room
);
4607 if (ASCII_CHAR_P (c
))
4609 else if (CHAR_BYTE8_P (c
))
4610 *dst
++ = CHAR_TO_BYTE8 (c
);
4612 CHAR_STRING_ADVANCE (c
, dst
);
4618 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4619 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4620 *dst
++ = *charbuf
++;
4621 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4624 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4625 coding
->produced_char
+= produced_chars
;
4626 coding
->produced
= dst
- coding
->destination
;
4630 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4631 Check if a text is encoded in a charset-based coding system. If it
4632 is, return 1, else return 0. */
4635 detect_coding_charset (coding
, detect_info
)
4636 struct coding_system
*coding
;
4637 struct coding_detection_info
*detect_info
;
4639 const unsigned char *src
= coding
->source
, *src_base
;
4640 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4641 int multibytep
= coding
->src_multibyte
;
4642 int consumed_chars
= 0;
4643 Lisp_Object attrs
, valids
;
4646 detect_info
->checked
|= CATEGORY_MASK_CHARSET
;
4648 coding
= &coding_categories
[coding_category_charset
];
4649 attrs
= CODING_ID_ATTRS (coding
->id
);
4650 valids
= AREF (attrs
, coding_attr_charset_valids
);
4652 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4653 src
+= coding
->head_ascii
;
4663 if (NILP (AREF (valids
, c
)))
4666 found
= CATEGORY_MASK_CHARSET
;
4668 detect_info
->rejected
|= CATEGORY_MASK_CHARSET
;
4672 detect_info
->found
|= found
;
4677 decode_coding_charset (coding
)
4678 struct coding_system
*coding
;
4680 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4681 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4682 const unsigned char *src_base
;
4683 int *charbuf
= coding
->charbuf
;
4684 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4685 int consumed_chars
= 0, consumed_chars_base
;
4686 int multibytep
= coding
->src_multibyte
;
4687 Lisp_Object attrs
, charset_list
, valids
;
4688 int char_offset
= coding
->produced_char
;
4689 int last_offset
= char_offset
;
4690 int last_id
= charset_ascii
;
4692 CODING_GET_INFO (coding
, attrs
, charset_list
);
4693 valids
= AREF (attrs
, coding_attr_charset_valids
);
4699 struct charset
*charset
;
4705 consumed_chars_base
= consumed_chars
;
4707 if (charbuf
>= charbuf_end
)
4715 val
= AREF (valids
, c
);
4720 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4721 dim
= CHARSET_DIMENSION (charset
);
4725 code
= (code
<< 8) | c
;
4728 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
,
4733 /* VAL is a list of charset IDs. It is assured that the
4734 list is sorted by charset dimensions (smaller one
4738 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (val
)));
4739 dim
= CHARSET_DIMENSION (charset
);
4743 code
= (code
<< 8) | c
;
4746 CODING_DECODE_CHAR (coding
, src
, src_base
,
4747 src_end
, charset
, code
, c
);
4755 if (charset
->id
!= charset_ascii
4756 && last_id
!= charset
->id
)
4758 if (last_id
!= charset_ascii
)
4759 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4760 last_id
= charset
->id
;
4761 last_offset
= char_offset
;
4770 consumed_chars
= consumed_chars_base
;
4772 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4778 if (last_id
!= charset_ascii
)
4779 ADD_CHARSET_DATA (charbuf
, last_offset
, char_offset
, last_id
);
4780 coding
->consumed_char
+= consumed_chars_base
;
4781 coding
->consumed
= src_base
- coding
->source
;
4782 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4786 encode_coding_charset (coding
)
4787 struct coding_system
*coding
;
4789 int multibytep
= coding
->dst_multibyte
;
4790 int *charbuf
= coding
->charbuf
;
4791 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4792 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4793 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4794 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4795 int produced_chars
= 0;
4796 Lisp_Object attrs
, charset_list
;
4797 int ascii_compatible
;
4800 CODING_GET_INFO (coding
, attrs
, charset_list
);
4801 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4803 while (charbuf
< charbuf_end
)
4805 struct charset
*charset
;
4808 ASSURE_DESTINATION (safe_room
);
4810 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4811 EMIT_ONE_ASCII_BYTE (c
);
4812 else if (CHAR_BYTE8_P (c
))
4814 c
= CHAR_TO_BYTE8 (c
);
4819 charset
= char_charset (c
, charset_list
, &code
);
4822 if (CHARSET_DIMENSION (charset
) == 1)
4823 EMIT_ONE_BYTE (code
);
4824 else if (CHARSET_DIMENSION (charset
) == 2)
4825 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4826 else if (CHARSET_DIMENSION (charset
) == 3)
4827 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4829 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4830 (code
>> 8) & 0xFF, code
& 0xFF);
4834 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4835 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4837 c
= coding
->default_char
;
4843 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4844 coding
->produced_char
+= produced_chars
;
4845 coding
->produced
= dst
- coding
->destination
;
4850 /*** 7. C library functions ***/
4852 /* Setup coding context CODING from information about CODING_SYSTEM.
4853 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4854 CODING_SYSTEM is invalid, signal an error. */
4857 setup_coding_system (coding_system
, coding
)
4858 Lisp_Object coding_system
;
4859 struct coding_system
*coding
;
4862 Lisp_Object eol_type
;
4863 Lisp_Object coding_type
;
4866 if (NILP (coding_system
))
4867 coding_system
= Qno_conversion
;
4869 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4871 attrs
= CODING_ID_ATTRS (coding
->id
);
4872 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4875 coding
->head_ascii
= -1;
4876 coding
->common_flags
4877 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4878 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
4879 coding
->common_flags
|= CODING_REQUIRE_DECODING_MASK
;
4880 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
4881 coding
->common_flags
|= CODING_REQUIRE_ENCODING_MASK
;
4882 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs
)))
4883 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4885 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4886 coding
->max_charset_id
= SCHARS (val
) - 1;
4887 coding
->safe_charsets
= (char *) SDATA (val
);
4888 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4890 coding_type
= CODING_ATTR_TYPE (attrs
);
4891 if (EQ (coding_type
, Qundecided
))
4893 coding
->detector
= NULL
;
4894 coding
->decoder
= decode_coding_raw_text
;
4895 coding
->encoder
= encode_coding_raw_text
;
4896 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4898 else if (EQ (coding_type
, Qiso_2022
))
4901 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4903 /* Invoke graphic register 0 to plane 0. */
4904 CODING_ISO_INVOCATION (coding
, 0) = 0;
4905 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4906 CODING_ISO_INVOCATION (coding
, 1)
4907 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4908 /* Setup the initial status of designation. */
4909 for (i
= 0; i
< 4; i
++)
4910 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4911 /* Not single shifting initially. */
4912 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4913 /* Beginning of buffer should also be regarded as bol. */
4914 CODING_ISO_BOL (coding
) = 1;
4915 coding
->detector
= detect_coding_iso_2022
;
4916 coding
->decoder
= decode_coding_iso_2022
;
4917 coding
->encoder
= encode_coding_iso_2022
;
4918 if (flags
& CODING_ISO_FLAG_SAFE
)
4919 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4920 coding
->common_flags
4921 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4922 | CODING_REQUIRE_FLUSHING_MASK
);
4923 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4924 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4925 if (flags
& CODING_ISO_FLAG_DESIGNATION
)
4926 coding
->common_flags
|= CODING_ANNOTATE_CHARSET_MASK
;
4927 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4929 setup_iso_safe_charsets (attrs
);
4930 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4931 coding
->max_charset_id
= SCHARS (val
) - 1;
4932 coding
->safe_charsets
= (char *) SDATA (val
);
4934 CODING_ISO_FLAGS (coding
) = flags
;
4936 else if (EQ (coding_type
, Qcharset
))
4938 coding
->detector
= detect_coding_charset
;
4939 coding
->decoder
= decode_coding_charset
;
4940 coding
->encoder
= encode_coding_charset
;
4941 coding
->common_flags
4942 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4944 else if (EQ (coding_type
, Qutf_8
))
4946 coding
->detector
= detect_coding_utf_8
;
4947 coding
->decoder
= decode_coding_utf_8
;
4948 coding
->encoder
= encode_coding_utf_8
;
4949 coding
->common_flags
4950 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4952 else if (EQ (coding_type
, Qutf_16
))
4954 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4955 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4956 : EQ (val
, Qt
) ? utf_16_with_bom
4957 : utf_16_without_bom
);
4958 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4959 CODING_UTF_16_ENDIAN (coding
) = (EQ (val
, Qbig
) ? utf_16_big_endian
4960 : utf_16_little_endian
);
4961 CODING_UTF_16_SURROGATE (coding
) = 0;
4962 coding
->detector
= detect_coding_utf_16
;
4963 coding
->decoder
= decode_coding_utf_16
;
4964 coding
->encoder
= encode_coding_utf_16
;
4965 coding
->common_flags
4966 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4967 if (CODING_UTF_16_BOM (coding
) == utf_16_detect_bom
)
4968 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4970 else if (EQ (coding_type
, Qccl
))
4972 coding
->detector
= detect_coding_ccl
;
4973 coding
->decoder
= decode_coding_ccl
;
4974 coding
->encoder
= encode_coding_ccl
;
4975 coding
->common_flags
4976 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4977 | CODING_REQUIRE_FLUSHING_MASK
);
4979 else if (EQ (coding_type
, Qemacs_mule
))
4981 coding
->detector
= detect_coding_emacs_mule
;
4982 coding
->decoder
= decode_coding_emacs_mule
;
4983 coding
->encoder
= encode_coding_emacs_mule
;
4984 coding
->common_flags
4985 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4986 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
4987 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
4989 Lisp_Object tail
, safe_charsets
;
4990 int max_charset_id
= 0;
4992 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4994 if (max_charset_id
< XFASTINT (XCAR (tail
)))
4995 max_charset_id
= XFASTINT (XCAR (tail
));
4996 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
4998 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
5000 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
5001 coding
->max_charset_id
= max_charset_id
;
5002 coding
->safe_charsets
= (char *) SDATA (safe_charsets
);
5005 else if (EQ (coding_type
, Qshift_jis
))
5007 coding
->detector
= detect_coding_sjis
;
5008 coding
->decoder
= decode_coding_sjis
;
5009 coding
->encoder
= encode_coding_sjis
;
5010 coding
->common_flags
5011 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5013 else if (EQ (coding_type
, Qbig5
))
5015 coding
->detector
= detect_coding_big5
;
5016 coding
->decoder
= decode_coding_big5
;
5017 coding
->encoder
= encode_coding_big5
;
5018 coding
->common_flags
5019 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5021 else /* EQ (coding_type, Qraw_text) */
5023 coding
->detector
= NULL
;
5024 coding
->decoder
= decode_coding_raw_text
;
5025 coding
->encoder
= encode_coding_raw_text
;
5031 /* Return raw-text or one of its subsidiaries that has the same
5032 eol_type as CODING-SYSTEM. */
5035 raw_text_coding_system (coding_system
)
5036 Lisp_Object coding_system
;
5038 Lisp_Object spec
, attrs
;
5039 Lisp_Object eol_type
, raw_text_eol_type
;
5041 if (NILP (coding_system
))
5043 spec
= CODING_SYSTEM_SPEC (coding_system
);
5044 attrs
= AREF (spec
, 0);
5046 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
5047 return coding_system
;
5049 eol_type
= AREF (spec
, 2);
5050 if (VECTORP (eol_type
))
5052 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
5053 raw_text_eol_type
= AREF (spec
, 2);
5054 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
5055 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
5056 : AREF (raw_text_eol_type
, 2));
5060 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5061 does, return one of the subsidiary that has the same eol-spec as
5062 PARENT. Otherwise, return CODING_SYSTEM. */
5065 coding_inherit_eol_type (coding_system
, parent
)
5066 Lisp_Object coding_system
, parent
;
5068 Lisp_Object spec
, eol_type
;
5070 if (NILP (coding_system
))
5071 coding_system
= Qraw_text
;
5072 spec
= CODING_SYSTEM_SPEC (coding_system
);
5073 eol_type
= AREF (spec
, 2);
5074 if (VECTORP (eol_type
)
5077 Lisp_Object parent_spec
;
5078 Lisp_Object parent_eol_type
;
5081 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
5082 parent_eol_type
= AREF (parent_spec
, 2);
5083 if (EQ (parent_eol_type
, Qunix
))
5084 coding_system
= AREF (eol_type
, 0);
5085 else if (EQ (parent_eol_type
, Qdos
))
5086 coding_system
= AREF (eol_type
, 1);
5087 else if (EQ (parent_eol_type
, Qmac
))
5088 coding_system
= AREF (eol_type
, 2);
5090 return coding_system
;
5093 /* Emacs has a mechanism to automatically detect a coding system if it
5094 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5095 it's impossible to distinguish some coding systems accurately
5096 because they use the same range of codes. So, at first, coding
5097 systems are categorized into 7, those are:
5099 o coding-category-emacs-mule
5101 The category for a coding system which has the same code range
5102 as Emacs' internal format. Assigned the coding-system (Lisp
5103 symbol) `emacs-mule' by default.
5105 o coding-category-sjis
5107 The category for a coding system which has the same code range
5108 as SJIS. Assigned the coding-system (Lisp
5109 symbol) `japanese-shift-jis' by default.
5111 o coding-category-iso-7
5113 The category for a coding system which has the same code range
5114 as ISO2022 of 7-bit environment. This doesn't use any locking
5115 shift and single shift functions. This can encode/decode all
5116 charsets. Assigned the coding-system (Lisp symbol)
5117 `iso-2022-7bit' by default.
5119 o coding-category-iso-7-tight
5121 Same as coding-category-iso-7 except that this can
5122 encode/decode only the specified charsets.
5124 o coding-category-iso-8-1
5126 The category for a coding system which has the same code range
5127 as ISO2022 of 8-bit environment and graphic plane 1 used only
5128 for DIMENSION1 charset. This doesn't use any locking shift
5129 and single shift functions. Assigned the coding-system (Lisp
5130 symbol) `iso-latin-1' by default.
5132 o coding-category-iso-8-2
5134 The category for a coding system which has the same code range
5135 as ISO2022 of 8-bit environment and graphic plane 1 used only
5136 for DIMENSION2 charset. This doesn't use any locking shift
5137 and single shift functions. Assigned the coding-system (Lisp
5138 symbol) `japanese-iso-8bit' by default.
5140 o coding-category-iso-7-else
5142 The category for a coding system which has the same code range
5143 as ISO2022 of 7-bit environemnt but uses locking shift or
5144 single shift functions. Assigned the coding-system (Lisp
5145 symbol) `iso-2022-7bit-lock' by default.
5147 o coding-category-iso-8-else
5149 The category for a coding system which has the same code range
5150 as ISO2022 of 8-bit environemnt but uses locking shift or
5151 single shift functions. Assigned the coding-system (Lisp
5152 symbol) `iso-2022-8bit-ss2' by default.
5154 o coding-category-big5
5156 The category for a coding system which has the same code range
5157 as BIG5. Assigned the coding-system (Lisp symbol)
5158 `cn-big5' by default.
5160 o coding-category-utf-8
5162 The category for a coding system which has the same code range
5163 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
5164 symbol) `utf-8' by default.
5166 o coding-category-utf-16-be
5168 The category for a coding system in which a text has an
5169 Unicode signature (cf. Unicode Standard) in the order of BIG
5170 endian at the head. Assigned the coding-system (Lisp symbol)
5171 `utf-16-be' by default.
5173 o coding-category-utf-16-le
5175 The category for a coding system in which a text has an
5176 Unicode signature (cf. Unicode Standard) in the order of
5177 LITTLE endian at the head. Assigned the coding-system (Lisp
5178 symbol) `utf-16-le' by default.
5180 o coding-category-ccl
5182 The category for a coding system of which encoder/decoder is
5183 written in CCL programs. The default value is nil, i.e., no
5184 coding system is assigned.
5186 o coding-category-binary
5188 The category for a coding system not categorized in any of the
5189 above. Assigned the coding-system (Lisp symbol)
5190 `no-conversion' by default.
5192 Each of them is a Lisp symbol and the value is an actual
5193 `coding-system's (this is also a Lisp symbol) assigned by a user.
5194 What Emacs does actually is to detect a category of coding system.
5195 Then, it uses a `coding-system' assigned to it. If Emacs can't
5196 decide only one possible category, it selects a category of the
5197 highest priority. Priorities of categories are also specified by a
5198 user in a Lisp variable `coding-category-list'.
5202 #define EOL_SEEN_NONE 0
5203 #define EOL_SEEN_LF 1
5204 #define EOL_SEEN_CR 2
5205 #define EOL_SEEN_CRLF 4
5207 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5208 SOURCE is encoded. If CATEGORY is one of
5209 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5210 two-byte, else they are encoded by one-byte.
5212 Return one of EOL_SEEN_XXX. */
5214 #define MAX_EOL_CHECK_COUNT 3
5217 detect_eol (source
, src_bytes
, category
)
5218 unsigned char *source
;
5219 EMACS_INT src_bytes
;
5220 enum coding_category category
;
5222 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
5225 int eol_seen
= EOL_SEEN_NONE
;
5227 if ((1 << category
) & CATEGORY_MASK_UTF_16
)
5231 msb
= category
== (coding_category_utf_16_le
5232 | coding_category_utf_16_le_nosig
);
5235 while (src
+ 1 < src_end
)
5238 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
5243 this_eol
= EOL_SEEN_LF
;
5244 else if (src
+ 3 >= src_end
5245 || src
[msb
+ 2] != 0
5246 || src
[lsb
+ 2] != '\n')
5247 this_eol
= EOL_SEEN_CR
;
5249 this_eol
= EOL_SEEN_CRLF
;
5251 if (eol_seen
== EOL_SEEN_NONE
)
5252 /* This is the first end-of-line. */
5253 eol_seen
= this_eol
;
5254 else if (eol_seen
!= this_eol
)
5256 /* The found type is different from what found before. */
5257 eol_seen
= EOL_SEEN_LF
;
5260 if (++total
== MAX_EOL_CHECK_COUNT
)
5268 while (src
< src_end
)
5271 if (c
== '\n' || c
== '\r')
5276 this_eol
= EOL_SEEN_LF
;
5277 else if (src
>= src_end
|| *src
!= '\n')
5278 this_eol
= EOL_SEEN_CR
;
5280 this_eol
= EOL_SEEN_CRLF
, src
++;
5282 if (eol_seen
== EOL_SEEN_NONE
)
5283 /* This is the first end-of-line. */
5284 eol_seen
= this_eol
;
5285 else if (eol_seen
!= this_eol
)
5287 /* The found type is different from what found before. */
5288 eol_seen
= EOL_SEEN_LF
;
5291 if (++total
== MAX_EOL_CHECK_COUNT
)
5301 adjust_coding_eol_type (coding
, eol_seen
)
5302 struct coding_system
*coding
;
5305 Lisp_Object eol_type
;
5307 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5308 if (eol_seen
& EOL_SEEN_LF
)
5310 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
5313 else if (eol_seen
& EOL_SEEN_CRLF
)
5315 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
5318 else if (eol_seen
& EOL_SEEN_CR
)
5320 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
5326 /* Detect how a text specified in CODING is encoded. If a coding
5327 system is detected, update fields of CODING by the detected coding
5331 detect_coding (coding
)
5332 struct coding_system
*coding
;
5334 const unsigned char *src
, *src_end
;
5335 Lisp_Object attrs
, coding_type
;
5337 coding
->consumed
= coding
->consumed_char
= 0;
5338 coding
->produced
= coding
->produced_char
= 0;
5339 coding_set_source (coding
);
5341 src_end
= coding
->source
+ coding
->src_bytes
;
5343 /* If we have not yet decided the text encoding type, detect it
5345 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
5349 for (i
= 0, src
= coding
->source
; src
< src_end
; i
++, src
++)
5352 if (c
& 0x80 || (c
< 0x20 && (c
== 0
5353 || c
== ISO_CODE_ESC
5355 || c
== ISO_CODE_SO
)))
5358 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
5360 if (coding
->head_ascii
< coding
->src_bytes
)
5362 struct coding_detection_info detect_info
;
5363 enum coding_category category
;
5364 struct coding_system
*this;
5366 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
5367 for (i
= 0; i
< coding_category_raw_text
; i
++)
5369 category
= coding_priorities
[i
];
5370 this = coding_categories
+ category
;
5373 /* No coding system of this category is defined. */
5374 detect_info
.rejected
|= (1 << category
);
5376 else if (category
>= coding_category_raw_text
)
5378 else if (detect_info
.checked
& (1 << category
))
5380 if (detect_info
.found
& (1 << category
))
5383 else if ((*(this->detector
)) (coding
, &detect_info
)
5384 && detect_info
.found
& (1 << category
))
5386 if (category
== coding_category_utf_16_auto
)
5388 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5389 category
= coding_category_utf_16_le
;
5391 category
= coding_category_utf_16_be
;
5396 if (i
< coding_category_raw_text
)
5397 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5398 else if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
5399 setup_coding_system (Qraw_text
, coding
);
5400 else if (detect_info
.rejected
)
5401 for (i
= 0; i
< coding_category_raw_text
; i
++)
5402 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
5404 this = coding_categories
+ coding_priorities
[i
];
5405 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5410 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding
->id
)))
5411 == coding_category_utf_16_auto
)
5413 Lisp_Object coding_systems
;
5414 struct coding_detection_info detect_info
;
5417 = AREF (CODING_ID_ATTRS (coding
->id
), coding_attr_utf_16_bom
);
5418 detect_info
.found
= detect_info
.rejected
= 0;
5419 if (CONSP (coding_systems
)
5420 && detect_coding_utf_16 (coding
, &detect_info
))
5422 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5423 setup_coding_system (XCAR (coding_systems
), coding
);
5424 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
5425 setup_coding_system (XCDR (coding_systems
), coding
);
5433 struct coding_system
*coding
;
5435 Lisp_Object eol_type
;
5436 unsigned char *p
, *pbeg
, *pend
;
5438 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5439 if (EQ (eol_type
, Qunix
))
5442 if (NILP (coding
->dst_object
))
5443 pbeg
= coding
->destination
;
5445 pbeg
= BYTE_POS_ADDR (coding
->dst_pos_byte
);
5446 pend
= pbeg
+ coding
->produced
;
5448 if (VECTORP (eol_type
))
5450 int eol_seen
= EOL_SEEN_NONE
;
5452 for (p
= pbeg
; p
< pend
; p
++)
5455 eol_seen
|= EOL_SEEN_LF
;
5456 else if (*p
== '\r')
5458 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5460 eol_seen
|= EOL_SEEN_CRLF
;
5464 eol_seen
|= EOL_SEEN_CR
;
5467 if (eol_seen
!= EOL_SEEN_NONE
5468 && eol_seen
!= EOL_SEEN_LF
5469 && eol_seen
!= EOL_SEEN_CRLF
5470 && eol_seen
!= EOL_SEEN_CR
)
5471 eol_seen
= EOL_SEEN_LF
;
5472 if (eol_seen
!= EOL_SEEN_NONE
)
5473 eol_type
= adjust_coding_eol_type (coding
, eol_seen
);
5476 if (EQ (eol_type
, Qmac
))
5478 for (p
= pbeg
; p
< pend
; p
++)
5482 else if (EQ (eol_type
, Qdos
))
5486 if (NILP (coding
->dst_object
))
5488 for (p
= pend
- 2; p
>= pbeg
; p
--)
5491 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
-- - p
- 1);
5497 for (p
= pend
- 2; p
>= pbeg
; p
--)
5500 int pos_byte
= coding
->dst_pos_byte
+ (p
- pbeg
);
5501 int pos
= BYTE_TO_CHAR (pos_byte
);
5503 del_range_2 (pos
, pos_byte
, pos
+ 1, pos_byte
+ 1, 0);
5507 coding
->produced
-= n
;
5508 coding
->produced_char
-= n
;
5513 /* Return a translation table (or list of them) from coding system
5514 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5515 decoding (ENCODEP is zero). */
5518 get_translation_table (attrs
, encodep
)
5520 Lisp_Object standard
, translation_table
;
5523 translation_table
= CODING_ATTR_ENCODE_TBL (attrs
),
5524 standard
= Vstandard_translation_table_for_encode
;
5526 translation_table
= CODING_ATTR_DECODE_TBL (attrs
),
5527 standard
= Vstandard_translation_table_for_decode
;
5528 if (NILP (translation_table
))
5530 if (SYMBOLP (translation_table
))
5531 translation_table
= Fget (translation_table
, Qtranslation_table
);
5532 else if (CONSP (translation_table
))
5536 translation_table
= Fcopy_sequence (translation_table
);
5537 for (val
= translation_table
; CONSP (val
); val
= XCDR (val
))
5538 if (SYMBOLP (XCAR (val
)))
5539 XSETCAR (val
, Fget (XCAR (val
), Qtranslation_table
));
5541 if (! NILP (standard
))
5543 if (CONSP (translation_table
))
5544 translation_table
= nconc2 (translation_table
, Fcons (standard
, Qnil
));
5546 translation_table
= Fcons (translation_table
, Fcons (standard
, Qnil
));
5548 return translation_table
;
5553 translate_chars (coding
, table
)
5554 struct coding_system
*coding
;
5557 int *charbuf
= coding
->charbuf
;
5558 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5561 if (coding
->chars_at_source
)
5564 while (charbuf
< charbuf_end
)
5570 *charbuf
++ = translate_char (table
, c
);
5575 produce_chars (coding
)
5576 struct coding_system
*coding
;
5578 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5579 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5581 int produced_chars
= 0;
5583 if (! coding
->chars_at_source
)
5585 /* Characters are in coding->charbuf. */
5586 int *buf
= coding
->charbuf
;
5587 int *buf_end
= buf
+ coding
->charbuf_used
;
5588 unsigned char *adjusted_dst_end
;
5590 if (BUFFERP (coding
->src_object
)
5591 && EQ (coding
->src_object
, coding
->dst_object
))
5592 dst_end
= ((unsigned char *) coding
->source
) + coding
->consumed
;
5593 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5595 while (buf
< buf_end
)
5599 if (dst
>= adjusted_dst_end
)
5601 dst
= alloc_destination (coding
,
5602 buf_end
- buf
+ MAX_MULTIBYTE_LENGTH
,
5604 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5605 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5609 if (coding
->dst_multibyte
5610 || ! CHAR_BYTE8_P (c
))
5611 CHAR_STRING_ADVANCE (c
, dst
);
5613 *dst
++ = CHAR_TO_BYTE8 (c
);
5617 /* This is an annotation datum. (-C) is the length of
5624 const unsigned char *src
= coding
->source
;
5625 const unsigned char *src_end
= src
+ coding
->src_bytes
;
5626 Lisp_Object eol_type
;
5628 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5630 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5632 if (coding
->src_multibyte
)
5639 const unsigned char *src_base
= src
;
5645 if (EQ (eol_type
, Qdos
))
5649 record_conversion_result
5650 (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
5651 goto no_more_source
;
5656 else if (EQ (eol_type
, Qmac
))
5661 coding
->consumed
= src
- coding
->source
;
5663 if (EQ (coding
->src_object
, coding
->dst_object
))
5664 dst_end
= (unsigned char *) src
;
5667 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5669 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5670 coding_set_source (coding
);
5671 src
= coding
->source
+ coding
->consumed
;
5672 src_end
= coding
->source
+ coding
->src_bytes
;
5682 while (src
< src_end
)
5689 if (EQ (eol_type
, Qdos
))
5695 else if (EQ (eol_type
, Qmac
))
5698 if (dst
>= dst_end
- 1)
5700 coding
->consumed
= src
- coding
->source
;
5702 if (EQ (coding
->src_object
, coding
->dst_object
))
5703 dst_end
= (unsigned char *) src
;
5704 if (dst
>= dst_end
- 1)
5706 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5708 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5709 coding_set_source (coding
);
5710 src
= coding
->source
+ coding
->consumed
;
5711 src_end
= coding
->source
+ coding
->src_bytes
;
5719 if (!EQ (coding
->src_object
, coding
->dst_object
))
5721 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5725 EMACS_INT offset
= src
- coding
->source
;
5727 dst
= alloc_destination (coding
, require
, dst
);
5728 coding_set_source (coding
);
5729 src
= coding
->source
+ offset
;
5730 src_end
= coding
->source
+ coding
->src_bytes
;
5733 produced_chars
= coding
->src_chars
;
5734 while (src
< src_end
)
5740 if (EQ (eol_type
, Qdos
))
5747 else if (EQ (eol_type
, Qmac
))
5753 coding
->consumed
= coding
->src_bytes
;
5754 coding
->consumed_char
= coding
->src_chars
;
5757 produced
= dst
- (coding
->destination
+ coding
->produced
);
5758 if (BUFFERP (coding
->dst_object
))
5759 insert_from_gap (produced_chars
, produced
);
5760 coding
->produced
+= produced
;
5761 coding
->produced_char
+= produced_chars
;
5762 return produced_chars
;
5765 /* Compose text in CODING->object according to the annotation data at
5766 CHARBUF. CHARBUF is an array:
5767 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5771 produce_composition (coding
, charbuf
)
5772 struct coding_system
*coding
;
5777 enum composition_method method
;
5778 Lisp_Object components
;
5781 from
= coding
->dst_pos
+ charbuf
[2];
5782 to
= coding
->dst_pos
+ charbuf
[3];
5783 method
= (enum composition_method
) (charbuf
[4]);
5785 if (method
== COMPOSITION_RELATIVE
)
5789 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5794 for (i
= 0; i
< len
; i
++)
5795 args
[i
] = make_number (charbuf
[i
]);
5796 components
= (method
== COMPOSITION_WITH_ALTCHARS
5797 ? Fstring (len
, args
) : Fvector (len
, args
));
5799 compose_text (from
, to
, components
, Qnil
, coding
->dst_object
);
5803 /* Put `charset' property on text in CODING->object according to
5804 the annotation data at CHARBUF. CHARBUF is an array:
5805 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5809 produce_charset (coding
, charbuf
)
5810 struct coding_system
*coding
;
5813 EMACS_INT from
= coding
->dst_pos
+ charbuf
[2];
5814 EMACS_INT to
= coding
->dst_pos
+ charbuf
[3];
5815 struct charset
*charset
= CHARSET_FROM_ID (charbuf
[4]);
5817 Fput_text_property (make_number (from
), make_number (to
),
5818 Qcharset
, CHARSET_NAME (charset
),
5819 coding
->dst_object
);
5823 #define CHARBUF_SIZE 0x4000
5825 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5827 int size = CHARBUF_SIZE;; \
5829 coding->charbuf = NULL; \
5830 while (size > 1024) \
5832 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5833 if (coding->charbuf) \
5837 if (! coding->charbuf) \
5839 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5840 return coding->result; \
5842 coding->charbuf_size = size; \
5847 produce_annotation (coding
)
5848 struct coding_system
*coding
;
5850 int *charbuf
= coding
->charbuf
;
5851 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5853 if (NILP (coding
->dst_object
))
5856 while (charbuf
< charbuf_end
)
5862 int len
= -*charbuf
;
5865 case CODING_ANNOTATE_COMPOSITION_MASK
:
5866 produce_composition (coding
, charbuf
);
5868 case CODING_ANNOTATE_CHARSET_MASK
:
5869 produce_charset (coding
, charbuf
);
5879 /* Decode the data at CODING->src_object into CODING->dst_object.
5880 CODING->src_object is a buffer, a string, or nil.
5881 CODING->dst_object is a buffer.
5883 If CODING->src_object is a buffer, it must be the current buffer.
5884 In this case, if CODING->src_pos is positive, it is a position of
5885 the source text in the buffer, otherwise, the source text is in the
5886 gap area of the buffer, and CODING->src_pos specifies the offset of
5887 the text from GPT (which must be the same as PT). If this is the
5888 same buffer as CODING->dst_object, CODING->src_pos must be
5891 If CODING->src_object is a string, CODING->src_pos in an index to
5894 If CODING->src_object is nil, CODING->source must already point to
5895 the non-relocatable memory area. In this case, CODING->src_pos is
5896 an offset from CODING->source.
5898 The decoded data is inserted at the current point of the buffer
5903 decode_coding (coding
)
5904 struct coding_system
*coding
;
5907 Lisp_Object undo_list
;
5908 Lisp_Object translation_table
;
5910 if (BUFFERP (coding
->src_object
)
5911 && coding
->src_pos
> 0
5912 && coding
->src_pos
< GPT
5913 && coding
->src_pos
+ coding
->src_chars
> GPT
)
5914 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
5917 if (BUFFERP (coding
->dst_object
))
5919 if (current_buffer
!= XBUFFER (coding
->dst_object
))
5920 set_buffer_internal (XBUFFER (coding
->dst_object
));
5922 move_gap_both (PT
, PT_BYTE
);
5923 undo_list
= current_buffer
->undo_list
;
5924 current_buffer
->undo_list
= Qt
;
5927 coding
->consumed
= coding
->consumed_char
= 0;
5928 coding
->produced
= coding
->produced_char
= 0;
5929 coding
->chars_at_source
= 0;
5930 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
5933 ALLOC_CONVERSION_WORK_AREA (coding
);
5935 attrs
= CODING_ID_ATTRS (coding
->id
);
5936 translation_table
= get_translation_table (attrs
, 0);
5940 coding_set_source (coding
);
5941 coding
->annotated
= 0;
5942 (*(coding
->decoder
)) (coding
);
5943 if (!NILP (translation_table
))
5944 translate_chars (coding
, translation_table
);
5945 coding_set_destination (coding
);
5946 produce_chars (coding
);
5947 if (coding
->annotated
)
5948 produce_annotation (coding
);
5950 while (coding
->consumed
< coding
->src_bytes
5951 && ! coding
->result
);
5953 coding
->carryover_bytes
= 0;
5954 if (coding
->consumed
< coding
->src_bytes
)
5956 int nbytes
= coding
->src_bytes
- coding
->consumed
;
5957 const unsigned char *src
;
5959 coding_set_source (coding
);
5960 coding_set_destination (coding
);
5961 src
= coding
->source
+ coding
->consumed
;
5963 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
5965 /* Flush out unprocessed data as binary chars. We are sure
5966 that the number of data is less than the size of
5968 coding
->charbuf_used
= 0;
5969 while (nbytes
-- > 0)
5973 coding
->charbuf
[coding
->charbuf_used
++] = (c
& 0x80 ? - c
: c
);
5975 produce_chars (coding
);
5979 /* Record unprocessed bytes in coding->carryover. We are
5980 sure that the number of data is less than the size of
5981 coding->carryover. */
5982 unsigned char *p
= coding
->carryover
;
5984 coding
->carryover_bytes
= nbytes
;
5985 while (nbytes
-- > 0)
5988 coding
->consumed
= coding
->src_bytes
;
5991 if (BUFFERP (coding
->dst_object
))
5993 current_buffer
->undo_list
= undo_list
;
5994 record_insert (coding
->dst_pos
, coding
->produced_char
);
5996 if (! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
5997 decode_eol (coding
);
5998 return coding
->result
;
6002 /* Extract an annotation datum from a composition starting at POS and
6003 ending before LIMIT of CODING->src_object (buffer or string), store
6004 the data in BUF, set *STOP to a starting position of the next
6005 composition (if any) or to LIMIT, and return the address of the
6006 next element of BUF.
6008 If such an annotation is not found, set *STOP to a starting
6009 position of a composition after POS (if any) or to LIMIT, and
6013 handle_composition_annotation (pos
, limit
, coding
, buf
, stop
)
6014 EMACS_INT pos
, limit
;
6015 struct coding_system
*coding
;
6019 EMACS_INT start
, end
;
6022 if (! find_composition (pos
, limit
, &start
, &end
, &prop
, coding
->src_object
)
6025 else if (start
> pos
)
6031 /* We found a composition. Store the corresponding
6032 annotation data in BUF. */
6034 enum composition_method method
= COMPOSITION_METHOD (prop
);
6035 int nchars
= COMPOSITION_LENGTH (prop
);
6037 ADD_COMPOSITION_DATA (buf
, 0, nchars
, method
);
6038 if (method
!= COMPOSITION_RELATIVE
)
6040 Lisp_Object components
;
6043 components
= COMPOSITION_COMPONENTS (prop
);
6044 if (VECTORP (components
))
6046 len
= XVECTOR (components
)->size
;
6047 for (i
= 0; i
< len
; i
++)
6048 *buf
++ = XINT (AREF (components
, i
));
6050 else if (STRINGP (components
))
6052 len
= SCHARS (components
);
6056 FETCH_STRING_CHAR_ADVANCE (*buf
, components
, i
, i_byte
);
6060 else if (INTEGERP (components
))
6063 *buf
++ = XINT (components
);
6065 else if (CONSP (components
))
6067 for (len
= 0; CONSP (components
);
6068 len
++, components
= XCDR (components
))
6069 *buf
++ = XINT (XCAR (components
));
6077 if (find_composition (end
, limit
, &start
, &end
, &prop
,
6088 /* Extract an annotation datum from a text property `charset' at POS of
6089 CODING->src_object (buffer of string), store the data in BUF, set
6090 *STOP to the position where the value of `charset' property changes
6091 (limiting by LIMIT), and return the address of the next element of
6094 If the property value is nil, set *STOP to the position where the
6095 property value is non-nil (limiting by LIMIT), and return BUF. */
6098 handle_charset_annotation (pos
, limit
, coding
, buf
, stop
)
6099 EMACS_INT pos
, limit
;
6100 struct coding_system
*coding
;
6104 Lisp_Object val
, next
;
6107 val
= Fget_text_property (make_number (pos
), Qcharset
, coding
->src_object
);
6108 if (! NILP (val
) && CHARSETP (val
))
6109 id
= XINT (CHARSET_SYMBOL_ID (val
));
6112 ADD_CHARSET_DATA (buf
, 0, 0, id
);
6113 next
= Fnext_single_property_change (make_number (pos
), Qcharset
,
6115 make_number (limit
));
6116 *stop
= XINT (next
);
6122 consume_chars (coding
)
6123 struct coding_system
*coding
;
6125 int *buf
= coding
->charbuf
;
6126 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
;
6127 const unsigned char *src
= coding
->source
+ coding
->consumed
;
6128 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
6129 EMACS_INT pos
= coding
->src_pos
+ coding
->consumed_char
;
6130 EMACS_INT end_pos
= coding
->src_pos
+ coding
->src_chars
;
6131 int multibytep
= coding
->src_multibyte
;
6132 Lisp_Object eol_type
;
6134 EMACS_INT stop
, stop_composition
, stop_charset
;
6136 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
6137 if (VECTORP (eol_type
))
6140 /* Note: composition handling is not yet implemented. */
6141 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6143 if (NILP (coding
->src_object
))
6144 stop
= stop_composition
= stop_charset
= end_pos
;
6147 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
)
6148 stop
= stop_composition
= pos
;
6150 stop
= stop_composition
= end_pos
;
6151 if (coding
->common_flags
& CODING_ANNOTATE_CHARSET_MASK
)
6152 stop
= stop_charset
= pos
;
6154 stop_charset
= end_pos
;
6157 /* Compensate for CRLF and conversion. */
6158 buf_end
-= 1 + MAX_ANNOTATION_LENGTH
;
6159 while (buf
< buf_end
)
6165 if (pos
== stop_composition
)
6166 buf
= handle_composition_annotation (pos
, end_pos
, coding
,
6167 buf
, &stop_composition
);
6168 if (pos
== stop_charset
)
6169 buf
= handle_charset_annotation (pos
, end_pos
, coding
,
6170 buf
, &stop_charset
);
6171 stop
= (stop_composition
< stop_charset
6172 ? stop_composition
: stop_charset
);
6179 if (! CODING_FOR_UNIBYTE (coding
)
6180 && (bytes
= MULTIBYTE_LENGTH (src
, src_end
)) > 0)
6181 c
= STRING_CHAR_ADVANCE (src
), pos
+= bytes
;
6186 c
= STRING_CHAR_ADVANCE (src
), pos
++;
6187 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
6189 if (! EQ (eol_type
, Qunix
))
6193 if (EQ (eol_type
, Qdos
))
6202 coding
->consumed
= src
- coding
->source
;
6203 coding
->consumed_char
= pos
- coding
->src_pos
;
6204 coding
->charbuf_used
= buf
- coding
->charbuf
;
6205 coding
->chars_at_source
= 0;
6209 /* Encode the text at CODING->src_object into CODING->dst_object.
6210 CODING->src_object is a buffer or a string.
6211 CODING->dst_object is a buffer or nil.
6213 If CODING->src_object is a buffer, it must be the current buffer.
6214 In this case, if CODING->src_pos is positive, it is a position of
6215 the source text in the buffer, otherwise. the source text is in the
6216 gap area of the buffer, and coding->src_pos specifies the offset of
6217 the text from GPT (which must be the same as PT). If this is the
6218 same buffer as CODING->dst_object, CODING->src_pos must be
6219 negative and CODING should not have `pre-write-conversion'.
6221 If CODING->src_object is a string, CODING should not have
6222 `pre-write-conversion'.
6224 If CODING->dst_object is a buffer, the encoded data is inserted at
6225 the current point of that buffer.
6227 If CODING->dst_object is nil, the encoded data is placed at the
6228 memory area specified by CODING->destination. */
6231 encode_coding (coding
)
6232 struct coding_system
*coding
;
6235 Lisp_Object translation_table
;
6237 attrs
= CODING_ID_ATTRS (coding
->id
);
6238 translation_table
= get_translation_table (attrs
, 1);
6240 if (BUFFERP (coding
->dst_object
))
6242 set_buffer_internal (XBUFFER (coding
->dst_object
));
6243 coding
->dst_multibyte
6244 = ! NILP (current_buffer
->enable_multibyte_characters
);
6247 coding
->consumed
= coding
->consumed_char
= 0;
6248 coding
->produced
= coding
->produced_char
= 0;
6249 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
6252 ALLOC_CONVERSION_WORK_AREA (coding
);
6255 coding_set_source (coding
);
6256 consume_chars (coding
);
6258 if (!NILP (translation_table
))
6259 translate_chars (coding
, translation_table
);
6261 coding_set_destination (coding
);
6262 (*(coding
->encoder
)) (coding
);
6263 } while (coding
->consumed_char
< coding
->src_chars
);
6265 if (BUFFERP (coding
->dst_object
))
6266 insert_from_gap (coding
->produced_char
, coding
->produced
);
6268 return (coding
->result
);
6272 /* Name (or base name) of work buffer for code conversion. */
6273 static Lisp_Object Vcode_conversion_workbuf_name
;
6275 /* A working buffer used by the top level conversion. Once it is
6276 created, it is never destroyed. It has the name
6277 Vcode_conversion_workbuf_name. The other working buffers are
6278 destroyed after the use is finished, and their names are modified
6279 versions of Vcode_conversion_workbuf_name. */
6280 static Lisp_Object Vcode_conversion_reused_workbuf
;
6282 /* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6283 static int reused_workbuf_in_use
;
6286 /* Return a working buffer of code convesion. MULTIBYTE specifies the
6287 multibyteness of returning buffer. */
6290 make_conversion_work_buffer (multibyte
)
6292 Lisp_Object name
, workbuf
;
6293 struct buffer
*current
;
6295 if (reused_workbuf_in_use
++)
6297 name
= Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name
, Qnil
);
6298 workbuf
= Fget_buffer_create (name
);
6302 name
= Vcode_conversion_workbuf_name
;
6303 workbuf
= Fget_buffer_create (name
);
6304 if (NILP (Vcode_conversion_reused_workbuf
))
6305 Vcode_conversion_reused_workbuf
= workbuf
;
6307 current
= current_buffer
;
6308 set_buffer_internal (XBUFFER (workbuf
));
6310 current_buffer
->undo_list
= Qt
;
6311 current_buffer
->enable_multibyte_characters
= multibyte
? Qt
: Qnil
;
6312 set_buffer_internal (current
);
6318 code_conversion_restore (arg
)
6321 Lisp_Object current
, workbuf
;
6323 current
= XCAR (arg
);
6324 workbuf
= XCDR (arg
);
6325 if (! NILP (workbuf
))
6327 if (EQ (workbuf
, Vcode_conversion_reused_workbuf
))
6328 reused_workbuf_in_use
= 0;
6329 else if (! NILP (Fbuffer_live_p (workbuf
)))
6330 Fkill_buffer (workbuf
);
6332 set_buffer_internal (XBUFFER (current
));
6337 code_conversion_save (with_work_buf
, multibyte
)
6338 int with_work_buf
, multibyte
;
6340 Lisp_Object workbuf
= Qnil
;
6343 workbuf
= make_conversion_work_buffer (multibyte
);
6344 record_unwind_protect (code_conversion_restore
,
6345 Fcons (Fcurrent_buffer (), workbuf
));
6350 decode_coding_gap (coding
, chars
, bytes
)
6351 struct coding_system
*coding
;
6352 EMACS_INT chars
, bytes
;
6354 int count
= specpdl_ptr
- specpdl
;
6357 code_conversion_save (0, 0);
6359 coding
->src_object
= Fcurrent_buffer ();
6360 coding
->src_chars
= chars
;
6361 coding
->src_bytes
= bytes
;
6362 coding
->src_pos
= -chars
;
6363 coding
->src_pos_byte
= -bytes
;
6364 coding
->src_multibyte
= chars
< bytes
;
6365 coding
->dst_object
= coding
->src_object
;
6366 coding
->dst_pos
= PT
;
6367 coding
->dst_pos_byte
= PT_BYTE
;
6368 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
6369 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
6371 if (CODING_REQUIRE_DETECTION (coding
))
6372 detect_coding (coding
);
6374 decode_coding (coding
);
6376 attrs
= CODING_ID_ATTRS (coding
->id
);
6377 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6379 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6382 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6383 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6384 make_number (coding
->produced_char
));
6386 coding
->produced_char
+= Z
- prev_Z
;
6387 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6390 unbind_to (count
, Qnil
);
6391 return coding
->result
;
6395 encode_coding_gap (coding
, chars
, bytes
)
6396 struct coding_system
*coding
;
6397 EMACS_INT chars
, bytes
;
6399 int count
= specpdl_ptr
- specpdl
;
6401 code_conversion_save (0, 0);
6403 coding
->src_object
= Fcurrent_buffer ();
6404 coding
->src_chars
= chars
;
6405 coding
->src_bytes
= bytes
;
6406 coding
->src_pos
= -chars
;
6407 coding
->src_pos_byte
= -bytes
;
6408 coding
->src_multibyte
= chars
< bytes
;
6409 coding
->dst_object
= coding
->src_object
;
6410 coding
->dst_pos
= PT
;
6411 coding
->dst_pos_byte
= PT_BYTE
;
6413 encode_coding (coding
);
6415 unbind_to (count
, Qnil
);
6416 return coding
->result
;
6420 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6421 SRC_OBJECT into DST_OBJECT by coding context CODING.
6423 SRC_OBJECT is a buffer, a string, or Qnil.
6425 If it is a buffer, the text is at point of the buffer. FROM and TO
6426 are positions in the buffer.
6428 If it is a string, the text is at the beginning of the string.
6429 FROM and TO are indices to the string.
6431 If it is nil, the text is at coding->source. FROM and TO are
6432 indices to coding->source.
6434 DST_OBJECT is a buffer, Qt, or Qnil.
6436 If it is a buffer, the decoded text is inserted at point of the
6437 buffer. If the buffer is the same as SRC_OBJECT, the source text
6440 If it is Qt, a string is made from the decoded text, and
6441 set in CODING->dst_object.
6443 If it is Qnil, the decoded text is stored at CODING->destination.
6444 The caller must allocate CODING->dst_bytes bytes at
6445 CODING->destination by xmalloc. If the decoded text is longer than
6446 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6450 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6452 struct coding_system
*coding
;
6453 Lisp_Object src_object
;
6454 EMACS_INT from
, from_byte
, to
, to_byte
;
6455 Lisp_Object dst_object
;
6457 int count
= specpdl_ptr
- specpdl
;
6458 unsigned char *destination
;
6459 EMACS_INT dst_bytes
;
6460 EMACS_INT chars
= to
- from
;
6461 EMACS_INT bytes
= to_byte
- from_byte
;
6464 int saved_pt
= -1, saved_pt_byte
;
6466 buffer
= Fcurrent_buffer ();
6468 if (NILP (dst_object
))
6470 destination
= coding
->destination
;
6471 dst_bytes
= coding
->dst_bytes
;
6474 coding
->src_object
= src_object
;
6475 coding
->src_chars
= chars
;
6476 coding
->src_bytes
= bytes
;
6477 coding
->src_multibyte
= chars
< bytes
;
6479 if (STRINGP (src_object
))
6481 coding
->src_pos
= from
;
6482 coding
->src_pos_byte
= from_byte
;
6484 else if (BUFFERP (src_object
))
6486 set_buffer_internal (XBUFFER (src_object
));
6488 move_gap_both (from
, from_byte
);
6489 if (EQ (src_object
, dst_object
))
6491 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6492 TEMP_SET_PT_BOTH (from
, from_byte
);
6493 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6494 coding
->src_pos
= -chars
;
6495 coding
->src_pos_byte
= -bytes
;
6499 coding
->src_pos
= from
;
6500 coding
->src_pos_byte
= from_byte
;
6504 if (CODING_REQUIRE_DETECTION (coding
))
6505 detect_coding (coding
);
6506 attrs
= CODING_ID_ATTRS (coding
->id
);
6508 if (EQ (dst_object
, Qt
)
6509 || (! NILP (CODING_ATTR_POST_READ (attrs
))
6510 && NILP (dst_object
)))
6512 coding
->dst_object
= code_conversion_save (1, 1);
6513 coding
->dst_pos
= BEG
;
6514 coding
->dst_pos_byte
= BEG_BYTE
;
6515 coding
->dst_multibyte
= 1;
6517 else if (BUFFERP (dst_object
))
6519 code_conversion_save (0, 0);
6520 coding
->dst_object
= dst_object
;
6521 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6522 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6523 coding
->dst_multibyte
6524 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6528 code_conversion_save (0, 0);
6529 coding
->dst_object
= Qnil
;
6530 coding
->dst_multibyte
= 1;
6533 decode_coding (coding
);
6535 if (BUFFERP (coding
->dst_object
))
6536 set_buffer_internal (XBUFFER (coding
->dst_object
));
6538 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6540 struct gcpro gcpro1
, gcpro2
;
6541 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6544 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6545 GCPRO2 (coding
->src_object
, coding
->dst_object
);
6546 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6547 make_number (coding
->produced_char
));
6550 coding
->produced_char
+= Z
- prev_Z
;
6551 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6554 if (EQ (dst_object
, Qt
))
6556 coding
->dst_object
= Fbuffer_string ();
6558 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
6560 set_buffer_internal (XBUFFER (coding
->dst_object
));
6561 if (dst_bytes
< coding
->produced
)
6564 = (unsigned char *) xrealloc (destination
, coding
->produced
);
6567 record_conversion_result (coding
,
6568 CODING_RESULT_INSUFFICIENT_DST
);
6569 unbind_to (count
, Qnil
);
6572 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
6573 move_gap_both (BEGV
, BEGV_BYTE
);
6574 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
6575 coding
->destination
= destination
;
6581 /* This is the case of:
6582 (BUFFERP (src_object) && EQ (src_object, dst_object))
6583 As we have moved PT while replacing the original buffer
6584 contents, we must recover it now. */
6585 set_buffer_internal (XBUFFER (src_object
));
6586 if (saved_pt
< from
)
6587 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6588 else if (saved_pt
< from
+ chars
)
6589 TEMP_SET_PT_BOTH (from
, from_byte
);
6590 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6591 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6592 saved_pt_byte
+ (coding
->produced
- bytes
));
6594 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6595 saved_pt_byte
+ (coding
->produced
- bytes
));
6598 unbind_to (count
, coding
->dst_object
);
6603 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6605 struct coding_system
*coding
;
6606 Lisp_Object src_object
;
6607 EMACS_INT from
, from_byte
, to
, to_byte
;
6608 Lisp_Object dst_object
;
6610 int count
= specpdl_ptr
- specpdl
;
6611 EMACS_INT chars
= to
- from
;
6612 EMACS_INT bytes
= to_byte
- from_byte
;
6615 int saved_pt
= -1, saved_pt_byte
;
6617 buffer
= Fcurrent_buffer ();
6619 coding
->src_object
= src_object
;
6620 coding
->src_chars
= chars
;
6621 coding
->src_bytes
= bytes
;
6622 coding
->src_multibyte
= chars
< bytes
;
6624 attrs
= CODING_ID_ATTRS (coding
->id
);
6626 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
6628 coding
->src_object
= code_conversion_save (1, coding
->src_multibyte
);
6629 set_buffer_internal (XBUFFER (coding
->src_object
));
6630 if (STRINGP (src_object
))
6631 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
6632 else if (BUFFERP (src_object
))
6633 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
6635 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
6637 if (EQ (src_object
, dst_object
))
6639 set_buffer_internal (XBUFFER (src_object
));
6640 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6641 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6642 set_buffer_internal (XBUFFER (coding
->src_object
));
6645 call2 (CODING_ATTR_PRE_WRITE (attrs
),
6646 make_number (BEG
), make_number (Z
));
6647 coding
->src_object
= Fcurrent_buffer ();
6649 move_gap_both (BEG
, BEG_BYTE
);
6650 coding
->src_chars
= Z
- BEG
;
6651 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6652 coding
->src_pos
= BEG
;
6653 coding
->src_pos_byte
= BEG_BYTE
;
6654 coding
->src_multibyte
= Z
< Z_BYTE
;
6656 else if (STRINGP (src_object
))
6658 code_conversion_save (0, 0);
6659 coding
->src_pos
= from
;
6660 coding
->src_pos_byte
= from_byte
;
6662 else if (BUFFERP (src_object
))
6664 code_conversion_save (0, 0);
6665 set_buffer_internal (XBUFFER (src_object
));
6666 if (EQ (src_object
, dst_object
))
6668 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6669 coding
->src_object
= del_range_1 (from
, to
, 1, 1);
6670 coding
->src_pos
= 0;
6671 coding
->src_pos_byte
= 0;
6675 if (from
< GPT
&& to
>= GPT
)
6676 move_gap_both (from
, from_byte
);
6677 coding
->src_pos
= from
;
6678 coding
->src_pos_byte
= from_byte
;
6682 code_conversion_save (0, 0);
6684 if (BUFFERP (dst_object
))
6686 coding
->dst_object
= dst_object
;
6687 if (EQ (src_object
, dst_object
))
6689 coding
->dst_pos
= from
;
6690 coding
->dst_pos_byte
= from_byte
;
6694 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6695 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6697 coding
->dst_multibyte
6698 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6700 else if (EQ (dst_object
, Qt
))
6702 coding
->dst_object
= Qnil
;
6703 coding
->dst_bytes
= coding
->src_chars
;
6704 if (coding
->dst_bytes
== 0)
6705 coding
->dst_bytes
= 1;
6706 coding
->destination
= (unsigned char *) xmalloc (coding
->dst_bytes
);
6707 coding
->dst_multibyte
= 0;
6711 coding
->dst_object
= Qnil
;
6712 coding
->dst_multibyte
= 0;
6715 encode_coding (coding
);
6717 if (EQ (dst_object
, Qt
))
6719 if (BUFFERP (coding
->dst_object
))
6720 coding
->dst_object
= Fbuffer_string ();
6724 = make_unibyte_string ((char *) coding
->destination
,
6726 xfree (coding
->destination
);
6732 /* This is the case of:
6733 (BUFFERP (src_object) && EQ (src_object, dst_object))
6734 As we have moved PT while replacing the original buffer
6735 contents, we must recover it now. */
6736 set_buffer_internal (XBUFFER (src_object
));
6737 if (saved_pt
< from
)
6738 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6739 else if (saved_pt
< from
+ chars
)
6740 TEMP_SET_PT_BOTH (from
, from_byte
);
6741 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6742 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6743 saved_pt_byte
+ (coding
->produced
- bytes
));
6745 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6746 saved_pt_byte
+ (coding
->produced
- bytes
));
6749 unbind_to (count
, Qnil
);
6754 preferred_coding_system ()
6756 int id
= coding_categories
[coding_priorities
[0]].id
;
6758 return CODING_ID_NAME (id
);
6763 /*** 8. Emacs Lisp library functions ***/
6765 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6766 doc
: /* Return t if OBJECT is nil or a coding-system.
6767 See the documentation of `define-coding-system' for information
6768 about coding-system objects. */)
6772 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6775 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6776 Sread_non_nil_coding_system
, 1, 1, 0,
6777 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6784 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6785 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6787 while (SCHARS (val
) == 0);
6788 return (Fintern (val
, Qnil
));
6791 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6792 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6793 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6794 (prompt
, default_coding_system
)
6795 Lisp_Object prompt
, default_coding_system
;
6798 if (SYMBOLP (default_coding_system
))
6799 XSETSTRING (default_coding_system
, XPNTR (SYMBOL_NAME (default_coding_system
)));
6800 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6801 Qt
, Qnil
, Qcoding_system_history
,
6802 default_coding_system
, Qnil
);
6803 return (SCHARS (val
) == 0 ? Qnil
: Fintern (val
, Qnil
));
6806 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6808 doc
: /* Check validity of CODING-SYSTEM.
6809 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
6811 Lisp_Object coding_system
;
6813 CHECK_SYMBOL (coding_system
);
6814 if (!NILP (Fcoding_system_p (coding_system
)))
6815 return coding_system
;
6817 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
6821 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6822 HIGHEST is nonzero, return the coding system of the highest
6823 priority among the detected coding systems. Otherwize return a
6824 list of detected coding systems sorted by their priorities. If
6825 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6826 multibyte form but contains only ASCII and eight-bit chars.
6827 Otherwise, the bytes are raw bytes.
6829 CODING-SYSTEM controls the detection as below:
6831 If it is nil, detect both text-format and eol-format. If the
6832 text-format part of CODING-SYSTEM is already specified
6833 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6834 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6835 detect only text-format. */
6838 detect_coding_system (src
, src_chars
, src_bytes
, highest
, multibytep
,
6840 const unsigned char *src
;
6841 int src_chars
, src_bytes
, highest
;
6843 Lisp_Object coding_system
;
6845 const unsigned char *src_end
= src
+ src_bytes
;
6846 Lisp_Object attrs
, eol_type
;
6848 struct coding_system coding
;
6850 struct coding_detection_info detect_info
;
6851 enum coding_category base_category
;
6853 if (NILP (coding_system
))
6854 coding_system
= Qundecided
;
6855 setup_coding_system (coding_system
, &coding
);
6856 attrs
= CODING_ID_ATTRS (coding
.id
);
6857 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
6858 coding_system
= CODING_ATTR_BASE_NAME (attrs
);
6860 coding
.source
= src
;
6861 coding
.src_chars
= src_chars
;
6862 coding
.src_bytes
= src_bytes
;
6863 coding
.src_multibyte
= multibytep
;
6864 coding
.consumed
= 0;
6865 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6867 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
6869 /* At first, detect text-format if necessary. */
6870 base_category
= XINT (CODING_ATTR_CATEGORY (attrs
));
6871 if (base_category
== coding_category_undecided
)
6873 enum coding_category category
;
6874 struct coding_system
*this;
6877 /* Skip all ASCII bytes except for a few ISO2022 controls. */
6878 for (i
= 0; src
< src_end
; i
++, src
++)
6881 if (c
& 0x80 || (c
< 0x20 && (c
== 0
6882 || c
== ISO_CODE_ESC
6884 || c
== ISO_CODE_SO
)))
6887 coding
.head_ascii
= src
- coding
.source
;
6890 for (i
= 0; i
< coding_category_raw_text
; i
++)
6892 category
= coding_priorities
[i
];
6893 this = coding_categories
+ category
;
6897 /* No coding system of this category is defined. */
6898 detect_info
.rejected
|= (1 << category
);
6900 else if (category
>= coding_category_raw_text
)
6902 else if (detect_info
.checked
& (1 << category
))
6905 && (detect_info
.found
& (1 << category
)))
6910 if ((*(this->detector
)) (&coding
, &detect_info
)
6912 && (detect_info
.found
& (1 << category
)))
6914 if (category
== coding_category_utf_16_auto
)
6916 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
6917 category
= coding_category_utf_16_le
;
6919 category
= coding_category_utf_16_be
;
6926 if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
6928 detect_info
.found
= CATEGORY_MASK_RAW_TEXT
;
6929 id
= coding_categories
[coding_category_raw_text
].id
;
6930 val
= Fcons (make_number (id
), Qnil
);
6932 else if (! detect_info
.rejected
&& ! detect_info
.found
)
6934 detect_info
.found
= CATEGORY_MASK_ANY
;
6935 id
= coding_categories
[coding_category_undecided
].id
;
6936 val
= Fcons (make_number (id
), Qnil
);
6940 if (detect_info
.found
)
6942 detect_info
.found
= 1 << category
;
6943 val
= Fcons (make_number (this->id
), Qnil
);
6946 for (i
= 0; i
< coding_category_raw_text
; i
++)
6947 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
6949 detect_info
.found
= 1 << coding_priorities
[i
];
6950 id
= coding_categories
[coding_priorities
[i
]].id
;
6951 val
= Fcons (make_number (id
), Qnil
);
6957 int mask
= detect_info
.rejected
| detect_info
.found
;
6961 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6963 category
= coding_priorities
[i
];
6964 if (! (mask
& (1 << category
)))
6966 found
|= 1 << category
;
6967 id
= coding_categories
[category
].id
;
6968 val
= Fcons (make_number (id
), val
);
6971 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6973 category
= coding_priorities
[i
];
6974 if (detect_info
.found
& (1 << category
))
6976 id
= coding_categories
[category
].id
;
6977 val
= Fcons (make_number (id
), val
);
6980 detect_info
.found
|= found
;
6983 else if (base_category
== coding_category_utf_16_auto
)
6985 if (detect_coding_utf_16 (&coding
, &detect_info
))
6987 enum coding_category category
;
6988 struct coding_system
*this;
6990 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
6991 this = coding_categories
+ coding_category_utf_16_le
;
6992 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
6993 this = coding_categories
+ coding_category_utf_16_be
;
6994 else if (detect_info
.rejected
& CATEGORY_MASK_UTF_16_LE_NOSIG
)
6995 this = coding_categories
+ coding_category_utf_16_be_nosig
;
6997 this = coding_categories
+ coding_category_utf_16_le_nosig
;
6998 val
= Fcons (make_number (this->id
), Qnil
);
7003 detect_info
.found
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
7004 val
= Fcons (make_number (coding
.id
), Qnil
);
7007 /* Then, detect eol-format if necessary. */
7009 int normal_eol
= -1, utf_16_be_eol
= -1, utf_16_le_eol
;
7012 if (VECTORP (eol_type
))
7014 if (detect_info
.found
& ~CATEGORY_MASK_UTF_16
)
7015 normal_eol
= detect_eol (coding
.source
, src_bytes
,
7016 coding_category_raw_text
);
7017 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_BE
7018 | CATEGORY_MASK_UTF_16_BE_NOSIG
))
7019 utf_16_be_eol
= detect_eol (coding
.source
, src_bytes
,
7020 coding_category_utf_16_be
);
7021 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_LE
7022 | CATEGORY_MASK_UTF_16_LE_NOSIG
))
7023 utf_16_le_eol
= detect_eol (coding
.source
, src_bytes
,
7024 coding_category_utf_16_le
);
7028 if (EQ (eol_type
, Qunix
))
7029 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_LF
;
7030 else if (EQ (eol_type
, Qdos
))
7031 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CRLF
;
7033 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CR
;
7036 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
7038 enum coding_category category
;
7041 id
= XINT (XCAR (tail
));
7042 attrs
= CODING_ID_ATTRS (id
);
7043 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7044 eol_type
= CODING_ID_EOL_TYPE (id
);
7045 if (VECTORP (eol_type
))
7047 if (category
== coding_category_utf_16_be
7048 || category
== coding_category_utf_16_be_nosig
)
7049 this_eol
= utf_16_be_eol
;
7050 else if (category
== coding_category_utf_16_le
7051 || category
== coding_category_utf_16_le_nosig
)
7052 this_eol
= utf_16_le_eol
;
7054 this_eol
= normal_eol
;
7056 if (this_eol
== EOL_SEEN_LF
)
7057 XSETCAR (tail
, AREF (eol_type
, 0));
7058 else if (this_eol
== EOL_SEEN_CRLF
)
7059 XSETCAR (tail
, AREF (eol_type
, 1));
7060 else if (this_eol
== EOL_SEEN_CR
)
7061 XSETCAR (tail
, AREF (eol_type
, 2));
7063 XSETCAR (tail
, CODING_ID_NAME (id
));
7066 XSETCAR (tail
, CODING_ID_NAME (id
));
7070 return (highest
? XCAR (val
) : val
);
7074 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
7076 doc
: /* Detect coding system of the text in the region between START and END.
7077 Return a list of possible coding systems ordered by priority.
7079 If only ASCII characters are found, it returns a list of single element
7080 `undecided' or its subsidiary coding system according to a detected
7083 If optional argument HIGHEST is non-nil, return the coding system of
7084 highest priority. */)
7085 (start
, end
, highest
)
7086 Lisp_Object start
, end
, highest
;
7089 int from_byte
, to_byte
;
7091 CHECK_NUMBER_COERCE_MARKER (start
);
7092 CHECK_NUMBER_COERCE_MARKER (end
);
7094 validate_region (&start
, &end
);
7095 from
= XINT (start
), to
= XINT (end
);
7096 from_byte
= CHAR_TO_BYTE (from
);
7097 to_byte
= CHAR_TO_BYTE (to
);
7099 if (from
< GPT
&& to
>= GPT
)
7100 move_gap_both (to
, to_byte
);
7102 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
7103 to
- from
, to_byte
- from_byte
,
7105 !NILP (current_buffer
7106 ->enable_multibyte_characters
),
7110 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
7112 doc
: /* Detect coding system of the text in STRING.
7113 Return a list of possible coding systems ordered by priority.
7115 If only ASCII characters are found, it returns a list of single element
7116 `undecided' or its subsidiary coding system according to a detected
7119 If optional argument HIGHEST is non-nil, return the coding system of
7120 highest priority. */)
7122 Lisp_Object string
, highest
;
7124 CHECK_STRING (string
);
7126 return detect_coding_system (SDATA (string
),
7127 SCHARS (string
), SBYTES (string
),
7128 !NILP (highest
), STRING_MULTIBYTE (string
),
7134 char_encodable_p (c
, attrs
)
7139 struct charset
*charset
;
7140 Lisp_Object translation_table
;
7142 translation_table
= CODING_ATTR_TRANS_TBL (attrs
);
7143 if (! NILP (translation_table
))
7144 c
= translate_char (translation_table
, c
);
7145 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
7146 CONSP (tail
); tail
= XCDR (tail
))
7148 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
7149 if (CHAR_CHARSET_P (c
, charset
))
7152 return (! NILP (tail
));
7156 /* Return a list of coding systems that safely encode the text between
7157 START and END. If EXCLUDE is non-nil, it is a list of coding
7158 systems not to check. The returned list doesn't contain any such
7159 coding systems. In any case, if the text contains only ASCII or is
7160 unibyte, return t. */
7162 DEFUN ("find-coding-systems-region-internal",
7163 Ffind_coding_systems_region_internal
,
7164 Sfind_coding_systems_region_internal
, 2, 3, 0,
7165 doc
: /* Internal use only. */)
7166 (start
, end
, exclude
)
7167 Lisp_Object start
, end
, exclude
;
7169 Lisp_Object coding_attrs_list
, safe_codings
;
7170 EMACS_INT start_byte
, end_byte
;
7171 const unsigned char *p
, *pbeg
, *pend
;
7173 Lisp_Object tail
, elt
;
7175 if (STRINGP (start
))
7177 if (!STRING_MULTIBYTE (start
)
7178 || SCHARS (start
) == SBYTES (start
))
7181 end_byte
= SBYTES (start
);
7185 CHECK_NUMBER_COERCE_MARKER (start
);
7186 CHECK_NUMBER_COERCE_MARKER (end
);
7187 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7188 args_out_of_range (start
, end
);
7189 if (NILP (current_buffer
->enable_multibyte_characters
))
7191 start_byte
= CHAR_TO_BYTE (XINT (start
));
7192 end_byte
= CHAR_TO_BYTE (XINT (end
));
7193 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7196 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7198 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7199 move_gap_both (XINT (start
), start_byte
);
7201 move_gap_both (XINT (end
), end_byte
);
7205 coding_attrs_list
= Qnil
;
7206 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7208 || NILP (Fmemq (XCAR (tail
), exclude
)))
7212 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
7213 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
7214 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
7216 ASET (attrs
, coding_attr_trans_tbl
,
7217 get_translation_table (attrs
, 1));
7218 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
7222 if (STRINGP (start
))
7223 p
= pbeg
= SDATA (start
);
7225 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7226 pend
= p
+ (end_byte
- start_byte
);
7228 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
7229 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7233 if (ASCII_BYTE_P (*p
))
7237 c
= STRING_CHAR_ADVANCE (p
);
7239 charset_map_loaded
= 0;
7240 for (tail
= coding_attrs_list
; CONSP (tail
);)
7245 else if (char_encodable_p (c
, elt
))
7247 else if (CONSP (XCDR (tail
)))
7249 XSETCAR (tail
, XCAR (XCDR (tail
)));
7250 XSETCDR (tail
, XCDR (XCDR (tail
)));
7254 XSETCAR (tail
, Qnil
);
7258 if (charset_map_loaded
)
7260 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7262 if (STRINGP (start
))
7263 pbeg
= SDATA (start
);
7265 pbeg
= BYTE_POS_ADDR (start_byte
);
7266 p
= pbeg
+ p_offset
;
7267 pend
= pbeg
+ pend_offset
;
7272 safe_codings
= Qnil
;
7273 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
7274 if (! NILP (XCAR (tail
)))
7275 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
7277 return safe_codings
;
7281 DEFUN ("unencodable-char-position", Funencodable_char_position
,
7282 Sunencodable_char_position
, 3, 5, 0,
7284 Return position of first un-encodable character in a region.
7285 START and END specfiy the region and CODING-SYSTEM specifies the
7286 encoding to check. Return nil if CODING-SYSTEM does encode the region.
7288 If optional 4th argument COUNT is non-nil, it specifies at most how
7289 many un-encodable characters to search. In this case, the value is a
7292 If optional 5th argument STRING is non-nil, it is a string to search
7293 for un-encodable characters. In that case, START and END are indexes
7295 (start
, end
, coding_system
, count
, string
)
7296 Lisp_Object start
, end
, coding_system
, count
, string
;
7299 struct coding_system coding
;
7300 Lisp_Object attrs
, charset_list
, translation_table
;
7301 Lisp_Object positions
;
7303 const unsigned char *p
, *stop
, *pend
;
7304 int ascii_compatible
;
7306 setup_coding_system (Fcheck_coding_system (coding_system
), &coding
);
7307 attrs
= CODING_ID_ATTRS (coding
.id
);
7308 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
7310 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
7311 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7312 translation_table
= get_translation_table (attrs
, 1);
7316 validate_region (&start
, &end
);
7317 from
= XINT (start
);
7319 if (NILP (current_buffer
->enable_multibyte_characters
)
7320 || (ascii_compatible
7321 && (to
- from
) == (CHAR_TO_BYTE (to
) - (CHAR_TO_BYTE (from
)))))
7323 p
= CHAR_POS_ADDR (from
);
7324 pend
= CHAR_POS_ADDR (to
);
7325 if (from
< GPT
&& to
>= GPT
)
7332 CHECK_STRING (string
);
7333 CHECK_NATNUM (start
);
7335 from
= XINT (start
);
7338 || to
> SCHARS (string
))
7339 args_out_of_range_3 (string
, start
, end
);
7340 if (! STRING_MULTIBYTE (string
))
7342 p
= SDATA (string
) + string_char_to_byte (string
, from
);
7343 stop
= pend
= SDATA (string
) + string_char_to_byte (string
, to
);
7344 if (ascii_compatible
&& (to
- from
) == (pend
- p
))
7352 CHECK_NATNUM (count
);
7361 if (ascii_compatible
)
7362 while (p
< stop
&& ASCII_BYTE_P (*p
))
7372 c
= STRING_CHAR_ADVANCE (p
);
7373 if (! (ASCII_CHAR_P (c
) && ascii_compatible
)
7374 && ! char_charset (translate_char (translation_table
, c
),
7375 charset_list
, NULL
))
7377 positions
= Fcons (make_number (from
), positions
);
7386 return (NILP (count
) ? Fcar (positions
) : Fnreverse (positions
));
7390 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
7391 Scheck_coding_systems_region
, 3, 3, 0,
7392 doc
: /* Check if the region is encodable by coding systems.
7394 START and END are buffer positions specifying the region.
7395 CODING-SYSTEM-LIST is a list of coding systems to check.
7397 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7398 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7399 whole region, POS0, POS1, ... are buffer positions where non-encodable
7400 characters are found.
7402 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7405 START may be a string. In that case, check if the string is
7406 encodable, and the value contains indices to the string instead of
7407 buffer positions. END is ignored. */)
7408 (start
, end
, coding_system_list
)
7409 Lisp_Object start
, end
, coding_system_list
;
7412 EMACS_INT start_byte
, end_byte
;
7414 const unsigned char *p
, *pbeg
, *pend
;
7416 Lisp_Object tail
, elt
, attrs
;
7418 if (STRINGP (start
))
7420 if (!STRING_MULTIBYTE (start
)
7421 && SCHARS (start
) != SBYTES (start
))
7424 end_byte
= SBYTES (start
);
7429 CHECK_NUMBER_COERCE_MARKER (start
);
7430 CHECK_NUMBER_COERCE_MARKER (end
);
7431 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7432 args_out_of_range (start
, end
);
7433 if (NILP (current_buffer
->enable_multibyte_characters
))
7435 start_byte
= CHAR_TO_BYTE (XINT (start
));
7436 end_byte
= CHAR_TO_BYTE (XINT (end
));
7437 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7440 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7442 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7443 move_gap_both (XINT (start
), start_byte
);
7445 move_gap_both (XINT (end
), end_byte
);
7451 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7454 attrs
= AREF (CODING_SYSTEM_SPEC (elt
), 0);
7455 ASET (attrs
, coding_attr_trans_tbl
, get_translation_table (attrs
, 1));
7456 list
= Fcons (Fcons (elt
, Fcons (attrs
, Qnil
)), list
);
7459 if (STRINGP (start
))
7460 p
= pbeg
= SDATA (start
);
7462 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7463 pend
= p
+ (end_byte
- start_byte
);
7465 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
7466 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7470 if (ASCII_BYTE_P (*p
))
7474 c
= STRING_CHAR_ADVANCE (p
);
7476 charset_map_loaded
= 0;
7477 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
7479 elt
= XCDR (XCAR (tail
));
7480 if (! char_encodable_p (c
, XCAR (elt
)))
7481 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
7483 if (charset_map_loaded
)
7485 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7487 if (STRINGP (start
))
7488 pbeg
= SDATA (start
);
7490 pbeg
= BYTE_POS_ADDR (start_byte
);
7491 p
= pbeg
+ p_offset
;
7492 pend
= pbeg
+ pend_offset
;
7500 for (; CONSP (tail
); tail
= XCDR (tail
))
7503 if (CONSP (XCDR (XCDR (elt
))))
7504 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
7513 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
7514 Lisp_Object start
, end
, coding_system
, dst_object
;
7515 int encodep
, norecord
;
7517 struct coding_system coding
;
7518 EMACS_INT from
, from_byte
, to
, to_byte
;
7519 Lisp_Object src_object
;
7521 CHECK_NUMBER_COERCE_MARKER (start
);
7522 CHECK_NUMBER_COERCE_MARKER (end
);
7523 if (NILP (coding_system
))
7524 coding_system
= Qno_conversion
;
7526 CHECK_CODING_SYSTEM (coding_system
);
7527 src_object
= Fcurrent_buffer ();
7528 if (NILP (dst_object
))
7529 dst_object
= src_object
;
7530 else if (! EQ (dst_object
, Qt
))
7531 CHECK_BUFFER (dst_object
);
7533 validate_region (&start
, &end
);
7534 from
= XFASTINT (start
);
7535 from_byte
= CHAR_TO_BYTE (from
);
7536 to
= XFASTINT (end
);
7537 to_byte
= CHAR_TO_BYTE (to
);
7539 setup_coding_system (coding_system
, &coding
);
7540 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7543 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7546 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7549 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7551 return (BUFFERP (dst_object
)
7552 ? make_number (coding
.produced_char
)
7553 : coding
.dst_object
);
7557 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
7558 3, 4, "r\nzCoding system: ",
7559 doc
: /* Decode the current region from the specified coding system.
7560 When called from a program, takes four arguments:
7561 START, END, CODING-SYSTEM, and DESTINATION.
7562 START and END are buffer positions.
7564 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7565 If nil, the region between START and END is replace by the decoded text.
7566 If buffer, the decoded text is inserted in the buffer.
7567 If t, the decoded text is returned.
7569 This function sets `last-coding-system-used' to the precise coding system
7570 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7571 not fully specified.)
7572 It returns the length of the decoded text. */)
7573 (start
, end
, coding_system
, destination
)
7574 Lisp_Object start
, end
, coding_system
, destination
;
7576 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
7579 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
7580 3, 4, "r\nzCoding system: ",
7581 doc
: /* Encode the current region by specified coding system.
7582 When called from a program, takes three arguments:
7583 START, END, and CODING-SYSTEM. START and END are buffer positions.
7585 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7586 If nil, the region between START and END is replace by the encoded text.
7587 If buffer, the encoded text is inserted in the buffer.
7588 If t, the encoded text is returned.
7590 This function sets `last-coding-system-used' to the precise coding system
7591 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7592 not fully specified.)
7593 It returns the length of the encoded text. */)
7594 (start
, end
, coding_system
, destination
)
7595 Lisp_Object start
, end
, coding_system
, destination
;
7597 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
7601 code_convert_string (string
, coding_system
, dst_object
,
7602 encodep
, nocopy
, norecord
)
7603 Lisp_Object string
, coding_system
, dst_object
;
7604 int encodep
, nocopy
, norecord
;
7606 struct coding_system coding
;
7607 EMACS_INT chars
, bytes
;
7609 CHECK_STRING (string
);
7610 if (NILP (coding_system
))
7613 Vlast_coding_system_used
= Qno_conversion
;
7614 if (NILP (dst_object
))
7615 return (nocopy
? Fcopy_sequence (string
) : string
);
7618 if (NILP (coding_system
))
7619 coding_system
= Qno_conversion
;
7621 CHECK_CODING_SYSTEM (coding_system
);
7622 if (NILP (dst_object
))
7624 else if (! EQ (dst_object
, Qt
))
7625 CHECK_BUFFER (dst_object
);
7627 setup_coding_system (coding_system
, &coding
);
7628 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7629 chars
= SCHARS (string
);
7630 bytes
= SBYTES (string
);
7632 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7634 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7636 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7638 return (BUFFERP (dst_object
)
7639 ? make_number (coding
.produced_char
)
7640 : coding
.dst_object
);
7644 /* Encode or decode STRING according to CODING_SYSTEM.
7645 Do not set Vlast_coding_system_used.
7647 This function is called only from macros DECODE_FILE and
7648 ENCODE_FILE, thus we ignore character composition. */
7651 code_convert_string_norecord (string
, coding_system
, encodep
)
7652 Lisp_Object string
, coding_system
;
7655 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
7659 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
7661 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7663 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7664 if the decoding operation is trivial.
7666 Optional fourth arg BUFFER non-nil meant that the decoded text is
7667 inserted in BUFFER instead of returned as a string. In this case,
7668 the return value is BUFFER.
7670 This function sets `last-coding-system-used' to the precise coding system
7671 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7672 not fully specified. */)
7673 (string
, coding_system
, nocopy
, buffer
)
7674 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7676 return code_convert_string (string
, coding_system
, buffer
,
7677 0, ! NILP (nocopy
), 0);
7680 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
7682 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
7684 Optional third arg NOCOPY non-nil means it is OK to return STRING
7685 itself if the encoding operation is trivial.
7687 Optional fourth arg BUFFER non-nil meant that the encoded text is
7688 inserted in BUFFER instead of returned as a string. In this case,
7689 the return value is BUFFER.
7691 This function sets `last-coding-system-used' to the precise coding system
7692 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7693 not fully specified.) */)
7694 (string
, coding_system
, nocopy
, buffer
)
7695 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7697 return code_convert_string (string
, coding_system
, buffer
,
7698 1, ! NILP (nocopy
), 1);
7702 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
7703 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
7704 Return the corresponding character. */)
7708 Lisp_Object spec
, attrs
, val
;
7709 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
7712 CHECK_NATNUM (code
);
7713 c
= XFASTINT (code
);
7714 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7715 attrs
= AREF (spec
, 0);
7717 if (ASCII_BYTE_P (c
)
7718 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7721 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7722 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7723 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7724 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7727 charset
= charset_roman
;
7728 else if (c
>= 0xA0 && c
< 0xDF)
7730 charset
= charset_kana
;
7735 int s1
= c
>> 8, s2
= c
& 0xFF;
7737 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
7738 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
7739 error ("Invalid code: %d", code
);
7741 charset
= charset_kanji
;
7743 c
= DECODE_CHAR (charset
, c
);
7745 error ("Invalid code: %d", code
);
7746 return make_number (c
);
7750 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
7751 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
7752 Return the corresponding code in SJIS. */)
7756 Lisp_Object spec
, attrs
, charset_list
;
7758 struct charset
*charset
;
7761 CHECK_CHARACTER (ch
);
7763 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7764 attrs
= AREF (spec
, 0);
7766 if (ASCII_CHAR_P (c
)
7767 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7770 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7771 charset
= char_charset (c
, charset_list
, &code
);
7772 if (code
== CHARSET_INVALID_CODE (charset
))
7773 error ("Can't encode by shift_jis encoding: %d", c
);
7776 return make_number (code
);
7779 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
7780 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
7781 Return the corresponding character. */)
7785 Lisp_Object spec
, attrs
, val
;
7786 struct charset
*charset_roman
, *charset_big5
, *charset
;
7789 CHECK_NATNUM (code
);
7790 c
= XFASTINT (code
);
7791 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7792 attrs
= AREF (spec
, 0);
7794 if (ASCII_BYTE_P (c
)
7795 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7798 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7799 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7800 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7803 charset
= charset_roman
;
7806 int b1
= c
>> 8, b2
= c
& 0x7F;
7807 if (b1
< 0xA1 || b1
> 0xFE
7808 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
7809 error ("Invalid code: %d", code
);
7810 charset
= charset_big5
;
7812 c
= DECODE_CHAR (charset
, (unsigned )c
);
7814 error ("Invalid code: %d", code
);
7815 return make_number (c
);
7818 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
7819 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
7820 Return the corresponding character code in Big5. */)
7824 Lisp_Object spec
, attrs
, charset_list
;
7825 struct charset
*charset
;
7829 CHECK_CHARACTER (ch
);
7831 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7832 attrs
= AREF (spec
, 0);
7833 if (ASCII_CHAR_P (c
)
7834 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7837 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7838 charset
= char_charset (c
, charset_list
, &code
);
7839 if (code
== CHARSET_INVALID_CODE (charset
))
7840 error ("Can't encode by Big5 encoding: %d", c
);
7842 return make_number (code
);
7846 DEFUN ("set-terminal-coding-system-internal",
7847 Fset_terminal_coding_system_internal
,
7848 Sset_terminal_coding_system_internal
, 1, 1, 0,
7849 doc
: /* Internal use only. */)
7851 Lisp_Object coding_system
;
7853 CHECK_SYMBOL (coding_system
);
7854 setup_coding_system (Fcheck_coding_system (coding_system
),
7857 /* We had better not send unsafe characters to terminal. */
7858 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
7859 /* Characer composition should be disabled. */
7860 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7861 terminal_coding
.src_multibyte
= 1;
7862 terminal_coding
.dst_multibyte
= 0;
7866 DEFUN ("set-safe-terminal-coding-system-internal",
7867 Fset_safe_terminal_coding_system_internal
,
7868 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
7869 doc
: /* Internal use only. */)
7871 Lisp_Object coding_system
;
7873 CHECK_SYMBOL (coding_system
);
7874 setup_coding_system (Fcheck_coding_system (coding_system
),
7875 &safe_terminal_coding
);
7876 /* Characer composition should be disabled. */
7877 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7878 safe_terminal_coding
.src_multibyte
= 1;
7879 safe_terminal_coding
.dst_multibyte
= 0;
7883 DEFUN ("terminal-coding-system",
7884 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
7885 doc
: /* Return coding system specified for terminal output. */)
7888 return CODING_ID_NAME (terminal_coding
.id
);
7891 DEFUN ("set-keyboard-coding-system-internal",
7892 Fset_keyboard_coding_system_internal
,
7893 Sset_keyboard_coding_system_internal
, 1, 1, 0,
7894 doc
: /* Internal use only. */)
7896 Lisp_Object coding_system
;
7898 CHECK_SYMBOL (coding_system
);
7899 setup_coding_system (Fcheck_coding_system (coding_system
),
7901 /* Characer composition should be disabled. */
7902 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7906 DEFUN ("keyboard-coding-system",
7907 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
7908 doc
: /* Return coding system specified for decoding keyboard input. */)
7911 return CODING_ID_NAME (keyboard_coding
.id
);
7915 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
7916 Sfind_operation_coding_system
, 1, MANY
, 0,
7917 doc
: /* Choose a coding system for an operation based on the target name.
7918 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7919 DECODING-SYSTEM is the coding system to use for decoding
7920 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7921 for encoding (in case OPERATION does encoding).
7923 The first argument OPERATION specifies an I/O primitive:
7924 For file I/O, `insert-file-contents' or `write-region'.
7925 For process I/O, `call-process', `call-process-region', or `start-process'.
7926 For network I/O, `open-network-stream'.
7928 The remaining arguments should be the same arguments that were passed
7929 to the primitive. Depending on which primitive, one of those arguments
7930 is selected as the TARGET. For example, if OPERATION does file I/O,
7931 whichever argument specifies the file name is TARGET.
7933 TARGET has a meaning which depends on OPERATION:
7934 For file I/O, TARGET is a file name.
7935 For process I/O, TARGET is a process name.
7936 For network I/O, TARGET is a service name or a port number
7938 This function looks up what specified for TARGET in,
7939 `file-coding-system-alist', `process-coding-system-alist',
7940 or `network-coding-system-alist' depending on OPERATION.
7941 They may specify a coding system, a cons of coding systems,
7942 or a function symbol to call.
7943 In the last case, we call the function with one argument,
7944 which is a list of all the arguments given to this function.
7946 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7951 Lisp_Object operation
, target_idx
, target
, val
;
7952 register Lisp_Object chain
;
7955 error ("Too few arguments");
7956 operation
= args
[0];
7957 if (!SYMBOLP (operation
)
7958 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
7959 error ("Invalid first arguement");
7960 if (nargs
< 1 + XINT (target_idx
))
7961 error ("Too few arguments for operation: %s",
7962 SDATA (SYMBOL_NAME (operation
)));
7963 target
= args
[XINT (target_idx
) + 1];
7964 if (!(STRINGP (target
)
7965 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
7966 error ("Invalid %dth argument", XINT (target_idx
) + 1);
7968 chain
= ((EQ (operation
, Qinsert_file_contents
)
7969 || EQ (operation
, Qwrite_region
))
7970 ? Vfile_coding_system_alist
7971 : (EQ (operation
, Qopen_network_stream
)
7972 ? Vnetwork_coding_system_alist
7973 : Vprocess_coding_system_alist
));
7977 for (; CONSP (chain
); chain
= XCDR (chain
))
7983 && ((STRINGP (target
)
7984 && STRINGP (XCAR (elt
))
7985 && fast_string_match (XCAR (elt
), target
) >= 0)
7986 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
7989 /* Here, if VAL is both a valid coding system and a valid
7990 function symbol, we return VAL as a coding system. */
7993 if (! SYMBOLP (val
))
7995 if (! NILP (Fcoding_system_p (val
)))
7996 return Fcons (val
, val
);
7997 if (! NILP (Ffboundp (val
)))
7999 val
= call1 (val
, Flist (nargs
, args
));
8002 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
8003 return Fcons (val
, val
);
8011 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
8012 Sset_coding_system_priority
, 0, MANY
, 0,
8013 doc
: /* Assign higher priority to the coding systems given as arguments.
8014 If multiple coding systems belongs to the same category,
8015 all but the first one are ignored.
8017 usage: (set-coding-system-priority ...) */)
8023 int changed
[coding_category_max
];
8024 enum coding_category priorities
[coding_category_max
];
8026 bzero (changed
, sizeof changed
);
8028 for (i
= j
= 0; i
< nargs
; i
++)
8030 enum coding_category category
;
8031 Lisp_Object spec
, attrs
;
8033 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
8034 attrs
= AREF (spec
, 0);
8035 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
8036 if (changed
[category
])
8037 /* Ignore this coding system because a coding system of the
8038 same category already had a higher priority. */
8040 changed
[category
] = 1;
8041 priorities
[j
++] = category
;
8042 if (coding_categories
[category
].id
>= 0
8043 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
8044 setup_coding_system (args
[i
], &coding_categories
[category
]);
8045 Fset (AREF (Vcoding_category_table
, category
), args
[i
]);
8048 /* Now we have decided top J priorities. Reflect the order of the
8049 original priorities to the remaining priorities. */
8051 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
8053 while (j
< coding_category_max
8054 && changed
[coding_priorities
[j
]])
8056 if (j
== coding_category_max
)
8058 priorities
[i
] = coding_priorities
[j
];
8061 bcopy (priorities
, coding_priorities
, sizeof priorities
);
8063 /* Update `coding-category-list'. */
8064 Vcoding_category_list
= Qnil
;
8065 for (i
= coding_category_max
- 1; i
>= 0; i
--)
8066 Vcoding_category_list
8067 = Fcons (AREF (Vcoding_category_table
, priorities
[i
]),
8068 Vcoding_category_list
);
8073 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
8074 Scoding_system_priority_list
, 0, 1, 0,
8075 doc
: /* Return a list of coding systems ordered by their priorities.
8076 HIGHESTP non-nil means just return the highest priority one. */)
8078 Lisp_Object highestp
;
8083 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
8085 enum coding_category category
= coding_priorities
[i
];
8086 int id
= coding_categories
[category
].id
;
8091 attrs
= CODING_ID_ATTRS (id
);
8092 if (! NILP (highestp
))
8093 return CODING_ATTR_BASE_NAME (attrs
);
8094 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
8096 return Fnreverse (val
);
8099 static char *suffixes
[] = { "-unix", "-dos", "-mac" };
8102 make_subsidiaries (base
)
8105 Lisp_Object subsidiaries
;
8106 int base_name_len
= SBYTES (SYMBOL_NAME (base
));
8107 char *buf
= (char *) alloca (base_name_len
+ 6);
8110 bcopy (SDATA (SYMBOL_NAME (base
)), buf
, base_name_len
);
8111 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
8112 for (i
= 0; i
< 3; i
++)
8114 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
8115 ASET (subsidiaries
, i
, intern (buf
));
8117 return subsidiaries
;
8121 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
8122 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
8123 doc
: /* For internal use only.
8124 usage: (define-coding-system-internal ...) */)
8130 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
8131 Lisp_Object attrs
; /* Vector of attributes. */
8132 Lisp_Object eol_type
;
8133 Lisp_Object aliases
;
8134 Lisp_Object coding_type
, charset_list
, safe_charsets
;
8135 enum coding_category category
;
8136 Lisp_Object tail
, val
;
8137 int max_charset_id
= 0;
8140 if (nargs
< coding_arg_max
)
8143 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
8145 name
= args
[coding_arg_name
];
8146 CHECK_SYMBOL (name
);
8147 CODING_ATTR_BASE_NAME (attrs
) = name
;
8149 val
= args
[coding_arg_mnemonic
];
8150 if (! STRINGP (val
))
8151 CHECK_CHARACTER (val
);
8152 CODING_ATTR_MNEMONIC (attrs
) = val
;
8154 coding_type
= args
[coding_arg_coding_type
];
8155 CHECK_SYMBOL (coding_type
);
8156 CODING_ATTR_TYPE (attrs
) = coding_type
;
8158 charset_list
= args
[coding_arg_charset_list
];
8159 if (SYMBOLP (charset_list
))
8161 if (EQ (charset_list
, Qiso_2022
))
8163 if (! EQ (coding_type
, Qiso_2022
))
8164 error ("Invalid charset-list");
8165 charset_list
= Viso_2022_charset_list
;
8167 else if (EQ (charset_list
, Qemacs_mule
))
8169 if (! EQ (coding_type
, Qemacs_mule
))
8170 error ("Invalid charset-list");
8171 charset_list
= Vemacs_mule_charset_list
;
8173 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8174 if (max_charset_id
< XFASTINT (XCAR (tail
)))
8175 max_charset_id
= XFASTINT (XCAR (tail
));
8179 charset_list
= Fcopy_sequence (charset_list
);
8180 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
8182 struct charset
*charset
;
8185 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8186 if (EQ (coding_type
, Qiso_2022
)
8187 ? CHARSET_ISO_FINAL (charset
) < 0
8188 : EQ (coding_type
, Qemacs_mule
)
8189 ? CHARSET_EMACS_MULE_ID (charset
) < 0
8191 error ("Can't handle charset `%s'",
8192 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8194 XSETCAR (tail
, make_number (charset
->id
));
8195 if (max_charset_id
< charset
->id
)
8196 max_charset_id
= charset
->id
;
8199 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
8201 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
8203 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8204 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
8205 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
8207 CODING_ATTR_ASCII_COMPAT (attrs
) = args
[coding_arg_ascii_compatible_p
];
8209 val
= args
[coding_arg_decode_translation_table
];
8210 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8212 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8214 val
= args
[coding_arg_encode_translation_table
];
8215 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8217 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8219 val
= args
[coding_arg_post_read_conversion
];
8221 CODING_ATTR_POST_READ (attrs
) = val
;
8223 val
= args
[coding_arg_pre_write_conversion
];
8225 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8227 val
= args
[coding_arg_default_char
];
8229 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
8232 CHECK_CHARACTER (val
);
8233 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8236 val
= args
[coding_arg_for_unibyte
];
8237 CODING_ATTR_FOR_UNIBYTE (attrs
) = NILP (val
) ? Qnil
: Qt
;
8239 val
= args
[coding_arg_plist
];
8241 CODING_ATTR_PLIST (attrs
) = val
;
8243 if (EQ (coding_type
, Qcharset
))
8245 /* Generate a lisp vector of 256 elements. Each element is nil,
8246 integer, or a list of charset IDs.
8248 If Nth element is nil, the byte code N is invalid in this
8251 If Nth element is a number NUM, N is the first byte of a
8252 charset whose ID is NUM.
8254 If Nth element is a list of charset IDs, N is the first byte
8255 of one of them. The list is sorted by dimensions of the
8256 charsets. A charset of smaller dimension comes firtst. */
8257 val
= Fmake_vector (make_number (256), Qnil
);
8259 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8261 struct charset
*charset
= CHARSET_FROM_ID (XFASTINT (XCAR (tail
)));
8262 int dim
= CHARSET_DIMENSION (charset
);
8263 int idx
= (dim
- 1) * 4;
8265 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8266 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8268 for (i
= charset
->code_space
[idx
];
8269 i
<= charset
->code_space
[idx
+ 1]; i
++)
8271 Lisp_Object tmp
, tmp2
;
8274 tmp
= AREF (val
, i
);
8277 else if (NUMBERP (tmp
))
8279 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp
)));
8281 tmp
= Fcons (XCAR (tail
), Fcons (tmp
, Qnil
));
8283 tmp
= Fcons (tmp
, Fcons (XCAR (tail
), Qnil
));
8287 for (tmp2
= tmp
; CONSP (tmp2
); tmp2
= XCDR (tmp2
))
8289 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2
))));
8294 tmp
= nconc2 (tmp
, Fcons (XCAR (tail
), Qnil
));
8297 XSETCDR (tmp2
, Fcons (XCAR (tmp2
), XCDR (tmp2
)));
8298 XSETCAR (tmp2
, XCAR (tail
));
8304 ASET (attrs
, coding_attr_charset_valids
, val
);
8305 category
= coding_category_charset
;
8307 else if (EQ (coding_type
, Qccl
))
8311 if (nargs
< coding_arg_ccl_max
)
8314 val
= args
[coding_arg_ccl_decoder
];
8315 CHECK_CCL_PROGRAM (val
);
8317 val
= Fcopy_sequence (val
);
8318 ASET (attrs
, coding_attr_ccl_decoder
, val
);
8320 val
= args
[coding_arg_ccl_encoder
];
8321 CHECK_CCL_PROGRAM (val
);
8323 val
= Fcopy_sequence (val
);
8324 ASET (attrs
, coding_attr_ccl_encoder
, val
);
8326 val
= args
[coding_arg_ccl_valids
];
8327 valids
= Fmake_string (make_number (256), make_number (0));
8328 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
8335 from
= to
= XINT (val
);
8336 if (from
< 0 || from
> 255)
8337 args_out_of_range_3 (val
, make_number (0), make_number (255));
8342 CHECK_NATNUM_CAR (val
);
8343 CHECK_NATNUM_CDR (val
);
8344 from
= XINT (XCAR (val
));
8346 args_out_of_range_3 (XCAR (val
),
8347 make_number (0), make_number (255));
8348 to
= XINT (XCDR (val
));
8349 if (to
< from
|| to
> 255)
8350 args_out_of_range_3 (XCDR (val
),
8351 XCAR (val
), make_number (255));
8353 for (i
= from
; i
<= to
; i
++)
8354 SSET (valids
, i
, 1);
8356 ASET (attrs
, coding_attr_ccl_valids
, valids
);
8358 category
= coding_category_ccl
;
8360 else if (EQ (coding_type
, Qutf_16
))
8362 Lisp_Object bom
, endian
;
8364 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8366 if (nargs
< coding_arg_utf16_max
)
8369 bom
= args
[coding_arg_utf16_bom
];
8370 if (! NILP (bom
) && ! EQ (bom
, Qt
))
8374 CHECK_CODING_SYSTEM (val
);
8376 CHECK_CODING_SYSTEM (val
);
8378 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
8380 endian
= args
[coding_arg_utf16_endian
];
8381 CHECK_SYMBOL (endian
);
8384 else if (! EQ (endian
, Qbig
) && ! EQ (endian
, Qlittle
))
8385 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian
)));
8386 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
8388 category
= (CONSP (bom
)
8389 ? coding_category_utf_16_auto
8391 ? (EQ (endian
, Qbig
)
8392 ? coding_category_utf_16_be_nosig
8393 : coding_category_utf_16_le_nosig
)
8394 : (EQ (endian
, Qbig
)
8395 ? coding_category_utf_16_be
8396 : coding_category_utf_16_le
));
8398 else if (EQ (coding_type
, Qiso_2022
))
8400 Lisp_Object initial
, reg_usage
, request
, flags
;
8403 if (nargs
< coding_arg_iso2022_max
)
8406 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
8407 CHECK_VECTOR (initial
);
8408 for (i
= 0; i
< 4; i
++)
8410 val
= Faref (initial
, make_number (i
));
8413 struct charset
*charset
;
8415 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8416 ASET (initial
, i
, make_number (CHARSET_ID (charset
)));
8417 if (i
== 0 && CHARSET_ASCII_COMPATIBLE_P (charset
))
8418 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8421 ASET (initial
, i
, make_number (-1));
8424 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
8425 CHECK_CONS (reg_usage
);
8426 CHECK_NUMBER_CAR (reg_usage
);
8427 CHECK_NUMBER_CDR (reg_usage
);
8429 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
8430 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
8438 CHECK_CHARSET_GET_ID (tmp
, id
);
8439 CHECK_NATNUM_CDR (val
);
8440 if (XINT (XCDR (val
)) >= 4)
8441 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
8442 XSETCAR (val
, make_number (id
));
8445 flags
= args
[coding_arg_iso2022_flags
];
8446 CHECK_NATNUM (flags
);
8448 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
8449 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
8451 ASET (attrs
, coding_attr_iso_initial
, initial
);
8452 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
8453 ASET (attrs
, coding_attr_iso_request
, request
);
8454 ASET (attrs
, coding_attr_iso_flags
, flags
);
8455 setup_iso_safe_charsets (attrs
);
8457 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
8458 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
8459 | CODING_ISO_FLAG_SINGLE_SHIFT
))
8460 ? coding_category_iso_7_else
8461 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8462 ? coding_category_iso_7
8463 : coding_category_iso_7_tight
);
8466 int id
= XINT (AREF (initial
, 1));
8468 category
= (((i
& CODING_ISO_FLAG_LOCKING_SHIFT
)
8469 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8471 ? coding_category_iso_8_else
8472 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
8473 ? coding_category_iso_8_1
8474 : coding_category_iso_8_2
);
8476 if (category
!= coding_category_iso_8_1
8477 && category
!= coding_category_iso_8_2
)
8478 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8480 else if (EQ (coding_type
, Qemacs_mule
))
8482 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
8483 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
8484 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8485 category
= coding_category_emacs_mule
;
8487 else if (EQ (coding_type
, Qshift_jis
))
8490 struct charset
*charset
;
8492 if (XINT (Flength (charset_list
)) != 3
8493 && XINT (Flength (charset_list
)) != 4)
8494 error ("There should be three or four charsets");
8496 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8497 if (CHARSET_DIMENSION (charset
) != 1)
8498 error ("Dimension of charset %s is not one",
8499 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8500 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8501 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8503 charset_list
= XCDR (charset_list
);
8504 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8505 if (CHARSET_DIMENSION (charset
) != 1)
8506 error ("Dimension of charset %s is not one",
8507 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8509 charset_list
= XCDR (charset_list
);
8510 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8511 if (CHARSET_DIMENSION (charset
) != 2)
8512 error ("Dimension of charset %s is not two",
8513 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8515 charset_list
= XCDR (charset_list
);
8516 if (! NILP (charset_list
))
8518 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8519 if (CHARSET_DIMENSION (charset
) != 2)
8520 error ("Dimension of charset %s is not two",
8521 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8524 category
= coding_category_sjis
;
8525 Vsjis_coding_system
= name
;
8527 else if (EQ (coding_type
, Qbig5
))
8529 struct charset
*charset
;
8531 if (XINT (Flength (charset_list
)) != 2)
8532 error ("There should be just two charsets");
8534 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8535 if (CHARSET_DIMENSION (charset
) != 1)
8536 error ("Dimension of charset %s is not one",
8537 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8538 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8539 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8541 charset_list
= XCDR (charset_list
);
8542 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8543 if (CHARSET_DIMENSION (charset
) != 2)
8544 error ("Dimension of charset %s is not two",
8545 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8547 category
= coding_category_big5
;
8548 Vbig5_coding_system
= name
;
8550 else if (EQ (coding_type
, Qraw_text
))
8552 category
= coding_category_raw_text
;
8553 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8555 else if (EQ (coding_type
, Qutf_8
))
8557 category
= coding_category_utf_8
;
8558 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8560 else if (EQ (coding_type
, Qundecided
))
8561 category
= coding_category_undecided
;
8563 error ("Invalid coding system type: %s",
8564 SDATA (SYMBOL_NAME (coding_type
)));
8566 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
8567 CODING_ATTR_PLIST (attrs
)
8568 = Fcons (QCcategory
, Fcons (AREF (Vcoding_category_table
, category
),
8569 CODING_ATTR_PLIST (attrs
)));
8571 eol_type
= args
[coding_arg_eol_type
];
8572 if (! NILP (eol_type
)
8573 && ! EQ (eol_type
, Qunix
)
8574 && ! EQ (eol_type
, Qdos
)
8575 && ! EQ (eol_type
, Qmac
))
8576 error ("Invalid eol-type");
8578 aliases
= Fcons (name
, Qnil
);
8580 if (NILP (eol_type
))
8582 eol_type
= make_subsidiaries (name
);
8583 for (i
= 0; i
< 3; i
++)
8585 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
8587 this_name
= AREF (eol_type
, i
);
8588 this_aliases
= Fcons (this_name
, Qnil
);
8589 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
8590 this_spec
= Fmake_vector (make_number (3), attrs
);
8591 ASET (this_spec
, 1, this_aliases
);
8592 ASET (this_spec
, 2, this_eol_type
);
8593 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
8594 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
8595 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
8596 Vcoding_system_alist
);
8600 spec_vec
= Fmake_vector (make_number (3), attrs
);
8601 ASET (spec_vec
, 1, aliases
);
8602 ASET (spec_vec
, 2, eol_type
);
8604 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
8605 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
8606 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
8607 Vcoding_system_alist
);
8610 int id
= coding_categories
[category
].id
;
8612 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
8613 setup_coding_system (name
, &coding_categories
[category
]);
8619 return Fsignal (Qwrong_number_of_arguments
,
8620 Fcons (intern ("define-coding-system-internal"),
8621 make_number (nargs
)));
8625 DEFUN ("coding-system-put", Fcoding_system_put
, Scoding_system_put
,
8627 doc
: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
8628 (coding_system
, prop
, val
)
8629 Lisp_Object coding_system
, prop
, val
;
8631 Lisp_Object spec
, attrs
, plist
;
8633 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8634 attrs
= AREF (spec
, 0);
8635 if (EQ (prop
, QCmnemonic
))
8637 if (! STRINGP (val
))
8638 CHECK_CHARACTER (val
);
8639 CODING_ATTR_MNEMONIC (attrs
) = val
;
8641 else if (EQ (prop
, QCdefalut_char
))
8644 val
= make_number (' ');
8646 CHECK_CHARACTER (val
);
8647 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8649 else if (EQ (prop
, QCdecode_translation_table
))
8651 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8653 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8655 else if (EQ (prop
, QCencode_translation_table
))
8657 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8659 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8661 else if (EQ (prop
, QCpost_read_conversion
))
8664 CODING_ATTR_POST_READ (attrs
) = val
;
8666 else if (EQ (prop
, QCpre_write_conversion
))
8669 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8672 CODING_ATTR_PLIST (attrs
)
8673 = Fplist_put (CODING_ATTR_PLIST (attrs
), prop
, val
);
8678 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
8679 Sdefine_coding_system_alias
, 2, 2, 0,
8680 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8681 (alias
, coding_system
)
8682 Lisp_Object alias
, coding_system
;
8684 Lisp_Object spec
, aliases
, eol_type
;
8686 CHECK_SYMBOL (alias
);
8687 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8688 aliases
= AREF (spec
, 1);
8689 /* ALISES should be a list of length more than zero, and the first
8690 element is a base coding system. Append ALIAS at the tail of the
8692 while (!NILP (XCDR (aliases
)))
8693 aliases
= XCDR (aliases
);
8694 XSETCDR (aliases
, Fcons (alias
, Qnil
));
8696 eol_type
= AREF (spec
, 2);
8697 if (VECTORP (eol_type
))
8699 Lisp_Object subsidiaries
;
8702 subsidiaries
= make_subsidiaries (alias
);
8703 for (i
= 0; i
< 3; i
++)
8704 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
8705 AREF (eol_type
, i
));
8708 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
8709 Vcoding_system_list
= Fcons (alias
, Vcoding_system_list
);
8710 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (alias
), Qnil
),
8711 Vcoding_system_alist
);
8716 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
8718 doc
: /* Return the base of CODING-SYSTEM.
8719 Any alias or subsidiary coding system is not a base coding system. */)
8721 Lisp_Object coding_system
;
8723 Lisp_Object spec
, attrs
;
8725 if (NILP (coding_system
))
8726 return (Qno_conversion
);
8727 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8728 attrs
= AREF (spec
, 0);
8729 return CODING_ATTR_BASE_NAME (attrs
);
8732 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
8734 doc
: "Return the property list of CODING-SYSTEM.")
8736 Lisp_Object coding_system
;
8738 Lisp_Object spec
, attrs
;
8740 if (NILP (coding_system
))
8741 coding_system
= Qno_conversion
;
8742 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8743 attrs
= AREF (spec
, 0);
8744 return CODING_ATTR_PLIST (attrs
);
8748 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
8750 doc
: /* Return the list of aliases of CODING-SYSTEM. */)
8752 Lisp_Object coding_system
;
8756 if (NILP (coding_system
))
8757 coding_system
= Qno_conversion
;
8758 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8759 return AREF (spec
, 1);
8762 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
8763 Scoding_system_eol_type
, 1, 1, 0,
8764 doc
: /* Return eol-type of CODING-SYSTEM.
8765 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8767 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8768 and CR respectively.
8770 A vector value indicates that a format of end-of-line should be
8771 detected automatically. Nth element of the vector is the subsidiary
8772 coding system whose eol-type is N. */)
8774 Lisp_Object coding_system
;
8776 Lisp_Object spec
, eol_type
;
8779 if (NILP (coding_system
))
8780 coding_system
= Qno_conversion
;
8781 if (! CODING_SYSTEM_P (coding_system
))
8783 spec
= CODING_SYSTEM_SPEC (coding_system
);
8784 eol_type
= AREF (spec
, 2);
8785 if (VECTORP (eol_type
))
8786 return Fcopy_sequence (eol_type
);
8787 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
8788 return make_number (n
);
8794 /*** 9. Post-amble ***/
8801 for (i
= 0; i
< coding_category_max
; i
++)
8803 coding_categories
[i
].id
= -1;
8804 coding_priorities
[i
] = i
;
8807 /* ISO2022 specific initialize routine. */
8808 for (i
= 0; i
< 0x20; i
++)
8809 iso_code_class
[i
] = ISO_control_0
;
8810 for (i
= 0x21; i
< 0x7F; i
++)
8811 iso_code_class
[i
] = ISO_graphic_plane_0
;
8812 for (i
= 0x80; i
< 0xA0; i
++)
8813 iso_code_class
[i
] = ISO_control_1
;
8814 for (i
= 0xA1; i
< 0xFF; i
++)
8815 iso_code_class
[i
] = ISO_graphic_plane_1
;
8816 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
8817 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
8818 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
8819 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
8820 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
8821 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
8822 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
8823 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
8824 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
8826 for (i
= 0; i
< 256; i
++)
8828 emacs_mule_bytes
[i
] = 1;
8830 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_11
] = 3;
8831 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_12
] = 3;
8832 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_21
] = 4;
8833 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_22
] = 4;
8841 staticpro (&Vcoding_system_hash_table
);
8843 Lisp_Object args
[2];
8846 Vcoding_system_hash_table
= Fmake_hash_table (2, args
);
8849 staticpro (&Vsjis_coding_system
);
8850 Vsjis_coding_system
= Qnil
;
8852 staticpro (&Vbig5_coding_system
);
8853 Vbig5_coding_system
= Qnil
;
8855 staticpro (&Vcode_conversion_reused_workbuf
);
8856 Vcode_conversion_reused_workbuf
= Qnil
;
8858 staticpro (&Vcode_conversion_workbuf_name
);
8859 Vcode_conversion_workbuf_name
= build_string (" *code-conversion-work*");
8861 reused_workbuf_in_use
= 0;
8863 DEFSYM (Qcharset
, "charset");
8864 DEFSYM (Qtarget_idx
, "target-idx");
8865 DEFSYM (Qcoding_system_history
, "coding-system-history");
8866 Fset (Qcoding_system_history
, Qnil
);
8868 /* Target FILENAME is the first argument. */
8869 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
8870 /* Target FILENAME is the third argument. */
8871 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
8873 DEFSYM (Qcall_process
, "call-process");
8874 /* Target PROGRAM is the first argument. */
8875 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
8877 DEFSYM (Qcall_process_region
, "call-process-region");
8878 /* Target PROGRAM is the third argument. */
8879 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
8881 DEFSYM (Qstart_process
, "start-process");
8882 /* Target PROGRAM is the third argument. */
8883 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
8885 DEFSYM (Qopen_network_stream
, "open-network-stream");
8886 /* Target SERVICE is the fourth argument. */
8887 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
8889 DEFSYM (Qcoding_system
, "coding-system");
8890 DEFSYM (Qcoding_aliases
, "coding-aliases");
8892 DEFSYM (Qeol_type
, "eol-type");
8893 DEFSYM (Qunix
, "unix");
8894 DEFSYM (Qdos
, "dos");
8896 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
8897 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
8898 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
8899 DEFSYM (Qdefault_char
, "default-char");
8900 DEFSYM (Qundecided
, "undecided");
8901 DEFSYM (Qno_conversion
, "no-conversion");
8902 DEFSYM (Qraw_text
, "raw-text");
8904 DEFSYM (Qiso_2022
, "iso-2022");
8906 DEFSYM (Qutf_8
, "utf-8");
8907 DEFSYM (Qutf_8_emacs
, "utf-8-emacs");
8909 DEFSYM (Qutf_16
, "utf-16");
8910 DEFSYM (Qbig
, "big");
8911 DEFSYM (Qlittle
, "little");
8913 DEFSYM (Qshift_jis
, "shift-jis");
8914 DEFSYM (Qbig5
, "big5");
8916 DEFSYM (Qcoding_system_p
, "coding-system-p");
8918 DEFSYM (Qcoding_system_error
, "coding-system-error");
8919 Fput (Qcoding_system_error
, Qerror_conditions
,
8920 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
8921 Fput (Qcoding_system_error
, Qerror_message
,
8922 build_string ("Invalid coding system"));
8924 /* Intern this now in case it isn't already done.
8925 Setting this variable twice is harmless.
8926 But don't staticpro it here--that is done in alloc.c. */
8927 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
8929 DEFSYM (Qtranslation_table
, "translation-table");
8930 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
8931 DEFSYM (Qtranslation_table_id
, "translation-table-id");
8932 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
8933 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
8935 DEFSYM (Qvalid_codes
, "valid-codes");
8937 DEFSYM (Qemacs_mule
, "emacs-mule");
8939 DEFSYM (QCcategory
, ":category");
8940 DEFSYM (QCmnemonic
, ":mnemonic");
8941 DEFSYM (QCdefalut_char
, ":default-char");
8942 DEFSYM (QCdecode_translation_table
, ":decode-translation-table");
8943 DEFSYM (QCencode_translation_table
, ":encode-translation-table");
8944 DEFSYM (QCpost_read_conversion
, ":post-read-conversion");
8945 DEFSYM (QCpre_write_conversion
, ":pre-write-conversion");
8947 Vcoding_category_table
8948 = Fmake_vector (make_number (coding_category_max
), Qnil
);
8949 staticpro (&Vcoding_category_table
);
8950 /* Followings are target of code detection. */
8951 ASET (Vcoding_category_table
, coding_category_iso_7
,
8952 intern ("coding-category-iso-7"));
8953 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
8954 intern ("coding-category-iso-7-tight"));
8955 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
8956 intern ("coding-category-iso-8-1"));
8957 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
8958 intern ("coding-category-iso-8-2"));
8959 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
8960 intern ("coding-category-iso-7-else"));
8961 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
8962 intern ("coding-category-iso-8-else"));
8963 ASET (Vcoding_category_table
, coding_category_utf_8
,
8964 intern ("coding-category-utf-8"));
8965 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
8966 intern ("coding-category-utf-16-be"));
8967 ASET (Vcoding_category_table
, coding_category_utf_16_auto
,
8968 intern ("coding-category-utf-16-auto"));
8969 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
8970 intern ("coding-category-utf-16-le"));
8971 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
8972 intern ("coding-category-utf-16-be-nosig"));
8973 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
8974 intern ("coding-category-utf-16-le-nosig"));
8975 ASET (Vcoding_category_table
, coding_category_charset
,
8976 intern ("coding-category-charset"));
8977 ASET (Vcoding_category_table
, coding_category_sjis
,
8978 intern ("coding-category-sjis"));
8979 ASET (Vcoding_category_table
, coding_category_big5
,
8980 intern ("coding-category-big5"));
8981 ASET (Vcoding_category_table
, coding_category_ccl
,
8982 intern ("coding-category-ccl"));
8983 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
8984 intern ("coding-category-emacs-mule"));
8985 /* Followings are NOT target of code detection. */
8986 ASET (Vcoding_category_table
, coding_category_raw_text
,
8987 intern ("coding-category-raw-text"));
8988 ASET (Vcoding_category_table
, coding_category_undecided
,
8989 intern ("coding-category-undecided"));
8991 DEFSYM (Qinsufficient_source
, "insufficient-source");
8992 DEFSYM (Qinconsistent_eol
, "inconsistent-eol");
8993 DEFSYM (Qinvalid_source
, "invalid-source");
8994 DEFSYM (Qinterrupted
, "interrupted");
8995 DEFSYM (Qinsufficient_memory
, "insufficient-memory");
8997 defsubr (&Scoding_system_p
);
8998 defsubr (&Sread_coding_system
);
8999 defsubr (&Sread_non_nil_coding_system
);
9000 defsubr (&Scheck_coding_system
);
9001 defsubr (&Sdetect_coding_region
);
9002 defsubr (&Sdetect_coding_string
);
9003 defsubr (&Sfind_coding_systems_region_internal
);
9004 defsubr (&Sunencodable_char_position
);
9005 defsubr (&Scheck_coding_systems_region
);
9006 defsubr (&Sdecode_coding_region
);
9007 defsubr (&Sencode_coding_region
);
9008 defsubr (&Sdecode_coding_string
);
9009 defsubr (&Sencode_coding_string
);
9010 defsubr (&Sdecode_sjis_char
);
9011 defsubr (&Sencode_sjis_char
);
9012 defsubr (&Sdecode_big5_char
);
9013 defsubr (&Sencode_big5_char
);
9014 defsubr (&Sset_terminal_coding_system_internal
);
9015 defsubr (&Sset_safe_terminal_coding_system_internal
);
9016 defsubr (&Sterminal_coding_system
);
9017 defsubr (&Sset_keyboard_coding_system_internal
);
9018 defsubr (&Skeyboard_coding_system
);
9019 defsubr (&Sfind_operation_coding_system
);
9020 defsubr (&Sset_coding_system_priority
);
9021 defsubr (&Sdefine_coding_system_internal
);
9022 defsubr (&Sdefine_coding_system_alias
);
9023 defsubr (&Scoding_system_put
);
9024 defsubr (&Scoding_system_base
);
9025 defsubr (&Scoding_system_plist
);
9026 defsubr (&Scoding_system_aliases
);
9027 defsubr (&Scoding_system_eol_type
);
9028 defsubr (&Scoding_system_priority_list
);
9030 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
9031 doc
: /* List of coding systems.
9033 Do not alter the value of this variable manually. This variable should be
9034 updated by the functions `define-coding-system' and
9035 `define-coding-system-alias'. */);
9036 Vcoding_system_list
= Qnil
;
9038 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
9039 doc
: /* Alist of coding system names.
9040 Each element is one element list of coding system name.
9041 This variable is given to `completing-read' as TABLE argument.
9043 Do not alter the value of this variable manually. This variable should be
9044 updated by the functions `make-coding-system' and
9045 `define-coding-system-alias'. */);
9046 Vcoding_system_alist
= Qnil
;
9048 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
9049 doc
: /* List of coding-categories (symbols) ordered by priority.
9051 On detecting a coding system, Emacs tries code detection algorithms
9052 associated with each coding-category one by one in this order. When
9053 one algorithm agrees with a byte sequence of source text, the coding
9054 system bound to the corresponding coding-category is selected. */);
9058 Vcoding_category_list
= Qnil
;
9059 for (i
= coding_category_max
- 1; i
>= 0; i
--)
9060 Vcoding_category_list
9061 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
9062 Vcoding_category_list
);
9065 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
9066 doc
: /* Specify the coding system for read operations.
9067 It is useful to bind this variable with `let', but do not set it globally.
9068 If the value is a coding system, it is used for decoding on read operation.
9069 If not, an appropriate element is used from one of the coding system alists:
9070 There are three such tables, `file-coding-system-alist',
9071 `process-coding-system-alist', and `network-coding-system-alist'. */);
9072 Vcoding_system_for_read
= Qnil
;
9074 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
9075 doc
: /* Specify the coding system for write operations.
9076 Programs bind this variable with `let', but you should not set it globally.
9077 If the value is a coding system, it is used for encoding of output,
9078 when writing it to a file and when sending it to a file or subprocess.
9080 If this does not specify a coding system, an appropriate element
9081 is used from one of the coding system alists:
9082 There are three such tables, `file-coding-system-alist',
9083 `process-coding-system-alist', and `network-coding-system-alist'.
9084 For output to files, if the above procedure does not specify a coding system,
9085 the value of `buffer-file-coding-system' is used. */);
9086 Vcoding_system_for_write
= Qnil
;
9088 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
9090 Coding system used in the latest file or process I/O. */);
9091 Vlast_coding_system_used
= Qnil
;
9093 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error
,
9095 Error status of the last code conversion.
9097 When an error was detected in the last code conversion, this variable
9098 is set to one of the following symbols.
9099 `insufficient-source'
9103 `insufficient-memory'
9104 When no error was detected, the value doesn't change. So, to check
9105 the error status of a code conversion by this variable, you must
9106 explicitly set this variable to nil before performing code
9108 Vlast_code_conversion_error
= Qnil
;
9110 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
9112 *Non-nil means always inhibit code conversion of end-of-line format.
9113 See info node `Coding Systems' and info node `Text and Binary' concerning
9114 such conversion. */);
9115 inhibit_eol_conversion
= 0;
9117 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
9119 Non-nil means process buffer inherits coding system of process output.
9120 Bind it to t if the process output is to be treated as if it were a file
9121 read from some filesystem. */);
9122 inherit_process_coding_system
= 0;
9124 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
9126 Alist to decide a coding system to use for a file I/O operation.
9127 The format is ((PATTERN . VAL) ...),
9128 where PATTERN is a regular expression matching a file name,
9129 VAL is a coding system, a cons of coding systems, or a function symbol.
9130 If VAL is a coding system, it is used for both decoding and encoding
9132 If VAL is a cons of coding systems, the car part is used for decoding,
9133 and the cdr part is used for encoding.
9134 If VAL is a function symbol, the function must return a coding system
9135 or a cons of coding systems which are used as above. The function gets
9136 the arguments with which `find-operation-coding-systems' was called.
9138 See also the function `find-operation-coding-system'
9139 and the variable `auto-coding-alist'. */);
9140 Vfile_coding_system_alist
= Qnil
;
9142 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
9144 Alist to decide a coding system to use for a process I/O operation.
9145 The format is ((PATTERN . VAL) ...),
9146 where PATTERN is a regular expression matching a program name,
9147 VAL is a coding system, a cons of coding systems, or a function symbol.
9148 If VAL is a coding system, it is used for both decoding what received
9149 from the program and encoding what sent to the program.
9150 If VAL is a cons of coding systems, the car part is used for decoding,
9151 and the cdr part is used for encoding.
9152 If VAL is a function symbol, the function must return a coding system
9153 or a cons of coding systems which are used as above.
9155 See also the function `find-operation-coding-system'. */);
9156 Vprocess_coding_system_alist
= Qnil
;
9158 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
9160 Alist to decide a coding system to use for a network I/O operation.
9161 The format is ((PATTERN . VAL) ...),
9162 where PATTERN is a regular expression matching a network service name
9163 or is a port number to connect to,
9164 VAL is a coding system, a cons of coding systems, or a function symbol.
9165 If VAL is a coding system, it is used for both decoding what received
9166 from the network stream and encoding what sent to the network stream.
9167 If VAL is a cons of coding systems, the car part is used for decoding,
9168 and the cdr part is used for encoding.
9169 If VAL is a function symbol, the function must return a coding system
9170 or a cons of coding systems which are used as above.
9172 See also the function `find-operation-coding-system'. */);
9173 Vnetwork_coding_system_alist
= Qnil
;
9175 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
9176 doc
: /* Coding system to use with system messages.
9177 Also used for decoding keyboard input on X Window system. */);
9178 Vlocale_coding_system
= Qnil
;
9180 /* The eol mnemonics are reset in startup.el system-dependently. */
9181 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
9183 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
9184 eol_mnemonic_unix
= build_string (":");
9186 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
9188 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
9189 eol_mnemonic_dos
= build_string ("\\");
9191 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
9193 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
9194 eol_mnemonic_mac
= build_string ("/");
9196 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
9198 *String displayed in mode line when end-of-line format is not yet determined. */);
9199 eol_mnemonic_undecided
= build_string (":");
9201 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
9203 *Non-nil enables character translation while encoding and decoding. */);
9204 Venable_character_translation
= Qt
;
9206 DEFVAR_LISP ("standard-translation-table-for-decode",
9207 &Vstandard_translation_table_for_decode
,
9208 doc
: /* Table for translating characters while decoding. */);
9209 Vstandard_translation_table_for_decode
= Qnil
;
9211 DEFVAR_LISP ("standard-translation-table-for-encode",
9212 &Vstandard_translation_table_for_encode
,
9213 doc
: /* Table for translating characters while encoding. */);
9214 Vstandard_translation_table_for_encode
= Qnil
;
9216 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
9217 doc
: /* Alist of charsets vs revision numbers.
9218 While encoding, if a charset (car part of an element) is found,
9219 designate it with the escape sequence identifying revision (cdr part
9220 of the element). */);
9221 Vcharset_revision_table
= Qnil
;
9223 DEFVAR_LISP ("default-process-coding-system",
9224 &Vdefault_process_coding_system
,
9225 doc
: /* Cons of coding systems used for process I/O by default.
9226 The car part is used for decoding a process output,
9227 the cdr part is used for encoding a text to be sent to a process. */);
9228 Vdefault_process_coding_system
= Qnil
;
9230 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
9232 Table of extra Latin codes in the range 128..159 (inclusive).
9233 This is a vector of length 256.
9234 If Nth element is non-nil, the existence of code N in a file
9235 \(or output of subprocess) doesn't prevent it to be detected as
9236 a coding system of ISO 2022 variant which has a flag
9237 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9238 or reading output of a subprocess.
9239 Only 128th through 159th elements has a meaning. */);
9240 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
9242 DEFVAR_LISP ("select-safe-coding-system-function",
9243 &Vselect_safe_coding_system_function
,
9245 Function to call to select safe coding system for encoding a text.
9247 If set, this function is called to force a user to select a proper
9248 coding system which can encode the text in the case that a default
9249 coding system used in each operation can't encode the text.
9251 The default value is `select-safe-coding-system' (which see). */);
9252 Vselect_safe_coding_system_function
= Qnil
;
9254 DEFVAR_BOOL ("coding-system-require-warning",
9255 &coding_system_require_warning
,
9256 doc
: /* Internal use only.
9257 If non-nil, on writing a file, `select-safe-coding-system-function' is
9258 called even if `coding-system-for-write' is non-nil. The command
9259 `universal-coding-system-argument' binds this variable to t temporarily. */);
9260 coding_system_require_warning
= 0;
9263 DEFVAR_BOOL ("inhibit-iso-escape-detection",
9264 &inhibit_iso_escape_detection
,
9266 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9268 By default, on reading a file, Emacs tries to detect how the text is
9269 encoded. This code detection is sensitive to escape sequences. If
9270 the sequence is valid as ISO2022, the code is determined as one of
9271 the ISO2022 encodings, and the file is decoded by the corresponding
9272 coding system (e.g. `iso-2022-7bit').
9274 However, there may be a case that you want to read escape sequences in
9275 a file as is. In such a case, you can set this variable to non-nil.
9276 Then, as the code detection ignores any escape sequences, no file is
9277 detected as encoded in some ISO2022 encoding. The result is that all
9278 escape sequences become visible in a buffer.
9280 The default value is nil, and it is strongly recommended not to change
9281 it. That is because many Emacs Lisp source files that contain
9282 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9283 in Emacs's distribution, and they won't be decoded correctly on
9284 reading if you suppress escape sequence detection.
9286 The other way to read escape sequences in a file without decoding is
9287 to explicitly specify some coding system that doesn't use ISO2022's
9288 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
9289 inhibit_iso_escape_detection
= 0;
9291 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input
,
9292 doc
: /* Char table for translating self-inserting characters.
9293 This is applied to the result of input methods, not their input. See also
9294 `keyboard-translate-table'. */);
9295 Vtranslation_table_for_input
= Qnil
;
9298 Lisp_Object args
[coding_arg_max
];
9299 Lisp_Object plist
[16];
9302 for (i
= 0; i
< coding_arg_max
; i
++)
9305 plist
[0] = intern (":name");
9306 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
9307 plist
[2] = intern (":mnemonic");
9308 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
9309 plist
[4] = intern (":coding-type");
9310 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
9311 plist
[6] = intern (":ascii-compatible-p");
9312 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
9313 plist
[8] = intern (":default-char");
9314 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
9315 plist
[10] = intern (":for-unibyte");
9316 plist
[11] = args
[coding_arg_for_unibyte
] = Qt
;
9317 plist
[12] = intern (":docstring");
9318 plist
[13] = build_string ("Do no conversion.\n\
9320 When you visit a file with this coding, the file is read into a\n\
9321 unibyte buffer as is, thus each byte of a file is treated as a\n\
9323 plist
[14] = intern (":eol-type");
9324 plist
[15] = args
[coding_arg_eol_type
] = Qunix
;
9325 args
[coding_arg_plist
] = Flist (16, plist
);
9326 Fdefine_coding_system_internal (coding_arg_max
, args
);
9329 setup_coding_system (Qno_conversion
, &keyboard_coding
);
9330 setup_coding_system (Qno_conversion
, &terminal_coding
);
9331 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
9336 for (i
= 0; i
< coding_category_max
; i
++)
9337 Fset (AREF (Vcoding_category_table
, i
), Qno_conversion
);
9342 emacs_strerror (error_number
)
9347 synchronize_system_messages_locale ();
9348 str
= strerror (error_number
);
9350 if (! NILP (Vlocale_coding_system
))
9352 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
9353 Vlocale_coding_system
,
9355 str
= (char *) SDATA (dec
);