1 /* Coding system handler (conversion, detection, etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
77 o Old Emacs internal format (emacs-mule)
79 The coding system adopted by old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX, and update the members of
150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
152 Below is the template of these functions. */
156 detect_coding_XXX (coding
, detect_info
)
157 struct coding_system
*coding
;
158 struct coding_detection_info
*detect_info
;
160 unsigned char *src
= coding
->source
;
161 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
162 int multibytep
= coding
->src_multibyte
;
163 int consumed_chars
= 0;
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
173 if (! __C_conforms_to_XXX___ (c
))
175 if (! __C_strongly_suggests_XXX__ (c
))
176 found
= CATEGORY_MASK_XXX
;
178 /* The byte sequence is invalid for XXX. */
179 detect_info
->rejected
|= CATEGORY_MASK_XXX
;
183 /* The source exausted successfully. */
184 detect_info
->found
|= found
;
189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
201 Below is the template of these functions. */
205 decode_coding_XXXX (coding
)
206 struct coding_system
*coding
;
208 unsigned char *src
= coding
->source
+ coding
->consumed
;
209 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base
;
214 /* A buffer to produce decoded characters. */
215 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
216 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
217 int multibytep
= coding
->src_multibyte
;
222 if (charbuf
< charbuf_end
)
223 /* No more room to produce a decoded character. */
230 if (src_base
< src_end
231 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base
< src_end
&& charbuf
< charbuf_end
)
235 *charbuf
++ = *src_base
++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
239 /* Remember how many characters we produced. */
240 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
260 Below is a template of these functions. */
263 encode_coding_XXX (coding
)
264 struct coding_system
*coding
;
266 int multibytep
= coding
->dst_multibyte
;
267 int *charbuf
= coding
->charbuf
;
268 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
269 unsigned char *dst
= coding
->destination
+ coding
->produced
;
270 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
271 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
272 int produced_chars
= 0;
274 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
277 /* Encode C into DST, and increment DST. */
279 label_no_more_destination
:
280 /* How many chars and bytes we produced. */
281 coding
->produced_char
+= produced_chars
;
282 coding
->produced
= dst
- coding
->destination
;
287 /*** 1. Preamble ***/
294 #include "character.h"
297 #include "composite.h"
301 Lisp_Object Vcoding_system_hash_table
;
303 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
304 Lisp_Object Qunix
, Qdos
;
305 extern Lisp_Object Qmac
; /* frame.c */
306 Lisp_Object Qbuffer_file_coding_system
;
307 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
308 Lisp_Object Qdefault_char
;
309 Lisp_Object Qno_conversion
, Qundecided
;
310 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
311 Lisp_Object Qbig
, Qlittle
;
312 Lisp_Object Qcoding_system_history
;
313 Lisp_Object Qvalid_codes
;
314 Lisp_Object QCcategory
, QCmnemonic
, QCdefalut_char
;
315 Lisp_Object QCdecode_translation_table
, QCencode_translation_table
;
316 Lisp_Object QCpost_read_conversion
, QCpre_write_conversion
;
318 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
319 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
320 Lisp_Object Qstart_process
, Qopen_network_stream
;
321 Lisp_Object Qtarget_idx
;
323 Lisp_Object Qinsufficient_source
, Qinconsistent_eol
, Qinvalid_source
;
324 Lisp_Object Qinterrupted
, Qinsufficient_memory
;
326 int coding_system_require_warning
;
328 Lisp_Object Vselect_safe_coding_system_function
;
330 /* Mnemonic string for each format of end-of-line. */
331 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
332 /* Mnemonic string to indicate format of end-of-line is not yet
334 Lisp_Object eol_mnemonic_undecided
;
338 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
340 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
342 /* Coding system emacs-mule and raw-text are for converting only
343 end-of-line format. */
344 Lisp_Object Qemacs_mule
, Qraw_text
;
345 Lisp_Object Qutf_8_emacs
;
347 /* Coding-systems are handed between Emacs Lisp programs and C internal
348 routines by the following three variables. */
349 /* Coding-system for reading files and receiving data from process. */
350 Lisp_Object Vcoding_system_for_read
;
351 /* Coding-system for writing files and sending data to process. */
352 Lisp_Object Vcoding_system_for_write
;
353 /* Coding-system actually used in the latest I/O. */
354 Lisp_Object Vlast_coding_system_used
;
355 /* Set to non-nil when an error is detected while code conversion. */
356 Lisp_Object Vlast_code_conversion_error
;
357 /* A vector of length 256 which contains information about special
358 Latin codes (especially for dealing with Microsoft codes). */
359 Lisp_Object Vlatin_extra_code_table
;
361 /* Flag to inhibit code conversion of end-of-line format. */
362 int inhibit_eol_conversion
;
364 /* Flag to inhibit ISO2022 escape sequence detection. */
365 int inhibit_iso_escape_detection
;
367 /* Flag to make buffer-file-coding-system inherit from process-coding. */
368 int inherit_process_coding_system
;
370 /* Coding system to be used to encode text for terminal display. */
371 struct coding_system terminal_coding
;
373 /* Coding system to be used to encode text for terminal display when
374 terminal coding system is nil. */
375 struct coding_system safe_terminal_coding
;
377 /* Coding system of what is sent from terminal keyboard. */
378 struct coding_system keyboard_coding
;
380 Lisp_Object Vfile_coding_system_alist
;
381 Lisp_Object Vprocess_coding_system_alist
;
382 Lisp_Object Vnetwork_coding_system_alist
;
384 Lisp_Object Vlocale_coding_system
;
388 /* Flag to tell if we look up translation table on character code
390 Lisp_Object Venable_character_translation
;
391 /* Standard translation table to look up on decoding (reading). */
392 Lisp_Object Vstandard_translation_table_for_decode
;
393 /* Standard translation table to look up on encoding (writing). */
394 Lisp_Object Vstandard_translation_table_for_encode
;
396 Lisp_Object Qtranslation_table
;
397 Lisp_Object Qtranslation_table_id
;
398 Lisp_Object Qtranslation_table_for_decode
;
399 Lisp_Object Qtranslation_table_for_encode
;
401 /* Alist of charsets vs revision number. */
402 static Lisp_Object Vcharset_revision_table
;
404 /* Default coding systems used for process I/O. */
405 Lisp_Object Vdefault_process_coding_system
;
407 /* Char table for translating Quail and self-inserting input. */
408 Lisp_Object Vtranslation_table_for_input
;
410 /* Two special coding systems. */
411 Lisp_Object Vsjis_coding_system
;
412 Lisp_Object Vbig5_coding_system
;
414 static void record_conversion_result (struct coding_system
*coding
,
415 enum coding_result_code result
);
416 static int detect_coding_utf_8
P_ ((struct coding_system
*,
417 struct coding_detection_info
*info
));
418 static void decode_coding_utf_8
P_ ((struct coding_system
*));
419 static int encode_coding_utf_8
P_ ((struct coding_system
*));
421 static int detect_coding_utf_16
P_ ((struct coding_system
*,
422 struct coding_detection_info
*info
));
423 static void decode_coding_utf_16
P_ ((struct coding_system
*));
424 static int encode_coding_utf_16
P_ ((struct coding_system
*));
426 static int detect_coding_iso_2022
P_ ((struct coding_system
*,
427 struct coding_detection_info
*info
));
428 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
429 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
431 static int detect_coding_emacs_mule
P_ ((struct coding_system
*,
432 struct coding_detection_info
*info
));
433 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
434 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
436 static int detect_coding_sjis
P_ ((struct coding_system
*,
437 struct coding_detection_info
*info
));
438 static void decode_coding_sjis
P_ ((struct coding_system
*));
439 static int encode_coding_sjis
P_ ((struct coding_system
*));
441 static int detect_coding_big5
P_ ((struct coding_system
*,
442 struct coding_detection_info
*info
));
443 static void decode_coding_big5
P_ ((struct coding_system
*));
444 static int encode_coding_big5
P_ ((struct coding_system
*));
446 static int detect_coding_ccl
P_ ((struct coding_system
*,
447 struct coding_detection_info
*info
));
448 static void decode_coding_ccl
P_ ((struct coding_system
*));
449 static int encode_coding_ccl
P_ ((struct coding_system
*));
451 static void decode_coding_raw_text
P_ ((struct coding_system
*));
452 static int encode_coding_raw_text
P_ ((struct coding_system
*));
455 /* ISO2022 section */
457 #define CODING_ISO_INITIAL(coding, reg) \
458 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
459 coding_attr_iso_initial), \
463 #define CODING_ISO_REQUEST(coding, charset_id) \
464 ((charset_id <= (coding)->max_charset_id \
465 ? (coding)->safe_charsets[charset_id] \
469 #define CODING_ISO_FLAGS(coding) \
470 ((coding)->spec.iso_2022.flags)
471 #define CODING_ISO_DESIGNATION(coding, reg) \
472 ((coding)->spec.iso_2022.current_designation[reg])
473 #define CODING_ISO_INVOCATION(coding, plane) \
474 ((coding)->spec.iso_2022.current_invocation[plane])
475 #define CODING_ISO_SINGLE_SHIFTING(coding) \
476 ((coding)->spec.iso_2022.single_shifting)
477 #define CODING_ISO_BOL(coding) \
478 ((coding)->spec.iso_2022.bol)
479 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
480 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
482 /* Control characters of ISO2022. */
483 /* code */ /* function */
484 #define ISO_CODE_LF 0x0A /* line-feed */
485 #define ISO_CODE_CR 0x0D /* carriage-return */
486 #define ISO_CODE_SO 0x0E /* shift-out */
487 #define ISO_CODE_SI 0x0F /* shift-in */
488 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
489 #define ISO_CODE_ESC 0x1B /* escape */
490 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
491 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
492 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
494 /* All code (1-byte) of ISO2022 is classified into one of the
496 enum iso_code_class_type
498 ISO_control_0
, /* Control codes in the range
499 0x00..0x1F and 0x7F, except for the
500 following 5 codes. */
501 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
502 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
503 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
504 ISO_escape
, /* ISO_CODE_SO (0x1B) */
505 ISO_control_1
, /* Control codes in the range
506 0x80..0x9F, except for the
507 following 3 codes. */
508 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
509 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
510 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
511 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
512 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
513 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
514 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
517 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
518 `iso-flags' attribute of an iso2022 coding system. */
520 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
521 instead of the correct short-form sequence (e.g. ESC $ A). */
522 #define CODING_ISO_FLAG_LONG_FORM 0x0001
524 /* If set, reset graphic planes and registers at end-of-line to the
526 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
528 /* If set, reset graphic planes and registers before any control
529 characters to the initial state. */
530 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
532 /* If set, encode by 7-bit environment. */
533 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
535 /* If set, use locking-shift function. */
536 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
538 /* If set, use single-shift function. Overwrite
539 CODING_ISO_FLAG_LOCKING_SHIFT. */
540 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
542 /* If set, use designation escape sequence. */
543 #define CODING_ISO_FLAG_DESIGNATION 0x0040
545 /* If set, produce revision number sequence. */
546 #define CODING_ISO_FLAG_REVISION 0x0080
548 /* If set, produce ISO6429's direction specifying sequence. */
549 #define CODING_ISO_FLAG_DIRECTION 0x0100
551 /* If set, assume designation states are reset at beginning of line on
553 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
555 /* If set, designation sequence should be placed at beginning of line
557 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
559 /* If set, do not encode unsafe charactes on output. */
560 #define CODING_ISO_FLAG_SAFE 0x0800
562 /* If set, extra latin codes (128..159) are accepted as a valid code
564 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
566 #define CODING_ISO_FLAG_COMPOSITION 0x2000
568 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
570 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
572 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
574 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
576 /* A character to be produced on output if encoding of the original
577 character is prohibited by CODING_ISO_FLAG_SAFE. */
578 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
582 #define CODING_UTF_16_BOM(coding) \
583 ((coding)->spec.utf_16.bom)
585 #define CODING_UTF_16_ENDIAN(coding) \
586 ((coding)->spec.utf_16.endian)
588 #define CODING_UTF_16_SURROGATE(coding) \
589 ((coding)->spec.utf_16.surrogate)
593 #define CODING_CCL_DECODER(coding) \
594 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
595 #define CODING_CCL_ENCODER(coding) \
596 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
597 #define CODING_CCL_VALIDS(coding) \
598 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
600 /* Index for each coding category in `coding_categories' */
604 coding_category_iso_7
,
605 coding_category_iso_7_tight
,
606 coding_category_iso_8_1
,
607 coding_category_iso_8_2
,
608 coding_category_iso_7_else
,
609 coding_category_iso_8_else
,
610 coding_category_utf_8
,
611 coding_category_utf_16_auto
,
612 coding_category_utf_16_be
,
613 coding_category_utf_16_le
,
614 coding_category_utf_16_be_nosig
,
615 coding_category_utf_16_le_nosig
,
616 coding_category_charset
,
617 coding_category_sjis
,
618 coding_category_big5
,
620 coding_category_emacs_mule
,
621 /* All above are targets of code detection. */
622 coding_category_raw_text
,
623 coding_category_undecided
,
627 /* Definitions of flag bits used in detect_coding_XXXX. */
628 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
629 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
630 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
631 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
632 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
633 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
634 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
635 #define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
636 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
637 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
638 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
639 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
640 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
641 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
642 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
643 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
644 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
645 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
647 /* This value is returned if detect_coding_mask () find nothing other
648 than ASCII characters. */
649 #define CATEGORY_MASK_ANY \
650 (CATEGORY_MASK_ISO_7 \
651 | CATEGORY_MASK_ISO_7_TIGHT \
652 | CATEGORY_MASK_ISO_8_1 \
653 | CATEGORY_MASK_ISO_8_2 \
654 | CATEGORY_MASK_ISO_7_ELSE \
655 | CATEGORY_MASK_ISO_8_ELSE \
656 | CATEGORY_MASK_UTF_8 \
657 | CATEGORY_MASK_UTF_16_BE \
658 | CATEGORY_MASK_UTF_16_LE \
659 | CATEGORY_MASK_UTF_16_BE_NOSIG \
660 | CATEGORY_MASK_UTF_16_LE_NOSIG \
661 | CATEGORY_MASK_CHARSET \
662 | CATEGORY_MASK_SJIS \
663 | CATEGORY_MASK_BIG5 \
664 | CATEGORY_MASK_CCL \
665 | CATEGORY_MASK_EMACS_MULE)
668 #define CATEGORY_MASK_ISO_7BIT \
669 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
671 #define CATEGORY_MASK_ISO_8BIT \
672 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
674 #define CATEGORY_MASK_ISO_ELSE \
675 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
677 #define CATEGORY_MASK_ISO_ESCAPE \
678 (CATEGORY_MASK_ISO_7 \
679 | CATEGORY_MASK_ISO_7_TIGHT \
680 | CATEGORY_MASK_ISO_7_ELSE \
681 | CATEGORY_MASK_ISO_8_ELSE)
683 #define CATEGORY_MASK_ISO \
684 ( CATEGORY_MASK_ISO_7BIT \
685 | CATEGORY_MASK_ISO_8BIT \
686 | CATEGORY_MASK_ISO_ELSE)
688 #define CATEGORY_MASK_UTF_16 \
689 (CATEGORY_MASK_UTF_16_BE \
690 | CATEGORY_MASK_UTF_16_LE \
691 | CATEGORY_MASK_UTF_16_BE_NOSIG \
692 | CATEGORY_MASK_UTF_16_LE_NOSIG)
695 /* List of symbols `coding-category-xxx' ordered by priority. This
696 variable is exposed to Emacs Lisp. */
697 static Lisp_Object Vcoding_category_list
;
699 /* Table of coding categories (Lisp symbols). This variable is for
701 static Lisp_Object Vcoding_category_table
;
703 /* Table of coding-categories ordered by priority. */
704 static enum coding_category coding_priorities
[coding_category_max
];
706 /* Nth element is a coding context for the coding system bound to the
707 Nth coding category. */
708 static struct coding_system coding_categories
[coding_category_max
];
710 /*** Commonly used macros and functions ***/
713 #define min(a, b) ((a) < (b) ? (a) : (b))
716 #define max(a, b) ((a) > (b) ? (a) : (b))
719 #define CODING_GET_INFO(coding, attrs, charset_list) \
721 (attrs) = CODING_ID_ATTRS ((coding)->id); \
722 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
726 /* Safely get one byte from the source text pointed by SRC which ends
727 at SRC_END, and set C to that byte. If there are not enough bytes
728 in the source, it jumps to `no_more_source'. If multibytep is
729 nonzero, and a multibyte character is found at SRC, set C to the
730 negative value of the character code. The caller should declare
731 and set these variables appropriately in advance:
732 src, src_end, multibytep */
734 #define ONE_MORE_BYTE(c) \
736 if (src == src_end) \
738 if (src_base < src) \
739 record_conversion_result \
740 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
741 goto no_more_source; \
744 if (multibytep && (c & 0x80)) \
746 if ((c & 0xFE) == 0xC0) \
747 c = ((c & 1) << 6) | *src++; \
750 c = - string_char (--src, &src, NULL); \
751 record_conversion_result \
752 (coding, CODING_RESULT_INVALID_SRC); \
759 #define ONE_MORE_BYTE_NO_CHECK(c) \
762 if (multibytep && (c & 0x80)) \
764 if ((c & 0xFE) == 0xC0) \
765 c = ((c & 1) << 6) | *src++; \
768 c = - string_char (--src, &src, NULL); \
769 record_conversion_result \
770 (coding, CODING_RESULT_INVALID_SRC); \
777 /* Store a byte C in the place pointed by DST and increment DST to the
778 next free point, and increment PRODUCED_CHARS. The caller should
779 assure that C is 0..127, and declare and set the variable `dst'
780 appropriately in advance.
784 #define EMIT_ONE_ASCII_BYTE(c) \
791 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
793 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
795 produced_chars += 2; \
796 *dst++ = (c1), *dst++ = (c2); \
800 /* Store a byte C in the place pointed by DST and increment DST to the
801 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
802 nonzero, store in an appropriate multibyte from. The caller should
803 declare and set the variables `dst' and `multibytep' appropriately
806 #define EMIT_ONE_BYTE(c) \
813 ch = BYTE8_TO_CHAR (ch); \
814 CHAR_STRING_ADVANCE (ch, dst); \
821 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
823 #define EMIT_TWO_BYTES(c1, c2) \
825 produced_chars += 2; \
832 ch = BYTE8_TO_CHAR (ch); \
833 CHAR_STRING_ADVANCE (ch, dst); \
836 ch = BYTE8_TO_CHAR (ch); \
837 CHAR_STRING_ADVANCE (ch, dst); \
847 #define EMIT_THREE_BYTES(c1, c2, c3) \
849 EMIT_ONE_BYTE (c1); \
850 EMIT_TWO_BYTES (c2, c3); \
854 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
856 EMIT_TWO_BYTES (c1, c2); \
857 EMIT_TWO_BYTES (c3, c4); \
862 record_conversion_result (struct coding_system
*coding
,
863 enum coding_result_code result
)
865 coding
->result
= result
;
868 case CODING_RESULT_INSUFFICIENT_SRC
:
869 Vlast_code_conversion_error
= Qinsufficient_source
;
871 case CODING_RESULT_INCONSISTENT_EOL
:
872 Vlast_code_conversion_error
= Qinconsistent_eol
;
874 case CODING_RESULT_INVALID_SRC
:
875 Vlast_code_conversion_error
= Qinvalid_source
;
877 case CODING_RESULT_INTERRUPT
:
878 Vlast_code_conversion_error
= Qinterrupted
;
880 case CODING_RESULT_INSUFFICIENT_MEM
:
881 Vlast_code_conversion_error
= Qinsufficient_memory
;
886 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
888 charset_map_loaded = 0; \
889 c = DECODE_CHAR (charset, code); \
890 if (charset_map_loaded) \
892 const unsigned char *orig = coding->source; \
895 coding_set_source (coding); \
896 offset = coding->source - orig; \
898 src_base += offset; \
904 #define ASSURE_DESTINATION(bytes) \
906 if (dst + (bytes) >= dst_end) \
908 int more_bytes = charbuf_end - charbuf + (bytes); \
910 dst = alloc_destination (coding, more_bytes, dst); \
911 dst_end = coding->destination + coding->dst_bytes; \
918 coding_set_source (coding
)
919 struct coding_system
*coding
;
921 if (BUFFERP (coding
->src_object
))
923 struct buffer
*buf
= XBUFFER (coding
->src_object
);
925 if (coding
->src_pos
< 0)
926 coding
->source
= BUF_GAP_END_ADDR (buf
) + coding
->src_pos_byte
;
928 coding
->source
= BUF_BYTE_ADDRESS (buf
, coding
->src_pos_byte
);
930 else if (STRINGP (coding
->src_object
))
932 coding
->source
= SDATA (coding
->src_object
) + coding
->src_pos_byte
;
935 /* Otherwise, the source is C string and is never relocated
936 automatically. Thus we don't have to update anything. */
941 coding_set_destination (coding
)
942 struct coding_system
*coding
;
944 if (BUFFERP (coding
->dst_object
))
946 if (coding
->src_pos
< 0)
948 coding
->destination
= BEG_ADDR
+ coding
->dst_pos_byte
- 1;
949 coding
->dst_bytes
= (GAP_END_ADDR
950 - (coding
->src_bytes
- coding
->consumed
)
951 - coding
->destination
);
955 /* We are sure that coding->dst_pos_byte is before the gap
957 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
958 + coding
->dst_pos_byte
- 1);
959 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
960 - coding
->destination
);
964 /* Otherwise, the destination is C string and is never relocated
965 automatically. Thus we don't have to update anything. */
971 coding_alloc_by_realloc (coding
, bytes
)
972 struct coding_system
*coding
;
975 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
976 coding
->dst_bytes
+ bytes
);
977 coding
->dst_bytes
+= bytes
;
981 coding_alloc_by_making_gap (coding
, bytes
)
982 struct coding_system
*coding
;
985 if (BUFFERP (coding
->dst_object
)
986 && EQ (coding
->src_object
, coding
->dst_object
))
988 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
990 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
992 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
996 Lisp_Object this_buffer
;
998 this_buffer
= Fcurrent_buffer ();
999 set_buffer_internal (XBUFFER (coding
->dst_object
));
1001 set_buffer_internal (XBUFFER (this_buffer
));
1006 static unsigned char *
1007 alloc_destination (coding
, nbytes
, dst
)
1008 struct coding_system
*coding
;
1012 EMACS_INT offset
= dst
- coding
->destination
;
1014 if (BUFFERP (coding
->dst_object
))
1015 coding_alloc_by_making_gap (coding
, nbytes
);
1017 coding_alloc_by_realloc (coding
, nbytes
);
1018 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1019 coding_set_destination (coding
);
1020 dst
= coding
->destination
+ offset
;
1024 /** Macros for annotations. */
1026 /* Maximum length of annotation data (sum of annotations for
1027 composition and charset). */
1028 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1030 /* An annotation data is stored in the array coding->charbuf in this
1032 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1033 LENGTH is the number of elements in the annotation.
1034 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1035 NCHARS is the number of characters in the text annotated.
1037 The format of the following elements depend on ANNOTATION_MASK.
1039 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1041 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1042 METHOD is one of enum composition_method.
1043 Optionnal COMPOSITION-COMPONENTS are characters and composition
1046 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1049 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
1051 *(buf)++ = -(len); \
1052 *(buf)++ = (mask); \
1053 *(buf)++ = (nchars); \
1054 coding->annotated = 1; \
1057 #define ADD_COMPOSITION_DATA(buf, nchars, method) \
1059 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1064 #define ADD_CHARSET_DATA(buf, nchars, id) \
1066 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1071 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1078 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1079 Check if a text is encoded in UTF-8. If it is, return 1, else
1082 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1083 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1084 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1085 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1086 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1087 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1090 detect_coding_utf_8 (coding
, detect_info
)
1091 struct coding_system
*coding
;
1092 struct coding_detection_info
*detect_info
;
1094 const unsigned char *src
= coding
->source
, *src_base
;
1095 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1096 int multibytep
= coding
->src_multibyte
;
1097 int consumed_chars
= 0;
1100 detect_info
->checked
|= CATEGORY_MASK_UTF_8
;
1101 /* A coding system of this category is always ASCII compatible. */
1102 src
+= coding
->head_ascii
;
1106 int c
, c1
, c2
, c3
, c4
;
1110 if (c
< 0 || UTF_8_1_OCTET_P (c
))
1113 if (c1
< 0 || ! UTF_8_EXTRA_OCTET_P (c1
))
1115 if (UTF_8_2_OCTET_LEADING_P (c
))
1117 found
= CATEGORY_MASK_UTF_8
;
1121 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1123 if (UTF_8_3_OCTET_LEADING_P (c
))
1125 found
= CATEGORY_MASK_UTF_8
;
1129 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1131 if (UTF_8_4_OCTET_LEADING_P (c
))
1133 found
= CATEGORY_MASK_UTF_8
;
1137 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1139 if (UTF_8_5_OCTET_LEADING_P (c
))
1141 found
= CATEGORY_MASK_UTF_8
;
1146 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1150 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1152 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1155 detect_info
->found
|= found
;
1161 decode_coding_utf_8 (coding
)
1162 struct coding_system
*coding
;
1164 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1165 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1166 const unsigned char *src_base
;
1167 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
1168 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
1169 int consumed_chars
= 0, consumed_chars_base
;
1170 int multibytep
= coding
->src_multibyte
;
1171 Lisp_Object attr
, charset_list
;
1173 CODING_GET_INFO (coding
, attr
, charset_list
);
1177 int c
, c1
, c2
, c3
, c4
, c5
;
1180 consumed_chars_base
= consumed_chars
;
1182 if (charbuf
>= charbuf_end
)
1190 else if (UTF_8_1_OCTET_P(c1
))
1197 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1199 if (UTF_8_2_OCTET_LEADING_P (c1
))
1201 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1202 /* Reject overlong sequences here and below. Encoders
1203 producing them are incorrect, they can be misleading,
1204 and they mess up read/write invariance. */
1211 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1213 if (UTF_8_3_OCTET_LEADING_P (c1
))
1215 c
= (((c1
& 0xF) << 12)
1216 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1218 || (c
>= 0xd800 && c
< 0xe000)) /* surrogates (invalid) */
1224 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1226 if (UTF_8_4_OCTET_LEADING_P (c1
))
1228 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1229 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1236 if (c5
< 0 || ! UTF_8_EXTRA_OCTET_P (c5
))
1238 if (UTF_8_5_OCTET_LEADING_P (c1
))
1240 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1241 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1243 if ((c
> MAX_CHAR
) || (c
< 0x200000))
1258 consumed_chars
= consumed_chars_base
;
1260 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1265 coding
->consumed_char
+= consumed_chars_base
;
1266 coding
->consumed
= src_base
- coding
->source
;
1267 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1272 encode_coding_utf_8 (coding
)
1273 struct coding_system
*coding
;
1275 int multibytep
= coding
->dst_multibyte
;
1276 int *charbuf
= coding
->charbuf
;
1277 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1278 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1279 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1280 int produced_chars
= 0;
1285 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1287 while (charbuf
< charbuf_end
)
1289 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1291 ASSURE_DESTINATION (safe_room
);
1293 if (CHAR_BYTE8_P (c
))
1295 c
= CHAR_TO_BYTE8 (c
);
1300 CHAR_STRING_ADVANCE (c
, pend
);
1301 for (p
= str
; p
< pend
; p
++)
1308 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1310 while (charbuf
< charbuf_end
)
1312 ASSURE_DESTINATION (safe_room
);
1314 dst
+= CHAR_STRING (c
, dst
);
1318 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1319 coding
->produced_char
+= produced_chars
;
1320 coding
->produced
= dst
- coding
->destination
;
1325 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1326 Check if a text is encoded in one of UTF-16 based coding systems.
1327 If it is, return 1, else return 0. */
1329 #define UTF_16_HIGH_SURROGATE_P(val) \
1330 (((val) & 0xFC00) == 0xD800)
1332 #define UTF_16_LOW_SURROGATE_P(val) \
1333 (((val) & 0xFC00) == 0xDC00)
1335 #define UTF_16_INVALID_P(val) \
1336 (((val) == 0xFFFE) \
1337 || ((val) == 0xFFFF) \
1338 || UTF_16_LOW_SURROGATE_P (val))
1342 detect_coding_utf_16 (coding
, detect_info
)
1343 struct coding_system
*coding
;
1344 struct coding_detection_info
*detect_info
;
1346 const unsigned char *src
= coding
->source
, *src_base
= src
;
1347 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1348 int multibytep
= coding
->src_multibyte
;
1349 int consumed_chars
= 0;
1352 detect_info
->checked
|= CATEGORY_MASK_UTF_16
;
1353 if (coding
->mode
& CODING_MODE_LAST_BLOCK
1354 && (coding
->src_chars
& 1))
1356 detect_info
->rejected
|= CATEGORY_MASK_UTF_16
;
1362 if ((c1
== 0xFF) && (c2
== 0xFE))
1364 detect_info
->found
|= (CATEGORY_MASK_UTF_16_LE
1365 | CATEGORY_MASK_UTF_16_AUTO
);
1366 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_BE
1367 | CATEGORY_MASK_UTF_16_BE_NOSIG
1368 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1370 else if ((c1
== 0xFE) && (c2
== 0xFF))
1372 detect_info
->found
|= (CATEGORY_MASK_UTF_16_BE
1373 | CATEGORY_MASK_UTF_16_AUTO
);
1374 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_LE
1375 | CATEGORY_MASK_UTF_16_BE_NOSIG
1376 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1378 else if (c1
>= 0 && c2
>= 0)
1380 unsigned char b1
[256], b2
[256];
1381 int b1_variants
= 1, b2_variants
= 1;
1384 bzero (b1
, 256), bzero (b2
, 256);
1386 for (n
= 0; n
< 256 && src
< src_end
; n
++)
1391 if (c1
< 0 || c2
< 0)
1393 if (! b1
[c1
++]) b1_variants
++;
1394 if (! b2
[c2
++]) b2_variants
++;
1396 if (b1_variants
< b2_variants
)
1397 detect_info
->found
|= CATEGORY_MASK_UTF_16_BE_NOSIG
;
1399 detect_info
->found
|= CATEGORY_MASK_UTF_16_LE_NOSIG
;
1400 detect_info
->rejected
1401 |= (CATEGORY_MASK_UTF_16_BE
| CATEGORY_MASK_UTF_16_LE
);
1408 decode_coding_utf_16 (coding
)
1409 struct coding_system
*coding
;
1411 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1412 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1413 const unsigned char *src_base
;
1414 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
1415 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
1416 int consumed_chars
= 0, consumed_chars_base
;
1417 int multibytep
= coding
->src_multibyte
;
1418 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1419 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1420 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1421 Lisp_Object attr
, charset_list
;
1423 CODING_GET_INFO (coding
, attr
, charset_list
);
1425 if (bom
== utf_16_with_bom
)
1434 if (endian
== utf_16_big_endian
1435 ? c
!= 0xFEFF : c
!= 0xFFFE)
1437 /* The first two bytes are not BOM. Treat them as bytes
1438 for a normal character. */
1442 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1444 else if (bom
== utf_16_detect_bom
)
1446 /* We have already tried to detect BOM and failed in
1448 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1456 consumed_chars_base
= consumed_chars
;
1458 if (charbuf
+ 2 >= charbuf_end
)
1470 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
1474 c
= (endian
== utf_16_big_endian
1475 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1478 if (! UTF_16_LOW_SURROGATE_P (c
))
1480 if (endian
== utf_16_big_endian
)
1481 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1483 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1487 if (UTF_16_HIGH_SURROGATE_P (c
))
1488 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1494 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1495 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1496 *charbuf
++ = 0x10000 + c
;
1501 if (UTF_16_HIGH_SURROGATE_P (c
))
1502 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1509 coding
->consumed_char
+= consumed_chars_base
;
1510 coding
->consumed
= src_base
- coding
->source
;
1511 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1515 encode_coding_utf_16 (coding
)
1516 struct coding_system
*coding
;
1518 int multibytep
= coding
->dst_multibyte
;
1519 int *charbuf
= coding
->charbuf
;
1520 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1521 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1522 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1524 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1525 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1526 int produced_chars
= 0;
1527 Lisp_Object attrs
, charset_list
;
1530 CODING_GET_INFO (coding
, attrs
, charset_list
);
1532 if (bom
!= utf_16_without_bom
)
1534 ASSURE_DESTINATION (safe_room
);
1536 EMIT_TWO_BYTES (0xFE, 0xFF);
1538 EMIT_TWO_BYTES (0xFF, 0xFE);
1539 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1542 while (charbuf
< charbuf_end
)
1544 ASSURE_DESTINATION (safe_room
);
1546 if (c
>= MAX_UNICODE_CHAR
)
1547 c
= coding
->default_char
;
1552 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1554 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1561 c1
= (c
>> 10) + 0xD800;
1562 c2
= (c
& 0x3FF) + 0xDC00;
1564 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1566 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1569 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1570 coding
->produced
= dst
- coding
->destination
;
1571 coding
->produced_char
+= produced_chars
;
1576 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1578 /* Emacs' internal format for representation of multiple character
1579 sets is a kind of multi-byte encoding, i.e. characters are
1580 represented by variable-length sequences of one-byte codes.
1582 ASCII characters and control characters (e.g. `tab', `newline') are
1583 represented by one-byte sequences which are their ASCII codes, in
1584 the range 0x00 through 0x7F.
1586 8-bit characters of the range 0x80..0x9F are represented by
1587 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1590 8-bit characters of the range 0xA0..0xFF are represented by
1591 one-byte sequences which are their 8-bit code.
1593 The other characters are represented by a sequence of `base
1594 leading-code', optional `extended leading-code', and one or two
1595 `position-code's. The length of the sequence is determined by the
1596 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1597 whereas extended leading-code and position-code take the range 0xA0
1598 through 0xFF. See `charset.h' for more details about leading-code
1601 --- CODE RANGE of Emacs' internal format ---
1605 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1606 eight-bit-graphic 0xA0..0xBF
1607 ELSE 0x81..0x9D + [0xA0..0xFF]+
1608 ---------------------------------------------
1610 As this is the internal character representation, the format is
1611 usually not used externally (i.e. in a file or in a data sent to a
1612 process). But, it is possible to have a text externally in this
1613 format (i.e. by encoding by the coding system `emacs-mule').
1615 In that case, a sequence of one-byte codes has a slightly different
1618 At first, all characters in eight-bit-control are represented by
1619 one-byte sequences which are their 8-bit code.
1621 Next, character composition data are represented by the byte
1622 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1624 METHOD is 0xF0 plus one of composition method (enum
1625 composition_method),
1627 BYTES is 0xA0 plus a byte length of this composition data,
1629 CHARS is 0x20 plus a number of characters composed by this
1632 COMPONENTs are characters of multibye form or composition
1633 rules encoded by two-byte of ASCII codes.
1635 In addition, for backward compatibility, the following formats are
1636 also recognized as composition data on decoding.
1639 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1642 MSEQ is a multibyte form but in these special format:
1643 ASCII: 0xA0 ASCII_CODE+0x80,
1644 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1645 RULE is a one byte code of the range 0xA0..0xF0 that
1646 represents a composition rule.
1649 char emacs_mule_bytes
[256];
1652 emacs_mule_char (coding
, src
, nbytes
, nchars
, id
)
1653 struct coding_system
*coding
;
1654 const unsigned char *src
;
1655 int *nbytes
, *nchars
, *id
;
1657 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1658 const unsigned char *src_base
= src
;
1659 int multibytep
= coding
->src_multibyte
;
1660 struct charset
*charset
;
1663 int consumed_chars
= 0;
1669 charset
= emacs_mule_charset
[0];
1673 switch (emacs_mule_bytes
[c
])
1676 if (! (charset
= emacs_mule_charset
[c
]))
1685 if (c
== EMACS_MULE_LEADING_CODE_PRIVATE_11
1686 || c
== EMACS_MULE_LEADING_CODE_PRIVATE_12
)
1689 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1698 if (! (charset
= emacs_mule_charset
[c
]))
1703 code
= (c
& 0x7F) << 8;
1713 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1718 code
= (c
& 0x7F) << 8;
1727 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
)
1728 ? charset_ascii
: charset_eight_bit
);
1734 c
= DECODE_CHAR (charset
, code
);
1738 *nbytes
= src
- src_base
;
1739 *nchars
= consumed_chars
;
1752 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1753 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1757 detect_coding_emacs_mule (coding
, detect_info
)
1758 struct coding_system
*coding
;
1759 struct coding_detection_info
*detect_info
;
1761 const unsigned char *src
= coding
->source
, *src_base
;
1762 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1763 int multibytep
= coding
->src_multibyte
;
1764 int consumed_chars
= 0;
1768 detect_info
->checked
|= CATEGORY_MASK_EMACS_MULE
;
1769 /* A coding system of this category is always ASCII compatible. */
1770 src
+= coding
->head_ascii
;
1780 /* Perhaps the start of composite character. We simple skip
1781 it because analyzing it is too heavy for detecting. But,
1782 at least, we check that the composite character
1783 constitues of more than 4 bytes. */
1784 const unsigned char *src_base
;
1794 if (src
- src_base
<= 4)
1796 found
= CATEGORY_MASK_EMACS_MULE
;
1804 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1809 int more_bytes
= emacs_mule_bytes
[*src_base
] - 1;
1811 while (more_bytes
> 0)
1816 src
--; /* Unread the last byte. */
1821 if (more_bytes
!= 0)
1823 found
= CATEGORY_MASK_EMACS_MULE
;
1826 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1830 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1832 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1835 detect_info
->found
|= found
;
1840 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1842 /* Decode a character represented as a component of composition
1843 sequence of Emacs 20/21 style at SRC. Set C to that character and
1844 update SRC to the head of next character (or an encoded composition
1845 rule). If SRC doesn't points a composition component, set C to -1.
1846 If SRC points an invalid byte sequence, global exit by a return
1849 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1853 int nbytes, nchars; \
1855 if (src == src_end) \
1857 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1862 goto invalid_code; \
1866 consumed_chars += nchars; \
1871 /* Decode a composition rule represented as a component of composition
1872 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1873 and increment BUF. If SRC points an invalid byte sequence, set C
1876 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1878 int c, gref, nref; \
1880 if (src >= src_end) \
1881 goto invalid_code; \
1882 ONE_MORE_BYTE_NO_CHECK (c); \
1884 if (c < 0 || c >= 81) \
1885 goto invalid_code; \
1887 gref = c / 9, nref = c % 9; \
1888 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1892 /* Decode a composition rule represented as a component of composition
1893 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1894 and increment BUF. If SRC points an invalid byte sequence, set C
1897 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1901 if (src + 1>= src_end) \
1902 goto invalid_code; \
1903 ONE_MORE_BYTE_NO_CHECK (gref); \
1905 ONE_MORE_BYTE_NO_CHECK (nref); \
1907 if (gref < 0 || gref >= 81 \
1908 || nref < 0 || nref >= 81) \
1909 goto invalid_code; \
1910 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1914 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1916 /* Emacs 21 style format. The first three bytes at SRC are \
1917 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1918 the byte length of this composition information, CHARS is the \
1919 number of characters composed by this composition. */ \
1920 enum composition_method method = c - 0xF2; \
1921 int *charbuf_base = charbuf; \
1922 int consumed_chars_limit; \
1923 int nbytes, nchars; \
1925 ONE_MORE_BYTE (c); \
1927 goto invalid_code; \
1928 nbytes = c - 0xA0; \
1930 goto invalid_code; \
1931 ONE_MORE_BYTE (c); \
1933 goto invalid_code; \
1934 nchars = c - 0xA0; \
1935 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
1936 consumed_chars_limit = consumed_chars_base + nbytes; \
1937 if (method != COMPOSITION_RELATIVE) \
1940 while (consumed_chars < consumed_chars_limit) \
1942 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1943 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1945 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1948 if (consumed_chars < consumed_chars_limit) \
1949 goto invalid_code; \
1950 charbuf_base[0] -= i; \
1955 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1957 /* Emacs 20 style format for relative composition. */ \
1958 /* Store multibyte form of characters to be composed. */ \
1959 enum composition_method method = COMPOSITION_RELATIVE; \
1960 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1961 int *buf = components; \
1965 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1966 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1967 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1969 goto invalid_code; \
1970 ADD_COMPOSITION_DATA (charbuf, i, method); \
1971 for (j = 0; j < i; j++) \
1972 *charbuf++ = components[j]; \
1976 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1978 /* Emacs 20 style format for rule-base composition. */ \
1979 /* Store multibyte form of characters to be composed. */ \
1980 enum composition_method method = COMPOSITION_WITH_RULE; \
1981 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1982 int *buf = components; \
1985 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1986 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1988 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1989 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1991 if (i < 1 || (buf - components) % 2 == 0) \
1992 goto invalid_code; \
1993 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1994 goto no_more_source; \
1995 ADD_COMPOSITION_DATA (buf, i, method); \
1996 for (j = 0; j < i; j++) \
1997 *charbuf++ = components[j]; \
1998 for (j = 0; j < i; j += 2) \
1999 *charbuf++ = components[j]; \
2004 decode_coding_emacs_mule (coding
)
2005 struct coding_system
*coding
;
2007 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2008 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2009 const unsigned char *src_base
;
2010 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
2012 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
2013 int consumed_chars
= 0, consumed_chars_base
;
2014 int multibytep
= coding
->src_multibyte
;
2015 Lisp_Object attrs
, charset_list
;
2016 int char_offset
= coding
->produced_char
;
2017 int last_offset
= char_offset
;
2018 int last_id
= charset_ascii
;
2020 CODING_GET_INFO (coding
, attrs
, charset_list
);
2027 consumed_chars_base
= consumed_chars
;
2029 if (charbuf
>= charbuf_end
)
2048 if (c
- 0xF2 >= COMPOSITION_RELATIVE
2049 && c
- 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS
)
2050 DECODE_EMACS_MULE_21_COMPOSITION (c
);
2052 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
2054 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
2058 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
2064 consumed_chars
= consumed_chars_base
;
2065 c
= emacs_mule_char (coding
, src
, &nbytes
, &nchars
, &id
);
2074 if (last_id
!= charset_ascii
)
2075 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
2077 last_offset
= char_offset
;
2081 consumed_chars
+= nchars
;
2088 consumed_chars
= consumed_chars_base
;
2090 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
2096 if (last_id
!= charset_ascii
)
2097 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
2098 coding
->consumed_char
+= consumed_chars_base
;
2099 coding
->consumed
= src_base
- coding
->source
;
2100 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
2104 #define EMACS_MULE_LEADING_CODES(id, codes) \
2107 codes[0] = id, codes[1] = 0; \
2108 else if (id < 0xE0) \
2109 codes[0] = 0x9A, codes[1] = id; \
2110 else if (id < 0xF0) \
2111 codes[0] = 0x9B, codes[1] = id; \
2112 else if (id < 0xF5) \
2113 codes[0] = 0x9C, codes[1] = id; \
2115 codes[0] = 0x9D, codes[1] = id; \
2120 encode_coding_emacs_mule (coding
)
2121 struct coding_system
*coding
;
2123 int multibytep
= coding
->dst_multibyte
;
2124 int *charbuf
= coding
->charbuf
;
2125 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
2126 unsigned char *dst
= coding
->destination
+ coding
->produced
;
2127 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
2129 int produced_chars
= 0;
2130 Lisp_Object attrs
, charset_list
;
2132 int preferred_charset_id
= -1;
2134 CODING_GET_INFO (coding
, attrs
, charset_list
);
2135 if (! EQ (charset_list
, Vemacs_mule_charset_list
))
2137 CODING_ATTR_CHARSET_LIST (attrs
)
2138 = charset_list
= Vemacs_mule_charset_list
;
2141 while (charbuf
< charbuf_end
)
2143 ASSURE_DESTINATION (safe_room
);
2148 /* Handle an annotation. */
2151 case CODING_ANNOTATE_COMPOSITION_MASK
:
2152 /* Not yet implemented. */
2154 case CODING_ANNOTATE_CHARSET_MASK
:
2155 preferred_charset_id
= charbuf
[3];
2156 if (preferred_charset_id
>= 0
2157 && NILP (Fmemq (make_number (preferred_charset_id
),
2159 preferred_charset_id
= -1;
2168 if (ASCII_CHAR_P (c
))
2169 EMIT_ONE_ASCII_BYTE (c
);
2170 else if (CHAR_BYTE8_P (c
))
2172 c
= CHAR_TO_BYTE8 (c
);
2177 struct charset
*charset
;
2181 unsigned char leading_codes
[2];
2183 if (preferred_charset_id
>= 0)
2185 charset
= CHARSET_FROM_ID (preferred_charset_id
);
2186 if (! CHAR_CHARSET_P (c
, charset
))
2187 charset
= char_charset (c
, charset_list
, NULL
);
2190 charset
= char_charset (c
, charset_list
, &code
);
2193 c
= coding
->default_char
;
2194 if (ASCII_CHAR_P (c
))
2196 EMIT_ONE_ASCII_BYTE (c
);
2199 charset
= char_charset (c
, charset_list
, &code
);
2201 dimension
= CHARSET_DIMENSION (charset
);
2202 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
2203 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
2204 EMIT_ONE_BYTE (leading_codes
[0]);
2205 if (leading_codes
[1])
2206 EMIT_ONE_BYTE (leading_codes
[1]);
2208 EMIT_ONE_BYTE (code
| 0x80);
2212 EMIT_ONE_BYTE (code
>> 8);
2213 EMIT_ONE_BYTE (code
& 0xFF);
2217 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
2218 coding
->produced_char
+= produced_chars
;
2219 coding
->produced
= dst
- coding
->destination
;
2224 /*** 7. ISO2022 handlers ***/
2226 /* The following note describes the coding system ISO2022 briefly.
2227 Since the intention of this note is to help understand the
2228 functions in this file, some parts are NOT ACCURATE or are OVERLY
2229 SIMPLIFIED. For thorough understanding, please refer to the
2230 original document of ISO2022. This is equivalent to the standard
2231 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2233 ISO2022 provides many mechanisms to encode several character sets
2234 in 7-bit and 8-bit environments. For 7-bit environments, all text
2235 is encoded using bytes less than 128. This may make the encoded
2236 text a little bit longer, but the text passes more easily through
2237 several types of gateway, some of which strip off the MSB (Most
2240 There are two kinds of character sets: control character sets and
2241 graphic character sets. The former contain control characters such
2242 as `newline' and `escape' to provide control functions (control
2243 functions are also provided by escape sequences). The latter
2244 contain graphic characters such as 'A' and '-'. Emacs recognizes
2245 two control character sets and many graphic character sets.
2247 Graphic character sets are classified into one of the following
2248 four classes, according to the number of bytes (DIMENSION) and
2249 number of characters in one dimension (CHARS) of the set:
2250 - DIMENSION1_CHARS94
2251 - DIMENSION1_CHARS96
2252 - DIMENSION2_CHARS94
2253 - DIMENSION2_CHARS96
2255 In addition, each character set is assigned an identification tag,
2256 unique for each set, called the "final character" (denoted as <F>
2257 hereafter). The <F> of each character set is decided by ECMA(*)
2258 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2259 (0x30..0x3F are for private use only).
2261 Note (*): ECMA = European Computer Manufacturers Association
2263 Here are examples of graphic character sets [NAME(<F>)]:
2264 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2265 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2266 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2267 o DIMENSION2_CHARS96 -- none for the moment
2269 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2270 C0 [0x00..0x1F] -- control character plane 0
2271 GL [0x20..0x7F] -- graphic character plane 0
2272 C1 [0x80..0x9F] -- control character plane 1
2273 GR [0xA0..0xFF] -- graphic character plane 1
2275 A control character set is directly designated and invoked to C0 or
2276 C1 by an escape sequence. The most common case is that:
2277 - ISO646's control character set is designated/invoked to C0, and
2278 - ISO6429's control character set is designated/invoked to C1,
2279 and usually these designations/invocations are omitted in encoded
2280 text. In a 7-bit environment, only C0 can be used, and a control
2281 character for C1 is encoded by an appropriate escape sequence to
2282 fit into the environment. All control characters for C1 are
2283 defined to have corresponding escape sequences.
2285 A graphic character set is at first designated to one of four
2286 graphic registers (G0 through G3), then these graphic registers are
2287 invoked to GL or GR. These designations and invocations can be
2288 done independently. The most common case is that G0 is invoked to
2289 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2290 these invocations and designations are omitted in encoded text.
2291 In a 7-bit environment, only GL can be used.
2293 When a graphic character set of CHARS94 is invoked to GL, codes
2294 0x20 and 0x7F of the GL area work as control characters SPACE and
2295 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2298 There are two ways of invocation: locking-shift and single-shift.
2299 With locking-shift, the invocation lasts until the next different
2300 invocation, whereas with single-shift, the invocation affects the
2301 following character only and doesn't affect the locking-shift
2302 state. Invocations are done by the following control characters or
2305 ----------------------------------------------------------------------
2306 abbrev function cntrl escape seq description
2307 ----------------------------------------------------------------------
2308 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2309 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2310 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2311 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2312 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2313 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2314 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2315 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2316 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2317 ----------------------------------------------------------------------
2318 (*) These are not used by any known coding system.
2320 Control characters for these functions are defined by macros
2321 ISO_CODE_XXX in `coding.h'.
2323 Designations are done by the following escape sequences:
2324 ----------------------------------------------------------------------
2325 escape sequence description
2326 ----------------------------------------------------------------------
2327 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2328 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2329 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2330 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2331 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2332 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2333 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2334 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2335 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2336 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2337 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2338 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2339 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2340 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2341 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2342 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2343 ----------------------------------------------------------------------
2345 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2346 of dimension 1, chars 94, and final character <F>, etc...
2348 Note (*): Although these designations are not allowed in ISO2022,
2349 Emacs accepts them on decoding, and produces them on encoding
2350 CHARS96 character sets in a coding system which is characterized as
2351 7-bit environment, non-locking-shift, and non-single-shift.
2353 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2354 '(' must be omitted. We refer to this as "short-form" hereafter.
2356 Now you may notice that there are a lot of ways of encoding the
2357 same multilingual text in ISO2022. Actually, there exist many
2358 coding systems such as Compound Text (used in X11's inter client
2359 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2360 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2361 localized platforms), and all of these are variants of ISO2022.
2363 In addition to the above, Emacs handles two more kinds of escape
2364 sequences: ISO6429's direction specification and Emacs' private
2365 sequence for specifying character composition.
2367 ISO6429's direction specification takes the following form:
2368 o CSI ']' -- end of the current direction
2369 o CSI '0' ']' -- end of the current direction
2370 o CSI '1' ']' -- start of left-to-right text
2371 o CSI '2' ']' -- start of right-to-left text
2372 The control character CSI (0x9B: control sequence introducer) is
2373 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2375 Character composition specification takes the following form:
2376 o ESC '0' -- start relative composition
2377 o ESC '1' -- end composition
2378 o ESC '2' -- start rule-base composition (*)
2379 o ESC '3' -- start relative composition with alternate chars (**)
2380 o ESC '4' -- start rule-base composition with alternate chars (**)
2381 Since these are not standard escape sequences of any ISO standard,
2382 the use of them with these meanings is restricted to Emacs only.
2384 (*) This form is used only in Emacs 20.7 and older versions,
2385 but newer versions can safely decode it.
2386 (**) This form is used only in Emacs 21.1 and newer versions,
2387 and older versions can't decode it.
2389 Here's a list of example usages of these composition escape
2390 sequences (categorized by `enum composition_method').
2392 COMPOSITION_RELATIVE:
2393 ESC 0 CHAR [ CHAR ] ESC 1
2394 COMPOSITION_WITH_RULE:
2395 ESC 2 CHAR [ RULE CHAR ] ESC 1
2396 COMPOSITION_WITH_ALTCHARS:
2397 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2398 COMPOSITION_WITH_RULE_ALTCHARS:
2399 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2401 enum iso_code_class_type iso_code_class
[256];
2403 #define SAFE_CHARSET_P(coding, id) \
2404 ((id) <= (coding)->max_charset_id \
2405 && (coding)->safe_charsets[id] >= 0)
2408 #define SHIFT_OUT_OK(category) \
2409 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2412 setup_iso_safe_charsets (attrs
)
2415 Lisp_Object charset_list
, safe_charsets
;
2416 Lisp_Object request
;
2417 Lisp_Object reg_usage
;
2420 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2423 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2424 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2425 && ! EQ (charset_list
, Viso_2022_charset_list
))
2427 CODING_ATTR_CHARSET_LIST (attrs
)
2428 = charset_list
= Viso_2022_charset_list
;
2429 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2432 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2436 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2438 int id
= XINT (XCAR (tail
));
2439 if (max_charset_id
< id
)
2440 max_charset_id
= id
;
2443 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2445 request
= AREF (attrs
, coding_attr_iso_request
);
2446 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2447 reg94
= XINT (XCAR (reg_usage
));
2448 reg96
= XINT (XCDR (reg_usage
));
2450 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2454 struct charset
*charset
;
2457 charset
= CHARSET_FROM_ID (XINT (id
));
2458 reg
= Fcdr (Fassq (id
, request
));
2460 SSET (safe_charsets
, XINT (id
), XINT (reg
));
2461 else if (charset
->iso_chars_96
)
2464 SSET (safe_charsets
, XINT (id
), reg96
);
2469 SSET (safe_charsets
, XINT (id
), reg94
);
2472 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2476 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2477 Check if a text is encoded in one of ISO-2022 based codig systems.
2478 If it is, return 1, else return 0. */
2481 detect_coding_iso_2022 (coding
, detect_info
)
2482 struct coding_system
*coding
;
2483 struct coding_detection_info
*detect_info
;
2485 const unsigned char *src
= coding
->source
, *src_base
= src
;
2486 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2487 int multibytep
= coding
->src_multibyte
;
2488 int single_shifting
= 0;
2491 int consumed_chars
= 0;
2496 detect_info
->checked
|= CATEGORY_MASK_ISO
;
2498 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2500 struct coding_system
*this = &(coding_categories
[i
]);
2501 Lisp_Object attrs
, val
;
2503 attrs
= CODING_ID_ATTRS (this->id
);
2504 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2505 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2506 setup_iso_safe_charsets (attrs
);
2507 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2508 this->max_charset_id
= SCHARS (val
) - 1;
2509 this->safe_charsets
= (char *) SDATA (val
);
2512 /* A coding system of this category is always ASCII compatible. */
2513 src
+= coding
->head_ascii
;
2515 while (rejected
!= CATEGORY_MASK_ISO
)
2522 if (inhibit_iso_escape_detection
)
2524 single_shifting
= 0;
2526 if (c
>= '(' && c
<= '/')
2528 /* Designation sequence for a charset of dimension 1. */
2530 if (c1
< ' ' || c1
>= 0x80
2531 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2532 /* Invalid designation sequence. Just ignore. */
2537 /* Designation sequence for a charset of dimension 2. */
2539 if (c
>= '@' && c
<= 'B')
2540 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2541 id
= iso_charset_table
[1][0][c
];
2542 else if (c
>= '(' && c
<= '/')
2545 if (c1
< ' ' || c1
>= 0x80
2546 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2547 /* Invalid designation sequence. Just ignore. */
2551 /* Invalid designation sequence. Just ignore it. */
2554 else if (c
== 'N' || c
== 'O')
2556 /* ESC <Fe> for SS2 or SS3. */
2557 single_shifting
= 1;
2558 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2561 else if (c
>= '0' && c
<= '4')
2563 /* ESC <Fp> for start/end composition. */
2564 found
|= CATEGORY_MASK_ISO
;
2569 /* Invalid escape sequence. Just ignore it. */
2573 /* We found a valid designation sequence for CHARSET. */
2574 rejected
|= CATEGORY_MASK_ISO_8BIT
;
2575 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2577 found
|= CATEGORY_MASK_ISO_7
;
2579 rejected
|= CATEGORY_MASK_ISO_7
;
2580 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2582 found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2584 rejected
|= CATEGORY_MASK_ISO_7_TIGHT
;
2585 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2587 found
|= CATEGORY_MASK_ISO_7_ELSE
;
2589 rejected
|= CATEGORY_MASK_ISO_7_ELSE
;
2590 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2592 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2594 rejected
|= CATEGORY_MASK_ISO_8_ELSE
;
2599 /* Locking shift out/in. */
2600 if (inhibit_iso_escape_detection
)
2602 single_shifting
= 0;
2603 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2604 found
|= CATEGORY_MASK_ISO_ELSE
;
2608 /* Control sequence introducer. */
2609 single_shifting
= 0;
2610 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2611 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2612 goto check_extra_latin
;
2617 if (inhibit_iso_escape_detection
)
2619 single_shifting
= 0;
2620 rejected
|= CATEGORY_MASK_ISO_7BIT
;
2621 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2622 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2623 found
|= CATEGORY_MASK_ISO_8_1
, single_shifting
= 1;
2624 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2625 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2626 found
|= CATEGORY_MASK_ISO_8_2
, single_shifting
= 1;
2627 if (single_shifting
)
2629 goto check_extra_latin
;
2636 single_shifting
= 0;
2641 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2642 found
|= CATEGORY_MASK_ISO_8_1
;
2643 /* Check the length of succeeding codes of the range
2644 0xA0..0FF. If the byte length is even, we include
2645 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2646 only when we are not single shifting. */
2647 if (! single_shifting
2648 && ! (rejected
& CATEGORY_MASK_ISO_8_2
))
2651 while (src
< src_end
)
2659 if (i
& 1 && src
< src_end
)
2660 rejected
|= CATEGORY_MASK_ISO_8_2
;
2662 found
|= CATEGORY_MASK_ISO_8_2
;
2667 single_shifting
= 0;
2668 if (! VECTORP (Vlatin_extra_code_table
)
2669 || NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2671 rejected
= CATEGORY_MASK_ISO
;
2674 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2675 & CODING_ISO_FLAG_LATIN_EXTRA
)
2676 found
|= CATEGORY_MASK_ISO_8_1
;
2678 rejected
|= CATEGORY_MASK_ISO_8_1
;
2679 rejected
|= CATEGORY_MASK_ISO_8_2
;
2682 detect_info
->rejected
|= CATEGORY_MASK_ISO
;
2686 detect_info
->rejected
|= rejected
;
2687 detect_info
->found
|= (found
& ~rejected
);
2692 /* Set designation state into CODING. */
2693 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2697 if (final < '0' || final >= 128 \
2698 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2699 || !SAFE_CHARSET_P (coding, id)) \
2701 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2702 goto invalid_code; \
2704 prev = CODING_ISO_DESIGNATION (coding, reg); \
2705 if (id == charset_jisx0201_roman) \
2707 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2708 id = charset_ascii; \
2710 else if (id == charset_jisx0208_1978) \
2712 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2713 id = charset_jisx0208; \
2715 CODING_ISO_DESIGNATION (coding, reg) = id; \
2716 /* If there was an invalid designation to REG previously, and this \
2717 designation is ASCII to REG, we should keep this designation \
2719 if (prev == -2 && id == charset_ascii) \
2720 goto invalid_code; \
2724 #define MAYBE_FINISH_COMPOSITION() \
2727 if (composition_state == COMPOSING_NO) \
2729 /* It is assured that we have enough room for producing \
2730 characters stored in the table `components'. */ \
2731 if (charbuf + component_idx > charbuf_end) \
2732 goto no_more_source; \
2733 composition_state = COMPOSING_NO; \
2734 if (method == COMPOSITION_RELATIVE \
2735 || method == COMPOSITION_WITH_ALTCHARS) \
2737 for (i = 0; i < component_idx; i++) \
2738 *charbuf++ = components[i]; \
2739 char_offset += component_idx; \
2743 for (i = 0; i < component_idx; i += 2) \
2744 *charbuf++ = components[i]; \
2745 char_offset += (component_idx / 2) + 1; \
2750 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2751 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2752 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2753 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2754 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2757 #define DECODE_COMPOSITION_START(c1) \
2760 && composition_state == COMPOSING_COMPONENT_RULE) \
2762 component_len = component_idx; \
2763 composition_state = COMPOSING_CHAR; \
2767 const unsigned char *p; \
2769 MAYBE_FINISH_COMPOSITION (); \
2770 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2771 goto no_more_source; \
2772 for (p = src; p < src_end - 1; p++) \
2773 if (*p == ISO_CODE_ESC && p[1] == '1') \
2775 if (p == src_end - 1) \
2777 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2778 goto invalid_code; \
2779 goto no_more_source; \
2782 /* This is surely the start of a composition. */ \
2783 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2784 : c1 == '2' ? COMPOSITION_WITH_RULE \
2785 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2786 : COMPOSITION_WITH_RULE_ALTCHARS); \
2787 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2788 : COMPOSING_COMPONENT_CHAR); \
2789 component_idx = component_len = 0; \
2794 /* Handle compositoin end sequence ESC 1. */
2796 #define DECODE_COMPOSITION_END() \
2798 int nchars = (component_len > 0 ? component_idx - component_len \
2799 : method == COMPOSITION_RELATIVE ? component_idx \
2800 : (component_idx + 1) / 2); \
2802 int *saved_charbuf = charbuf; \
2804 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
2805 if (method != COMPOSITION_RELATIVE) \
2807 if (component_len == 0) \
2808 for (i = 0; i < component_idx; i++) \
2809 *charbuf++ = components[i]; \
2811 for (i = 0; i < component_len; i++) \
2812 *charbuf++ = components[i]; \
2813 *saved_charbuf = saved_charbuf - charbuf; \
2815 if (method == COMPOSITION_WITH_RULE) \
2816 for (i = 0; i < component_idx; i += 2, char_offset++) \
2817 *charbuf++ = components[i]; \
2819 for (i = component_len; i < component_idx; i++, char_offset++) \
2820 *charbuf++ = components[i]; \
2821 coding->annotated = 1; \
2822 composition_state = COMPOSING_NO; \
2826 /* Decode a composition rule from the byte C1 (and maybe one more byte
2827 from SRC) and store one encoded composition rule in
2828 coding->cmp_data. */
2830 #define DECODE_COMPOSITION_RULE(c1) \
2833 if (c1 < 81) /* old format (before ver.21) */ \
2835 int gref = (c1) / 9; \
2836 int nref = (c1) % 9; \
2837 if (gref == 4) gref = 10; \
2838 if (nref == 4) nref = 10; \
2839 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2841 else if (c1 < 93) /* new format (after ver.21) */ \
2843 ONE_MORE_BYTE (c2); \
2844 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2851 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2854 decode_coding_iso_2022 (coding
)
2855 struct coding_system
*coding
;
2857 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2858 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2859 const unsigned char *src_base
;
2860 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
2862 = coding
->charbuf
+ coding
->charbuf_size
- 4 - MAX_ANNOTATION_LENGTH
;
2863 int consumed_chars
= 0, consumed_chars_base
;
2864 int multibytep
= coding
->src_multibyte
;
2865 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2866 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2867 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2868 struct charset
*charset
;
2870 /* For handling composition sequence. */
2871 #define COMPOSING_NO 0
2872 #define COMPOSING_CHAR 1
2873 #define COMPOSING_RULE 2
2874 #define COMPOSING_COMPONENT_CHAR 3
2875 #define COMPOSING_COMPONENT_RULE 4
2877 int composition_state
= COMPOSING_NO
;
2878 enum composition_method method
;
2879 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2882 Lisp_Object attrs
, charset_list
;
2883 int char_offset
= coding
->produced_char
;
2884 int last_offset
= char_offset
;
2885 int last_id
= charset_ascii
;
2887 CODING_GET_INFO (coding
, attrs
, charset_list
);
2888 setup_iso_safe_charsets (attrs
);
2895 consumed_chars_base
= consumed_chars
;
2897 if (charbuf
>= charbuf_end
)
2904 /* We produce at most one character. */
2905 switch (iso_code_class
[c1
])
2907 case ISO_0x20_or_0x7F
:
2908 if (composition_state
!= COMPOSING_NO
)
2910 if (composition_state
== COMPOSING_RULE
2911 || composition_state
== COMPOSING_COMPONENT_RULE
)
2913 DECODE_COMPOSITION_RULE (c1
);
2914 components
[component_idx
++] = c1
;
2915 composition_state
--;
2919 if (charset_id_0
< 0
2920 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2921 /* This is SPACE or DEL. */
2922 charset
= CHARSET_FROM_ID (charset_ascii
);
2924 charset
= CHARSET_FROM_ID (charset_id_0
);
2927 case ISO_graphic_plane_0
:
2928 if (composition_state
!= COMPOSING_NO
)
2930 if (composition_state
== COMPOSING_RULE
2931 || composition_state
== COMPOSING_COMPONENT_RULE
)
2933 DECODE_COMPOSITION_RULE (c1
);
2934 components
[component_idx
++] = c1
;
2935 composition_state
--;
2939 charset
= CHARSET_FROM_ID (charset_id_0
);
2942 case ISO_0xA0_or_0xFF
:
2943 if (charset_id_1
< 0
2944 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2945 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2947 /* This is a graphic character, we fall down ... */
2949 case ISO_graphic_plane_1
:
2950 if (charset_id_1
< 0)
2952 charset
= CHARSET_FROM_ID (charset_id_1
);
2956 MAYBE_FINISH_COMPOSITION ();
2957 charset
= CHARSET_FROM_ID (charset_ascii
);
2961 MAYBE_FINISH_COMPOSITION ();
2965 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2966 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
2968 CODING_ISO_INVOCATION (coding
, 0) = 1;
2969 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2973 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
2975 CODING_ISO_INVOCATION (coding
, 0) = 0;
2976 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2979 case ISO_single_shift_2_7
:
2980 case ISO_single_shift_2
:
2981 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2983 /* SS2 is handled as an escape sequence of ESC 'N' */
2985 goto label_escape_sequence
;
2987 case ISO_single_shift_3
:
2988 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2990 /* SS2 is handled as an escape sequence of ESC 'O' */
2992 goto label_escape_sequence
;
2994 case ISO_control_sequence_introducer
:
2995 /* CSI is handled as an escape sequence of ESC '[' ... */
2997 goto label_escape_sequence
;
3001 label_escape_sequence
:
3002 /* Escape sequences handled here are invocation,
3003 designation, direction specification, and character
3004 composition specification. */
3007 case '&': /* revision of following character set */
3009 if (!(c1
>= '@' && c1
<= '~'))
3012 if (c1
!= ISO_CODE_ESC
)
3015 goto label_escape_sequence
;
3017 case '$': /* designation of 2-byte character set */
3018 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3021 if (c1
>= '@' && c1
<= 'B')
3022 { /* designation of JISX0208.1978, GB2312.1980,
3024 DECODE_DESIGNATION (0, 2, 0, c1
);
3026 else if (c1
>= 0x28 && c1
<= 0x2B)
3027 { /* designation of DIMENSION2_CHARS94 character set */
3029 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
3031 else if (c1
>= 0x2C && c1
<= 0x2F)
3032 { /* designation of DIMENSION2_CHARS96 character set */
3034 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
3038 /* We must update these variables now. */
3039 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3040 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3043 case 'n': /* invocation of locking-shift-2 */
3044 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3045 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3047 CODING_ISO_INVOCATION (coding
, 0) = 2;
3048 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3051 case 'o': /* invocation of locking-shift-3 */
3052 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3053 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3055 CODING_ISO_INVOCATION (coding
, 0) = 3;
3056 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3059 case 'N': /* invocation of single-shift-2 */
3060 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3061 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3063 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
3065 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3069 case 'O': /* invocation of single-shift-3 */
3070 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3071 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3073 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
3075 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3079 case '0': case '2': case '3': case '4': /* start composition */
3080 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
3082 DECODE_COMPOSITION_START (c1
);
3085 case '1': /* end composition */
3086 if (composition_state
== COMPOSING_NO
)
3088 DECODE_COMPOSITION_END ();
3091 case '[': /* specification of direction */
3092 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
3094 /* For the moment, nested direction is not supported.
3095 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3096 left-to-right, and nozero means right-to-left. */
3100 case ']': /* end of the current direction */
3101 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3103 case '0': /* end of the current direction */
3104 case '1': /* start of left-to-right direction */
3107 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3112 case '2': /* start of right-to-left direction */
3115 coding
->mode
|= CODING_MODE_DIRECTION
;
3129 /* CTEXT extended segment:
3130 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3131 We keep these bytes as is for the moment.
3132 They may be decoded by post-read-conversion. */
3136 ONE_MORE_BYTE (dim
);
3139 size
= ((M
- 128) * 128) + (L
- 128);
3140 if (charbuf
+ 8 + size
> charbuf_end
)
3142 *charbuf
++ = ISO_CODE_ESC
;
3146 *charbuf
++ = BYTE8_TO_CHAR (M
);
3147 *charbuf
++ = BYTE8_TO_CHAR (L
);
3151 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3156 /* XFree86 extension for embedding UTF-8 in CTEXT:
3157 ESC % G --UTF-8-BYTES-- ESC % @
3158 We keep these bytes as is for the moment.
3159 They may be decoded by post-read-conversion. */
3162 if (p
+ 6 > charbuf_end
)
3164 *p
++ = ISO_CODE_ESC
;
3167 while (p
< charbuf_end
)
3170 if (c1
== ISO_CODE_ESC
3171 && src
+ 1 < src_end
3175 *p
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3177 if (p
+ 3 > charbuf_end
)
3179 *p
++ = ISO_CODE_ESC
;
3190 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3192 if (c1
>= 0x28 && c1
<= 0x2B)
3193 { /* designation of DIMENSION1_CHARS94 character set */
3195 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
3197 else if (c1
>= 0x2C && c1
<= 0x2F)
3198 { /* designation of DIMENSION1_CHARS96 character set */
3200 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
3204 /* We must update these variables now. */
3205 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3206 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3211 if (charset
->id
!= charset_ascii
3212 && last_id
!= charset
->id
)
3214 if (last_id
!= charset_ascii
)
3215 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
3216 last_id
= charset
->id
;
3217 last_offset
= char_offset
;
3220 /* Now we know CHARSET and 1st position code C1 of a character.
3221 Produce a decoded character while getting 2nd position code
3224 if (CHARSET_DIMENSION (charset
) > 1)
3227 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3228 /* C2 is not in a valid range. */
3230 c1
= (c1
<< 8) | (c2
& 0x7F);
3231 if (CHARSET_DIMENSION (charset
) > 2)
3234 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3235 /* C2 is not in a valid range. */
3237 c1
= (c1
<< 8) | (c2
& 0x7F);
3241 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3244 MAYBE_FINISH_COMPOSITION ();
3245 for (; src_base
< src
; src_base
++, char_offset
++)
3247 if (ASCII_BYTE_P (*src_base
))
3248 *charbuf
++ = *src_base
;
3250 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3253 else if (composition_state
== COMPOSING_NO
)
3260 components
[component_idx
++] = c
;
3261 if (method
== COMPOSITION_WITH_RULE
3262 || (method
== COMPOSITION_WITH_RULE_ALTCHARS
3263 && composition_state
== COMPOSING_COMPONENT_CHAR
))
3264 composition_state
++;
3269 MAYBE_FINISH_COMPOSITION ();
3271 consumed_chars
= consumed_chars_base
;
3273 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3283 if (last_id
!= charset_ascii
)
3284 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
3285 coding
->consumed_char
+= consumed_chars_base
;
3286 coding
->consumed
= src_base
- coding
->source
;
3287 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3291 /* ISO2022 encoding stuff. */
3294 It is not enough to say just "ISO2022" on encoding, we have to
3295 specify more details. In Emacs, each coding system of ISO2022
3296 variant has the following specifications:
3297 1. Initial designation to G0 thru G3.
3298 2. Allows short-form designation?
3299 3. ASCII should be designated to G0 before control characters?
3300 4. ASCII should be designated to G0 at end of line?
3301 5. 7-bit environment or 8-bit environment?
3302 6. Use locking-shift?
3303 7. Use Single-shift?
3304 And the following two are only for Japanese:
3305 8. Use ASCII in place of JIS0201-1976-Roman?
3306 9. Use JISX0208-1983 in place of JISX0208-1978?
3307 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3308 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3312 /* Produce codes (escape sequence) for designating CHARSET to graphic
3313 register REG at DST, and increment DST. If <final-char> of CHARSET is
3314 '@', 'A', or 'B' and the coding system CODING allows, produce
3315 designation sequence of short-form. */
3317 #define ENCODE_DESIGNATION(charset, reg, coding) \
3319 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3320 char *intermediate_char_94 = "()*+"; \
3321 char *intermediate_char_96 = ",-./"; \
3322 int revision = -1; \
3325 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3326 revision = CHARSET_ISO_REVISION (charset); \
3328 if (revision >= 0) \
3330 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3331 EMIT_ONE_BYTE ('@' + revision); \
3333 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3334 if (CHARSET_DIMENSION (charset) == 1) \
3336 if (! CHARSET_ISO_CHARS_96 (charset)) \
3337 c = intermediate_char_94[reg]; \
3339 c = intermediate_char_96[reg]; \
3340 EMIT_ONE_ASCII_BYTE (c); \
3344 EMIT_ONE_ASCII_BYTE ('$'); \
3345 if (! CHARSET_ISO_CHARS_96 (charset)) \
3347 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3349 || final_char < '@' || final_char > 'B') \
3350 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3353 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3355 EMIT_ONE_ASCII_BYTE (final_char); \
3357 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3361 /* The following two macros produce codes (control character or escape
3362 sequence) for ISO2022 single-shift functions (single-shift-2 and
3365 #define ENCODE_SINGLE_SHIFT_2 \
3367 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3368 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3370 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3371 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3375 #define ENCODE_SINGLE_SHIFT_3 \
3377 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3378 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3380 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3381 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3385 /* The following four macros produce codes (control character or
3386 escape sequence) for ISO2022 locking-shift functions (shift-in,
3387 shift-out, locking-shift-2, and locking-shift-3). */
3389 #define ENCODE_SHIFT_IN \
3391 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3392 CODING_ISO_INVOCATION (coding, 0) = 0; \
3396 #define ENCODE_SHIFT_OUT \
3398 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3399 CODING_ISO_INVOCATION (coding, 0) = 1; \
3403 #define ENCODE_LOCKING_SHIFT_2 \
3405 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3406 CODING_ISO_INVOCATION (coding, 0) = 2; \
3410 #define ENCODE_LOCKING_SHIFT_3 \
3412 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3413 CODING_ISO_INVOCATION (coding, 0) = 3; \
3417 /* Produce codes for a DIMENSION1 character whose character set is
3418 CHARSET and whose position-code is C1. Designation and invocation
3419 sequences are also produced in advance if necessary. */
3421 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3423 int id = CHARSET_ID (charset); \
3425 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3426 && id == charset_ascii) \
3428 id = charset_jisx0201_roman; \
3429 charset = CHARSET_FROM_ID (id); \
3432 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3434 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3435 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3437 EMIT_ONE_BYTE (c1 | 0x80); \
3438 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3441 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3443 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3446 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3448 EMIT_ONE_BYTE (c1 | 0x80); \
3452 /* Since CHARSET is not yet invoked to any graphic planes, we \
3453 must invoke it, or, at first, designate it to some graphic \
3454 register. Then repeat the loop to actually produce the \
3456 dst = encode_invocation_designation (charset, coding, dst, \
3461 /* Produce codes for a DIMENSION2 character whose character set is
3462 CHARSET and whose position-codes are C1 and C2. Designation and
3463 invocation codes are also produced in advance if necessary. */
3465 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3467 int id = CHARSET_ID (charset); \
3469 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3470 && id == charset_jisx0208) \
3472 id = charset_jisx0208_1978; \
3473 charset = CHARSET_FROM_ID (id); \
3476 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3478 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3479 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3481 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3482 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3485 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3487 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3490 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3492 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3496 /* Since CHARSET is not yet invoked to any graphic planes, we \
3497 must invoke it, or, at first, designate it to some graphic \
3498 register. Then repeat the loop to actually produce the \
3500 dst = encode_invocation_designation (charset, coding, dst, \
3505 #define ENCODE_ISO_CHARACTER(charset, c) \
3507 int code = ENCODE_CHAR ((charset),(c)); \
3509 if (CHARSET_DIMENSION (charset) == 1) \
3510 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3512 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3516 /* Produce designation and invocation codes at a place pointed by DST
3517 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3521 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3522 struct charset
*charset
;
3523 struct coding_system
*coding
;
3527 int multibytep
= coding
->dst_multibyte
;
3528 int produced_chars
= *p_nchars
;
3529 int reg
; /* graphic register number */
3530 int id
= CHARSET_ID (charset
);
3532 /* At first, check designations. */
3533 for (reg
= 0; reg
< 4; reg
++)
3534 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3539 /* CHARSET is not yet designated to any graphic registers. */
3540 /* At first check the requested designation. */
3541 reg
= CODING_ISO_REQUEST (coding
, id
);
3543 /* Since CHARSET requests no special designation, designate it
3544 to graphic register 0. */
3547 ENCODE_DESIGNATION (charset
, reg
, coding
);
3550 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3551 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3553 /* Since the graphic register REG is not invoked to any graphic
3554 planes, invoke it to graphic plane 0. */
3557 case 0: /* graphic register 0 */
3561 case 1: /* graphic register 1 */
3565 case 2: /* graphic register 2 */
3566 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3567 ENCODE_SINGLE_SHIFT_2
;
3569 ENCODE_LOCKING_SHIFT_2
;
3572 case 3: /* graphic register 3 */
3573 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3574 ENCODE_SINGLE_SHIFT_3
;
3576 ENCODE_LOCKING_SHIFT_3
;
3581 *p_nchars
= produced_chars
;
3585 /* The following three macros produce codes for indicating direction
3587 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3589 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3590 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3592 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3596 #define ENCODE_DIRECTION_R2L() \
3598 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3599 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3603 #define ENCODE_DIRECTION_L2R() \
3605 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3606 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3610 /* Produce codes for designation and invocation to reset the graphic
3611 planes and registers to initial state. */
3612 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3615 struct charset *charset; \
3617 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3619 for (reg = 0; reg < 4; reg++) \
3620 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3621 && (CODING_ISO_DESIGNATION (coding, reg) \
3622 != CODING_ISO_INITIAL (coding, reg))) \
3624 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3625 ENCODE_DESIGNATION (charset, reg, coding); \
3630 /* Produce designation sequences of charsets in the line started from
3631 SRC to a place pointed by DST, and return updated DST.
3633 If the current block ends before any end-of-line, we may fail to
3634 find all the necessary designations. */
3636 static unsigned char *
3637 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3638 struct coding_system
*coding
;
3639 int *charbuf
, *charbuf_end
;
3642 struct charset
*charset
;
3643 /* Table of charsets to be designated to each graphic register. */
3645 int c
, found
= 0, reg
;
3646 int produced_chars
= 0;
3647 int multibytep
= coding
->dst_multibyte
;
3649 Lisp_Object charset_list
;
3651 attrs
= CODING_ID_ATTRS (coding
->id
);
3652 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3653 if (EQ (charset_list
, Qiso_2022
))
3654 charset_list
= Viso_2022_charset_list
;
3656 for (reg
= 0; reg
< 4; reg
++)
3666 charset
= char_charset (c
, charset_list
, NULL
);
3667 id
= CHARSET_ID (charset
);
3668 reg
= CODING_ISO_REQUEST (coding
, id
);
3669 if (reg
>= 0 && r
[reg
] < 0)
3678 for (reg
= 0; reg
< 4; reg
++)
3680 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3681 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3687 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3690 encode_coding_iso_2022 (coding
)
3691 struct coding_system
*coding
;
3693 int multibytep
= coding
->dst_multibyte
;
3694 int *charbuf
= coding
->charbuf
;
3695 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3696 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3697 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3700 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3701 && CODING_ISO_BOL (coding
));
3702 int produced_chars
= 0;
3703 Lisp_Object attrs
, eol_type
, charset_list
;
3704 int ascii_compatible
;
3706 int preferred_charset_id
= -1;
3708 CODING_GET_INFO (coding
, attrs
, charset_list
);
3709 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
3710 if (VECTORP (eol_type
))
3713 setup_iso_safe_charsets (attrs
);
3714 /* Charset list may have been changed. */
3715 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
); \
3716 coding
->safe_charsets
= (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs
));
3718 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3720 while (charbuf
< charbuf_end
)
3722 ASSURE_DESTINATION (safe_room
);
3724 if (bol_designation
)
3726 unsigned char *dst_prev
= dst
;
3728 /* We have to produce designation sequences if any now. */
3729 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3730 bol_designation
= 0;
3731 /* We are sure that designation sequences are all ASCII bytes. */
3732 produced_chars
+= dst
- dst_prev
;
3739 /* Handle an annotation. */
3742 case CODING_ANNOTATE_COMPOSITION_MASK
:
3743 /* Not yet implemented. */
3745 case CODING_ANNOTATE_CHARSET_MASK
:
3746 preferred_charset_id
= charbuf
[3];
3747 if (preferred_charset_id
>= 0
3748 && NILP (Fmemq (make_number (preferred_charset_id
),
3750 preferred_charset_id
= -1;
3759 /* Now encode the character C. */
3760 if (c
< 0x20 || c
== 0x7F)
3763 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3765 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3766 ENCODE_RESET_PLANE_AND_REGISTER ();
3767 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3771 for (i
= 0; i
< 4; i
++)
3772 CODING_ISO_DESIGNATION (coding
, i
)
3773 = CODING_ISO_INITIAL (coding
, i
);
3776 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3778 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3779 ENCODE_RESET_PLANE_AND_REGISTER ();
3780 EMIT_ONE_ASCII_BYTE (c
);
3782 else if (ASCII_CHAR_P (c
))
3784 if (ascii_compatible
)
3785 EMIT_ONE_ASCII_BYTE (c
);
3788 struct charset
*charset
= CHARSET_FROM_ID (charset_ascii
);
3789 ENCODE_ISO_CHARACTER (charset
, c
);
3792 else if (CHAR_BYTE8_P (c
))
3794 c
= CHAR_TO_BYTE8 (c
);
3799 struct charset
*charset
;
3801 if (preferred_charset_id
>= 0)
3803 charset
= CHARSET_FROM_ID (preferred_charset_id
);
3804 if (! CHAR_CHARSET_P (c
, charset
))
3805 charset
= char_charset (c
, charset_list
, NULL
);
3808 charset
= char_charset (c
, charset_list
, NULL
);
3811 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3813 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3814 charset
= CHARSET_FROM_ID (charset_ascii
);
3818 c
= coding
->default_char
;
3819 charset
= char_charset (c
, charset_list
, NULL
);
3822 ENCODE_ISO_CHARACTER (charset
, c
);
3826 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3827 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3829 ASSURE_DESTINATION (safe_room
);
3830 ENCODE_RESET_PLANE_AND_REGISTER ();
3832 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
3833 CODING_ISO_BOL (coding
) = bol_designation
;
3834 coding
->produced_char
+= produced_chars
;
3835 coding
->produced
= dst
- coding
->destination
;
3840 /*** 8,9. SJIS and BIG5 handlers ***/
3842 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3843 quite widely. So, for the moment, Emacs supports them in the bare
3844 C code. But, in the future, they may be supported only by CCL. */
3846 /* SJIS is a coding system encoding three character sets: ASCII, right
3847 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3848 as is. A character of charset katakana-jisx0201 is encoded by
3849 "position-code + 0x80". A character of charset japanese-jisx0208
3850 is encoded in 2-byte but two position-codes are divided and shifted
3851 so that it fit in the range below.
3853 --- CODE RANGE of SJIS ---
3854 (character set) (range)
3856 KATAKANA-JISX0201 0xA0 .. 0xDF
3857 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3858 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3859 -------------------------------
3863 /* BIG5 is a coding system encoding two character sets: ASCII and
3864 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3865 character set and is encoded in two-byte.
3867 --- CODE RANGE of BIG5 ---
3868 (character set) (range)
3870 Big5 (1st byte) 0xA1 .. 0xFE
3871 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3872 --------------------------
3876 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3877 Check if a text is encoded in SJIS. If it is, return
3878 CATEGORY_MASK_SJIS, else return 0. */
3881 detect_coding_sjis (coding
, detect_info
)
3882 struct coding_system
*coding
;
3883 struct coding_detection_info
*detect_info
;
3885 const unsigned char *src
= coding
->source
, *src_base
;
3886 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3887 int multibytep
= coding
->src_multibyte
;
3888 int consumed_chars
= 0;
3892 detect_info
->checked
|= CATEGORY_MASK_SJIS
;
3893 /* A coding system of this category is always ASCII compatible. */
3894 src
+= coding
->head_ascii
;
3902 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3905 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3907 found
= CATEGORY_MASK_SJIS
;
3909 else if (c
>= 0xA0 && c
< 0xE0)
3910 found
= CATEGORY_MASK_SJIS
;
3914 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3918 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3920 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3923 detect_info
->found
|= found
;
3927 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3928 Check if a text is encoded in BIG5. If it is, return
3929 CATEGORY_MASK_BIG5, else return 0. */
3932 detect_coding_big5 (coding
, detect_info
)
3933 struct coding_system
*coding
;
3934 struct coding_detection_info
*detect_info
;
3936 const unsigned char *src
= coding
->source
, *src_base
;
3937 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3938 int multibytep
= coding
->src_multibyte
;
3939 int consumed_chars
= 0;
3943 detect_info
->checked
|= CATEGORY_MASK_BIG5
;
3944 /* A coding system of this category is always ASCII compatible. */
3945 src
+= coding
->head_ascii
;
3956 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3958 found
= CATEGORY_MASK_BIG5
;
3963 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3967 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3969 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
3972 detect_info
->found
|= found
;
3976 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3977 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3980 decode_coding_sjis (coding
)
3981 struct coding_system
*coding
;
3983 const unsigned char *src
= coding
->source
+ coding
->consumed
;
3984 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3985 const unsigned char *src_base
;
3986 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
3988 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
3989 int consumed_chars
= 0, consumed_chars_base
;
3990 int multibytep
= coding
->src_multibyte
;
3991 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3992 struct charset
*charset_kanji2
;
3993 Lisp_Object attrs
, charset_list
, val
;
3994 int char_offset
= coding
->produced_char
;
3995 int last_offset
= char_offset
;
3996 int last_id
= charset_ascii
;
3998 CODING_GET_INFO (coding
, attrs
, charset_list
);
4001 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4002 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4003 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4004 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4009 struct charset
*charset
;
4012 consumed_chars_base
= consumed_chars
;
4014 if (charbuf
>= charbuf_end
)
4021 charset
= charset_roman
;
4022 else if (c
== 0x80 || c
== 0xA0)
4024 else if (c
>= 0xA1 && c
<= 0xDF)
4026 /* SJIS -> JISX0201-Kana */
4028 charset
= charset_kana
;
4032 /* SJIS -> JISX0208 */
4034 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4038 charset
= charset_kanji
;
4040 else if (c
<= 0xFC && charset_kanji2
)
4042 /* SJIS -> JISX0213-2 */
4044 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4048 charset
= charset_kanji2
;
4052 if (charset
->id
!= charset_ascii
4053 && last_id
!= charset
->id
)
4055 if (last_id
!= charset_ascii
)
4056 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4057 last_id
= charset
->id
;
4058 last_offset
= char_offset
;
4060 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4067 consumed_chars
= consumed_chars_base
;
4069 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4075 if (last_id
!= charset_ascii
)
4076 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4077 coding
->consumed_char
+= consumed_chars_base
;
4078 coding
->consumed
= src_base
- coding
->source
;
4079 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4083 decode_coding_big5 (coding
)
4084 struct coding_system
*coding
;
4086 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4087 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4088 const unsigned char *src_base
;
4089 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4091 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4092 int consumed_chars
= 0, consumed_chars_base
;
4093 int multibytep
= coding
->src_multibyte
;
4094 struct charset
*charset_roman
, *charset_big5
;
4095 Lisp_Object attrs
, charset_list
, val
;
4096 int char_offset
= coding
->produced_char
;
4097 int last_offset
= char_offset
;
4098 int last_id
= charset_ascii
;
4100 CODING_GET_INFO (coding
, attrs
, charset_list
);
4102 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4103 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4108 struct charset
*charset
;
4111 consumed_chars_base
= consumed_chars
;
4113 if (charbuf
>= charbuf_end
)
4121 charset
= charset_roman
;
4125 if (c
< 0xA1 || c
> 0xFE)
4128 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
4131 charset
= charset_big5
;
4133 if (charset
->id
!= charset_ascii
4134 && last_id
!= charset
->id
)
4136 if (last_id
!= charset_ascii
)
4137 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4138 last_id
= charset
->id
;
4139 last_offset
= char_offset
;
4141 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4148 consumed_chars
= consumed_chars_base
;
4150 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4156 if (last_id
!= charset_ascii
)
4157 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4158 coding
->consumed_char
+= consumed_chars_base
;
4159 coding
->consumed
= src_base
- coding
->source
;
4160 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4163 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4164 This function can encode charsets `ascii', `katakana-jisx0201',
4165 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4166 are sure that all these charsets are registered as official charset
4167 (i.e. do not have extended leading-codes). Characters of other
4168 charsets are produced without any encoding. If SJIS_P is 1, encode
4169 SJIS text, else encode BIG5 text. */
4172 encode_coding_sjis (coding
)
4173 struct coding_system
*coding
;
4175 int multibytep
= coding
->dst_multibyte
;
4176 int *charbuf
= coding
->charbuf
;
4177 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4178 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4179 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4181 int produced_chars
= 0;
4182 Lisp_Object attrs
, charset_list
, val
;
4183 int ascii_compatible
;
4184 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4185 struct charset
*charset_kanji2
;
4188 CODING_GET_INFO (coding
, attrs
, charset_list
);
4190 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4191 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4192 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4193 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4195 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4197 while (charbuf
< charbuf_end
)
4199 ASSURE_DESTINATION (safe_room
);
4201 /* Now encode the character C. */
4202 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4203 EMIT_ONE_ASCII_BYTE (c
);
4204 else if (CHAR_BYTE8_P (c
))
4206 c
= CHAR_TO_BYTE8 (c
);
4212 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4216 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4218 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4219 charset
= CHARSET_FROM_ID (charset_ascii
);
4223 c
= coding
->default_char
;
4224 charset
= char_charset (c
, charset_list
, &code
);
4227 if (code
== CHARSET_INVALID_CODE (charset
))
4229 if (charset
== charset_kanji
)
4233 c1
= code
>> 8, c2
= code
& 0xFF;
4234 EMIT_TWO_BYTES (c1
, c2
);
4236 else if (charset
== charset_kana
)
4237 EMIT_ONE_BYTE (code
| 0x80);
4238 else if (charset_kanji2
&& charset
== charset_kanji2
)
4243 if (c1
== 0x21 || (c1
>= 0x23 && c1
< 0x25)
4244 || (c1
>= 0x2C && c1
<= 0x2F) || c1
>= 0x6E)
4246 JIS_TO_SJIS2 (code
);
4247 c1
= code
>> 8, c2
= code
& 0xFF;
4248 EMIT_TWO_BYTES (c1
, c2
);
4251 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4254 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4257 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4258 coding
->produced_char
+= produced_chars
;
4259 coding
->produced
= dst
- coding
->destination
;
4264 encode_coding_big5 (coding
)
4265 struct coding_system
*coding
;
4267 int multibytep
= coding
->dst_multibyte
;
4268 int *charbuf
= coding
->charbuf
;
4269 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4270 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4271 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4273 int produced_chars
= 0;
4274 Lisp_Object attrs
, charset_list
, val
;
4275 int ascii_compatible
;
4276 struct charset
*charset_roman
, *charset_big5
;
4279 CODING_GET_INFO (coding
, attrs
, charset_list
);
4281 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4282 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4283 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4285 while (charbuf
< charbuf_end
)
4287 ASSURE_DESTINATION (safe_room
);
4289 /* Now encode the character C. */
4290 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4291 EMIT_ONE_ASCII_BYTE (c
);
4292 else if (CHAR_BYTE8_P (c
))
4294 c
= CHAR_TO_BYTE8 (c
);
4300 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4304 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4306 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4307 charset
= CHARSET_FROM_ID (charset_ascii
);
4311 c
= coding
->default_char
;
4312 charset
= char_charset (c
, charset_list
, &code
);
4315 if (code
== CHARSET_INVALID_CODE (charset
))
4317 if (charset
== charset_big5
)
4321 c1
= code
>> 8, c2
= code
& 0xFF;
4322 EMIT_TWO_BYTES (c1
, c2
);
4325 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4328 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4329 coding
->produced_char
+= produced_chars
;
4330 coding
->produced
= dst
- coding
->destination
;
4335 /*** 10. CCL handlers ***/
4337 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4338 Check if a text is encoded in a coding system of which
4339 encoder/decoder are written in CCL program. If it is, return
4340 CATEGORY_MASK_CCL, else return 0. */
4343 detect_coding_ccl (coding
, detect_info
)
4344 struct coding_system
*coding
;
4345 struct coding_detection_info
*detect_info
;
4347 const unsigned char *src
= coding
->source
, *src_base
;
4348 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4349 int multibytep
= coding
->src_multibyte
;
4350 int consumed_chars
= 0;
4352 unsigned char *valids
;
4353 int head_ascii
= coding
->head_ascii
;
4356 detect_info
->checked
|= CATEGORY_MASK_CCL
;
4358 coding
= &coding_categories
[coding_category_ccl
];
4359 valids
= CODING_CCL_VALIDS (coding
);
4360 attrs
= CODING_ID_ATTRS (coding
->id
);
4361 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4370 if (c
< 0 || ! valids
[c
])
4372 if ((valids
[c
] > 1))
4373 found
= CATEGORY_MASK_CCL
;
4375 detect_info
->rejected
|= CATEGORY_MASK_CCL
;
4379 detect_info
->found
|= found
;
4384 decode_coding_ccl (coding
)
4385 struct coding_system
*coding
;
4387 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4388 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4389 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4390 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
4391 int consumed_chars
= 0;
4392 int multibytep
= coding
->src_multibyte
;
4393 struct ccl_program ccl
;
4394 int source_charbuf
[1024];
4395 int source_byteidx
[1024];
4396 Lisp_Object attrs
, charset_list
;
4398 CODING_GET_INFO (coding
, attrs
, charset_list
);
4399 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4401 while (src
< src_end
)
4403 const unsigned char *p
= src
;
4404 int *source
, *source_end
;
4408 while (i
< 1024 && p
< src_end
)
4410 source_byteidx
[i
] = p
- src
;
4411 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4414 while (i
< 1024 && p
< src_end
)
4415 source_charbuf
[i
++] = *p
++;
4417 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4420 source
= source_charbuf
;
4421 source_end
= source
+ i
;
4422 while (source
< source_end
)
4424 ccl_driver (&ccl
, source
, charbuf
,
4425 source_end
- source
, charbuf_end
- charbuf
,
4427 source
+= ccl
.consumed
;
4428 charbuf
+= ccl
.produced
;
4429 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4432 if (source
< source_end
)
4433 src
+= source_byteidx
[source
- source_charbuf
];
4436 consumed_chars
+= source
- source_charbuf
;
4438 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4439 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4445 case CCL_STAT_SUSPEND_BY_SRC
:
4446 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4448 case CCL_STAT_SUSPEND_BY_DST
:
4451 case CCL_STAT_INVALID_CMD
:
4452 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4455 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4458 coding
->consumed_char
+= consumed_chars
;
4459 coding
->consumed
= src
- coding
->source
;
4460 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4464 encode_coding_ccl (coding
)
4465 struct coding_system
*coding
;
4467 struct ccl_program ccl
;
4468 int multibytep
= coding
->dst_multibyte
;
4469 int *charbuf
= coding
->charbuf
;
4470 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4471 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4472 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4473 unsigned char *adjusted_dst_end
= dst_end
- 1;
4474 int destination_charbuf
[1024];
4475 int i
, produced_chars
= 0;
4476 Lisp_Object attrs
, charset_list
;
4478 CODING_GET_INFO (coding
, attrs
, charset_list
);
4479 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4481 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4482 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4484 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4486 int dst_bytes
= dst_end
- dst
;
4487 if (dst_bytes
> 1024)
4490 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4491 charbuf_end
- charbuf
, dst_bytes
, charset_list
);
4492 charbuf
+= ccl
.consumed
;
4494 for (i
= 0; i
< ccl
.produced
; i
++)
4495 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4498 for (i
= 0; i
< ccl
.produced
; i
++)
4499 *dst
++ = destination_charbuf
[i
] & 0xFF;
4500 produced_chars
+= ccl
.produced
;
4506 case CCL_STAT_SUSPEND_BY_SRC
:
4507 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4509 case CCL_STAT_SUSPEND_BY_DST
:
4510 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_DST
);
4513 case CCL_STAT_INVALID_CMD
:
4514 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4517 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4521 coding
->produced_char
+= produced_chars
;
4522 coding
->produced
= dst
- coding
->destination
;
4528 /*** 10, 11. no-conversion handlers ***/
4530 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4533 decode_coding_raw_text (coding
)
4534 struct coding_system
*coding
;
4536 coding
->chars_at_source
= 1;
4537 coding
->consumed_char
= 0;
4538 coding
->consumed
= 0;
4539 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4543 encode_coding_raw_text (coding
)
4544 struct coding_system
*coding
;
4546 int multibytep
= coding
->dst_multibyte
;
4547 int *charbuf
= coding
->charbuf
;
4548 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4549 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4550 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4551 int produced_chars
= 0;
4556 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4558 if (coding
->src_multibyte
)
4559 while (charbuf
< charbuf_end
)
4561 ASSURE_DESTINATION (safe_room
);
4563 if (ASCII_CHAR_P (c
))
4564 EMIT_ONE_ASCII_BYTE (c
);
4565 else if (CHAR_BYTE8_P (c
))
4567 c
= CHAR_TO_BYTE8 (c
);
4572 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4574 CHAR_STRING_ADVANCE (c
, p1
);
4577 EMIT_ONE_BYTE (*p0
);
4583 while (charbuf
< charbuf_end
)
4585 ASSURE_DESTINATION (safe_room
);
4592 if (coding
->src_multibyte
)
4594 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4596 while (charbuf
< charbuf_end
)
4598 ASSURE_DESTINATION (safe_room
);
4600 if (ASCII_CHAR_P (c
))
4602 else if (CHAR_BYTE8_P (c
))
4603 *dst
++ = CHAR_TO_BYTE8 (c
);
4605 CHAR_STRING_ADVANCE (c
, dst
);
4611 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4612 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4613 *dst
++ = *charbuf
++;
4614 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4617 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4618 coding
->produced_char
+= produced_chars
;
4619 coding
->produced
= dst
- coding
->destination
;
4623 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4624 Check if a text is encoded in a charset-based coding system. If it
4625 is, return 1, else return 0. */
4628 detect_coding_charset (coding
, detect_info
)
4629 struct coding_system
*coding
;
4630 struct coding_detection_info
*detect_info
;
4632 const unsigned char *src
= coding
->source
, *src_base
;
4633 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4634 int multibytep
= coding
->src_multibyte
;
4635 int consumed_chars
= 0;
4636 Lisp_Object attrs
, valids
;
4639 detect_info
->checked
|= CATEGORY_MASK_CHARSET
;
4641 coding
= &coding_categories
[coding_category_charset
];
4642 attrs
= CODING_ID_ATTRS (coding
->id
);
4643 valids
= AREF (attrs
, coding_attr_charset_valids
);
4645 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4646 src
+= coding
->head_ascii
;
4656 if (NILP (AREF (valids
, c
)))
4659 found
= CATEGORY_MASK_CHARSET
;
4661 detect_info
->rejected
|= CATEGORY_MASK_CHARSET
;
4665 detect_info
->found
|= found
;
4670 decode_coding_charset (coding
)
4671 struct coding_system
*coding
;
4673 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4674 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4675 const unsigned char *src_base
;
4676 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4678 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4679 int consumed_chars
= 0, consumed_chars_base
;
4680 int multibytep
= coding
->src_multibyte
;
4681 Lisp_Object attrs
, charset_list
, valids
;
4682 int char_offset
= coding
->produced_char
;
4683 int last_offset
= char_offset
;
4684 int last_id
= charset_ascii
;
4686 CODING_GET_INFO (coding
, attrs
, charset_list
);
4687 valids
= AREF (attrs
, coding_attr_charset_valids
);
4693 struct charset
*charset
;
4699 consumed_chars_base
= consumed_chars
;
4701 if (charbuf
>= charbuf_end
)
4709 val
= AREF (valids
, c
);
4714 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4715 dim
= CHARSET_DIMENSION (charset
);
4719 code
= (code
<< 8) | c
;
4722 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
,
4727 /* VAL is a list of charset IDs. It is assured that the
4728 list is sorted by charset dimensions (smaller one
4732 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (val
)));
4733 dim
= CHARSET_DIMENSION (charset
);
4737 code
= (code
<< 8) | c
;
4740 CODING_DECODE_CHAR (coding
, src
, src_base
,
4741 src_end
, charset
, code
, c
);
4749 if (charset
->id
!= charset_ascii
4750 && last_id
!= charset
->id
)
4752 if (last_id
!= charset_ascii
)
4753 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4754 last_id
= charset
->id
;
4755 last_offset
= char_offset
;
4764 consumed_chars
= consumed_chars_base
;
4766 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4772 if (last_id
!= charset_ascii
)
4773 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4774 coding
->consumed_char
+= consumed_chars_base
;
4775 coding
->consumed
= src_base
- coding
->source
;
4776 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4780 encode_coding_charset (coding
)
4781 struct coding_system
*coding
;
4783 int multibytep
= coding
->dst_multibyte
;
4784 int *charbuf
= coding
->charbuf
;
4785 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4786 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4787 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4788 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4789 int produced_chars
= 0;
4790 Lisp_Object attrs
, charset_list
;
4791 int ascii_compatible
;
4794 CODING_GET_INFO (coding
, attrs
, charset_list
);
4795 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4797 while (charbuf
< charbuf_end
)
4799 struct charset
*charset
;
4802 ASSURE_DESTINATION (safe_room
);
4804 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4805 EMIT_ONE_ASCII_BYTE (c
);
4806 else if (CHAR_BYTE8_P (c
))
4808 c
= CHAR_TO_BYTE8 (c
);
4813 charset
= char_charset (c
, charset_list
, &code
);
4816 if (CHARSET_DIMENSION (charset
) == 1)
4817 EMIT_ONE_BYTE (code
);
4818 else if (CHARSET_DIMENSION (charset
) == 2)
4819 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4820 else if (CHARSET_DIMENSION (charset
) == 3)
4821 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4823 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4824 (code
>> 8) & 0xFF, code
& 0xFF);
4828 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4829 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4831 c
= coding
->default_char
;
4837 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4838 coding
->produced_char
+= produced_chars
;
4839 coding
->produced
= dst
- coding
->destination
;
4844 /*** 7. C library functions ***/
4846 /* Setup coding context CODING from information about CODING_SYSTEM.
4847 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4848 CODING_SYSTEM is invalid, signal an error. */
4851 setup_coding_system (coding_system
, coding
)
4852 Lisp_Object coding_system
;
4853 struct coding_system
*coding
;
4856 Lisp_Object eol_type
;
4857 Lisp_Object coding_type
;
4860 if (NILP (coding_system
))
4861 coding_system
= Qno_conversion
;
4863 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4865 attrs
= CODING_ID_ATTRS (coding
->id
);
4866 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4869 coding
->head_ascii
= -1;
4870 coding
->common_flags
4871 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4872 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
4873 coding
->common_flags
|= CODING_REQUIRE_DECODING_MASK
;
4874 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
4875 coding
->common_flags
|= CODING_REQUIRE_ENCODING_MASK
;
4876 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs
)))
4877 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4879 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4880 coding
->max_charset_id
= SCHARS (val
) - 1;
4881 coding
->safe_charsets
= (char *) SDATA (val
);
4882 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4884 coding_type
= CODING_ATTR_TYPE (attrs
);
4885 if (EQ (coding_type
, Qundecided
))
4887 coding
->detector
= NULL
;
4888 coding
->decoder
= decode_coding_raw_text
;
4889 coding
->encoder
= encode_coding_raw_text
;
4890 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4892 else if (EQ (coding_type
, Qiso_2022
))
4895 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4897 /* Invoke graphic register 0 to plane 0. */
4898 CODING_ISO_INVOCATION (coding
, 0) = 0;
4899 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4900 CODING_ISO_INVOCATION (coding
, 1)
4901 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4902 /* Setup the initial status of designation. */
4903 for (i
= 0; i
< 4; i
++)
4904 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4905 /* Not single shifting initially. */
4906 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4907 /* Beginning of buffer should also be regarded as bol. */
4908 CODING_ISO_BOL (coding
) = 1;
4909 coding
->detector
= detect_coding_iso_2022
;
4910 coding
->decoder
= decode_coding_iso_2022
;
4911 coding
->encoder
= encode_coding_iso_2022
;
4912 if (flags
& CODING_ISO_FLAG_SAFE
)
4913 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4914 coding
->common_flags
4915 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4916 | CODING_REQUIRE_FLUSHING_MASK
);
4917 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4918 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4919 if (flags
& CODING_ISO_FLAG_DESIGNATION
)
4920 coding
->common_flags
|= CODING_ANNOTATE_CHARSET_MASK
;
4921 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4923 setup_iso_safe_charsets (attrs
);
4924 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4925 coding
->max_charset_id
= SCHARS (val
) - 1;
4926 coding
->safe_charsets
= (char *) SDATA (val
);
4928 CODING_ISO_FLAGS (coding
) = flags
;
4930 else if (EQ (coding_type
, Qcharset
))
4932 coding
->detector
= detect_coding_charset
;
4933 coding
->decoder
= decode_coding_charset
;
4934 coding
->encoder
= encode_coding_charset
;
4935 coding
->common_flags
4936 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4938 else if (EQ (coding_type
, Qutf_8
))
4940 coding
->detector
= detect_coding_utf_8
;
4941 coding
->decoder
= decode_coding_utf_8
;
4942 coding
->encoder
= encode_coding_utf_8
;
4943 coding
->common_flags
4944 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4946 else if (EQ (coding_type
, Qutf_16
))
4948 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4949 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4950 : EQ (val
, Qt
) ? utf_16_with_bom
4951 : utf_16_without_bom
);
4952 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4953 CODING_UTF_16_ENDIAN (coding
) = (EQ (val
, Qbig
) ? utf_16_big_endian
4954 : utf_16_little_endian
);
4955 CODING_UTF_16_SURROGATE (coding
) = 0;
4956 coding
->detector
= detect_coding_utf_16
;
4957 coding
->decoder
= decode_coding_utf_16
;
4958 coding
->encoder
= encode_coding_utf_16
;
4959 coding
->common_flags
4960 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4961 if (CODING_UTF_16_BOM (coding
) == utf_16_detect_bom
)
4962 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4964 else if (EQ (coding_type
, Qccl
))
4966 coding
->detector
= detect_coding_ccl
;
4967 coding
->decoder
= decode_coding_ccl
;
4968 coding
->encoder
= encode_coding_ccl
;
4969 coding
->common_flags
4970 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4971 | CODING_REQUIRE_FLUSHING_MASK
);
4973 else if (EQ (coding_type
, Qemacs_mule
))
4975 coding
->detector
= detect_coding_emacs_mule
;
4976 coding
->decoder
= decode_coding_emacs_mule
;
4977 coding
->encoder
= encode_coding_emacs_mule
;
4978 coding
->common_flags
4979 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4980 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
4981 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
4983 Lisp_Object tail
, safe_charsets
;
4984 int max_charset_id
= 0;
4986 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4988 if (max_charset_id
< XFASTINT (XCAR (tail
)))
4989 max_charset_id
= XFASTINT (XCAR (tail
));
4990 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
4992 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4994 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
4995 coding
->max_charset_id
= max_charset_id
;
4996 coding
->safe_charsets
= (char *) SDATA (safe_charsets
);
4999 else if (EQ (coding_type
, Qshift_jis
))
5001 coding
->detector
= detect_coding_sjis
;
5002 coding
->decoder
= decode_coding_sjis
;
5003 coding
->encoder
= encode_coding_sjis
;
5004 coding
->common_flags
5005 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5007 else if (EQ (coding_type
, Qbig5
))
5009 coding
->detector
= detect_coding_big5
;
5010 coding
->decoder
= decode_coding_big5
;
5011 coding
->encoder
= encode_coding_big5
;
5012 coding
->common_flags
5013 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5015 else /* EQ (coding_type, Qraw_text) */
5017 coding
->detector
= NULL
;
5018 coding
->decoder
= decode_coding_raw_text
;
5019 coding
->encoder
= encode_coding_raw_text
;
5025 /* Return raw-text or one of its subsidiaries that has the same
5026 eol_type as CODING-SYSTEM. */
5029 raw_text_coding_system (coding_system
)
5030 Lisp_Object coding_system
;
5032 Lisp_Object spec
, attrs
;
5033 Lisp_Object eol_type
, raw_text_eol_type
;
5035 if (NILP (coding_system
))
5037 spec
= CODING_SYSTEM_SPEC (coding_system
);
5038 attrs
= AREF (spec
, 0);
5040 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
5041 return coding_system
;
5043 eol_type
= AREF (spec
, 2);
5044 if (VECTORP (eol_type
))
5046 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
5047 raw_text_eol_type
= AREF (spec
, 2);
5048 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
5049 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
5050 : AREF (raw_text_eol_type
, 2));
5054 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5055 does, return one of the subsidiary that has the same eol-spec as
5056 PARENT. Otherwise, return CODING_SYSTEM. */
5059 coding_inherit_eol_type (coding_system
, parent
)
5060 Lisp_Object coding_system
, parent
;
5062 Lisp_Object spec
, eol_type
;
5064 if (NILP (coding_system
))
5065 coding_system
= Qraw_text
;
5066 spec
= CODING_SYSTEM_SPEC (coding_system
);
5067 eol_type
= AREF (spec
, 2);
5068 if (VECTORP (eol_type
)
5071 Lisp_Object parent_spec
;
5072 Lisp_Object parent_eol_type
;
5075 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
5076 parent_eol_type
= AREF (parent_spec
, 2);
5077 if (EQ (parent_eol_type
, Qunix
))
5078 coding_system
= AREF (eol_type
, 0);
5079 else if (EQ (parent_eol_type
, Qdos
))
5080 coding_system
= AREF (eol_type
, 1);
5081 else if (EQ (parent_eol_type
, Qmac
))
5082 coding_system
= AREF (eol_type
, 2);
5084 return coding_system
;
5087 /* Emacs has a mechanism to automatically detect a coding system if it
5088 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5089 it's impossible to distinguish some coding systems accurately
5090 because they use the same range of codes. So, at first, coding
5091 systems are categorized into 7, those are:
5093 o coding-category-emacs-mule
5095 The category for a coding system which has the same code range
5096 as Emacs' internal format. Assigned the coding-system (Lisp
5097 symbol) `emacs-mule' by default.
5099 o coding-category-sjis
5101 The category for a coding system which has the same code range
5102 as SJIS. Assigned the coding-system (Lisp
5103 symbol) `japanese-shift-jis' by default.
5105 o coding-category-iso-7
5107 The category for a coding system which has the same code range
5108 as ISO2022 of 7-bit environment. This doesn't use any locking
5109 shift and single shift functions. This can encode/decode all
5110 charsets. Assigned the coding-system (Lisp symbol)
5111 `iso-2022-7bit' by default.
5113 o coding-category-iso-7-tight
5115 Same as coding-category-iso-7 except that this can
5116 encode/decode only the specified charsets.
5118 o coding-category-iso-8-1
5120 The category for a coding system which has the same code range
5121 as ISO2022 of 8-bit environment and graphic plane 1 used only
5122 for DIMENSION1 charset. This doesn't use any locking shift
5123 and single shift functions. Assigned the coding-system (Lisp
5124 symbol) `iso-latin-1' by default.
5126 o coding-category-iso-8-2
5128 The category for a coding system which has the same code range
5129 as ISO2022 of 8-bit environment and graphic plane 1 used only
5130 for DIMENSION2 charset. This doesn't use any locking shift
5131 and single shift functions. Assigned the coding-system (Lisp
5132 symbol) `japanese-iso-8bit' by default.
5134 o coding-category-iso-7-else
5136 The category for a coding system which has the same code range
5137 as ISO2022 of 7-bit environemnt but uses locking shift or
5138 single shift functions. Assigned the coding-system (Lisp
5139 symbol) `iso-2022-7bit-lock' by default.
5141 o coding-category-iso-8-else
5143 The category for a coding system which has the same code range
5144 as ISO2022 of 8-bit environemnt but uses locking shift or
5145 single shift functions. Assigned the coding-system (Lisp
5146 symbol) `iso-2022-8bit-ss2' by default.
5148 o coding-category-big5
5150 The category for a coding system which has the same code range
5151 as BIG5. Assigned the coding-system (Lisp symbol)
5152 `cn-big5' by default.
5154 o coding-category-utf-8
5156 The category for a coding system which has the same code range
5157 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
5158 symbol) `utf-8' by default.
5160 o coding-category-utf-16-be
5162 The category for a coding system in which a text has an
5163 Unicode signature (cf. Unicode Standard) in the order of BIG
5164 endian at the head. Assigned the coding-system (Lisp symbol)
5165 `utf-16-be' by default.
5167 o coding-category-utf-16-le
5169 The category for a coding system in which a text has an
5170 Unicode signature (cf. Unicode Standard) in the order of
5171 LITTLE endian at the head. Assigned the coding-system (Lisp
5172 symbol) `utf-16-le' by default.
5174 o coding-category-ccl
5176 The category for a coding system of which encoder/decoder is
5177 written in CCL programs. The default value is nil, i.e., no
5178 coding system is assigned.
5180 o coding-category-binary
5182 The category for a coding system not categorized in any of the
5183 above. Assigned the coding-system (Lisp symbol)
5184 `no-conversion' by default.
5186 Each of them is a Lisp symbol and the value is an actual
5187 `coding-system's (this is also a Lisp symbol) assigned by a user.
5188 What Emacs does actually is to detect a category of coding system.
5189 Then, it uses a `coding-system' assigned to it. If Emacs can't
5190 decide only one possible category, it selects a category of the
5191 highest priority. Priorities of categories are also specified by a
5192 user in a Lisp variable `coding-category-list'.
5196 #define EOL_SEEN_NONE 0
5197 #define EOL_SEEN_LF 1
5198 #define EOL_SEEN_CR 2
5199 #define EOL_SEEN_CRLF 4
5201 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5202 SOURCE is encoded. If CATEGORY is one of
5203 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5204 two-byte, else they are encoded by one-byte.
5206 Return one of EOL_SEEN_XXX. */
5208 #define MAX_EOL_CHECK_COUNT 3
5211 detect_eol (source
, src_bytes
, category
)
5212 unsigned char *source
;
5213 EMACS_INT src_bytes
;
5214 enum coding_category category
;
5216 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
5219 int eol_seen
= EOL_SEEN_NONE
;
5221 if ((1 << category
) & CATEGORY_MASK_UTF_16
)
5225 msb
= category
== (coding_category_utf_16_le
5226 | coding_category_utf_16_le_nosig
);
5229 while (src
+ 1 < src_end
)
5232 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
5237 this_eol
= EOL_SEEN_LF
;
5238 else if (src
+ 3 >= src_end
5239 || src
[msb
+ 2] != 0
5240 || src
[lsb
+ 2] != '\n')
5241 this_eol
= EOL_SEEN_CR
;
5243 this_eol
= EOL_SEEN_CRLF
;
5245 if (eol_seen
== EOL_SEEN_NONE
)
5246 /* This is the first end-of-line. */
5247 eol_seen
= this_eol
;
5248 else if (eol_seen
!= this_eol
)
5250 /* The found type is different from what found before. */
5251 eol_seen
= EOL_SEEN_LF
;
5254 if (++total
== MAX_EOL_CHECK_COUNT
)
5262 while (src
< src_end
)
5265 if (c
== '\n' || c
== '\r')
5270 this_eol
= EOL_SEEN_LF
;
5271 else if (src
>= src_end
|| *src
!= '\n')
5272 this_eol
= EOL_SEEN_CR
;
5274 this_eol
= EOL_SEEN_CRLF
, src
++;
5276 if (eol_seen
== EOL_SEEN_NONE
)
5277 /* This is the first end-of-line. */
5278 eol_seen
= this_eol
;
5279 else if (eol_seen
!= this_eol
)
5281 /* The found type is different from what found before. */
5282 eol_seen
= EOL_SEEN_LF
;
5285 if (++total
== MAX_EOL_CHECK_COUNT
)
5295 adjust_coding_eol_type (coding
, eol_seen
)
5296 struct coding_system
*coding
;
5299 Lisp_Object eol_type
;
5301 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5302 if (eol_seen
& EOL_SEEN_LF
)
5304 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
5307 else if (eol_seen
& EOL_SEEN_CRLF
)
5309 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
5312 else if (eol_seen
& EOL_SEEN_CR
)
5314 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
5320 /* Detect how a text specified in CODING is encoded. If a coding
5321 system is detected, update fields of CODING by the detected coding
5325 detect_coding (coding
)
5326 struct coding_system
*coding
;
5328 const unsigned char *src
, *src_end
;
5329 Lisp_Object attrs
, coding_type
;
5331 coding
->consumed
= coding
->consumed_char
= 0;
5332 coding
->produced
= coding
->produced_char
= 0;
5333 coding_set_source (coding
);
5335 src_end
= coding
->source
+ coding
->src_bytes
;
5337 /* If we have not yet decided the text encoding type, detect it
5339 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
5343 for (i
= 0, src
= coding
->source
; src
< src_end
; i
++, src
++)
5346 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
5348 || c
== ISO_CODE_SO
)))
5351 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
5353 if (coding
->head_ascii
< coding
->src_bytes
)
5355 struct coding_detection_info detect_info
;
5356 enum coding_category category
;
5357 struct coding_system
*this;
5359 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
5360 for (i
= 0; i
< coding_category_raw_text
; i
++)
5362 category
= coding_priorities
[i
];
5363 this = coding_categories
+ category
;
5366 /* No coding system of this category is defined. */
5367 detect_info
.rejected
|= (1 << category
);
5369 else if (category
>= coding_category_raw_text
)
5371 else if (detect_info
.checked
& (1 << category
))
5373 if (detect_info
.found
& (1 << category
))
5376 else if ((*(this->detector
)) (coding
, &detect_info
)
5377 && detect_info
.found
& (1 << category
))
5379 if (category
== coding_category_utf_16_auto
)
5381 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5382 category
= coding_category_utf_16_le
;
5384 category
= coding_category_utf_16_be
;
5389 if (i
< coding_category_raw_text
)
5390 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5391 else if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
5392 setup_coding_system (Qraw_text
, coding
);
5393 else if (detect_info
.rejected
)
5394 for (i
= 0; i
< coding_category_raw_text
; i
++)
5395 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
5397 this = coding_categories
+ coding_priorities
[i
];
5398 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5403 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding
->id
)))
5404 == coding_category_utf_16_auto
)
5406 Lisp_Object coding_systems
;
5407 struct coding_detection_info detect_info
;
5410 = AREF (CODING_ID_ATTRS (coding
->id
), coding_attr_utf_16_bom
);
5411 detect_info
.found
= detect_info
.rejected
= 0;
5412 if (CONSP (coding_systems
)
5413 && detect_coding_utf_16 (coding
, &detect_info
))
5415 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5416 setup_coding_system (XCAR (coding_systems
), coding
);
5417 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
5418 setup_coding_system (XCDR (coding_systems
), coding
);
5426 struct coding_system
*coding
;
5428 Lisp_Object eol_type
;
5429 unsigned char *p
, *pbeg
, *pend
;
5431 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5432 if (EQ (eol_type
, Qunix
))
5435 if (NILP (coding
->dst_object
))
5436 pbeg
= coding
->destination
;
5438 pbeg
= BYTE_POS_ADDR (coding
->dst_pos_byte
);
5439 pend
= pbeg
+ coding
->produced
;
5441 if (VECTORP (eol_type
))
5443 int eol_seen
= EOL_SEEN_NONE
;
5445 for (p
= pbeg
; p
< pend
; p
++)
5448 eol_seen
|= EOL_SEEN_LF
;
5449 else if (*p
== '\r')
5451 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5453 eol_seen
|= EOL_SEEN_CRLF
;
5457 eol_seen
|= EOL_SEEN_CR
;
5460 if (eol_seen
!= EOL_SEEN_NONE
5461 && eol_seen
!= EOL_SEEN_LF
5462 && eol_seen
!= EOL_SEEN_CRLF
5463 && eol_seen
!= EOL_SEEN_CR
)
5464 eol_seen
= EOL_SEEN_LF
;
5465 if (eol_seen
!= EOL_SEEN_NONE
)
5466 eol_type
= adjust_coding_eol_type (coding
, eol_seen
);
5469 if (EQ (eol_type
, Qmac
))
5471 for (p
= pbeg
; p
< pend
; p
++)
5475 else if (EQ (eol_type
, Qdos
))
5479 if (NILP (coding
->dst_object
))
5481 for (p
= pend
- 2; p
>= pbeg
; p
--)
5484 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
-- - p
- 1);
5490 for (p
= pend
- 2; p
>= pbeg
; p
--)
5493 int pos_byte
= coding
->dst_pos_byte
+ (p
- pbeg
);
5494 int pos
= BYTE_TO_CHAR (pos_byte
);
5496 del_range_2 (pos
, pos_byte
, pos
+ 1, pos_byte
+ 1, 0);
5500 coding
->produced
-= n
;
5501 coding
->produced_char
-= n
;
5506 /* Return a translation table (or list of them) from coding system
5507 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5508 decoding (ENCODEP is zero). */
5511 get_translation_table (attrs
, encodep
)
5513 Lisp_Object standard
, translation_table
;
5516 translation_table
= CODING_ATTR_ENCODE_TBL (attrs
),
5517 standard
= Vstandard_translation_table_for_encode
;
5519 translation_table
= CODING_ATTR_DECODE_TBL (attrs
),
5520 standard
= Vstandard_translation_table_for_decode
;
5521 if (NILP (translation_table
))
5523 if (SYMBOLP (translation_table
))
5524 translation_table
= Fget (translation_table
, Qtranslation_table
);
5525 else if (CONSP (translation_table
))
5529 translation_table
= Fcopy_sequence (translation_table
);
5530 for (val
= translation_table
; CONSP (val
); val
= XCDR (val
))
5531 if (SYMBOLP (XCAR (val
)))
5532 XSETCAR (val
, Fget (XCAR (val
), Qtranslation_table
));
5534 if (! NILP (standard
))
5536 if (CONSP (translation_table
))
5537 translation_table
= nconc2 (translation_table
, Fcons (standard
, Qnil
));
5539 translation_table
= Fcons (translation_table
, Fcons (standard
, Qnil
));
5541 return translation_table
;
5546 get_translation (val
, buf
, buf_end
, last_block
, from_nchars
, to_nchars
)
5550 int *from_nchars
, *to_nchars
;
5552 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
5556 Lisp_Object from
, tail
;
5559 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
5564 for (i
= 0; i
< len
; i
++)
5566 if (buf
+ i
== buf_end
)
5572 if (XINT (AREF (from
, i
)) != buf
[i
])
5586 *buf
= XINT (AREF (val
, 0)), *to_nchars
= ASIZE (val
);
5594 produce_chars (coding
, translation_table
, last_block
)
5595 struct coding_system
*coding
;
5596 Lisp_Object translation_table
;
5599 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5600 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5602 int produced_chars
= 0;
5605 if (! coding
->chars_at_source
)
5607 /* Characters are in coding->charbuf. */
5608 int *buf
= coding
->charbuf
;
5609 int *buf_end
= buf
+ coding
->charbuf_used
;
5611 if (BUFFERP (coding
->src_object
)
5612 && EQ (coding
->src_object
, coding
->dst_object
))
5613 dst_end
= ((unsigned char *) coding
->source
) + coding
->consumed
;
5615 while (buf
< buf_end
)
5621 int from_nchars
= 1, to_nchars
= 1;
5622 Lisp_Object trans
= Qnil
;
5624 if (! NILP (translation_table
)
5625 && ! NILP (trans
= CHAR_TABLE_REF (translation_table
, c
)))
5627 trans
= get_translation (trans
, buf
, buf_end
, last_block
,
5628 &from_nchars
, &to_nchars
);
5634 if (dst
+ MAX_MULTIBYTE_LENGTH
* to_nchars
> dst_end
)
5636 dst
= alloc_destination (coding
,
5638 + MAX_MULTIBYTE_LENGTH
* to_nchars
,
5640 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5643 for (i
= 0; i
< to_nchars
; i
++)
5646 c
= XINT (AREF (trans
, i
));
5647 if (coding
->dst_multibyte
5648 || ! CHAR_BYTE8_P (c
))
5649 CHAR_STRING_ADVANCE (c
, dst
);
5651 *dst
++ = CHAR_TO_BYTE8 (c
);
5653 produced_chars
+= to_nchars
;
5655 while (--from_nchars
> 0)
5659 /* This is an annotation datum. (-C) is the length. */
5662 carryover
= buf_end
- buf
;
5666 const unsigned char *src
= coding
->source
;
5667 const unsigned char *src_end
= src
+ coding
->src_bytes
;
5668 Lisp_Object eol_type
;
5670 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5672 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5674 if (coding
->src_multibyte
)
5681 const unsigned char *src_base
= src
;
5687 if (EQ (eol_type
, Qdos
))
5691 record_conversion_result
5692 (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
5693 goto no_more_source
;
5698 else if (EQ (eol_type
, Qmac
))
5703 coding
->consumed
= src
- coding
->source
;
5705 if (EQ (coding
->src_object
, coding
->dst_object
))
5706 dst_end
= (unsigned char *) src
;
5709 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5711 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5712 coding_set_source (coding
);
5713 src
= coding
->source
+ coding
->consumed
;
5714 src_end
= coding
->source
+ coding
->src_bytes
;
5724 while (src
< src_end
)
5731 if (EQ (eol_type
, Qdos
))
5737 else if (EQ (eol_type
, Qmac
))
5740 if (dst
>= dst_end
- 1)
5742 coding
->consumed
= src
- coding
->source
;
5744 if (EQ (coding
->src_object
, coding
->dst_object
))
5745 dst_end
= (unsigned char *) src
;
5746 if (dst
>= dst_end
- 1)
5748 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5750 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5751 coding_set_source (coding
);
5752 src
= coding
->source
+ coding
->consumed
;
5753 src_end
= coding
->source
+ coding
->src_bytes
;
5761 if (!EQ (coding
->src_object
, coding
->dst_object
))
5763 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5767 EMACS_INT offset
= src
- coding
->source
;
5769 dst
= alloc_destination (coding
, require
, dst
);
5770 coding_set_source (coding
);
5771 src
= coding
->source
+ offset
;
5772 src_end
= coding
->source
+ coding
->src_bytes
;
5775 produced_chars
= coding
->src_chars
;
5776 while (src
< src_end
)
5782 if (EQ (eol_type
, Qdos
))
5789 else if (EQ (eol_type
, Qmac
))
5795 coding
->consumed
= coding
->src_bytes
;
5796 coding
->consumed_char
= coding
->src_chars
;
5799 produced
= dst
- (coding
->destination
+ coding
->produced
);
5800 if (BUFFERP (coding
->dst_object
))
5801 insert_from_gap (produced_chars
, produced
);
5802 coding
->produced
+= produced
;
5803 coding
->produced_char
+= produced_chars
;
5807 /* Compose text in CODING->object according to the annotation data at
5808 CHARBUF. CHARBUF is an array:
5809 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5813 produce_composition (coding
, charbuf
, pos
)
5814 struct coding_system
*coding
;
5820 enum composition_method method
;
5821 Lisp_Object components
;
5824 to
= pos
+ charbuf
[2];
5825 method
= (enum composition_method
) (charbuf
[3]);
5827 if (method
== COMPOSITION_RELATIVE
)
5831 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5836 for (i
= 0; i
< len
; i
++)
5837 args
[i
] = make_number (charbuf
[i
]);
5838 components
= (method
== COMPOSITION_WITH_ALTCHARS
5839 ? Fstring (len
, args
) : Fvector (len
, args
));
5841 compose_text (pos
, to
, components
, Qnil
, coding
->dst_object
);
5845 /* Put `charset' property on text in CODING->object according to
5846 the annotation data at CHARBUF. CHARBUF is an array:
5847 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
5851 produce_charset (coding
, charbuf
, pos
)
5852 struct coding_system
*coding
;
5856 EMACS_INT from
= pos
- charbuf
[2];
5857 struct charset
*charset
= CHARSET_FROM_ID (charbuf
[3]);
5859 Fput_text_property (make_number (from
), make_number (pos
),
5860 Qcharset
, CHARSET_NAME (charset
),
5861 coding
->dst_object
);
5865 #define CHARBUF_SIZE 0x4000
5867 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5869 int size = CHARBUF_SIZE;; \
5871 coding->charbuf = NULL; \
5872 while (size > 1024) \
5874 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5875 if (coding->charbuf) \
5879 if (! coding->charbuf) \
5881 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5882 return coding->result; \
5884 coding->charbuf_size = size; \
5889 produce_annotation (coding
, pos
)
5890 struct coding_system
*coding
;
5893 int *charbuf
= coding
->charbuf
;
5894 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5896 if (NILP (coding
->dst_object
))
5899 while (charbuf
< charbuf_end
)
5905 int len
= -*charbuf
;
5908 case CODING_ANNOTATE_COMPOSITION_MASK
:
5909 produce_composition (coding
, charbuf
, pos
);
5911 case CODING_ANNOTATE_CHARSET_MASK
:
5912 produce_charset (coding
, charbuf
, pos
);
5922 /* Decode the data at CODING->src_object into CODING->dst_object.
5923 CODING->src_object is a buffer, a string, or nil.
5924 CODING->dst_object is a buffer.
5926 If CODING->src_object is a buffer, it must be the current buffer.
5927 In this case, if CODING->src_pos is positive, it is a position of
5928 the source text in the buffer, otherwise, the source text is in the
5929 gap area of the buffer, and CODING->src_pos specifies the offset of
5930 the text from GPT (which must be the same as PT). If this is the
5931 same buffer as CODING->dst_object, CODING->src_pos must be
5934 If CODING->src_object is a string, CODING->src_pos in an index to
5937 If CODING->src_object is nil, CODING->source must already point to
5938 the non-relocatable memory area. In this case, CODING->src_pos is
5939 an offset from CODING->source.
5941 The decoded data is inserted at the current point of the buffer
5946 decode_coding (coding
)
5947 struct coding_system
*coding
;
5950 Lisp_Object undo_list
;
5951 Lisp_Object translation_table
;
5955 if (BUFFERP (coding
->src_object
)
5956 && coding
->src_pos
> 0
5957 && coding
->src_pos
< GPT
5958 && coding
->src_pos
+ coding
->src_chars
> GPT
)
5959 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
5962 if (BUFFERP (coding
->dst_object
))
5964 if (current_buffer
!= XBUFFER (coding
->dst_object
))
5965 set_buffer_internal (XBUFFER (coding
->dst_object
));
5967 move_gap_both (PT
, PT_BYTE
);
5968 undo_list
= current_buffer
->undo_list
;
5969 current_buffer
->undo_list
= Qt
;
5972 coding
->consumed
= coding
->consumed_char
= 0;
5973 coding
->produced
= coding
->produced_char
= 0;
5974 coding
->chars_at_source
= 0;
5975 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
5978 ALLOC_CONVERSION_WORK_AREA (coding
);
5980 attrs
= CODING_ID_ATTRS (coding
->id
);
5981 translation_table
= get_translation_table (attrs
, 0);
5986 EMACS_INT pos
= coding
->dst_pos
+ coding
->produced_char
;
5988 coding_set_source (coding
);
5989 coding
->annotated
= 0;
5990 coding
->charbuf_used
= carryover
;
5991 (*(coding
->decoder
)) (coding
);
5992 coding_set_destination (coding
);
5993 carryover
= produce_chars (coding
, translation_table
, 0);
5994 if (coding
->annotated
)
5995 produce_annotation (coding
, pos
);
5996 for (i
= 0; i
< carryover
; i
++)
5998 = coding
->charbuf
[coding
->charbuf_used
- carryover
+ i
];
6000 while (coding
->consumed
< coding
->src_bytes
6001 && ! coding
->result
);
6005 coding_set_destination (coding
);
6006 coding
->charbuf_used
= carryover
;
6007 produce_chars (coding
, translation_table
, 1);
6010 coding
->carryover_bytes
= 0;
6011 if (coding
->consumed
< coding
->src_bytes
)
6013 int nbytes
= coding
->src_bytes
- coding
->consumed
;
6014 const unsigned char *src
;
6016 coding_set_source (coding
);
6017 coding_set_destination (coding
);
6018 src
= coding
->source
+ coding
->consumed
;
6020 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
6022 /* Flush out unprocessed data as binary chars. We are sure
6023 that the number of data is less than the size of
6025 coding
->charbuf_used
= 0;
6026 while (nbytes
-- > 0)
6030 coding
->charbuf
[coding
->charbuf_used
++] = (c
& 0x80 ? - c
: c
);
6032 produce_chars (coding
);
6036 /* Record unprocessed bytes in coding->carryover. We are
6037 sure that the number of data is less than the size of
6038 coding->carryover. */
6039 unsigned char *p
= coding
->carryover
;
6041 coding
->carryover_bytes
= nbytes
;
6042 while (nbytes
-- > 0)
6045 coding
->consumed
= coding
->src_bytes
;
6048 if (BUFFERP (coding
->dst_object
))
6050 current_buffer
->undo_list
= undo_list
;
6051 record_insert (coding
->dst_pos
, coding
->produced_char
);
6053 if (! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
6054 decode_eol (coding
);
6055 return coding
->result
;
6059 /* Extract an annotation datum from a composition starting at POS and
6060 ending before LIMIT of CODING->src_object (buffer or string), store
6061 the data in BUF, set *STOP to a starting position of the next
6062 composition (if any) or to LIMIT, and return the address of the
6063 next element of BUF.
6065 If such an annotation is not found, set *STOP to a starting
6066 position of a composition after POS (if any) or to LIMIT, and
6070 handle_composition_annotation (pos
, limit
, coding
, buf
, stop
)
6071 EMACS_INT pos
, limit
;
6072 struct coding_system
*coding
;
6076 EMACS_INT start
, end
;
6079 if (! find_composition (pos
, limit
, &start
, &end
, &prop
, coding
->src_object
)
6082 else if (start
> pos
)
6088 /* We found a composition. Store the corresponding
6089 annotation data in BUF. */
6091 enum composition_method method
= COMPOSITION_METHOD (prop
);
6092 int nchars
= COMPOSITION_LENGTH (prop
);
6094 ADD_COMPOSITION_DATA (buf
, nchars
, method
);
6095 if (method
!= COMPOSITION_RELATIVE
)
6097 Lisp_Object components
;
6100 components
= COMPOSITION_COMPONENTS (prop
);
6101 if (VECTORP (components
))
6103 len
= XVECTOR (components
)->size
;
6104 for (i
= 0; i
< len
; i
++)
6105 *buf
++ = XINT (AREF (components
, i
));
6107 else if (STRINGP (components
))
6109 len
= SCHARS (components
);
6113 FETCH_STRING_CHAR_ADVANCE (*buf
, components
, i
, i_byte
);
6117 else if (INTEGERP (components
))
6120 *buf
++ = XINT (components
);
6122 else if (CONSP (components
))
6124 for (len
= 0; CONSP (components
);
6125 len
++, components
= XCDR (components
))
6126 *buf
++ = XINT (XCAR (components
));
6134 if (find_composition (end
, limit
, &start
, &end
, &prop
,
6145 /* Extract an annotation datum from a text property `charset' at POS of
6146 CODING->src_object (buffer of string), store the data in BUF, set
6147 *STOP to the position where the value of `charset' property changes
6148 (limiting by LIMIT), and return the address of the next element of
6151 If the property value is nil, set *STOP to the position where the
6152 property value is non-nil (limiting by LIMIT), and return BUF. */
6155 handle_charset_annotation (pos
, limit
, coding
, buf
, stop
)
6156 EMACS_INT pos
, limit
;
6157 struct coding_system
*coding
;
6161 Lisp_Object val
, next
;
6164 val
= Fget_text_property (make_number (pos
), Qcharset
, coding
->src_object
);
6165 if (! NILP (val
) && CHARSETP (val
))
6166 id
= XINT (CHARSET_SYMBOL_ID (val
));
6169 ADD_CHARSET_DATA (buf
, 0, id
);
6170 next
= Fnext_single_property_change (make_number (pos
), Qcharset
,
6172 make_number (limit
));
6173 *stop
= XINT (next
);
6179 consume_chars (coding
, translation_table
)
6180 struct coding_system
*coding
;
6181 Lisp_Object translation_table
;
6183 int *buf
= coding
->charbuf
;
6184 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
;
6185 const unsigned char *src
= coding
->source
+ coding
->consumed
;
6186 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
6187 EMACS_INT pos
= coding
->src_pos
+ coding
->consumed_char
;
6188 EMACS_INT end_pos
= coding
->src_pos
+ coding
->src_chars
;
6189 int multibytep
= coding
->src_multibyte
;
6190 Lisp_Object eol_type
;
6192 EMACS_INT stop
, stop_composition
, stop_charset
;
6193 int max_lookup
= 0, *lookup_buf
= NULL
;
6195 if (! NILP (translation_table
))
6197 max_lookup
= XINT (XCHAR_TABLE (translation_table
)->extras
[1]);
6198 lookup_buf
= alloca (sizeof (int) * max_lookup
);
6201 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
6202 if (VECTORP (eol_type
))
6205 /* Note: composition handling is not yet implemented. */
6206 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6208 if (NILP (coding
->src_object
))
6209 stop
= stop_composition
= stop_charset
= end_pos
;
6212 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
)
6213 stop
= stop_composition
= pos
;
6215 stop
= stop_composition
= end_pos
;
6216 if (coding
->common_flags
& CODING_ANNOTATE_CHARSET_MASK
)
6217 stop
= stop_charset
= pos
;
6219 stop_charset
= end_pos
;
6222 /* Compensate for CRLF and conversion. */
6223 buf_end
-= 1 + MAX_ANNOTATION_LENGTH
;
6224 while (buf
< buf_end
)
6232 if (pos
== stop_composition
)
6233 buf
= handle_composition_annotation (pos
, end_pos
, coding
,
6234 buf
, &stop_composition
);
6235 if (pos
== stop_charset
)
6236 buf
= handle_charset_annotation (pos
, end_pos
, coding
,
6237 buf
, &stop_charset
);
6238 stop
= (stop_composition
< stop_charset
6239 ? stop_composition
: stop_charset
);
6246 if (! CODING_FOR_UNIBYTE (coding
)
6247 && (bytes
= MULTIBYTE_LENGTH (src
, src_end
)) > 0)
6248 c
= STRING_CHAR_ADVANCE (src
), pos
+= bytes
;
6253 c
= STRING_CHAR_ADVANCE (src
), pos
++;
6254 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
6256 if (! EQ (eol_type
, Qunix
))
6260 if (EQ (eol_type
, Qdos
))
6267 if (NILP (translation_table
)
6268 || NILP (trans
= CHAR_TABLE_REF (translation_table
, c
)))
6272 int from_nchars
= 1, to_nchars
= 1;
6273 int *lookup_buf_end
;
6274 const unsigned char *p
= src
;
6278 for (i
= 1; i
< max_lookup
&& p
< src_end
; i
++)
6279 lookup_buf
[i
] = STRING_CHAR_ADVANCE (p
);
6280 lookup_buf_end
= lookup_buf
+ i
;
6281 trans
= get_translation (trans
, lookup_buf
, lookup_buf_end
, 1,
6282 &from_nchars
, &to_nchars
);
6284 || buf
+ to_nchars
> buf_end
)
6286 *buf
++ = *lookup_buf
;
6287 for (i
= 1; i
< to_nchars
; i
++)
6288 *buf
++ = XINT (AREF (trans
, i
));
6289 for (i
= 1; i
< from_nchars
; i
++, pos
++)
6290 src
+= MULTIBYTE_LENGTH_NO_CHECK (src
);
6294 coding
->consumed
= src
- coding
->source
;
6295 coding
->consumed_char
= pos
- coding
->src_pos
;
6296 coding
->charbuf_used
= buf
- coding
->charbuf
;
6297 coding
->chars_at_source
= 0;
6301 /* Encode the text at CODING->src_object into CODING->dst_object.
6302 CODING->src_object is a buffer or a string.
6303 CODING->dst_object is a buffer or nil.
6305 If CODING->src_object is a buffer, it must be the current buffer.
6306 In this case, if CODING->src_pos is positive, it is a position of
6307 the source text in the buffer, otherwise. the source text is in the
6308 gap area of the buffer, and coding->src_pos specifies the offset of
6309 the text from GPT (which must be the same as PT). If this is the
6310 same buffer as CODING->dst_object, CODING->src_pos must be
6311 negative and CODING should not have `pre-write-conversion'.
6313 If CODING->src_object is a string, CODING should not have
6314 `pre-write-conversion'.
6316 If CODING->dst_object is a buffer, the encoded data is inserted at
6317 the current point of that buffer.
6319 If CODING->dst_object is nil, the encoded data is placed at the
6320 memory area specified by CODING->destination. */
6323 encode_coding (coding
)
6324 struct coding_system
*coding
;
6327 Lisp_Object translation_table
;
6329 attrs
= CODING_ID_ATTRS (coding
->id
);
6330 translation_table
= get_translation_table (attrs
, 1);
6332 if (BUFFERP (coding
->dst_object
))
6334 set_buffer_internal (XBUFFER (coding
->dst_object
));
6335 coding
->dst_multibyte
6336 = ! NILP (current_buffer
->enable_multibyte_characters
);
6339 coding
->consumed
= coding
->consumed_char
= 0;
6340 coding
->produced
= coding
->produced_char
= 0;
6341 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
6344 ALLOC_CONVERSION_WORK_AREA (coding
);
6347 coding_set_source (coding
);
6348 consume_chars (coding
, translation_table
);
6349 coding_set_destination (coding
);
6350 (*(coding
->encoder
)) (coding
);
6351 } while (coding
->consumed_char
< coding
->src_chars
);
6353 if (BUFFERP (coding
->dst_object
))
6354 insert_from_gap (coding
->produced_char
, coding
->produced
);
6356 return (coding
->result
);
6360 /* Name (or base name) of work buffer for code conversion. */
6361 static Lisp_Object Vcode_conversion_workbuf_name
;
6363 /* A working buffer used by the top level conversion. Once it is
6364 created, it is never destroyed. It has the name
6365 Vcode_conversion_workbuf_name. The other working buffers are
6366 destroyed after the use is finished, and their names are modified
6367 versions of Vcode_conversion_workbuf_name. */
6368 static Lisp_Object Vcode_conversion_reused_workbuf
;
6370 /* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6371 static int reused_workbuf_in_use
;
6374 /* Return a working buffer of code convesion. MULTIBYTE specifies the
6375 multibyteness of returning buffer. */
6378 make_conversion_work_buffer (multibyte
)
6380 Lisp_Object name
, workbuf
;
6381 struct buffer
*current
;
6383 if (reused_workbuf_in_use
++)
6385 name
= Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name
, Qnil
);
6386 workbuf
= Fget_buffer_create (name
);
6390 name
= Vcode_conversion_workbuf_name
;
6391 workbuf
= Fget_buffer_create (name
);
6392 if (NILP (Vcode_conversion_reused_workbuf
))
6393 Vcode_conversion_reused_workbuf
= workbuf
;
6395 current
= current_buffer
;
6396 set_buffer_internal (XBUFFER (workbuf
));
6398 current_buffer
->undo_list
= Qt
;
6399 current_buffer
->enable_multibyte_characters
= multibyte
? Qt
: Qnil
;
6400 set_buffer_internal (current
);
6406 code_conversion_restore (arg
)
6409 Lisp_Object current
, workbuf
;
6411 current
= XCAR (arg
);
6412 workbuf
= XCDR (arg
);
6413 if (! NILP (workbuf
))
6415 if (EQ (workbuf
, Vcode_conversion_reused_workbuf
))
6416 reused_workbuf_in_use
= 0;
6417 else if (! NILP (Fbuffer_live_p (workbuf
)))
6418 Fkill_buffer (workbuf
);
6420 set_buffer_internal (XBUFFER (current
));
6425 code_conversion_save (with_work_buf
, multibyte
)
6426 int with_work_buf
, multibyte
;
6428 Lisp_Object workbuf
= Qnil
;
6431 workbuf
= make_conversion_work_buffer (multibyte
);
6432 record_unwind_protect (code_conversion_restore
,
6433 Fcons (Fcurrent_buffer (), workbuf
));
6438 decode_coding_gap (coding
, chars
, bytes
)
6439 struct coding_system
*coding
;
6440 EMACS_INT chars
, bytes
;
6442 int count
= specpdl_ptr
- specpdl
;
6445 code_conversion_save (0, 0);
6447 coding
->src_object
= Fcurrent_buffer ();
6448 coding
->src_chars
= chars
;
6449 coding
->src_bytes
= bytes
;
6450 coding
->src_pos
= -chars
;
6451 coding
->src_pos_byte
= -bytes
;
6452 coding
->src_multibyte
= chars
< bytes
;
6453 coding
->dst_object
= coding
->src_object
;
6454 coding
->dst_pos
= PT
;
6455 coding
->dst_pos_byte
= PT_BYTE
;
6456 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
6457 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
6459 if (CODING_REQUIRE_DETECTION (coding
))
6460 detect_coding (coding
);
6462 decode_coding (coding
);
6464 attrs
= CODING_ID_ATTRS (coding
->id
);
6465 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6467 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6470 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6471 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6472 make_number (coding
->produced_char
));
6474 coding
->produced_char
+= Z
- prev_Z
;
6475 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6478 unbind_to (count
, Qnil
);
6479 return coding
->result
;
6483 encode_coding_gap (coding
, chars
, bytes
)
6484 struct coding_system
*coding
;
6485 EMACS_INT chars
, bytes
;
6487 int count
= specpdl_ptr
- specpdl
;
6489 code_conversion_save (0, 0);
6491 coding
->src_object
= Fcurrent_buffer ();
6492 coding
->src_chars
= chars
;
6493 coding
->src_bytes
= bytes
;
6494 coding
->src_pos
= -chars
;
6495 coding
->src_pos_byte
= -bytes
;
6496 coding
->src_multibyte
= chars
< bytes
;
6497 coding
->dst_object
= coding
->src_object
;
6498 coding
->dst_pos
= PT
;
6499 coding
->dst_pos_byte
= PT_BYTE
;
6501 encode_coding (coding
);
6503 unbind_to (count
, Qnil
);
6504 return coding
->result
;
6508 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6509 SRC_OBJECT into DST_OBJECT by coding context CODING.
6511 SRC_OBJECT is a buffer, a string, or Qnil.
6513 If it is a buffer, the text is at point of the buffer. FROM and TO
6514 are positions in the buffer.
6516 If it is a string, the text is at the beginning of the string.
6517 FROM and TO are indices to the string.
6519 If it is nil, the text is at coding->source. FROM and TO are
6520 indices to coding->source.
6522 DST_OBJECT is a buffer, Qt, or Qnil.
6524 If it is a buffer, the decoded text is inserted at point of the
6525 buffer. If the buffer is the same as SRC_OBJECT, the source text
6528 If it is Qt, a string is made from the decoded text, and
6529 set in CODING->dst_object.
6531 If it is Qnil, the decoded text is stored at CODING->destination.
6532 The caller must allocate CODING->dst_bytes bytes at
6533 CODING->destination by xmalloc. If the decoded text is longer than
6534 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6538 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6540 struct coding_system
*coding
;
6541 Lisp_Object src_object
;
6542 EMACS_INT from
, from_byte
, to
, to_byte
;
6543 Lisp_Object dst_object
;
6545 int count
= specpdl_ptr
- specpdl
;
6546 unsigned char *destination
;
6547 EMACS_INT dst_bytes
;
6548 EMACS_INT chars
= to
- from
;
6549 EMACS_INT bytes
= to_byte
- from_byte
;
6552 int saved_pt
= -1, saved_pt_byte
;
6554 buffer
= Fcurrent_buffer ();
6556 if (NILP (dst_object
))
6558 destination
= coding
->destination
;
6559 dst_bytes
= coding
->dst_bytes
;
6562 coding
->src_object
= src_object
;
6563 coding
->src_chars
= chars
;
6564 coding
->src_bytes
= bytes
;
6565 coding
->src_multibyte
= chars
< bytes
;
6567 if (STRINGP (src_object
))
6569 coding
->src_pos
= from
;
6570 coding
->src_pos_byte
= from_byte
;
6572 else if (BUFFERP (src_object
))
6574 set_buffer_internal (XBUFFER (src_object
));
6576 move_gap_both (from
, from_byte
);
6577 if (EQ (src_object
, dst_object
))
6579 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6580 TEMP_SET_PT_BOTH (from
, from_byte
);
6581 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6582 coding
->src_pos
= -chars
;
6583 coding
->src_pos_byte
= -bytes
;
6587 coding
->src_pos
= from
;
6588 coding
->src_pos_byte
= from_byte
;
6592 if (CODING_REQUIRE_DETECTION (coding
))
6593 detect_coding (coding
);
6594 attrs
= CODING_ID_ATTRS (coding
->id
);
6596 if (EQ (dst_object
, Qt
)
6597 || (! NILP (CODING_ATTR_POST_READ (attrs
))
6598 && NILP (dst_object
)))
6600 coding
->dst_object
= code_conversion_save (1, 1);
6601 coding
->dst_pos
= BEG
;
6602 coding
->dst_pos_byte
= BEG_BYTE
;
6603 coding
->dst_multibyte
= 1;
6605 else if (BUFFERP (dst_object
))
6607 code_conversion_save (0, 0);
6608 coding
->dst_object
= dst_object
;
6609 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6610 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6611 coding
->dst_multibyte
6612 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6616 code_conversion_save (0, 0);
6617 coding
->dst_object
= Qnil
;
6618 coding
->dst_multibyte
= 1;
6621 decode_coding (coding
);
6623 if (BUFFERP (coding
->dst_object
))
6624 set_buffer_internal (XBUFFER (coding
->dst_object
));
6626 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6628 struct gcpro gcpro1
, gcpro2
;
6629 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6632 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6633 GCPRO2 (coding
->src_object
, coding
->dst_object
);
6634 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6635 make_number (coding
->produced_char
));
6638 coding
->produced_char
+= Z
- prev_Z
;
6639 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6642 if (EQ (dst_object
, Qt
))
6644 coding
->dst_object
= Fbuffer_string ();
6646 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
6648 set_buffer_internal (XBUFFER (coding
->dst_object
));
6649 if (dst_bytes
< coding
->produced
)
6652 = (unsigned char *) xrealloc (destination
, coding
->produced
);
6655 record_conversion_result (coding
,
6656 CODING_RESULT_INSUFFICIENT_DST
);
6657 unbind_to (count
, Qnil
);
6660 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
6661 move_gap_both (BEGV
, BEGV_BYTE
);
6662 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
6663 coding
->destination
= destination
;
6669 /* This is the case of:
6670 (BUFFERP (src_object) && EQ (src_object, dst_object))
6671 As we have moved PT while replacing the original buffer
6672 contents, we must recover it now. */
6673 set_buffer_internal (XBUFFER (src_object
));
6674 if (saved_pt
< from
)
6675 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6676 else if (saved_pt
< from
+ chars
)
6677 TEMP_SET_PT_BOTH (from
, from_byte
);
6678 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6679 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6680 saved_pt_byte
+ (coding
->produced
- bytes
));
6682 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6683 saved_pt_byte
+ (coding
->produced
- bytes
));
6686 unbind_to (count
, coding
->dst_object
);
6691 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6693 struct coding_system
*coding
;
6694 Lisp_Object src_object
;
6695 EMACS_INT from
, from_byte
, to
, to_byte
;
6696 Lisp_Object dst_object
;
6698 int count
= specpdl_ptr
- specpdl
;
6699 EMACS_INT chars
= to
- from
;
6700 EMACS_INT bytes
= to_byte
- from_byte
;
6703 int saved_pt
= -1, saved_pt_byte
;
6705 buffer
= Fcurrent_buffer ();
6707 coding
->src_object
= src_object
;
6708 coding
->src_chars
= chars
;
6709 coding
->src_bytes
= bytes
;
6710 coding
->src_multibyte
= chars
< bytes
;
6712 attrs
= CODING_ID_ATTRS (coding
->id
);
6714 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
6716 coding
->src_object
= code_conversion_save (1, coding
->src_multibyte
);
6717 set_buffer_internal (XBUFFER (coding
->src_object
));
6718 if (STRINGP (src_object
))
6719 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
6720 else if (BUFFERP (src_object
))
6721 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
6723 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
6725 if (EQ (src_object
, dst_object
))
6727 set_buffer_internal (XBUFFER (src_object
));
6728 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6729 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6730 set_buffer_internal (XBUFFER (coding
->src_object
));
6733 call2 (CODING_ATTR_PRE_WRITE (attrs
),
6734 make_number (BEG
), make_number (Z
));
6735 coding
->src_object
= Fcurrent_buffer ();
6737 move_gap_both (BEG
, BEG_BYTE
);
6738 coding
->src_chars
= Z
- BEG
;
6739 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6740 coding
->src_pos
= BEG
;
6741 coding
->src_pos_byte
= BEG_BYTE
;
6742 coding
->src_multibyte
= Z
< Z_BYTE
;
6744 else if (STRINGP (src_object
))
6746 code_conversion_save (0, 0);
6747 coding
->src_pos
= from
;
6748 coding
->src_pos_byte
= from_byte
;
6750 else if (BUFFERP (src_object
))
6752 code_conversion_save (0, 0);
6753 set_buffer_internal (XBUFFER (src_object
));
6754 if (EQ (src_object
, dst_object
))
6756 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6757 coding
->src_object
= del_range_1 (from
, to
, 1, 1);
6758 coding
->src_pos
= 0;
6759 coding
->src_pos_byte
= 0;
6763 if (from
< GPT
&& to
>= GPT
)
6764 move_gap_both (from
, from_byte
);
6765 coding
->src_pos
= from
;
6766 coding
->src_pos_byte
= from_byte
;
6770 code_conversion_save (0, 0);
6772 if (BUFFERP (dst_object
))
6774 coding
->dst_object
= dst_object
;
6775 if (EQ (src_object
, dst_object
))
6777 coding
->dst_pos
= from
;
6778 coding
->dst_pos_byte
= from_byte
;
6782 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6783 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6785 coding
->dst_multibyte
6786 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6788 else if (EQ (dst_object
, Qt
))
6790 coding
->dst_object
= Qnil
;
6791 coding
->dst_bytes
= coding
->src_chars
;
6792 if (coding
->dst_bytes
== 0)
6793 coding
->dst_bytes
= 1;
6794 coding
->destination
= (unsigned char *) xmalloc (coding
->dst_bytes
);
6795 coding
->dst_multibyte
= 0;
6799 coding
->dst_object
= Qnil
;
6800 coding
->dst_multibyte
= 0;
6803 encode_coding (coding
);
6805 if (EQ (dst_object
, Qt
))
6807 if (BUFFERP (coding
->dst_object
))
6808 coding
->dst_object
= Fbuffer_string ();
6812 = make_unibyte_string ((char *) coding
->destination
,
6814 xfree (coding
->destination
);
6820 /* This is the case of:
6821 (BUFFERP (src_object) && EQ (src_object, dst_object))
6822 As we have moved PT while replacing the original buffer
6823 contents, we must recover it now. */
6824 set_buffer_internal (XBUFFER (src_object
));
6825 if (saved_pt
< from
)
6826 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6827 else if (saved_pt
< from
+ chars
)
6828 TEMP_SET_PT_BOTH (from
, from_byte
);
6829 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6830 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6831 saved_pt_byte
+ (coding
->produced
- bytes
));
6833 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6834 saved_pt_byte
+ (coding
->produced
- bytes
));
6837 unbind_to (count
, Qnil
);
6842 preferred_coding_system ()
6844 int id
= coding_categories
[coding_priorities
[0]].id
;
6846 return CODING_ID_NAME (id
);
6851 /*** 8. Emacs Lisp library functions ***/
6853 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6854 doc
: /* Return t if OBJECT is nil or a coding-system.
6855 See the documentation of `define-coding-system' for information
6856 about coding-system objects. */)
6860 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6863 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6864 Sread_non_nil_coding_system
, 1, 1, 0,
6865 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6872 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6873 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6875 while (SCHARS (val
) == 0);
6876 return (Fintern (val
, Qnil
));
6879 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6880 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6881 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6882 (prompt
, default_coding_system
)
6883 Lisp_Object prompt
, default_coding_system
;
6886 if (SYMBOLP (default_coding_system
))
6887 XSETSTRING (default_coding_system
, XPNTR (SYMBOL_NAME (default_coding_system
)));
6888 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6889 Qt
, Qnil
, Qcoding_system_history
,
6890 default_coding_system
, Qnil
);
6891 return (SCHARS (val
) == 0 ? Qnil
: Fintern (val
, Qnil
));
6894 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6896 doc
: /* Check validity of CODING-SYSTEM.
6897 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
6899 Lisp_Object coding_system
;
6901 CHECK_SYMBOL (coding_system
);
6902 if (!NILP (Fcoding_system_p (coding_system
)))
6903 return coding_system
;
6905 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
6909 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
6910 HIGHEST is nonzero, return the coding system of the highest
6911 priority among the detected coding systems. Otherwize return a
6912 list of detected coding systems sorted by their priorities. If
6913 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6914 multibyte form but contains only ASCII and eight-bit chars.
6915 Otherwise, the bytes are raw bytes.
6917 CODING-SYSTEM controls the detection as below:
6919 If it is nil, detect both text-format and eol-format. If the
6920 text-format part of CODING-SYSTEM is already specified
6921 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
6922 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6923 detect only text-format. */
6926 detect_coding_system (src
, src_chars
, src_bytes
, highest
, multibytep
,
6928 const unsigned char *src
;
6929 int src_chars
, src_bytes
, highest
;
6931 Lisp_Object coding_system
;
6933 const unsigned char *src_end
= src
+ src_bytes
;
6934 Lisp_Object attrs
, eol_type
;
6936 struct coding_system coding
;
6938 struct coding_detection_info detect_info
;
6939 enum coding_category base_category
;
6941 if (NILP (coding_system
))
6942 coding_system
= Qundecided
;
6943 setup_coding_system (coding_system
, &coding
);
6944 attrs
= CODING_ID_ATTRS (coding
.id
);
6945 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
6946 coding_system
= CODING_ATTR_BASE_NAME (attrs
);
6948 coding
.source
= src
;
6949 coding
.src_chars
= src_chars
;
6950 coding
.src_bytes
= src_bytes
;
6951 coding
.src_multibyte
= multibytep
;
6952 coding
.consumed
= 0;
6953 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6955 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
6957 /* At first, detect text-format if necessary. */
6958 base_category
= XINT (CODING_ATTR_CATEGORY (attrs
));
6959 if (base_category
== coding_category_undecided
)
6961 enum coding_category category
;
6962 struct coding_system
*this;
6965 /* Skip all ASCII bytes except for a few ISO2022 controls. */
6966 for (i
= 0; src
< src_end
; i
++, src
++)
6969 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
6971 || c
== ISO_CODE_SO
)))
6974 coding
.head_ascii
= src
- coding
.source
;
6977 for (i
= 0; i
< coding_category_raw_text
; i
++)
6979 category
= coding_priorities
[i
];
6980 this = coding_categories
+ category
;
6984 /* No coding system of this category is defined. */
6985 detect_info
.rejected
|= (1 << category
);
6987 else if (category
>= coding_category_raw_text
)
6989 else if (detect_info
.checked
& (1 << category
))
6992 && (detect_info
.found
& (1 << category
)))
6997 if ((*(this->detector
)) (&coding
, &detect_info
)
6999 && (detect_info
.found
& (1 << category
)))
7001 if (category
== coding_category_utf_16_auto
)
7003 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
7004 category
= coding_category_utf_16_le
;
7006 category
= coding_category_utf_16_be
;
7013 if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
7015 detect_info
.found
= CATEGORY_MASK_RAW_TEXT
;
7016 id
= coding_categories
[coding_category_raw_text
].id
;
7017 val
= Fcons (make_number (id
), Qnil
);
7019 else if (! detect_info
.rejected
&& ! detect_info
.found
)
7021 detect_info
.found
= CATEGORY_MASK_ANY
;
7022 id
= coding_categories
[coding_category_undecided
].id
;
7023 val
= Fcons (make_number (id
), Qnil
);
7027 if (detect_info
.found
)
7029 detect_info
.found
= 1 << category
;
7030 val
= Fcons (make_number (this->id
), Qnil
);
7033 for (i
= 0; i
< coding_category_raw_text
; i
++)
7034 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
7036 detect_info
.found
= 1 << coding_priorities
[i
];
7037 id
= coding_categories
[coding_priorities
[i
]].id
;
7038 val
= Fcons (make_number (id
), Qnil
);
7044 int mask
= detect_info
.rejected
| detect_info
.found
;
7048 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
7050 category
= coding_priorities
[i
];
7051 if (! (mask
& (1 << category
)))
7053 found
|= 1 << category
;
7054 id
= coding_categories
[category
].id
;
7055 val
= Fcons (make_number (id
), val
);
7058 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
7060 category
= coding_priorities
[i
];
7061 if (detect_info
.found
& (1 << category
))
7063 id
= coding_categories
[category
].id
;
7064 val
= Fcons (make_number (id
), val
);
7067 detect_info
.found
|= found
;
7070 else if (base_category
== coding_category_utf_16_auto
)
7072 if (detect_coding_utf_16 (&coding
, &detect_info
))
7074 enum coding_category category
;
7075 struct coding_system
*this;
7077 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
7078 this = coding_categories
+ coding_category_utf_16_le
;
7079 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
7080 this = coding_categories
+ coding_category_utf_16_be
;
7081 else if (detect_info
.rejected
& CATEGORY_MASK_UTF_16_LE_NOSIG
)
7082 this = coding_categories
+ coding_category_utf_16_be_nosig
;
7084 this = coding_categories
+ coding_category_utf_16_le_nosig
;
7085 val
= Fcons (make_number (this->id
), Qnil
);
7090 detect_info
.found
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
7091 val
= Fcons (make_number (coding
.id
), Qnil
);
7094 /* Then, detect eol-format if necessary. */
7096 int normal_eol
= -1, utf_16_be_eol
= -1, utf_16_le_eol
;
7099 if (VECTORP (eol_type
))
7101 if (detect_info
.found
& ~CATEGORY_MASK_UTF_16
)
7102 normal_eol
= detect_eol (coding
.source
, src_bytes
,
7103 coding_category_raw_text
);
7104 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_BE
7105 | CATEGORY_MASK_UTF_16_BE_NOSIG
))
7106 utf_16_be_eol
= detect_eol (coding
.source
, src_bytes
,
7107 coding_category_utf_16_be
);
7108 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_LE
7109 | CATEGORY_MASK_UTF_16_LE_NOSIG
))
7110 utf_16_le_eol
= detect_eol (coding
.source
, src_bytes
,
7111 coding_category_utf_16_le
);
7115 if (EQ (eol_type
, Qunix
))
7116 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_LF
;
7117 else if (EQ (eol_type
, Qdos
))
7118 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CRLF
;
7120 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CR
;
7123 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
7125 enum coding_category category
;
7128 id
= XINT (XCAR (tail
));
7129 attrs
= CODING_ID_ATTRS (id
);
7130 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7131 eol_type
= CODING_ID_EOL_TYPE (id
);
7132 if (VECTORP (eol_type
))
7134 if (category
== coding_category_utf_16_be
7135 || category
== coding_category_utf_16_be_nosig
)
7136 this_eol
= utf_16_be_eol
;
7137 else if (category
== coding_category_utf_16_le
7138 || category
== coding_category_utf_16_le_nosig
)
7139 this_eol
= utf_16_le_eol
;
7141 this_eol
= normal_eol
;
7143 if (this_eol
== EOL_SEEN_LF
)
7144 XSETCAR (tail
, AREF (eol_type
, 0));
7145 else if (this_eol
== EOL_SEEN_CRLF
)
7146 XSETCAR (tail
, AREF (eol_type
, 1));
7147 else if (this_eol
== EOL_SEEN_CR
)
7148 XSETCAR (tail
, AREF (eol_type
, 2));
7150 XSETCAR (tail
, CODING_ID_NAME (id
));
7153 XSETCAR (tail
, CODING_ID_NAME (id
));
7157 return (highest
? XCAR (val
) : val
);
7161 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
7163 doc
: /* Detect coding system of the text in the region between START and END.
7164 Return a list of possible coding systems ordered by priority.
7166 If only ASCII characters are found, it returns a list of single element
7167 `undecided' or its subsidiary coding system according to a detected
7170 If optional argument HIGHEST is non-nil, return the coding system of
7171 highest priority. */)
7172 (start
, end
, highest
)
7173 Lisp_Object start
, end
, highest
;
7176 int from_byte
, to_byte
;
7178 CHECK_NUMBER_COERCE_MARKER (start
);
7179 CHECK_NUMBER_COERCE_MARKER (end
);
7181 validate_region (&start
, &end
);
7182 from
= XINT (start
), to
= XINT (end
);
7183 from_byte
= CHAR_TO_BYTE (from
);
7184 to_byte
= CHAR_TO_BYTE (to
);
7186 if (from
< GPT
&& to
>= GPT
)
7187 move_gap_both (to
, to_byte
);
7189 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
7190 to
- from
, to_byte
- from_byte
,
7192 !NILP (current_buffer
7193 ->enable_multibyte_characters
),
7197 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
7199 doc
: /* Detect coding system of the text in STRING.
7200 Return a list of possible coding systems ordered by priority.
7202 If only ASCII characters are found, it returns a list of single element
7203 `undecided' or its subsidiary coding system according to a detected
7206 If optional argument HIGHEST is non-nil, return the coding system of
7207 highest priority. */)
7209 Lisp_Object string
, highest
;
7211 CHECK_STRING (string
);
7213 return detect_coding_system (SDATA (string
),
7214 SCHARS (string
), SBYTES (string
),
7215 !NILP (highest
), STRING_MULTIBYTE (string
),
7221 char_encodable_p (c
, attrs
)
7226 struct charset
*charset
;
7227 Lisp_Object translation_table
;
7229 translation_table
= CODING_ATTR_TRANS_TBL (attrs
);
7230 if (! NILP (translation_table
))
7231 c
= translate_char (translation_table
, c
);
7232 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
7233 CONSP (tail
); tail
= XCDR (tail
))
7235 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
7236 if (CHAR_CHARSET_P (c
, charset
))
7239 return (! NILP (tail
));
7243 /* Return a list of coding systems that safely encode the text between
7244 START and END. If EXCLUDE is non-nil, it is a list of coding
7245 systems not to check. The returned list doesn't contain any such
7246 coding systems. In any case, if the text contains only ASCII or is
7247 unibyte, return t. */
7249 DEFUN ("find-coding-systems-region-internal",
7250 Ffind_coding_systems_region_internal
,
7251 Sfind_coding_systems_region_internal
, 2, 3, 0,
7252 doc
: /* Internal use only. */)
7253 (start
, end
, exclude
)
7254 Lisp_Object start
, end
, exclude
;
7256 Lisp_Object coding_attrs_list
, safe_codings
;
7257 EMACS_INT start_byte
, end_byte
;
7258 const unsigned char *p
, *pbeg
, *pend
;
7260 Lisp_Object tail
, elt
;
7262 if (STRINGP (start
))
7264 if (!STRING_MULTIBYTE (start
)
7265 || SCHARS (start
) == SBYTES (start
))
7268 end_byte
= SBYTES (start
);
7272 CHECK_NUMBER_COERCE_MARKER (start
);
7273 CHECK_NUMBER_COERCE_MARKER (end
);
7274 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7275 args_out_of_range (start
, end
);
7276 if (NILP (current_buffer
->enable_multibyte_characters
))
7278 start_byte
= CHAR_TO_BYTE (XINT (start
));
7279 end_byte
= CHAR_TO_BYTE (XINT (end
));
7280 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7283 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7285 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7286 move_gap_both (XINT (start
), start_byte
);
7288 move_gap_both (XINT (end
), end_byte
);
7292 coding_attrs_list
= Qnil
;
7293 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7295 || NILP (Fmemq (XCAR (tail
), exclude
)))
7299 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
7300 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
7301 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
7303 ASET (attrs
, coding_attr_trans_tbl
,
7304 get_translation_table (attrs
, 1));
7305 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
7309 if (STRINGP (start
))
7310 p
= pbeg
= SDATA (start
);
7312 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7313 pend
= p
+ (end_byte
- start_byte
);
7315 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
7316 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7320 if (ASCII_BYTE_P (*p
))
7324 c
= STRING_CHAR_ADVANCE (p
);
7326 charset_map_loaded
= 0;
7327 for (tail
= coding_attrs_list
; CONSP (tail
);)
7332 else if (char_encodable_p (c
, elt
))
7334 else if (CONSP (XCDR (tail
)))
7336 XSETCAR (tail
, XCAR (XCDR (tail
)));
7337 XSETCDR (tail
, XCDR (XCDR (tail
)));
7341 XSETCAR (tail
, Qnil
);
7345 if (charset_map_loaded
)
7347 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7349 if (STRINGP (start
))
7350 pbeg
= SDATA (start
);
7352 pbeg
= BYTE_POS_ADDR (start_byte
);
7353 p
= pbeg
+ p_offset
;
7354 pend
= pbeg
+ pend_offset
;
7359 safe_codings
= Qnil
;
7360 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
7361 if (! NILP (XCAR (tail
)))
7362 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
7364 return safe_codings
;
7368 DEFUN ("unencodable-char-position", Funencodable_char_position
,
7369 Sunencodable_char_position
, 3, 5, 0,
7371 Return position of first un-encodable character in a region.
7372 START and END specfiy the region and CODING-SYSTEM specifies the
7373 encoding to check. Return nil if CODING-SYSTEM does encode the region.
7375 If optional 4th argument COUNT is non-nil, it specifies at most how
7376 many un-encodable characters to search. In this case, the value is a
7379 If optional 5th argument STRING is non-nil, it is a string to search
7380 for un-encodable characters. In that case, START and END are indexes
7382 (start
, end
, coding_system
, count
, string
)
7383 Lisp_Object start
, end
, coding_system
, count
, string
;
7386 struct coding_system coding
;
7387 Lisp_Object attrs
, charset_list
, translation_table
;
7388 Lisp_Object positions
;
7390 const unsigned char *p
, *stop
, *pend
;
7391 int ascii_compatible
;
7393 setup_coding_system (Fcheck_coding_system (coding_system
), &coding
);
7394 attrs
= CODING_ID_ATTRS (coding
.id
);
7395 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
7397 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
7398 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7399 translation_table
= get_translation_table (attrs
, 1);
7403 validate_region (&start
, &end
);
7404 from
= XINT (start
);
7406 if (NILP (current_buffer
->enable_multibyte_characters
)
7407 || (ascii_compatible
7408 && (to
- from
) == (CHAR_TO_BYTE (to
) - (CHAR_TO_BYTE (from
)))))
7410 p
= CHAR_POS_ADDR (from
);
7411 pend
= CHAR_POS_ADDR (to
);
7412 if (from
< GPT
&& to
>= GPT
)
7419 CHECK_STRING (string
);
7420 CHECK_NATNUM (start
);
7422 from
= XINT (start
);
7425 || to
> SCHARS (string
))
7426 args_out_of_range_3 (string
, start
, end
);
7427 if (! STRING_MULTIBYTE (string
))
7429 p
= SDATA (string
) + string_char_to_byte (string
, from
);
7430 stop
= pend
= SDATA (string
) + string_char_to_byte (string
, to
);
7431 if (ascii_compatible
&& (to
- from
) == (pend
- p
))
7439 CHECK_NATNUM (count
);
7448 if (ascii_compatible
)
7449 while (p
< stop
&& ASCII_BYTE_P (*p
))
7459 c
= STRING_CHAR_ADVANCE (p
);
7460 if (! (ASCII_CHAR_P (c
) && ascii_compatible
)
7461 && ! char_charset (translate_char (translation_table
, c
),
7462 charset_list
, NULL
))
7464 positions
= Fcons (make_number (from
), positions
);
7473 return (NILP (count
) ? Fcar (positions
) : Fnreverse (positions
));
7477 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
7478 Scheck_coding_systems_region
, 3, 3, 0,
7479 doc
: /* Check if the region is encodable by coding systems.
7481 START and END are buffer positions specifying the region.
7482 CODING-SYSTEM-LIST is a list of coding systems to check.
7484 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7485 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7486 whole region, POS0, POS1, ... are buffer positions where non-encodable
7487 characters are found.
7489 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7492 START may be a string. In that case, check if the string is
7493 encodable, and the value contains indices to the string instead of
7494 buffer positions. END is ignored. */)
7495 (start
, end
, coding_system_list
)
7496 Lisp_Object start
, end
, coding_system_list
;
7499 EMACS_INT start_byte
, end_byte
;
7501 const unsigned char *p
, *pbeg
, *pend
;
7503 Lisp_Object tail
, elt
, attrs
;
7505 if (STRINGP (start
))
7507 if (!STRING_MULTIBYTE (start
)
7508 && SCHARS (start
) != SBYTES (start
))
7511 end_byte
= SBYTES (start
);
7516 CHECK_NUMBER_COERCE_MARKER (start
);
7517 CHECK_NUMBER_COERCE_MARKER (end
);
7518 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7519 args_out_of_range (start
, end
);
7520 if (NILP (current_buffer
->enable_multibyte_characters
))
7522 start_byte
= CHAR_TO_BYTE (XINT (start
));
7523 end_byte
= CHAR_TO_BYTE (XINT (end
));
7524 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7527 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7529 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7530 move_gap_both (XINT (start
), start_byte
);
7532 move_gap_both (XINT (end
), end_byte
);
7538 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7541 attrs
= AREF (CODING_SYSTEM_SPEC (elt
), 0);
7542 ASET (attrs
, coding_attr_trans_tbl
, get_translation_table (attrs
, 1));
7543 list
= Fcons (Fcons (elt
, Fcons (attrs
, Qnil
)), list
);
7546 if (STRINGP (start
))
7547 p
= pbeg
= SDATA (start
);
7549 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7550 pend
= p
+ (end_byte
- start_byte
);
7552 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
7553 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7557 if (ASCII_BYTE_P (*p
))
7561 c
= STRING_CHAR_ADVANCE (p
);
7563 charset_map_loaded
= 0;
7564 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
7566 elt
= XCDR (XCAR (tail
));
7567 if (! char_encodable_p (c
, XCAR (elt
)))
7568 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
7570 if (charset_map_loaded
)
7572 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7574 if (STRINGP (start
))
7575 pbeg
= SDATA (start
);
7577 pbeg
= BYTE_POS_ADDR (start_byte
);
7578 p
= pbeg
+ p_offset
;
7579 pend
= pbeg
+ pend_offset
;
7587 for (; CONSP (tail
); tail
= XCDR (tail
))
7590 if (CONSP (XCDR (XCDR (elt
))))
7591 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
7600 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
7601 Lisp_Object start
, end
, coding_system
, dst_object
;
7602 int encodep
, norecord
;
7604 struct coding_system coding
;
7605 EMACS_INT from
, from_byte
, to
, to_byte
;
7606 Lisp_Object src_object
;
7608 CHECK_NUMBER_COERCE_MARKER (start
);
7609 CHECK_NUMBER_COERCE_MARKER (end
);
7610 if (NILP (coding_system
))
7611 coding_system
= Qno_conversion
;
7613 CHECK_CODING_SYSTEM (coding_system
);
7614 src_object
= Fcurrent_buffer ();
7615 if (NILP (dst_object
))
7616 dst_object
= src_object
;
7617 else if (! EQ (dst_object
, Qt
))
7618 CHECK_BUFFER (dst_object
);
7620 validate_region (&start
, &end
);
7621 from
= XFASTINT (start
);
7622 from_byte
= CHAR_TO_BYTE (from
);
7623 to
= XFASTINT (end
);
7624 to_byte
= CHAR_TO_BYTE (to
);
7626 setup_coding_system (coding_system
, &coding
);
7627 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7630 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7633 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7636 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7638 return (BUFFERP (dst_object
)
7639 ? make_number (coding
.produced_char
)
7640 : coding
.dst_object
);
7644 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
7645 3, 4, "r\nzCoding system: ",
7646 doc
: /* Decode the current region from the specified coding system.
7647 When called from a program, takes four arguments:
7648 START, END, CODING-SYSTEM, and DESTINATION.
7649 START and END are buffer positions.
7651 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7652 If nil, the region between START and END is replace by the decoded text.
7653 If buffer, the decoded text is inserted in the buffer.
7654 If t, the decoded text is returned.
7656 This function sets `last-coding-system-used' to the precise coding system
7657 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7658 not fully specified.)
7659 It returns the length of the decoded text. */)
7660 (start
, end
, coding_system
, destination
)
7661 Lisp_Object start
, end
, coding_system
, destination
;
7663 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
7666 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
7667 3, 4, "r\nzCoding system: ",
7668 doc
: /* Encode the current region by specified coding system.
7669 When called from a program, takes three arguments:
7670 START, END, and CODING-SYSTEM. START and END are buffer positions.
7672 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7673 If nil, the region between START and END is replace by the encoded text.
7674 If buffer, the encoded text is inserted in the buffer.
7675 If t, the encoded text is returned.
7677 This function sets `last-coding-system-used' to the precise coding system
7678 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7679 not fully specified.)
7680 It returns the length of the encoded text. */)
7681 (start
, end
, coding_system
, destination
)
7682 Lisp_Object start
, end
, coding_system
, destination
;
7684 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
7688 code_convert_string (string
, coding_system
, dst_object
,
7689 encodep
, nocopy
, norecord
)
7690 Lisp_Object string
, coding_system
, dst_object
;
7691 int encodep
, nocopy
, norecord
;
7693 struct coding_system coding
;
7694 EMACS_INT chars
, bytes
;
7696 CHECK_STRING (string
);
7697 if (NILP (coding_system
))
7700 Vlast_coding_system_used
= Qno_conversion
;
7701 if (NILP (dst_object
))
7702 return (nocopy
? Fcopy_sequence (string
) : string
);
7705 if (NILP (coding_system
))
7706 coding_system
= Qno_conversion
;
7708 CHECK_CODING_SYSTEM (coding_system
);
7709 if (NILP (dst_object
))
7711 else if (! EQ (dst_object
, Qt
))
7712 CHECK_BUFFER (dst_object
);
7714 setup_coding_system (coding_system
, &coding
);
7715 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7716 chars
= SCHARS (string
);
7717 bytes
= SBYTES (string
);
7719 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7721 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7723 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7725 return (BUFFERP (dst_object
)
7726 ? make_number (coding
.produced_char
)
7727 : coding
.dst_object
);
7731 /* Encode or decode STRING according to CODING_SYSTEM.
7732 Do not set Vlast_coding_system_used.
7734 This function is called only from macros DECODE_FILE and
7735 ENCODE_FILE, thus we ignore character composition. */
7738 code_convert_string_norecord (string
, coding_system
, encodep
)
7739 Lisp_Object string
, coding_system
;
7742 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
7746 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
7748 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7750 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7751 if the decoding operation is trivial.
7753 Optional fourth arg BUFFER non-nil meant that the decoded text is
7754 inserted in BUFFER instead of returned as a string. In this case,
7755 the return value is BUFFER.
7757 This function sets `last-coding-system-used' to the precise coding system
7758 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7759 not fully specified. */)
7760 (string
, coding_system
, nocopy
, buffer
)
7761 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7763 return code_convert_string (string
, coding_system
, buffer
,
7764 0, ! NILP (nocopy
), 0);
7767 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
7769 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
7771 Optional third arg NOCOPY non-nil means it is OK to return STRING
7772 itself if the encoding operation is trivial.
7774 Optional fourth arg BUFFER non-nil meant that the encoded text is
7775 inserted in BUFFER instead of returned as a string. In this case,
7776 the return value is BUFFER.
7778 This function sets `last-coding-system-used' to the precise coding system
7779 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7780 not fully specified.) */)
7781 (string
, coding_system
, nocopy
, buffer
)
7782 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7784 return code_convert_string (string
, coding_system
, buffer
,
7785 1, ! NILP (nocopy
), 1);
7789 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
7790 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
7791 Return the corresponding character. */)
7795 Lisp_Object spec
, attrs
, val
;
7796 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
7799 CHECK_NATNUM (code
);
7800 c
= XFASTINT (code
);
7801 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7802 attrs
= AREF (spec
, 0);
7804 if (ASCII_BYTE_P (c
)
7805 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7808 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7809 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7810 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7811 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7814 charset
= charset_roman
;
7815 else if (c
>= 0xA0 && c
< 0xDF)
7817 charset
= charset_kana
;
7822 int s1
= c
>> 8, s2
= c
& 0xFF;
7824 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
7825 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
7826 error ("Invalid code: %d", code
);
7828 charset
= charset_kanji
;
7830 c
= DECODE_CHAR (charset
, c
);
7832 error ("Invalid code: %d", code
);
7833 return make_number (c
);
7837 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
7838 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
7839 Return the corresponding code in SJIS. */)
7843 Lisp_Object spec
, attrs
, charset_list
;
7845 struct charset
*charset
;
7848 CHECK_CHARACTER (ch
);
7850 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7851 attrs
= AREF (spec
, 0);
7853 if (ASCII_CHAR_P (c
)
7854 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7857 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7858 charset
= char_charset (c
, charset_list
, &code
);
7859 if (code
== CHARSET_INVALID_CODE (charset
))
7860 error ("Can't encode by shift_jis encoding: %d", c
);
7863 return make_number (code
);
7866 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
7867 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
7868 Return the corresponding character. */)
7872 Lisp_Object spec
, attrs
, val
;
7873 struct charset
*charset_roman
, *charset_big5
, *charset
;
7876 CHECK_NATNUM (code
);
7877 c
= XFASTINT (code
);
7878 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7879 attrs
= AREF (spec
, 0);
7881 if (ASCII_BYTE_P (c
)
7882 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7885 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7886 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7887 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7890 charset
= charset_roman
;
7893 int b1
= c
>> 8, b2
= c
& 0x7F;
7894 if (b1
< 0xA1 || b1
> 0xFE
7895 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
7896 error ("Invalid code: %d", code
);
7897 charset
= charset_big5
;
7899 c
= DECODE_CHAR (charset
, (unsigned )c
);
7901 error ("Invalid code: %d", code
);
7902 return make_number (c
);
7905 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
7906 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
7907 Return the corresponding character code in Big5. */)
7911 Lisp_Object spec
, attrs
, charset_list
;
7912 struct charset
*charset
;
7916 CHECK_CHARACTER (ch
);
7918 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7919 attrs
= AREF (spec
, 0);
7920 if (ASCII_CHAR_P (c
)
7921 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7924 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7925 charset
= char_charset (c
, charset_list
, &code
);
7926 if (code
== CHARSET_INVALID_CODE (charset
))
7927 error ("Can't encode by Big5 encoding: %d", c
);
7929 return make_number (code
);
7933 DEFUN ("set-terminal-coding-system-internal",
7934 Fset_terminal_coding_system_internal
,
7935 Sset_terminal_coding_system_internal
, 1, 1, 0,
7936 doc
: /* Internal use only. */)
7938 Lisp_Object coding_system
;
7940 CHECK_SYMBOL (coding_system
);
7941 setup_coding_system (Fcheck_coding_system (coding_system
),
7944 /* We had better not send unsafe characters to terminal. */
7945 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
7946 /* Characer composition should be disabled. */
7947 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7948 terminal_coding
.src_multibyte
= 1;
7949 terminal_coding
.dst_multibyte
= 0;
7953 DEFUN ("set-safe-terminal-coding-system-internal",
7954 Fset_safe_terminal_coding_system_internal
,
7955 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
7956 doc
: /* Internal use only. */)
7958 Lisp_Object coding_system
;
7960 CHECK_SYMBOL (coding_system
);
7961 setup_coding_system (Fcheck_coding_system (coding_system
),
7962 &safe_terminal_coding
);
7963 /* Characer composition should be disabled. */
7964 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7965 safe_terminal_coding
.src_multibyte
= 1;
7966 safe_terminal_coding
.dst_multibyte
= 0;
7970 DEFUN ("terminal-coding-system",
7971 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
7972 doc
: /* Return coding system specified for terminal output. */)
7975 return CODING_ID_NAME (terminal_coding
.id
);
7978 DEFUN ("set-keyboard-coding-system-internal",
7979 Fset_keyboard_coding_system_internal
,
7980 Sset_keyboard_coding_system_internal
, 1, 1, 0,
7981 doc
: /* Internal use only. */)
7983 Lisp_Object coding_system
;
7985 CHECK_SYMBOL (coding_system
);
7986 setup_coding_system (Fcheck_coding_system (coding_system
),
7988 /* Characer composition should be disabled. */
7989 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7993 DEFUN ("keyboard-coding-system",
7994 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
7995 doc
: /* Return coding system specified for decoding keyboard input. */)
7998 return CODING_ID_NAME (keyboard_coding
.id
);
8002 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
8003 Sfind_operation_coding_system
, 1, MANY
, 0,
8004 doc
: /* Choose a coding system for an operation based on the target name.
8005 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8006 DECODING-SYSTEM is the coding system to use for decoding
8007 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8008 for encoding (in case OPERATION does encoding).
8010 The first argument OPERATION specifies an I/O primitive:
8011 For file I/O, `insert-file-contents' or `write-region'.
8012 For process I/O, `call-process', `call-process-region', or `start-process'.
8013 For network I/O, `open-network-stream'.
8015 The remaining arguments should be the same arguments that were passed
8016 to the primitive. Depending on which primitive, one of those arguments
8017 is selected as the TARGET. For example, if OPERATION does file I/O,
8018 whichever argument specifies the file name is TARGET.
8020 TARGET has a meaning which depends on OPERATION:
8021 For file I/O, TARGET is a file name.
8022 For process I/O, TARGET is a process name.
8023 For network I/O, TARGET is a service name or a port number
8025 This function looks up what specified for TARGET in,
8026 `file-coding-system-alist', `process-coding-system-alist',
8027 or `network-coding-system-alist' depending on OPERATION.
8028 They may specify a coding system, a cons of coding systems,
8029 or a function symbol to call.
8030 In the last case, we call the function with one argument,
8031 which is a list of all the arguments given to this function.
8033 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
8038 Lisp_Object operation
, target_idx
, target
, val
;
8039 register Lisp_Object chain
;
8042 error ("Too few arguments");
8043 operation
= args
[0];
8044 if (!SYMBOLP (operation
)
8045 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
8046 error ("Invalid first arguement");
8047 if (nargs
< 1 + XINT (target_idx
))
8048 error ("Too few arguments for operation: %s",
8049 SDATA (SYMBOL_NAME (operation
)));
8050 target
= args
[XINT (target_idx
) + 1];
8051 if (!(STRINGP (target
)
8052 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
8053 error ("Invalid %dth argument", XINT (target_idx
) + 1);
8055 chain
= ((EQ (operation
, Qinsert_file_contents
)
8056 || EQ (operation
, Qwrite_region
))
8057 ? Vfile_coding_system_alist
8058 : (EQ (operation
, Qopen_network_stream
)
8059 ? Vnetwork_coding_system_alist
8060 : Vprocess_coding_system_alist
));
8064 for (; CONSP (chain
); chain
= XCDR (chain
))
8070 && ((STRINGP (target
)
8071 && STRINGP (XCAR (elt
))
8072 && fast_string_match (XCAR (elt
), target
) >= 0)
8073 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
8076 /* Here, if VAL is both a valid coding system and a valid
8077 function symbol, we return VAL as a coding system. */
8080 if (! SYMBOLP (val
))
8082 if (! NILP (Fcoding_system_p (val
)))
8083 return Fcons (val
, val
);
8084 if (! NILP (Ffboundp (val
)))
8086 val
= call1 (val
, Flist (nargs
, args
));
8089 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
8090 return Fcons (val
, val
);
8098 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
8099 Sset_coding_system_priority
, 0, MANY
, 0,
8100 doc
: /* Assign higher priority to the coding systems given as arguments.
8101 If multiple coding systems belongs to the same category,
8102 all but the first one are ignored.
8104 usage: (set-coding-system-priority ...) */)
8110 int changed
[coding_category_max
];
8111 enum coding_category priorities
[coding_category_max
];
8113 bzero (changed
, sizeof changed
);
8115 for (i
= j
= 0; i
< nargs
; i
++)
8117 enum coding_category category
;
8118 Lisp_Object spec
, attrs
;
8120 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
8121 attrs
= AREF (spec
, 0);
8122 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
8123 if (changed
[category
])
8124 /* Ignore this coding system because a coding system of the
8125 same category already had a higher priority. */
8127 changed
[category
] = 1;
8128 priorities
[j
++] = category
;
8129 if (coding_categories
[category
].id
>= 0
8130 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
8131 setup_coding_system (args
[i
], &coding_categories
[category
]);
8132 Fset (AREF (Vcoding_category_table
, category
), args
[i
]);
8135 /* Now we have decided top J priorities. Reflect the order of the
8136 original priorities to the remaining priorities. */
8138 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
8140 while (j
< coding_category_max
8141 && changed
[coding_priorities
[j
]])
8143 if (j
== coding_category_max
)
8145 priorities
[i
] = coding_priorities
[j
];
8148 bcopy (priorities
, coding_priorities
, sizeof priorities
);
8150 /* Update `coding-category-list'. */
8151 Vcoding_category_list
= Qnil
;
8152 for (i
= coding_category_max
- 1; i
>= 0; i
--)
8153 Vcoding_category_list
8154 = Fcons (AREF (Vcoding_category_table
, priorities
[i
]),
8155 Vcoding_category_list
);
8160 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
8161 Scoding_system_priority_list
, 0, 1, 0,
8162 doc
: /* Return a list of coding systems ordered by their priorities.
8163 HIGHESTP non-nil means just return the highest priority one. */)
8165 Lisp_Object highestp
;
8170 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
8172 enum coding_category category
= coding_priorities
[i
];
8173 int id
= coding_categories
[category
].id
;
8178 attrs
= CODING_ID_ATTRS (id
);
8179 if (! NILP (highestp
))
8180 return CODING_ATTR_BASE_NAME (attrs
);
8181 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
8183 return Fnreverse (val
);
8186 static char *suffixes
[] = { "-unix", "-dos", "-mac" };
8189 make_subsidiaries (base
)
8192 Lisp_Object subsidiaries
;
8193 int base_name_len
= SBYTES (SYMBOL_NAME (base
));
8194 char *buf
= (char *) alloca (base_name_len
+ 6);
8197 bcopy (SDATA (SYMBOL_NAME (base
)), buf
, base_name_len
);
8198 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
8199 for (i
= 0; i
< 3; i
++)
8201 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
8202 ASET (subsidiaries
, i
, intern (buf
));
8204 return subsidiaries
;
8208 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
8209 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
8210 doc
: /* For internal use only.
8211 usage: (define-coding-system-internal ...) */)
8217 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
8218 Lisp_Object attrs
; /* Vector of attributes. */
8219 Lisp_Object eol_type
;
8220 Lisp_Object aliases
;
8221 Lisp_Object coding_type
, charset_list
, safe_charsets
;
8222 enum coding_category category
;
8223 Lisp_Object tail
, val
;
8224 int max_charset_id
= 0;
8227 if (nargs
< coding_arg_max
)
8230 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
8232 name
= args
[coding_arg_name
];
8233 CHECK_SYMBOL (name
);
8234 CODING_ATTR_BASE_NAME (attrs
) = name
;
8236 val
= args
[coding_arg_mnemonic
];
8237 if (! STRINGP (val
))
8238 CHECK_CHARACTER (val
);
8239 CODING_ATTR_MNEMONIC (attrs
) = val
;
8241 coding_type
= args
[coding_arg_coding_type
];
8242 CHECK_SYMBOL (coding_type
);
8243 CODING_ATTR_TYPE (attrs
) = coding_type
;
8245 charset_list
= args
[coding_arg_charset_list
];
8246 if (SYMBOLP (charset_list
))
8248 if (EQ (charset_list
, Qiso_2022
))
8250 if (! EQ (coding_type
, Qiso_2022
))
8251 error ("Invalid charset-list");
8252 charset_list
= Viso_2022_charset_list
;
8254 else if (EQ (charset_list
, Qemacs_mule
))
8256 if (! EQ (coding_type
, Qemacs_mule
))
8257 error ("Invalid charset-list");
8258 charset_list
= Vemacs_mule_charset_list
;
8260 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8261 if (max_charset_id
< XFASTINT (XCAR (tail
)))
8262 max_charset_id
= XFASTINT (XCAR (tail
));
8266 charset_list
= Fcopy_sequence (charset_list
);
8267 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
8269 struct charset
*charset
;
8272 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8273 if (EQ (coding_type
, Qiso_2022
)
8274 ? CHARSET_ISO_FINAL (charset
) < 0
8275 : EQ (coding_type
, Qemacs_mule
)
8276 ? CHARSET_EMACS_MULE_ID (charset
) < 0
8278 error ("Can't handle charset `%s'",
8279 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8281 XSETCAR (tail
, make_number (charset
->id
));
8282 if (max_charset_id
< charset
->id
)
8283 max_charset_id
= charset
->id
;
8286 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
8288 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
8290 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8291 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
8292 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
8294 CODING_ATTR_ASCII_COMPAT (attrs
) = args
[coding_arg_ascii_compatible_p
];
8296 val
= args
[coding_arg_decode_translation_table
];
8297 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8299 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8301 val
= args
[coding_arg_encode_translation_table
];
8302 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8304 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8306 val
= args
[coding_arg_post_read_conversion
];
8308 CODING_ATTR_POST_READ (attrs
) = val
;
8310 val
= args
[coding_arg_pre_write_conversion
];
8312 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8314 val
= args
[coding_arg_default_char
];
8316 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
8319 CHECK_CHARACTER (val
);
8320 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8323 val
= args
[coding_arg_for_unibyte
];
8324 CODING_ATTR_FOR_UNIBYTE (attrs
) = NILP (val
) ? Qnil
: Qt
;
8326 val
= args
[coding_arg_plist
];
8328 CODING_ATTR_PLIST (attrs
) = val
;
8330 if (EQ (coding_type
, Qcharset
))
8332 /* Generate a lisp vector of 256 elements. Each element is nil,
8333 integer, or a list of charset IDs.
8335 If Nth element is nil, the byte code N is invalid in this
8338 If Nth element is a number NUM, N is the first byte of a
8339 charset whose ID is NUM.
8341 If Nth element is a list of charset IDs, N is the first byte
8342 of one of them. The list is sorted by dimensions of the
8343 charsets. A charset of smaller dimension comes firtst. */
8344 val
= Fmake_vector (make_number (256), Qnil
);
8346 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8348 struct charset
*charset
= CHARSET_FROM_ID (XFASTINT (XCAR (tail
)));
8349 int dim
= CHARSET_DIMENSION (charset
);
8350 int idx
= (dim
- 1) * 4;
8352 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8353 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8355 for (i
= charset
->code_space
[idx
];
8356 i
<= charset
->code_space
[idx
+ 1]; i
++)
8358 Lisp_Object tmp
, tmp2
;
8361 tmp
= AREF (val
, i
);
8364 else if (NUMBERP (tmp
))
8366 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp
)));
8368 tmp
= Fcons (XCAR (tail
), Fcons (tmp
, Qnil
));
8370 tmp
= Fcons (tmp
, Fcons (XCAR (tail
), Qnil
));
8374 for (tmp2
= tmp
; CONSP (tmp2
); tmp2
= XCDR (tmp2
))
8376 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2
))));
8381 tmp
= nconc2 (tmp
, Fcons (XCAR (tail
), Qnil
));
8384 XSETCDR (tmp2
, Fcons (XCAR (tmp2
), XCDR (tmp2
)));
8385 XSETCAR (tmp2
, XCAR (tail
));
8391 ASET (attrs
, coding_attr_charset_valids
, val
);
8392 category
= coding_category_charset
;
8394 else if (EQ (coding_type
, Qccl
))
8398 if (nargs
< coding_arg_ccl_max
)
8401 val
= args
[coding_arg_ccl_decoder
];
8402 CHECK_CCL_PROGRAM (val
);
8404 val
= Fcopy_sequence (val
);
8405 ASET (attrs
, coding_attr_ccl_decoder
, val
);
8407 val
= args
[coding_arg_ccl_encoder
];
8408 CHECK_CCL_PROGRAM (val
);
8410 val
= Fcopy_sequence (val
);
8411 ASET (attrs
, coding_attr_ccl_encoder
, val
);
8413 val
= args
[coding_arg_ccl_valids
];
8414 valids
= Fmake_string (make_number (256), make_number (0));
8415 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
8422 from
= to
= XINT (val
);
8423 if (from
< 0 || from
> 255)
8424 args_out_of_range_3 (val
, make_number (0), make_number (255));
8429 CHECK_NATNUM_CAR (val
);
8430 CHECK_NATNUM_CDR (val
);
8431 from
= XINT (XCAR (val
));
8433 args_out_of_range_3 (XCAR (val
),
8434 make_number (0), make_number (255));
8435 to
= XINT (XCDR (val
));
8436 if (to
< from
|| to
> 255)
8437 args_out_of_range_3 (XCDR (val
),
8438 XCAR (val
), make_number (255));
8440 for (i
= from
; i
<= to
; i
++)
8441 SSET (valids
, i
, 1);
8443 ASET (attrs
, coding_attr_ccl_valids
, valids
);
8445 category
= coding_category_ccl
;
8447 else if (EQ (coding_type
, Qutf_16
))
8449 Lisp_Object bom
, endian
;
8451 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8453 if (nargs
< coding_arg_utf16_max
)
8456 bom
= args
[coding_arg_utf16_bom
];
8457 if (! NILP (bom
) && ! EQ (bom
, Qt
))
8461 CHECK_CODING_SYSTEM (val
);
8463 CHECK_CODING_SYSTEM (val
);
8465 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
8467 endian
= args
[coding_arg_utf16_endian
];
8468 CHECK_SYMBOL (endian
);
8471 else if (! EQ (endian
, Qbig
) && ! EQ (endian
, Qlittle
))
8472 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian
)));
8473 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
8475 category
= (CONSP (bom
)
8476 ? coding_category_utf_16_auto
8478 ? (EQ (endian
, Qbig
)
8479 ? coding_category_utf_16_be_nosig
8480 : coding_category_utf_16_le_nosig
)
8481 : (EQ (endian
, Qbig
)
8482 ? coding_category_utf_16_be
8483 : coding_category_utf_16_le
));
8485 else if (EQ (coding_type
, Qiso_2022
))
8487 Lisp_Object initial
, reg_usage
, request
, flags
;
8490 if (nargs
< coding_arg_iso2022_max
)
8493 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
8494 CHECK_VECTOR (initial
);
8495 for (i
= 0; i
< 4; i
++)
8497 val
= Faref (initial
, make_number (i
));
8500 struct charset
*charset
;
8502 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8503 ASET (initial
, i
, make_number (CHARSET_ID (charset
)));
8504 if (i
== 0 && CHARSET_ASCII_COMPATIBLE_P (charset
))
8505 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8508 ASET (initial
, i
, make_number (-1));
8511 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
8512 CHECK_CONS (reg_usage
);
8513 CHECK_NUMBER_CAR (reg_usage
);
8514 CHECK_NUMBER_CDR (reg_usage
);
8516 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
8517 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
8525 CHECK_CHARSET_GET_ID (tmp
, id
);
8526 CHECK_NATNUM_CDR (val
);
8527 if (XINT (XCDR (val
)) >= 4)
8528 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
8529 XSETCAR (val
, make_number (id
));
8532 flags
= args
[coding_arg_iso2022_flags
];
8533 CHECK_NATNUM (flags
);
8535 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
8536 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
8538 ASET (attrs
, coding_attr_iso_initial
, initial
);
8539 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
8540 ASET (attrs
, coding_attr_iso_request
, request
);
8541 ASET (attrs
, coding_attr_iso_flags
, flags
);
8542 setup_iso_safe_charsets (attrs
);
8544 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
8545 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
8546 | CODING_ISO_FLAG_SINGLE_SHIFT
))
8547 ? coding_category_iso_7_else
8548 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8549 ? coding_category_iso_7
8550 : coding_category_iso_7_tight
);
8553 int id
= XINT (AREF (initial
, 1));
8555 category
= (((i
& CODING_ISO_FLAG_LOCKING_SHIFT
)
8556 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8558 ? coding_category_iso_8_else
8559 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
8560 ? coding_category_iso_8_1
8561 : coding_category_iso_8_2
);
8563 if (category
!= coding_category_iso_8_1
8564 && category
!= coding_category_iso_8_2
)
8565 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8567 else if (EQ (coding_type
, Qemacs_mule
))
8569 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
8570 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
8571 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8572 category
= coding_category_emacs_mule
;
8574 else if (EQ (coding_type
, Qshift_jis
))
8577 struct charset
*charset
;
8579 if (XINT (Flength (charset_list
)) != 3
8580 && XINT (Flength (charset_list
)) != 4)
8581 error ("There should be three or four charsets");
8583 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8584 if (CHARSET_DIMENSION (charset
) != 1)
8585 error ("Dimension of charset %s is not one",
8586 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8587 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8588 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8590 charset_list
= XCDR (charset_list
);
8591 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8592 if (CHARSET_DIMENSION (charset
) != 1)
8593 error ("Dimension of charset %s is not one",
8594 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8596 charset_list
= XCDR (charset_list
);
8597 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8598 if (CHARSET_DIMENSION (charset
) != 2)
8599 error ("Dimension of charset %s is not two",
8600 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8602 charset_list
= XCDR (charset_list
);
8603 if (! NILP (charset_list
))
8605 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8606 if (CHARSET_DIMENSION (charset
) != 2)
8607 error ("Dimension of charset %s is not two",
8608 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8611 category
= coding_category_sjis
;
8612 Vsjis_coding_system
= name
;
8614 else if (EQ (coding_type
, Qbig5
))
8616 struct charset
*charset
;
8618 if (XINT (Flength (charset_list
)) != 2)
8619 error ("There should be just two charsets");
8621 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8622 if (CHARSET_DIMENSION (charset
) != 1)
8623 error ("Dimension of charset %s is not one",
8624 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8625 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8626 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8628 charset_list
= XCDR (charset_list
);
8629 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8630 if (CHARSET_DIMENSION (charset
) != 2)
8631 error ("Dimension of charset %s is not two",
8632 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8634 category
= coding_category_big5
;
8635 Vbig5_coding_system
= name
;
8637 else if (EQ (coding_type
, Qraw_text
))
8639 category
= coding_category_raw_text
;
8640 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8642 else if (EQ (coding_type
, Qutf_8
))
8644 category
= coding_category_utf_8
;
8645 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8647 else if (EQ (coding_type
, Qundecided
))
8648 category
= coding_category_undecided
;
8650 error ("Invalid coding system type: %s",
8651 SDATA (SYMBOL_NAME (coding_type
)));
8653 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
8654 CODING_ATTR_PLIST (attrs
)
8655 = Fcons (QCcategory
, Fcons (AREF (Vcoding_category_table
, category
),
8656 CODING_ATTR_PLIST (attrs
)));
8658 eol_type
= args
[coding_arg_eol_type
];
8659 if (! NILP (eol_type
)
8660 && ! EQ (eol_type
, Qunix
)
8661 && ! EQ (eol_type
, Qdos
)
8662 && ! EQ (eol_type
, Qmac
))
8663 error ("Invalid eol-type");
8665 aliases
= Fcons (name
, Qnil
);
8667 if (NILP (eol_type
))
8669 eol_type
= make_subsidiaries (name
);
8670 for (i
= 0; i
< 3; i
++)
8672 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
8674 this_name
= AREF (eol_type
, i
);
8675 this_aliases
= Fcons (this_name
, Qnil
);
8676 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
8677 this_spec
= Fmake_vector (make_number (3), attrs
);
8678 ASET (this_spec
, 1, this_aliases
);
8679 ASET (this_spec
, 2, this_eol_type
);
8680 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
8681 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
8682 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
8683 Vcoding_system_alist
);
8687 spec_vec
= Fmake_vector (make_number (3), attrs
);
8688 ASET (spec_vec
, 1, aliases
);
8689 ASET (spec_vec
, 2, eol_type
);
8691 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
8692 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
8693 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
8694 Vcoding_system_alist
);
8697 int id
= coding_categories
[category
].id
;
8699 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
8700 setup_coding_system (name
, &coding_categories
[category
]);
8706 return Fsignal (Qwrong_number_of_arguments
,
8707 Fcons (intern ("define-coding-system-internal"),
8708 make_number (nargs
)));
8712 DEFUN ("coding-system-put", Fcoding_system_put
, Scoding_system_put
,
8714 doc
: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
8715 (coding_system
, prop
, val
)
8716 Lisp_Object coding_system
, prop
, val
;
8718 Lisp_Object spec
, attrs
, plist
;
8720 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8721 attrs
= AREF (spec
, 0);
8722 if (EQ (prop
, QCmnemonic
))
8724 if (! STRINGP (val
))
8725 CHECK_CHARACTER (val
);
8726 CODING_ATTR_MNEMONIC (attrs
) = val
;
8728 else if (EQ (prop
, QCdefalut_char
))
8731 val
= make_number (' ');
8733 CHECK_CHARACTER (val
);
8734 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8736 else if (EQ (prop
, QCdecode_translation_table
))
8738 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8740 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8742 else if (EQ (prop
, QCencode_translation_table
))
8744 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8746 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8748 else if (EQ (prop
, QCpost_read_conversion
))
8751 CODING_ATTR_POST_READ (attrs
) = val
;
8753 else if (EQ (prop
, QCpre_write_conversion
))
8756 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8759 CODING_ATTR_PLIST (attrs
)
8760 = Fplist_put (CODING_ATTR_PLIST (attrs
), prop
, val
);
8765 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
8766 Sdefine_coding_system_alias
, 2, 2, 0,
8767 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8768 (alias
, coding_system
)
8769 Lisp_Object alias
, coding_system
;
8771 Lisp_Object spec
, aliases
, eol_type
;
8773 CHECK_SYMBOL (alias
);
8774 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8775 aliases
= AREF (spec
, 1);
8776 /* ALISES should be a list of length more than zero, and the first
8777 element is a base coding system. Append ALIAS at the tail of the
8779 while (!NILP (XCDR (aliases
)))
8780 aliases
= XCDR (aliases
);
8781 XSETCDR (aliases
, Fcons (alias
, Qnil
));
8783 eol_type
= AREF (spec
, 2);
8784 if (VECTORP (eol_type
))
8786 Lisp_Object subsidiaries
;
8789 subsidiaries
= make_subsidiaries (alias
);
8790 for (i
= 0; i
< 3; i
++)
8791 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
8792 AREF (eol_type
, i
));
8795 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
8796 Vcoding_system_list
= Fcons (alias
, Vcoding_system_list
);
8797 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (alias
), Qnil
),
8798 Vcoding_system_alist
);
8803 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
8805 doc
: /* Return the base of CODING-SYSTEM.
8806 Any alias or subsidiary coding system is not a base coding system. */)
8808 Lisp_Object coding_system
;
8810 Lisp_Object spec
, attrs
;
8812 if (NILP (coding_system
))
8813 return (Qno_conversion
);
8814 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8815 attrs
= AREF (spec
, 0);
8816 return CODING_ATTR_BASE_NAME (attrs
);
8819 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
8821 doc
: "Return the property list of CODING-SYSTEM.")
8823 Lisp_Object coding_system
;
8825 Lisp_Object spec
, attrs
;
8827 if (NILP (coding_system
))
8828 coding_system
= Qno_conversion
;
8829 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8830 attrs
= AREF (spec
, 0);
8831 return CODING_ATTR_PLIST (attrs
);
8835 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
8837 doc
: /* Return the list of aliases of CODING-SYSTEM. */)
8839 Lisp_Object coding_system
;
8843 if (NILP (coding_system
))
8844 coding_system
= Qno_conversion
;
8845 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8846 return AREF (spec
, 1);
8849 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
8850 Scoding_system_eol_type
, 1, 1, 0,
8851 doc
: /* Return eol-type of CODING-SYSTEM.
8852 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8854 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8855 and CR respectively.
8857 A vector value indicates that a format of end-of-line should be
8858 detected automatically. Nth element of the vector is the subsidiary
8859 coding system whose eol-type is N. */)
8861 Lisp_Object coding_system
;
8863 Lisp_Object spec
, eol_type
;
8866 if (NILP (coding_system
))
8867 coding_system
= Qno_conversion
;
8868 if (! CODING_SYSTEM_P (coding_system
))
8870 spec
= CODING_SYSTEM_SPEC (coding_system
);
8871 eol_type
= AREF (spec
, 2);
8872 if (VECTORP (eol_type
))
8873 return Fcopy_sequence (eol_type
);
8874 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
8875 return make_number (n
);
8881 /*** 9. Post-amble ***/
8888 for (i
= 0; i
< coding_category_max
; i
++)
8890 coding_categories
[i
].id
= -1;
8891 coding_priorities
[i
] = i
;
8894 /* ISO2022 specific initialize routine. */
8895 for (i
= 0; i
< 0x20; i
++)
8896 iso_code_class
[i
] = ISO_control_0
;
8897 for (i
= 0x21; i
< 0x7F; i
++)
8898 iso_code_class
[i
] = ISO_graphic_plane_0
;
8899 for (i
= 0x80; i
< 0xA0; i
++)
8900 iso_code_class
[i
] = ISO_control_1
;
8901 for (i
= 0xA1; i
< 0xFF; i
++)
8902 iso_code_class
[i
] = ISO_graphic_plane_1
;
8903 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
8904 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
8905 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
8906 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
8907 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
8908 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
8909 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
8910 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
8911 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
8913 for (i
= 0; i
< 256; i
++)
8915 emacs_mule_bytes
[i
] = 1;
8917 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_11
] = 3;
8918 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_12
] = 3;
8919 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_21
] = 4;
8920 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_22
] = 4;
8928 staticpro (&Vcoding_system_hash_table
);
8930 Lisp_Object args
[2];
8933 Vcoding_system_hash_table
= Fmake_hash_table (2, args
);
8936 staticpro (&Vsjis_coding_system
);
8937 Vsjis_coding_system
= Qnil
;
8939 staticpro (&Vbig5_coding_system
);
8940 Vbig5_coding_system
= Qnil
;
8942 staticpro (&Vcode_conversion_reused_workbuf
);
8943 Vcode_conversion_reused_workbuf
= Qnil
;
8945 staticpro (&Vcode_conversion_workbuf_name
);
8946 Vcode_conversion_workbuf_name
= build_string (" *code-conversion-work*");
8948 reused_workbuf_in_use
= 0;
8950 DEFSYM (Qcharset
, "charset");
8951 DEFSYM (Qtarget_idx
, "target-idx");
8952 DEFSYM (Qcoding_system_history
, "coding-system-history");
8953 Fset (Qcoding_system_history
, Qnil
);
8955 /* Target FILENAME is the first argument. */
8956 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
8957 /* Target FILENAME is the third argument. */
8958 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
8960 DEFSYM (Qcall_process
, "call-process");
8961 /* Target PROGRAM is the first argument. */
8962 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
8964 DEFSYM (Qcall_process_region
, "call-process-region");
8965 /* Target PROGRAM is the third argument. */
8966 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
8968 DEFSYM (Qstart_process
, "start-process");
8969 /* Target PROGRAM is the third argument. */
8970 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
8972 DEFSYM (Qopen_network_stream
, "open-network-stream");
8973 /* Target SERVICE is the fourth argument. */
8974 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
8976 DEFSYM (Qcoding_system
, "coding-system");
8977 DEFSYM (Qcoding_aliases
, "coding-aliases");
8979 DEFSYM (Qeol_type
, "eol-type");
8980 DEFSYM (Qunix
, "unix");
8981 DEFSYM (Qdos
, "dos");
8983 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
8984 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
8985 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
8986 DEFSYM (Qdefault_char
, "default-char");
8987 DEFSYM (Qundecided
, "undecided");
8988 DEFSYM (Qno_conversion
, "no-conversion");
8989 DEFSYM (Qraw_text
, "raw-text");
8991 DEFSYM (Qiso_2022
, "iso-2022");
8993 DEFSYM (Qutf_8
, "utf-8");
8994 DEFSYM (Qutf_8_emacs
, "utf-8-emacs");
8996 DEFSYM (Qutf_16
, "utf-16");
8997 DEFSYM (Qbig
, "big");
8998 DEFSYM (Qlittle
, "little");
9000 DEFSYM (Qshift_jis
, "shift-jis");
9001 DEFSYM (Qbig5
, "big5");
9003 DEFSYM (Qcoding_system_p
, "coding-system-p");
9005 DEFSYM (Qcoding_system_error
, "coding-system-error");
9006 Fput (Qcoding_system_error
, Qerror_conditions
,
9007 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
9008 Fput (Qcoding_system_error
, Qerror_message
,
9009 build_string ("Invalid coding system"));
9011 /* Intern this now in case it isn't already done.
9012 Setting this variable twice is harmless.
9013 But don't staticpro it here--that is done in alloc.c. */
9014 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
9016 DEFSYM (Qtranslation_table
, "translation-table");
9017 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (2));
9018 DEFSYM (Qtranslation_table_id
, "translation-table-id");
9019 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
9020 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
9022 DEFSYM (Qvalid_codes
, "valid-codes");
9024 DEFSYM (Qemacs_mule
, "emacs-mule");
9026 DEFSYM (QCcategory
, ":category");
9027 DEFSYM (QCmnemonic
, ":mnemonic");
9028 DEFSYM (QCdefalut_char
, ":default-char");
9029 DEFSYM (QCdecode_translation_table
, ":decode-translation-table");
9030 DEFSYM (QCencode_translation_table
, ":encode-translation-table");
9031 DEFSYM (QCpost_read_conversion
, ":post-read-conversion");
9032 DEFSYM (QCpre_write_conversion
, ":pre-write-conversion");
9034 Vcoding_category_table
9035 = Fmake_vector (make_number (coding_category_max
), Qnil
);
9036 staticpro (&Vcoding_category_table
);
9037 /* Followings are target of code detection. */
9038 ASET (Vcoding_category_table
, coding_category_iso_7
,
9039 intern ("coding-category-iso-7"));
9040 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
9041 intern ("coding-category-iso-7-tight"));
9042 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
9043 intern ("coding-category-iso-8-1"));
9044 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
9045 intern ("coding-category-iso-8-2"));
9046 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
9047 intern ("coding-category-iso-7-else"));
9048 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
9049 intern ("coding-category-iso-8-else"));
9050 ASET (Vcoding_category_table
, coding_category_utf_8
,
9051 intern ("coding-category-utf-8"));
9052 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
9053 intern ("coding-category-utf-16-be"));
9054 ASET (Vcoding_category_table
, coding_category_utf_16_auto
,
9055 intern ("coding-category-utf-16-auto"));
9056 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
9057 intern ("coding-category-utf-16-le"));
9058 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
9059 intern ("coding-category-utf-16-be-nosig"));
9060 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
9061 intern ("coding-category-utf-16-le-nosig"));
9062 ASET (Vcoding_category_table
, coding_category_charset
,
9063 intern ("coding-category-charset"));
9064 ASET (Vcoding_category_table
, coding_category_sjis
,
9065 intern ("coding-category-sjis"));
9066 ASET (Vcoding_category_table
, coding_category_big5
,
9067 intern ("coding-category-big5"));
9068 ASET (Vcoding_category_table
, coding_category_ccl
,
9069 intern ("coding-category-ccl"));
9070 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
9071 intern ("coding-category-emacs-mule"));
9072 /* Followings are NOT target of code detection. */
9073 ASET (Vcoding_category_table
, coding_category_raw_text
,
9074 intern ("coding-category-raw-text"));
9075 ASET (Vcoding_category_table
, coding_category_undecided
,
9076 intern ("coding-category-undecided"));
9078 DEFSYM (Qinsufficient_source
, "insufficient-source");
9079 DEFSYM (Qinconsistent_eol
, "inconsistent-eol");
9080 DEFSYM (Qinvalid_source
, "invalid-source");
9081 DEFSYM (Qinterrupted
, "interrupted");
9082 DEFSYM (Qinsufficient_memory
, "insufficient-memory");
9084 defsubr (&Scoding_system_p
);
9085 defsubr (&Sread_coding_system
);
9086 defsubr (&Sread_non_nil_coding_system
);
9087 defsubr (&Scheck_coding_system
);
9088 defsubr (&Sdetect_coding_region
);
9089 defsubr (&Sdetect_coding_string
);
9090 defsubr (&Sfind_coding_systems_region_internal
);
9091 defsubr (&Sunencodable_char_position
);
9092 defsubr (&Scheck_coding_systems_region
);
9093 defsubr (&Sdecode_coding_region
);
9094 defsubr (&Sencode_coding_region
);
9095 defsubr (&Sdecode_coding_string
);
9096 defsubr (&Sencode_coding_string
);
9097 defsubr (&Sdecode_sjis_char
);
9098 defsubr (&Sencode_sjis_char
);
9099 defsubr (&Sdecode_big5_char
);
9100 defsubr (&Sencode_big5_char
);
9101 defsubr (&Sset_terminal_coding_system_internal
);
9102 defsubr (&Sset_safe_terminal_coding_system_internal
);
9103 defsubr (&Sterminal_coding_system
);
9104 defsubr (&Sset_keyboard_coding_system_internal
);
9105 defsubr (&Skeyboard_coding_system
);
9106 defsubr (&Sfind_operation_coding_system
);
9107 defsubr (&Sset_coding_system_priority
);
9108 defsubr (&Sdefine_coding_system_internal
);
9109 defsubr (&Sdefine_coding_system_alias
);
9110 defsubr (&Scoding_system_put
);
9111 defsubr (&Scoding_system_base
);
9112 defsubr (&Scoding_system_plist
);
9113 defsubr (&Scoding_system_aliases
);
9114 defsubr (&Scoding_system_eol_type
);
9115 defsubr (&Scoding_system_priority_list
);
9117 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
9118 doc
: /* List of coding systems.
9120 Do not alter the value of this variable manually. This variable should be
9121 updated by the functions `define-coding-system' and
9122 `define-coding-system-alias'. */);
9123 Vcoding_system_list
= Qnil
;
9125 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
9126 doc
: /* Alist of coding system names.
9127 Each element is one element list of coding system name.
9128 This variable is given to `completing-read' as TABLE argument.
9130 Do not alter the value of this variable manually. This variable should be
9131 updated by the functions `make-coding-system' and
9132 `define-coding-system-alias'. */);
9133 Vcoding_system_alist
= Qnil
;
9135 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
9136 doc
: /* List of coding-categories (symbols) ordered by priority.
9138 On detecting a coding system, Emacs tries code detection algorithms
9139 associated with each coding-category one by one in this order. When
9140 one algorithm agrees with a byte sequence of source text, the coding
9141 system bound to the corresponding coding-category is selected. */);
9145 Vcoding_category_list
= Qnil
;
9146 for (i
= coding_category_max
- 1; i
>= 0; i
--)
9147 Vcoding_category_list
9148 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
9149 Vcoding_category_list
);
9152 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
9153 doc
: /* Specify the coding system for read operations.
9154 It is useful to bind this variable with `let', but do not set it globally.
9155 If the value is a coding system, it is used for decoding on read operation.
9156 If not, an appropriate element is used from one of the coding system alists:
9157 There are three such tables, `file-coding-system-alist',
9158 `process-coding-system-alist', and `network-coding-system-alist'. */);
9159 Vcoding_system_for_read
= Qnil
;
9161 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
9162 doc
: /* Specify the coding system for write operations.
9163 Programs bind this variable with `let', but you should not set it globally.
9164 If the value is a coding system, it is used for encoding of output,
9165 when writing it to a file and when sending it to a file or subprocess.
9167 If this does not specify a coding system, an appropriate element
9168 is used from one of the coding system alists:
9169 There are three such tables, `file-coding-system-alist',
9170 `process-coding-system-alist', and `network-coding-system-alist'.
9171 For output to files, if the above procedure does not specify a coding system,
9172 the value of `buffer-file-coding-system' is used. */);
9173 Vcoding_system_for_write
= Qnil
;
9175 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
9177 Coding system used in the latest file or process I/O. */);
9178 Vlast_coding_system_used
= Qnil
;
9180 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error
,
9182 Error status of the last code conversion.
9184 When an error was detected in the last code conversion, this variable
9185 is set to one of the following symbols.
9186 `insufficient-source'
9190 `insufficient-memory'
9191 When no error was detected, the value doesn't change. So, to check
9192 the error status of a code conversion by this variable, you must
9193 explicitly set this variable to nil before performing code
9195 Vlast_code_conversion_error
= Qnil
;
9197 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
9199 *Non-nil means always inhibit code conversion of end-of-line format.
9200 See info node `Coding Systems' and info node `Text and Binary' concerning
9201 such conversion. */);
9202 inhibit_eol_conversion
= 0;
9204 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
9206 Non-nil means process buffer inherits coding system of process output.
9207 Bind it to t if the process output is to be treated as if it were a file
9208 read from some filesystem. */);
9209 inherit_process_coding_system
= 0;
9211 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
9213 Alist to decide a coding system to use for a file I/O operation.
9214 The format is ((PATTERN . VAL) ...),
9215 where PATTERN is a regular expression matching a file name,
9216 VAL is a coding system, a cons of coding systems, or a function symbol.
9217 If VAL is a coding system, it is used for both decoding and encoding
9219 If VAL is a cons of coding systems, the car part is used for decoding,
9220 and the cdr part is used for encoding.
9221 If VAL is a function symbol, the function must return a coding system
9222 or a cons of coding systems which are used as above. The function gets
9223 the arguments with which `find-operation-coding-systems' was called.
9225 See also the function `find-operation-coding-system'
9226 and the variable `auto-coding-alist'. */);
9227 Vfile_coding_system_alist
= Qnil
;
9229 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
9231 Alist to decide a coding system to use for a process I/O operation.
9232 The format is ((PATTERN . VAL) ...),
9233 where PATTERN is a regular expression matching a program name,
9234 VAL is a coding system, a cons of coding systems, or a function symbol.
9235 If VAL is a coding system, it is used for both decoding what received
9236 from the program and encoding what sent to the program.
9237 If VAL is a cons of coding systems, the car part is used for decoding,
9238 and the cdr part is used for encoding.
9239 If VAL is a function symbol, the function must return a coding system
9240 or a cons of coding systems which are used as above.
9242 See also the function `find-operation-coding-system'. */);
9243 Vprocess_coding_system_alist
= Qnil
;
9245 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
9247 Alist to decide a coding system to use for a network I/O operation.
9248 The format is ((PATTERN . VAL) ...),
9249 where PATTERN is a regular expression matching a network service name
9250 or is a port number to connect to,
9251 VAL is a coding system, a cons of coding systems, or a function symbol.
9252 If VAL is a coding system, it is used for both decoding what received
9253 from the network stream and encoding what sent to the network stream.
9254 If VAL is a cons of coding systems, the car part is used for decoding,
9255 and the cdr part is used for encoding.
9256 If VAL is a function symbol, the function must return a coding system
9257 or a cons of coding systems which are used as above.
9259 See also the function `find-operation-coding-system'. */);
9260 Vnetwork_coding_system_alist
= Qnil
;
9262 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
9263 doc
: /* Coding system to use with system messages.
9264 Also used for decoding keyboard input on X Window system. */);
9265 Vlocale_coding_system
= Qnil
;
9267 /* The eol mnemonics are reset in startup.el system-dependently. */
9268 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
9270 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
9271 eol_mnemonic_unix
= build_string (":");
9273 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
9275 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
9276 eol_mnemonic_dos
= build_string ("\\");
9278 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
9280 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
9281 eol_mnemonic_mac
= build_string ("/");
9283 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
9285 *String displayed in mode line when end-of-line format is not yet determined. */);
9286 eol_mnemonic_undecided
= build_string (":");
9288 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
9290 *Non-nil enables character translation while encoding and decoding. */);
9291 Venable_character_translation
= Qt
;
9293 DEFVAR_LISP ("standard-translation-table-for-decode",
9294 &Vstandard_translation_table_for_decode
,
9295 doc
: /* Table for translating characters while decoding. */);
9296 Vstandard_translation_table_for_decode
= Qnil
;
9298 DEFVAR_LISP ("standard-translation-table-for-encode",
9299 &Vstandard_translation_table_for_encode
,
9300 doc
: /* Table for translating characters while encoding. */);
9301 Vstandard_translation_table_for_encode
= Qnil
;
9303 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
9304 doc
: /* Alist of charsets vs revision numbers.
9305 While encoding, if a charset (car part of an element) is found,
9306 designate it with the escape sequence identifying revision (cdr part
9307 of the element). */);
9308 Vcharset_revision_table
= Qnil
;
9310 DEFVAR_LISP ("default-process-coding-system",
9311 &Vdefault_process_coding_system
,
9312 doc
: /* Cons of coding systems used for process I/O by default.
9313 The car part is used for decoding a process output,
9314 the cdr part is used for encoding a text to be sent to a process. */);
9315 Vdefault_process_coding_system
= Qnil
;
9317 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
9319 Table of extra Latin codes in the range 128..159 (inclusive).
9320 This is a vector of length 256.
9321 If Nth element is non-nil, the existence of code N in a file
9322 \(or output of subprocess) doesn't prevent it to be detected as
9323 a coding system of ISO 2022 variant which has a flag
9324 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9325 or reading output of a subprocess.
9326 Only 128th through 159th elements has a meaning. */);
9327 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
9329 DEFVAR_LISP ("select-safe-coding-system-function",
9330 &Vselect_safe_coding_system_function
,
9332 Function to call to select safe coding system for encoding a text.
9334 If set, this function is called to force a user to select a proper
9335 coding system which can encode the text in the case that a default
9336 coding system used in each operation can't encode the text.
9338 The default value is `select-safe-coding-system' (which see). */);
9339 Vselect_safe_coding_system_function
= Qnil
;
9341 DEFVAR_BOOL ("coding-system-require-warning",
9342 &coding_system_require_warning
,
9343 doc
: /* Internal use only.
9344 If non-nil, on writing a file, `select-safe-coding-system-function' is
9345 called even if `coding-system-for-write' is non-nil. The command
9346 `universal-coding-system-argument' binds this variable to t temporarily. */);
9347 coding_system_require_warning
= 0;
9350 DEFVAR_BOOL ("inhibit-iso-escape-detection",
9351 &inhibit_iso_escape_detection
,
9353 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9355 By default, on reading a file, Emacs tries to detect how the text is
9356 encoded. This code detection is sensitive to escape sequences. If
9357 the sequence is valid as ISO2022, the code is determined as one of
9358 the ISO2022 encodings, and the file is decoded by the corresponding
9359 coding system (e.g. `iso-2022-7bit').
9361 However, there may be a case that you want to read escape sequences in
9362 a file as is. In such a case, you can set this variable to non-nil.
9363 Then, as the code detection ignores any escape sequences, no file is
9364 detected as encoded in some ISO2022 encoding. The result is that all
9365 escape sequences become visible in a buffer.
9367 The default value is nil, and it is strongly recommended not to change
9368 it. That is because many Emacs Lisp source files that contain
9369 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9370 in Emacs's distribution, and they won't be decoded correctly on
9371 reading if you suppress escape sequence detection.
9373 The other way to read escape sequences in a file without decoding is
9374 to explicitly specify some coding system that doesn't use ISO2022's
9375 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
9376 inhibit_iso_escape_detection
= 0;
9378 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input
,
9379 doc
: /* Char table for translating self-inserting characters.
9380 This is applied to the result of input methods, not their input. See also
9381 `keyboard-translate-table'. */);
9382 Vtranslation_table_for_input
= Qnil
;
9385 Lisp_Object args
[coding_arg_max
];
9386 Lisp_Object plist
[16];
9389 for (i
= 0; i
< coding_arg_max
; i
++)
9392 plist
[0] = intern (":name");
9393 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
9394 plist
[2] = intern (":mnemonic");
9395 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
9396 plist
[4] = intern (":coding-type");
9397 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
9398 plist
[6] = intern (":ascii-compatible-p");
9399 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
9400 plist
[8] = intern (":default-char");
9401 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
9402 plist
[10] = intern (":for-unibyte");
9403 plist
[11] = args
[coding_arg_for_unibyte
] = Qt
;
9404 plist
[12] = intern (":docstring");
9405 plist
[13] = build_string ("Do no conversion.\n\
9407 When you visit a file with this coding, the file is read into a\n\
9408 unibyte buffer as is, thus each byte of a file is treated as a\n\
9410 plist
[14] = intern (":eol-type");
9411 plist
[15] = args
[coding_arg_eol_type
] = Qunix
;
9412 args
[coding_arg_plist
] = Flist (16, plist
);
9413 Fdefine_coding_system_internal (coding_arg_max
, args
);
9416 setup_coding_system (Qno_conversion
, &keyboard_coding
);
9417 setup_coding_system (Qno_conversion
, &terminal_coding
);
9418 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
9423 for (i
= 0; i
< coding_category_max
; i
++)
9424 Fset (AREF (Vcoding_category_table
, i
), Qno_conversion
);
9429 emacs_strerror (error_number
)
9434 synchronize_system_messages_locale ();
9435 str
= strerror (error_number
);
9437 if (! NILP (Vlocale_coding_system
))
9439 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
9440 Vlocale_coding_system
,
9442 str
= (char *) SDATA (dec
);