1 /* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 Coding system is an object for a encoding mechanism that contains
50 information about how to convert byte sequence to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from a
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the mechanism of encoding. Here's a brief descrition about type.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by code converter defined for each
77 o Old Emacs' internal format (emacs-mule)
79 The coding system adopted by an old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode a text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for a text containing raw eight-bit data. Emacs
112 treat each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How end-of-line of a text is encoded depends on a system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text characters encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
155 Below is the template of these functions. */
159 detect_coding_XXX (coding
, mask
)
160 struct coding_system
*coding
;
163 unsigned char *src
= coding
->source
;
164 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
165 int multibytep
= coding
->src_multibyte
;
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
175 /* Check if it conforms to XXX. If not, break the loop. */
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask
&= ~CODING_CATEGORY_XXX
;
181 /* The source exausted. */
183 /* ASCII characters only. */
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask
&= CODING_CATEGORY_XXX
;
191 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
203 Below is the template of these functions. */
207 decode_coding_XXXX (coding
)
208 struct coding_system
*coding
;
210 unsigned char *src
= coding
->source
+ coding
->consumed
;
211 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base
;
216 /* A buffer to produce decoded characters. */
217 int *charbuf
= coding
->charbuf
;
218 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
219 int multibytep
= coding
->src_multibyte
;
224 if (charbuf
< charbuf_end
)
225 /* No more room to produce a decoded character. */
232 if (src_base
< src_end
233 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base
< src_end
&& charbuf
< charbuf_end
)
237 *charbuf
++ = *src_base
++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
241 /* Remember how many characters we produced. */
242 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
246 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
262 Below is a template of these functions. */
265 encode_coding_XXX (coding
)
266 struct coding_system
*coding
;
268 int multibytep
= coding
->dst_multibyte
;
269 int *charbuf
= coding
->charbuf
;
270 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
271 unsigned char *dst
= coding
->destination
+ coding
->produced
;
272 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
273 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
274 int produced_chars
= 0;
276 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
279 /* Encode C into DST, and increment DST. */
281 label_no_more_destination
:
282 /* How many chars and bytes we produced. */
283 coding
->produced_char
+= produced_chars
;
284 coding
->produced
= dst
- coding
->destination
;
289 /*** 1. Preamble ***/
296 #include "character.h"
299 #include "composite.h"
303 Lisp_Object Vcoding_system_hash_table
;
305 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
306 Lisp_Object Qunix
, Qdos
, Qmac
;
307 Lisp_Object Qbuffer_file_coding_system
;
308 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
309 Lisp_Object Qdefault_char
;
310 Lisp_Object Qno_conversion
, Qundecided
;
311 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
312 Lisp_Object Qutf_16_be_nosig
, Qutf_16_be
, Qutf_16_le_nosig
, Qutf_16_le
;
313 Lisp_Object Qsignature
, Qendian
, Qbig
, Qlittle
;
314 Lisp_Object Qcoding_system_history
;
315 Lisp_Object Qvalid_codes
;
317 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
318 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
319 Lisp_Object Qstart_process
, Qopen_network_stream
;
320 Lisp_Object Qtarget_idx
;
322 Lisp_Object Vselect_safe_coding_system_function
;
324 /* Mnemonic string for each format of end-of-line. */
325 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
326 /* Mnemonic string to indicate format of end-of-line is not yet
328 Lisp_Object eol_mnemonic_undecided
;
332 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
334 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
336 /* Coding system emacs-mule and raw-text are for converting only
337 end-of-line format. */
338 Lisp_Object Qemacs_mule
, Qraw_text
;
340 /* Coding-systems are handed between Emacs Lisp programs and C internal
341 routines by the following three variables. */
342 /* Coding-system for reading files and receiving data from process. */
343 Lisp_Object Vcoding_system_for_read
;
344 /* Coding-system for writing files and sending data to process. */
345 Lisp_Object Vcoding_system_for_write
;
346 /* Coding-system actually used in the latest I/O. */
347 Lisp_Object Vlast_coding_system_used
;
349 /* A vector of length 256 which contains information about special
350 Latin codes (especially for dealing with Microsoft codes). */
351 Lisp_Object Vlatin_extra_code_table
;
353 /* Flag to inhibit code conversion of end-of-line format. */
354 int inhibit_eol_conversion
;
356 /* Flag to inhibit ISO2022 escape sequence detection. */
357 int inhibit_iso_escape_detection
;
359 /* Flag to make buffer-file-coding-system inherit from process-coding. */
360 int inherit_process_coding_system
;
362 /* Coding system to be used to encode text for terminal display. */
363 struct coding_system terminal_coding
;
365 /* Coding system to be used to encode text for terminal display when
366 terminal coding system is nil. */
367 struct coding_system safe_terminal_coding
;
369 /* Coding system of what is sent from terminal keyboard. */
370 struct coding_system keyboard_coding
;
372 Lisp_Object Vfile_coding_system_alist
;
373 Lisp_Object Vprocess_coding_system_alist
;
374 Lisp_Object Vnetwork_coding_system_alist
;
376 Lisp_Object Vlocale_coding_system
;
380 /* Flag to tell if we look up translation table on character code
382 Lisp_Object Venable_character_translation
;
383 /* Standard translation table to look up on decoding (reading). */
384 Lisp_Object Vstandard_translation_table_for_decode
;
385 /* Standard translation table to look up on encoding (writing). */
386 Lisp_Object Vstandard_translation_table_for_encode
;
388 Lisp_Object Qtranslation_table
;
389 Lisp_Object Qtranslation_table_id
;
390 Lisp_Object Qtranslation_table_for_decode
;
391 Lisp_Object Qtranslation_table_for_encode
;
393 /* Alist of charsets vs revision number. */
394 static Lisp_Object Vcharset_revision_table
;
396 /* Default coding systems used for process I/O. */
397 Lisp_Object Vdefault_process_coding_system
;
399 /* Global flag to tell that we can't call post-read-conversion and
400 pre-write-conversion functions. Usually the value is zero, but it
401 is set to 1 temporarily while such functions are running. This is
402 to avoid infinite recursive call. */
403 static int inhibit_pre_post_conversion
;
405 /* Char-table containing safe coding systems of each character. */
406 Lisp_Object Vchar_coding_system_table
;
407 Lisp_Object Qchar_coding_system
;
409 /* Two special coding systems. */
410 Lisp_Object Vsjis_coding_system
;
411 Lisp_Object Vbig5_coding_system
;
414 static int detect_coding_utf_8
P_ ((struct coding_system
*, int *));
415 static void decode_coding_utf_8
P_ ((struct coding_system
*));
416 static int encode_coding_utf_8
P_ ((struct coding_system
*));
418 static int detect_coding_utf_16
P_ ((struct coding_system
*, int *));
419 static void decode_coding_utf_16
P_ ((struct coding_system
*));
420 static int encode_coding_utf_16
P_ ((struct coding_system
*));
422 static int detect_coding_iso_2022
P_ ((struct coding_system
*, int *));
423 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
424 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
426 static int detect_coding_emacs_mule
P_ ((struct coding_system
*, int *));
427 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
428 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
430 static int detect_coding_sjis
P_ ((struct coding_system
*, int *));
431 static void decode_coding_sjis
P_ ((struct coding_system
*));
432 static int encode_coding_sjis
P_ ((struct coding_system
*));
434 static int detect_coding_big5
P_ ((struct coding_system
*, int *));
435 static void decode_coding_big5
P_ ((struct coding_system
*));
436 static int encode_coding_big5
P_ ((struct coding_system
*));
438 static int detect_coding_ccl
P_ ((struct coding_system
*, int *));
439 static void decode_coding_ccl
P_ ((struct coding_system
*));
440 static int encode_coding_ccl
P_ ((struct coding_system
*));
442 static void decode_coding_raw_text
P_ ((struct coding_system
*));
443 static int encode_coding_raw_text
P_ ((struct coding_system
*));
446 /* ISO2022 section */
448 #define CODING_ISO_INITIAL(coding, reg) \
449 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
450 coding_attr_iso_initial), \
454 #define CODING_ISO_REQUEST(coding, charset_id) \
455 ((charset_id <= (coding)->max_charset_id \
456 ? (coding)->safe_charsets[charset_id] \
460 #define CODING_ISO_FLAGS(coding) \
461 ((coding)->spec.iso_2022.flags)
462 #define CODING_ISO_DESIGNATION(coding, reg) \
463 ((coding)->spec.iso_2022.current_designation[reg])
464 #define CODING_ISO_INVOCATION(coding, plane) \
465 ((coding)->spec.iso_2022.current_invocation[plane])
466 #define CODING_ISO_SINGLE_SHIFTING(coding) \
467 ((coding)->spec.iso_2022.single_shifting)
468 #define CODING_ISO_BOL(coding) \
469 ((coding)->spec.iso_2022.bol)
470 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
471 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
473 /* Control characters of ISO2022. */
474 /* code */ /* function */
475 #define ISO_CODE_LF 0x0A /* line-feed */
476 #define ISO_CODE_CR 0x0D /* carriage-return */
477 #define ISO_CODE_SO 0x0E /* shift-out */
478 #define ISO_CODE_SI 0x0F /* shift-in */
479 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
480 #define ISO_CODE_ESC 0x1B /* escape */
481 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
482 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
483 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
485 /* All code (1-byte) of ISO2022 is classified into one of the
487 enum iso_code_class_type
489 ISO_control_0
, /* Control codes in the range
490 0x00..0x1F and 0x7F, except for the
491 following 5 codes. */
492 ISO_carriage_return
, /* ISO_CODE_CR (0x0D) */
493 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
494 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
495 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
496 ISO_escape
, /* ISO_CODE_SO (0x1B) */
497 ISO_control_1
, /* Control codes in the range
498 0x80..0x9F, except for the
499 following 3 codes. */
500 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
501 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
502 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
503 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
504 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
505 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
506 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
509 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
510 `iso-flags' attribute of an iso2022 coding system. */
512 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
513 instead of the correct short-form sequence (e.g. ESC $ A). */
514 #define CODING_ISO_FLAG_LONG_FORM 0x0001
516 /* If set, reset graphic planes and registers at end-of-line to the
518 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
520 /* If set, reset graphic planes and registers before any control
521 characters to the initial state. */
522 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
524 /* If set, encode by 7-bit environment. */
525 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
527 /* If set, use locking-shift function. */
528 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
530 /* If set, use single-shift function. Overwrite
531 CODING_ISO_FLAG_LOCKING_SHIFT. */
532 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
534 /* If set, use designation escape sequence. */
535 #define CODING_ISO_FLAG_DESIGNATION 0x0040
537 /* If set, produce revision number sequence. */
538 #define CODING_ISO_FLAG_REVISION 0x0080
540 /* If set, produce ISO6429's direction specifying sequence. */
541 #define CODING_ISO_FLAG_DIRECTION 0x0100
543 /* If set, assume designation states are reset at beginning of line on
545 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
547 /* If set, designation sequence should be placed at beginning of line
549 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
551 /* If set, do not encode unsafe charactes on output. */
552 #define CODING_ISO_FLAG_SAFE 0x0800
554 /* If set, extra latin codes (128..159) are accepted as a valid code
556 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
558 #define CODING_ISO_FLAG_COMPOSITION 0x2000
560 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
562 #define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
564 /* A character to be produced on output if encoding of the original
565 character is prohibited by CODING_ISO_FLAG_SAFE. */
566 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
570 #define CODING_UTF_16_BOM(coding) \
571 ((coding)->spec.utf_16.bom)
573 #define CODING_UTF_16_ENDIAN(coding) \
574 ((coding)->spec.utf_16.endian)
576 #define CODING_UTF_16_SURROGATE(coding) \
577 ((coding)->spec.utf_16.surrogate)
581 #define CODING_CCL_DECODER(coding) \
582 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
583 #define CODING_CCL_ENCODER(coding) \
584 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
585 #define CODING_CCL_VALIDS(coding) \
586 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
589 /* Index for each coding category in `coding_category_table' */
593 coding_category_iso_7
,
594 coding_category_iso_7_tight
,
595 coding_category_iso_8_1
,
596 coding_category_iso_8_2
,
597 coding_category_iso_7_else
,
598 coding_category_iso_8_else
,
599 coding_category_utf_8
,
600 coding_category_utf_16_auto
,
601 coding_category_utf_16_be
,
602 coding_category_utf_16_le
,
603 coding_category_utf_16_be_nosig
,
604 coding_category_utf_16_le_nosig
,
605 coding_category_charset
,
606 coding_category_sjis
,
607 coding_category_big5
,
609 coding_category_emacs_mule
,
610 /* All above are targets of code detection. */
611 coding_category_raw_text
,
612 coding_category_undecided
,
616 /* Definitions of flag bits used in detect_coding_XXXX. */
617 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
618 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
619 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
620 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
621 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
622 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
623 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
624 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
625 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
626 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
627 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
628 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
629 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
630 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
631 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
632 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
634 /* This value is returned if detect_coding_mask () find nothing other
635 than ASCII characters. */
636 #define CATEGORY_MASK_ANY \
637 (CATEGORY_MASK_ISO_7 \
638 | CATEGORY_MASK_ISO_7_TIGHT \
639 | CATEGORY_MASK_ISO_8_1 \
640 | CATEGORY_MASK_ISO_8_2 \
641 | CATEGORY_MASK_ISO_7_ELSE \
642 | CATEGORY_MASK_ISO_8_ELSE \
643 | CATEGORY_MASK_UTF_8 \
644 | CATEGORY_MASK_UTF_16_BE \
645 | CATEGORY_MASK_UTF_16_LE \
646 | CATEGORY_MASK_UTF_16_BE_NOSIG \
647 | CATEGORY_MASK_UTF_16_LE_NOSIG \
648 | CATEGORY_MASK_CHARSET \
649 | CATEGORY_MASK_SJIS \
650 | CATEGORY_MASK_BIG5 \
651 | CATEGORY_MASK_CCL \
652 | CATEGORY_MASK_EMACS_MULE)
655 #define CATEGORY_MASK_ISO_7BIT \
656 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
658 #define CATEGORY_MASK_ISO_8BIT \
659 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
661 #define CATEGORY_MASK_ISO_ELSE \
662 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
664 #define CATEGORY_MASK_ISO_ESCAPE \
665 (CATEGORY_MASK_ISO_7 \
666 | CATEGORY_MASK_ISO_7_TIGHT \
667 | CATEGORY_MASK_ISO_7_ELSE \
668 | CATEGORY_MASK_ISO_8_ELSE)
670 #define CATEGORY_MASK_ISO \
671 ( CATEGORY_MASK_ISO_7BIT \
672 | CATEGORY_MASK_ISO_8BIT \
673 | CATEGORY_MASK_ISO_ELSE)
675 #define CATEGORY_MASK_UTF_16 \
676 (CATEGORY_MASK_UTF_16_BE \
677 | CATEGORY_MASK_UTF_16_LE \
678 | CATEGORY_MASK_UTF_16_BE_NOSIG \
679 | CATEGORY_MASK_UTF_16_LE_NOSIG)
682 /* List of symbols `coding-category-xxx' ordered by priority. This
683 variable is exposed to Emacs Lisp. */
684 static Lisp_Object Vcoding_category_list
;
686 /* Table of coding categories (Lisp symbols). This variable is for
688 static Lisp_Object Vcoding_category_table
;
690 /* Table of coding-categories ordered by priority. */
691 static enum coding_category coding_priorities
[coding_category_max
];
693 /* Nth element is a coding context for the coding system bound to the
694 Nth coding category. */
695 static struct coding_system coding_categories
[coding_category_max
];
697 static int detected_mask
[coding_category_raw_text
] =
705 CATEGORY_MASK_UTF_16
,
706 CATEGORY_MASK_UTF_16
,
707 CATEGORY_MASK_UTF_16
,
708 CATEGORY_MASK_UTF_16
,
709 CATEGORY_MASK_UTF_16
,
710 CATEGORY_MASK_CHARSET
,
714 CATEGORY_MASK_EMACS_MULE
717 /*** Commonly used macros and functions ***/
720 #define min(a, b) ((a) < (b) ? (a) : (b))
723 #define max(a, b) ((a) > (b) ? (a) : (b))
726 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
728 attrs = CODING_ID_ATTRS (coding->id); \
729 eol_type = CODING_ID_EOL_TYPE (coding->id); \
730 if (VECTORP (eol_type)) \
732 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
736 /* Safely get one byte from the source text pointed by SRC which ends
737 at SRC_END, and set C to that byte. If there are not enough bytes
738 in the source, it jumps to `no_more_source'. The caller
739 should declare and set these variables appropriately in advance:
740 src, src_end, multibytep
743 #define ONE_MORE_BYTE(c) \
745 if (src == src_end) \
747 if (src_base < src) \
748 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
749 goto no_more_source; \
752 if (multibytep && (c & 0x80)) \
754 if ((c & 0xFE) != 0xC0) \
755 error ("Undecodable char found"); \
756 c = ((c & 1) << 6) | *src++; \
762 #define ONE_MORE_BYTE_NO_CHECK(c) \
765 if (multibytep && (c & 0x80)) \
767 if ((c & 0xFE) != 0xC0) \
768 error ("Undecodable char found"); \
769 c = ((c & 1) << 6) | *src++; \
774 /* Store a byte C in the place pointed by DST and increment DST to the
775 next free point, and increment PRODUCED_CHARS. The caller should
776 assure that C is 0..127, and declare and set the variable `dst'
777 appropriately in advance.
781 #define EMIT_ONE_ASCII_BYTE(c) \
788 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
790 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
792 produced_chars += 2; \
793 *dst++ = (c1), *dst++ = (c2); \
797 /* Store a byte C in the place pointed by DST and increment DST to the
798 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
799 nonzero, store in an appropriate multibyte from. The caller should
800 declare and set the variables `dst' and `multibytep' appropriately
803 #define EMIT_ONE_BYTE(c) \
810 ch = BYTE8_TO_CHAR (ch); \
811 CHAR_STRING_ADVANCE (ch, dst); \
818 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
820 #define EMIT_TWO_BYTES(c1, c2) \
822 produced_chars += 2; \
829 ch = BYTE8_TO_CHAR (ch); \
830 CHAR_STRING_ADVANCE (ch, dst); \
833 ch = BYTE8_TO_CHAR (ch); \
834 CHAR_STRING_ADVANCE (ch, dst); \
844 #define EMIT_THREE_BYTES(c1, c2, c3) \
846 EMIT_ONE_BYTE (c1); \
847 EMIT_TWO_BYTES (c2, c3); \
851 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
853 EMIT_TWO_BYTES (c1, c2); \
854 EMIT_TWO_BYTES (c3, c4); \
858 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
860 charset_map_loaded = 0; \
861 c = DECODE_CHAR (charset, code); \
862 if (charset_map_loaded) \
864 unsigned char *orig = coding->source; \
867 coding_set_source (coding); \
868 offset = coding->source - orig; \
870 src_base += offset; \
876 #define ASSURE_DESTINATION(bytes) \
878 if (dst + (bytes) >= dst_end) \
880 int more_bytes = charbuf_end - charbuf + (bytes); \
882 dst = alloc_destination (coding, more_bytes, dst); \
883 dst_end = coding->destination + coding->dst_bytes; \
890 coding_set_source (coding
)
891 struct coding_system
*coding
;
893 if (BUFFERP (coding
->src_object
))
895 if (coding
->src_pos
< 0)
896 coding
->source
= GAP_END_ADDR
+ coding
->src_pos_byte
;
899 struct buffer
*buf
= XBUFFER (coding
->src_object
);
900 EMACS_INT beg_byte
= BUF_BEG_BYTE (buf
);
901 EMACS_INT gpt_byte
= BUF_GPT_BYTE (buf
);
902 unsigned char *beg_addr
= BUF_BEG_ADDR (buf
);
904 coding
->source
= beg_addr
+ coding
->src_pos_byte
- 1;
905 if (coding
->src_pos_byte
>= gpt_byte
)
906 coding
->source
+= BUF_GAP_SIZE (buf
);
909 else if (STRINGP (coding
->src_object
))
911 coding
->source
= (XSTRING (coding
->src_object
)->data
912 + coding
->src_pos_byte
);
915 /* Otherwise, the source is C string and is never relocated
916 automatically. Thus we don't have to update anything. */
921 coding_set_destination (coding
)
922 struct coding_system
*coding
;
924 if (BUFFERP (coding
->dst_object
))
926 /* We are sure that coding->dst_pos_byte is before the gap of the
928 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
929 + coding
->dst_pos_byte
- 1);
930 if (coding
->src_pos
< 0)
931 coding
->dst_bytes
= (GAP_END_ADDR
932 - (coding
->src_bytes
- coding
->consumed
)
933 - coding
->destination
);
935 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
936 - coding
->destination
);
939 /* Otherwise, the destination is C string and is never relocated
940 automatically. Thus we don't have to update anything. */
946 coding_alloc_by_realloc (coding
, bytes
)
947 struct coding_system
*coding
;
950 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
951 coding
->dst_bytes
+ bytes
);
952 coding
->dst_bytes
+= bytes
;
956 coding_alloc_by_making_gap (coding
, bytes
)
957 struct coding_system
*coding
;
960 if (BUFFERP (coding
->dst_object
)
961 && EQ (coding
->src_object
, coding
->dst_object
))
963 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
965 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
967 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
971 Lisp_Object this_buffer
;
973 this_buffer
= Fcurrent_buffer ();
974 set_buffer_internal (XBUFFER (coding
->dst_object
));
976 set_buffer_internal (XBUFFER (this_buffer
));
981 static unsigned char *
982 alloc_destination (coding
, nbytes
, dst
)
983 struct coding_system
*coding
;
987 EMACS_INT offset
= dst
- coding
->destination
;
989 if (BUFFERP (coding
->dst_object
))
990 coding_alloc_by_making_gap (coding
, nbytes
);
992 coding_alloc_by_realloc (coding
, nbytes
);
993 coding
->result
= CODING_RESULT_SUCCESS
;
994 coding_set_destination (coding
);
995 dst
= coding
->destination
+ offset
;
1000 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1007 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1008 Check if a text is encoded in UTF-8. If it is, return
1009 CATEGORY_MASK_UTF_8, else return 0. */
1011 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1012 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1013 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1014 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1015 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1016 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1019 detect_coding_utf_8 (coding
, mask
)
1020 struct coding_system
*coding
;
1023 unsigned char *src
= coding
->source
, *src_base
= src
;
1024 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1025 int multibytep
= coding
->src_multibyte
;
1026 int consumed_chars
= 0;
1029 /* A coding system of this category is always ASCII compatible. */
1030 src
+= coding
->head_ascii
;
1034 int c
, c1
, c2
, c3
, c4
;
1037 if (UTF_8_1_OCTET_P (c
))
1040 if (! UTF_8_EXTRA_OCTET_P (c1
))
1042 if (UTF_8_2_OCTET_LEADING_P (c
))
1048 if (! UTF_8_EXTRA_OCTET_P (c2
))
1050 if (UTF_8_3_OCTET_LEADING_P (c
))
1056 if (! UTF_8_EXTRA_OCTET_P (c3
))
1058 if (UTF_8_4_OCTET_LEADING_P (c
))
1064 if (! UTF_8_EXTRA_OCTET_P (c4
))
1066 if (UTF_8_5_OCTET_LEADING_P (c
))
1073 *mask
&= ~CATEGORY_MASK_UTF_8
;
1079 *mask
&= CATEGORY_MASK_UTF_8
;
1085 decode_coding_utf_8 (coding
)
1086 struct coding_system
*coding
;
1088 unsigned char *src
= coding
->source
+ coding
->consumed
;
1089 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1090 unsigned char *src_base
;
1091 int *charbuf
= coding
->charbuf
;
1092 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1093 int consumed_chars
= 0, consumed_chars_base
;
1094 int multibytep
= coding
->src_multibyte
;
1095 Lisp_Object attr
, eol_type
, charset_list
;
1097 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1101 int c
, c1
, c2
, c3
, c4
, c5
;
1104 consumed_chars_base
= consumed_chars
;
1106 if (charbuf
>= charbuf_end
)
1110 if (UTF_8_1_OCTET_P(c1
))
1115 if (EQ (eol_type
, Qdos
))
1118 goto no_more_source
;
1122 else if (EQ (eol_type
, Qmac
))
1129 if (! UTF_8_EXTRA_OCTET_P (c2
))
1131 if (UTF_8_2_OCTET_LEADING_P (c1
))
1132 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1136 if (! UTF_8_EXTRA_OCTET_P (c3
))
1138 if (UTF_8_3_OCTET_LEADING_P (c1
))
1139 c
= (((c1
& 0xF) << 12)
1140 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1144 if (! UTF_8_EXTRA_OCTET_P (c4
))
1146 if (UTF_8_4_OCTET_LEADING_P (c1
))
1147 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1148 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1152 if (! UTF_8_EXTRA_OCTET_P (c5
))
1154 if (UTF_8_5_OCTET_LEADING_P (c1
))
1156 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1157 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1174 consumed_chars
= consumed_chars_base
;
1176 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1181 coding
->consumed_char
+= consumed_chars_base
;
1182 coding
->consumed
= src_base
- coding
->source
;
1183 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1188 encode_coding_utf_8 (coding
)
1189 struct coding_system
*coding
;
1191 int multibytep
= coding
->dst_multibyte
;
1192 int *charbuf
= coding
->charbuf
;
1193 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1194 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1195 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1196 int produced_chars
= 0;
1201 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1203 while (charbuf
< charbuf_end
)
1205 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1207 ASSURE_DESTINATION (safe_room
);
1209 CHAR_STRING_ADVANCE (c
, pend
);
1210 for (p
= str
; p
< pend
; p
++)
1216 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1218 while (charbuf
< charbuf_end
)
1220 ASSURE_DESTINATION (safe_room
);
1222 dst
+= CHAR_STRING (c
, dst
);
1226 coding
->result
= CODING_RESULT_SUCCESS
;
1227 coding
->produced_char
+= produced_chars
;
1228 coding
->produced
= dst
- coding
->destination
;
1233 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1234 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1235 Little Endian (otherwise). If it is, return
1236 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1239 #define UTF_16_HIGH_SURROGATE_P(val) \
1240 (((val) & 0xFC00) == 0xD800)
1242 #define UTF_16_LOW_SURROGATE_P(val) \
1243 (((val) & 0xFC00) == 0xDC00)
1245 #define UTF_16_INVALID_P(val) \
1246 (((val) == 0xFFFE) \
1247 || ((val) == 0xFFFF) \
1248 || UTF_16_LOW_SURROGATE_P (val))
1252 detect_coding_utf_16 (coding
, mask
)
1253 struct coding_system
*coding
;
1256 unsigned char *src
= coding
->source
, *src_base
= src
;
1257 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1258 int multibytep
= coding
->src_multibyte
;
1259 int consumed_chars
= 0;
1265 if ((c1
== 0xFF) && (c2
== 0xFE))
1267 *mask
&= CATEGORY_MASK_UTF_16_LE
;
1270 else if ((c1
== 0xFE) && (c2
== 0xFF))
1272 *mask
&= CATEGORY_MASK_UTF_16_BE
;
1280 decode_coding_utf_16 (coding
)
1281 struct coding_system
*coding
;
1283 unsigned char *src
= coding
->source
+ coding
->consumed
;
1284 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1285 unsigned char *src_base
;
1286 int *charbuf
= coding
->charbuf
;
1287 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1288 int consumed_chars
= 0, consumed_chars_base
;
1289 int multibytep
= coding
->src_multibyte
;
1290 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1291 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1292 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1293 Lisp_Object attr
, eol_type
, charset_list
;
1295 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1297 if (bom
!= utf_16_without_bom
)
1305 if (bom
== utf_16_with_bom
)
1307 if (endian
== utf_16_big_endian
1308 ? c
!= 0xFFFE : c
!= 0xFEFF)
1310 /* We are sure that there's enouph room at CHARBUF. */
1319 CODING_UTF_16_ENDIAN (coding
)
1320 = endian
= utf_16_big_endian
;
1321 else if (c
== 0xFEFF)
1322 CODING_UTF_16_ENDIAN (coding
)
1323 = endian
= utf_16_little_endian
;
1326 CODING_UTF_16_ENDIAN (coding
)
1327 = endian
= utf_16_big_endian
;
1331 CODING_UTF_16_BOM (coding
) = utf_16_with_bom
;
1339 consumed_chars_base
= consumed_chars
;
1341 if (charbuf
+ 2 >= charbuf_end
)
1346 c
= (endian
== utf_16_big_endian
1347 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1350 if (! UTF_16_LOW_SURROGATE_P (c
))
1352 if (endian
== utf_16_big_endian
)
1353 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1355 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1359 if (UTF_16_HIGH_SURROGATE_P (c
))
1360 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1366 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1367 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1373 if (UTF_16_HIGH_SURROGATE_P (c
))
1374 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1381 coding
->consumed_char
+= consumed_chars_base
;
1382 coding
->consumed
= src_base
- coding
->source
;
1383 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1387 encode_coding_utf_16 (coding
)
1388 struct coding_system
*coding
;
1390 int multibytep
= coding
->dst_multibyte
;
1391 int *charbuf
= coding
->charbuf
;
1392 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1393 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1394 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1396 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1397 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1398 int produced_chars
= 0;
1399 Lisp_Object attrs
, eol_type
, charset_list
;
1402 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1404 if (bom
== utf_16_with_bom
)
1406 ASSURE_DESTINATION (safe_room
);
1408 EMIT_TWO_BYTES (0xFF, 0xFE);
1410 EMIT_TWO_BYTES (0xFE, 0xFF);
1411 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1414 while (charbuf
< charbuf_end
)
1416 ASSURE_DESTINATION (safe_room
);
1418 if (c
>= MAX_UNICODE_CHAR
)
1419 c
= coding
->default_char
;
1424 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1426 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1433 c1
= (c
>> 10) + 0xD800;
1434 c2
= (c
& 0x3FF) + 0xDC00;
1436 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1438 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1441 coding
->result
= CODING_RESULT_SUCCESS
;
1442 coding
->produced
= dst
- coding
->destination
;
1443 coding
->produced_char
+= produced_chars
;
1448 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1450 /* Emacs' internal format for representation of multiple character
1451 sets is a kind of multi-byte encoding, i.e. characters are
1452 represented by variable-length sequences of one-byte codes.
1454 ASCII characters and control characters (e.g. `tab', `newline') are
1455 represented by one-byte sequences which are their ASCII codes, in
1456 the range 0x00 through 0x7F.
1458 8-bit characters of the range 0x80..0x9F are represented by
1459 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1462 8-bit characters of the range 0xA0..0xFF are represented by
1463 one-byte sequences which are their 8-bit code.
1465 The other characters are represented by a sequence of `base
1466 leading-code', optional `extended leading-code', and one or two
1467 `position-code's. The length of the sequence is determined by the
1468 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1469 whereas extended leading-code and position-code take the range 0xA0
1470 through 0xFF. See `charset.h' for more details about leading-code
1473 --- CODE RANGE of Emacs' internal format ---
1477 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1478 eight-bit-graphic 0xA0..0xBF
1479 ELSE 0x81..0x9D + [0xA0..0xFF]+
1480 ---------------------------------------------
1482 As this is the internal character representation, the format is
1483 usually not used externally (i.e. in a file or in a data sent to a
1484 process). But, it is possible to have a text externally in this
1485 format (i.e. by encoding by the coding system `emacs-mule').
1487 In that case, a sequence of one-byte codes has a slightly different
1490 At first, all characters in eight-bit-control are represented by
1491 one-byte sequences which are their 8-bit code.
1493 Next, character composition data are represented by the byte
1494 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1496 METHOD is 0xF0 plus one of composition method (enum
1497 composition_method),
1499 BYTES is 0xA0 plus a byte length of this composition data,
1501 CHARS is 0x20 plus a number of characters composed by this
1504 COMPONENTs are characters of multibye form or composition
1505 rules encoded by two-byte of ASCII codes.
1507 In addition, for backward compatibility, the following formats are
1508 also recognized as composition data on decoding.
1511 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1514 MSEQ is a multibyte form but in these special format:
1515 ASCII: 0xA0 ASCII_CODE+0x80,
1516 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1517 RULE is a one byte code of the range 0xA0..0xF0 that
1518 represents a composition rule.
1521 char emacs_mule_bytes
[256];
1523 /* Leading-code followed by extended leading-code. */
1524 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1525 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1526 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1527 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1531 emacs_mule_char (coding
, composition
, nbytes
, nchars
)
1532 struct coding_system
*coding
;
1534 int *nbytes
, *nchars
;
1536 unsigned char *src
= coding
->source
+ coding
->consumed
;
1537 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1538 int multibytep
= coding
->src_multibyte
;
1539 unsigned char *src_base
= src
;
1540 struct charset
*charset
;
1543 int consumed_chars
= 0;
1554 *nbytes
= src
- src_base
;
1555 *nchars
= consumed_chars
;
1560 switch (emacs_mule_bytes
[c
])
1563 if (! (charset
= emacs_mule_charset
[c
]))
1570 if (c
== LEADING_CODE_PRIVATE_11
1571 || c
== LEADING_CODE_PRIVATE_12
)
1574 if (! (charset
= emacs_mule_charset
[c
]))
1581 if (! (charset
= emacs_mule_charset
[c
]))
1584 code
= (c
& 0x7F) << 7;
1591 if (! (charset
= emacs_mule_charset
[c
]))
1594 code
= (c
& 0x7F) << 7;
1601 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
) ? charset_ascii
1602 : code
< 0xA0 ? charset_8_bit_control
1603 : charset_8_bit_graphic
);
1609 c
= DECODE_CHAR (charset
, code
);
1612 *nbytes
= src
- src_base
;
1613 *nchars
= consumed_chars
;
1624 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1625 Check if a text is encoded in `emacs-mule'. */
1628 detect_coding_emacs_mule (coding
, mask
)
1629 struct coding_system
*coding
;
1632 unsigned char *src
= coding
->source
, *src_base
= src
;
1633 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1634 int multibytep
= coding
->src_multibyte
;
1635 int consumed_chars
= 0;
1639 /* A coding system of this category is always ASCII compatible. */
1640 src
+= coding
->head_ascii
;
1648 /* Perhaps the start of composite character. We simple skip
1649 it because analyzing it is too heavy for detecting. But,
1650 at least, we check that the composite character
1651 constitues of more than 4 bytes. */
1652 unsigned char *src_base
;
1662 if (src
- src_base
<= 4)
1672 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1677 unsigned char *src_base
= src
- 1;
1684 if (src
- src_base
!= emacs_mule_bytes
[*src_base
])
1689 *mask
&= ~CATEGORY_MASK_EMACS_MULE
;
1695 *mask
&= CATEGORY_MASK_EMACS_MULE
;
1700 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1702 /* Decode a character represented as a component of composition
1703 sequence of Emacs 20/21 style at SRC. Set C to that character and
1704 update SRC to the head of next character (or an encoded composition
1705 rule). If SRC doesn't points a composition component, set C to -1.
1706 If SRC points an invalid byte sequence, global exit by a return
1709 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1713 int nbytes, nchars; \
1715 if (src == src_end) \
1717 c = emacs_mule_char (coding, 1, &nbytes, &nchars); \
1722 goto invalid_code; \
1726 consumed_chars += nchars; \
1731 /* Decode a composition rule represented as a component of composition
1732 sequence of Emacs 20 style at SRC. Set C to the rule. If SRC
1733 points an invalid byte sequence, set C to -1. */
1735 #define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \
1737 int c, gref, nref; \
1739 if (src < src_end) \
1740 goto invalid_code; \
1741 ONE_MORE_BYTE_NO_CHECK (c); \
1743 if (c < 0 || c >= 81) \
1744 goto invalid_code; \
1746 gref = c / 9, nref = c % 9; \
1747 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1751 #define ADD_COMPOSITION_DATA(buf, method, nchars) \
1754 *buf++ = coding->produced_char + char_offset; \
1755 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1761 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1763 /* Emacs 21 style format. The first three bytes at SRC are \
1764 (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1765 the byte length of this composition information, CHARS is the \
1766 number of characters composed by this composition. */ \
1767 enum composition_method method = c - 0xF0; \
1768 int consumed_chars_limit; \
1769 int nbytes, nchars; \
1771 ONE_MORE_BYTE (c); \
1772 nbytes = c - 0xA0; \
1774 goto invalid_code; \
1775 ONE_MORE_BYTE (c); \
1776 nchars = c - 0xA0; \
1777 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1778 consumed_chars_limit = consumed_chars_base + nbytes; \
1779 if (method != COMPOSITION_RELATIVE) \
1782 while (consumed_chars < consumed_chars_limit) \
1784 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1785 DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \
1787 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1789 if (consumed_chars < consumed_chars_limit) \
1790 goto invalid_code; \
1795 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1797 /* Emacs 20 style format for relative composition. */ \
1798 /* Store multibyte form of characters to be composed. */ \
1799 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1800 int *buf = components; \
1804 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1805 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1806 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1808 goto invalid_code; \
1809 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1810 for (j = 0; j < i; j++) \
1811 *charbuf++ = components[j]; \
1815 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1817 /* Emacs 20 style format for rule-base composition. */ \
1818 /* Store multibyte form of characters to be composed. */ \
1819 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1820 int *buf = components; \
1823 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1824 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1826 DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \
1827 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1829 if (i < 1 || (buf - components) % 2 == 0) \
1830 goto invalid_code; \
1831 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1832 goto no_more_source; \
1833 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1834 for (j = 0; j < i; j++) \
1835 *charbuf++ = components[j]; \
1836 for (j = 0; j < i; j += 2) \
1837 *charbuf++ = components[j]; \
1842 decode_coding_emacs_mule (coding
)
1843 struct coding_system
*coding
;
1845 unsigned char *src
= coding
->source
+ coding
->consumed
;
1846 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1847 unsigned char *src_base
;
1848 int *charbuf
= coding
->charbuf
;
1849 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1850 int consumed_chars
= 0, consumed_chars_base
;
1851 int char_offset
= 0;
1852 int multibytep
= coding
->src_multibyte
;
1853 Lisp_Object attrs
, eol_type
, charset_list
;
1855 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1862 consumed_chars_base
= consumed_chars
;
1864 if (charbuf
>= charbuf_end
)
1873 if (EQ (eol_type
, Qdos
))
1876 goto no_more_source
;
1880 else if (EQ (eol_type
, Qmac
))
1888 if (charbuf
+ 5 + (MAX_COMPOSITION_COMPONENTS
* 2) - 1 > charbuf_end
)
1891 if (c
- 0xF0 >= COMPOSITION_RELATIVE
1892 && c
- 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS
)
1893 DECODE_EMACS_MULE_21_COMPOSITION (c
);
1895 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
1897 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
1901 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
1905 c
= emacs_mule_char (coding
, 0, &nbytes
, &nchars
);
1919 consumed_chars
= consumed_chars_base
;
1921 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1926 coding
->consumed_char
+= consumed_chars_base
;
1927 coding
->consumed
= src_base
- coding
->source
;
1928 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1932 #define EMACS_MULE_LEADING_CODES(id, codes) \
1935 codes[0] = id, codes[1] = 0; \
1936 else if (id < 0xE0) \
1937 codes[0] = 0x9A, codes[1] = id; \
1938 else if (id < 0xF0) \
1939 codes[0] = 0x9B, codes[1] = id; \
1940 else if (id < 0xF5) \
1941 codes[0] = 0x9C, codes[1] = id; \
1943 codes[0] = 0x9D, codes[1] = id; \
1948 encode_coding_emacs_mule (coding
)
1949 struct coding_system
*coding
;
1951 int multibytep
= coding
->dst_multibyte
;
1952 int *charbuf
= coding
->charbuf
;
1953 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1954 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1955 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1957 int produced_chars
= 0;
1958 Lisp_Object attrs
, eol_type
, charset_list
;
1961 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1963 while (charbuf
< charbuf_end
)
1965 ASSURE_DESTINATION (safe_room
);
1967 if (ASCII_CHAR_P (c
))
1968 EMIT_ONE_ASCII_BYTE (c
);
1971 struct charset
*charset
;
1975 unsigned char leading_codes
[2];
1977 charset
= char_charset (c
, charset_list
, &code
);
1980 c
= coding
->default_char
;
1981 if (ASCII_CHAR_P (c
))
1983 EMIT_ONE_ASCII_BYTE (c
);
1986 charset
= char_charset (c
, charset_list
, &code
);
1988 dimension
= CHARSET_DIMENSION (charset
);
1989 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
1990 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
1991 EMIT_ONE_BYTE (leading_codes
[0]);
1992 if (leading_codes
[1])
1993 EMIT_ONE_BYTE (leading_codes
[1]);
1995 EMIT_ONE_BYTE (code
);
1998 EMIT_ONE_BYTE (code
>> 8);
1999 EMIT_ONE_BYTE (code
& 0xFF);
2003 coding
->result
= CODING_RESULT_SUCCESS
;
2004 coding
->produced_char
+= produced_chars
;
2005 coding
->produced
= dst
- coding
->destination
;
2010 /*** 7. ISO2022 handlers ***/
2012 /* The following note describes the coding system ISO2022 briefly.
2013 Since the intention of this note is to help understand the
2014 functions in this file, some parts are NOT ACCURATE or OVERLY
2015 SIMPLIFIED. For thorough understanding, please refer to the
2016 original document of ISO2022.
2018 ISO2022 provides many mechanisms to encode several character sets
2019 in 7-bit and 8-bit environments. For 7-bite environments, all text
2020 is encoded using bytes less than 128. This may make the encoded
2021 text a little bit longer, but the text passes more easily through
2022 several gateways, some of which strip off MSB (Most Signigant Bit).
2024 There are two kinds of character sets: control character set and
2025 graphic character set. The former contains control characters such
2026 as `newline' and `escape' to provide control functions (control
2027 functions are also provided by escape sequences). The latter
2028 contains graphic characters such as 'A' and '-'. Emacs recognizes
2029 two control character sets and many graphic character sets.
2031 Graphic character sets are classified into one of the following
2032 four classes, according to the number of bytes (DIMENSION) and
2033 number of characters in one dimension (CHARS) of the set:
2034 - DIMENSION1_CHARS94
2035 - DIMENSION1_CHARS96
2036 - DIMENSION2_CHARS94
2037 - DIMENSION2_CHARS96
2039 In addition, each character set is assigned an identification tag,
2040 unique for each set, called "final character" (denoted as <F>
2041 hereafter). The <F> of each character set is decided by ECMA(*)
2042 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2043 (0x30..0x3F are for private use only).
2045 Note (*): ECMA = European Computer Manufacturers Association
2047 Here are examples of graphic character set [NAME(<F>)]:
2048 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2049 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2050 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2051 o DIMENSION2_CHARS96 -- none for the moment
2053 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2054 C0 [0x00..0x1F] -- control character plane 0
2055 GL [0x20..0x7F] -- graphic character plane 0
2056 C1 [0x80..0x9F] -- control character plane 1
2057 GR [0xA0..0xFF] -- graphic character plane 1
2059 A control character set is directly designated and invoked to C0 or
2060 C1 by an escape sequence. The most common case is that:
2061 - ISO646's control character set is designated/invoked to C0, and
2062 - ISO6429's control character set is designated/invoked to C1,
2063 and usually these designations/invocations are omitted in encoded
2064 text. In a 7-bit environment, only C0 can be used, and a control
2065 character for C1 is encoded by an appropriate escape sequence to
2066 fit into the environment. All control characters for C1 are
2067 defined to have corresponding escape sequences.
2069 A graphic character set is at first designated to one of four
2070 graphic registers (G0 through G3), then these graphic registers are
2071 invoked to GL or GR. These designations and invocations can be
2072 done independently. The most common case is that G0 is invoked to
2073 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2074 these invocations and designations are omitted in encoded text.
2075 In a 7-bit environment, only GL can be used.
2077 When a graphic character set of CHARS94 is invoked to GL, codes
2078 0x20 and 0x7F of the GL area work as control characters SPACE and
2079 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2082 There are two ways of invocation: locking-shift and single-shift.
2083 With locking-shift, the invocation lasts until the next different
2084 invocation, whereas with single-shift, the invocation affects the
2085 following character only and doesn't affect the locking-shift
2086 state. Invocations are done by the following control characters or
2089 ----------------------------------------------------------------------
2090 abbrev function cntrl escape seq description
2091 ----------------------------------------------------------------------
2092 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2093 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2094 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2095 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2096 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2097 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2098 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2099 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2100 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2101 ----------------------------------------------------------------------
2102 (*) These are not used by any known coding system.
2104 Control characters for these functions are defined by macros
2105 ISO_CODE_XXX in `coding.h'.
2107 Designations are done by the following escape sequences:
2108 ----------------------------------------------------------------------
2109 escape sequence description
2110 ----------------------------------------------------------------------
2111 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2112 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2113 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2114 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2115 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2116 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2117 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2118 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2119 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2120 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2121 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2122 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2123 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2124 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2125 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2126 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2127 ----------------------------------------------------------------------
2129 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2130 of dimension 1, chars 94, and final character <F>, etc...
2132 Note (*): Although these designations are not allowed in ISO2022,
2133 Emacs accepts them on decoding, and produces them on encoding
2134 CHARS96 character sets in a coding system which is characterized as
2135 7-bit environment, non-locking-shift, and non-single-shift.
2137 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2138 '(' must be omitted. We refer to this as "short-form" hereafter.
2140 Now you may notice that there are a lot of ways for encoding the
2141 same multilingual text in ISO2022. Actually, there exist many
2142 coding systems such as Compound Text (used in X11's inter client
2143 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2144 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
2145 localized platforms), and all of these are variants of ISO2022.
2147 In addition to the above, Emacs handles two more kinds of escape
2148 sequences: ISO6429's direction specification and Emacs' private
2149 sequence for specifying character composition.
2151 ISO6429's direction specification takes the following form:
2152 o CSI ']' -- end of the current direction
2153 o CSI '0' ']' -- end of the current direction
2154 o CSI '1' ']' -- start of left-to-right text
2155 o CSI '2' ']' -- start of right-to-left text
2156 The control character CSI (0x9B: control sequence introducer) is
2157 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2159 Character composition specification takes the following form:
2160 o ESC '0' -- start relative composition
2161 o ESC '1' -- end composition
2162 o ESC '2' -- start rule-base composition (*)
2163 o ESC '3' -- start relative composition with alternate chars (**)
2164 o ESC '4' -- start rule-base composition with alternate chars (**)
2165 Since these are not standard escape sequences of any ISO standard,
2166 the use of them for these meaning is restricted to Emacs only.
2168 (*) This form is used only in Emacs 20.5 and the older versions,
2169 but the newer versions can safely decode it.
2170 (**) This form is used only in Emacs 21.1 and the newer versions,
2171 and the older versions can't decode it.
2173 Here's a list of examples usages of these composition escape
2174 sequences (categorized by `enum composition_method').
2176 COMPOSITION_RELATIVE:
2177 ESC 0 CHAR [ CHAR ] ESC 1
2178 COMPOSITOIN_WITH_RULE:
2179 ESC 2 CHAR [ RULE CHAR ] ESC 1
2180 COMPOSITION_WITH_ALTCHARS:
2181 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2182 COMPOSITION_WITH_RULE_ALTCHARS:
2183 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2185 enum iso_code_class_type iso_code_class
[256];
2187 #define SAFE_CHARSET_P(coding, id) \
2188 ((id) <= (coding)->max_charset_id \
2189 && (coding)->safe_charsets[id] >= 0)
2192 #define SHIFT_OUT_OK(category) \
2193 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2196 setup_iso_safe_charsets (Lisp_Object attrs
)
2198 Lisp_Object charset_list
, safe_charsets
;
2199 Lisp_Object request
;
2200 Lisp_Object reg_usage
;
2203 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2206 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2207 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2208 && ! EQ (charset_list
, Viso_2022_charset_list
))
2210 CODING_ATTR_CHARSET_LIST (attrs
)
2211 = charset_list
= Viso_2022_charset_list
;
2212 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2215 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2219 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2221 int id
= XINT (XCAR (tail
));
2222 if (max_charset_id
< id
)
2223 max_charset_id
= id
;
2226 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2228 request
= AREF (attrs
, coding_attr_iso_request
);
2229 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2230 reg94
= XINT (XCAR (reg_usage
));
2231 reg96
= XINT (XCDR (reg_usage
));
2233 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2237 struct charset
*charset
;
2240 charset
= CHARSET_FROM_ID (XINT (id
));
2241 reg
= Fcdr (Fassq (request
, id
));
2243 XSTRING (safe_charsets
)->data
[XINT (id
)] = XINT (reg
);
2244 else if (charset
->iso_chars_96
)
2247 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg96
;
2252 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg94
;
2255 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2259 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2260 Check if a text is encoded in ISO2022. If it is, returns an
2261 integer in which appropriate flag bits any of:
2263 CATEGORY_MASK_ISO_7_TIGHT
2264 CATEGORY_MASK_ISO_8_1
2265 CATEGORY_MASK_ISO_8_2
2266 CATEGORY_MASK_ISO_7_ELSE
2267 CATEGORY_MASK_ISO_8_ELSE
2268 are set. If a code which should never appear in ISO2022 is found,
2272 detect_coding_iso_2022 (coding
, mask
)
2273 struct coding_system
*coding
;
2276 unsigned char *src
= coding
->source
, *src_base
= src
;
2277 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2278 int multibytep
= coding
->src_multibyte
;
2279 int mask_iso
= CATEGORY_MASK_ISO
;
2280 int mask_found
= 0, mask_8bit_found
= 0;
2281 int reg
[4], shift_out
= 0, single_shifting
= 0;
2284 int consumed_chars
= 0;
2287 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2289 struct coding_system
*this = &(coding_categories
[i
]);
2290 Lisp_Object attrs
, val
;
2292 attrs
= CODING_ID_ATTRS (this->id
);
2293 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2294 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2295 setup_iso_safe_charsets (attrs
);
2296 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2297 this->max_charset_id
= XSTRING (val
)->size
- 1;
2298 this->safe_charsets
= (char *) XSTRING (val
)->data
;
2301 /* A coding system of this category is always ASCII compatible. */
2302 src
+= coding
->head_ascii
;
2304 reg
[0] = charset_ascii
, reg
[1] = reg
[2] = reg
[3] = -1;
2305 while (mask_iso
&& src
< src_end
)
2311 if (inhibit_iso_escape_detection
)
2313 single_shifting
= 0;
2315 if (c
>= '(' && c
<= '/')
2317 /* Designation sequence for a charset of dimension 1. */
2319 if (c1
< ' ' || c1
>= 0x80
2320 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2321 /* Invalid designation sequence. Just ignore. */
2323 reg
[(c
- '(') % 4] = id
;
2327 /* Designation sequence for a charset of dimension 2. */
2329 if (c
>= '@' && c
<= 'B')
2330 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2331 reg
[0] = id
= iso_charset_table
[1][0][c
];
2332 else if (c
>= '(' && c
<= '/')
2335 if (c1
< ' ' || c1
>= 0x80
2336 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2337 /* Invalid designation sequence. Just ignore. */
2339 reg
[(c
- '(') % 4] = id
;
2342 /* Invalid designation sequence. Just ignore. */
2345 else if (c
== 'N' || c
== 'O')
2347 /* ESC <Fe> for SS2 or SS3. */
2348 mask_iso
&= CATEGORY_MASK_ISO_7_ELSE
;
2351 else if (c
>= '0' && c
<= '4')
2353 /* ESC <Fp> for start/end composition. */
2354 mask_found
|= CATEGORY_MASK_ISO
;
2359 /* Invalid escape sequence. */
2360 mask_iso
&= ~CATEGORY_MASK_ISO_ESCAPE
;
2364 /* We found a valid designation sequence for CHARSET. */
2365 mask_iso
&= ~CATEGORY_MASK_ISO_8BIT
;
2366 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2368 mask_found
|= CATEGORY_MASK_ISO_7
;
2370 mask_iso
&= ~CATEGORY_MASK_ISO_7
;
2371 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2373 mask_found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2375 mask_iso
&= ~CATEGORY_MASK_ISO_7_TIGHT
;
2376 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2378 mask_found
|= CATEGORY_MASK_ISO_7_ELSE
;
2380 mask_iso
&= ~CATEGORY_MASK_ISO_7_ELSE
;
2381 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2383 mask_found
|= CATEGORY_MASK_ISO_8_ELSE
;
2385 mask_iso
&= ~CATEGORY_MASK_ISO_8_ELSE
;
2389 if (inhibit_iso_escape_detection
)
2391 single_shifting
= 0;
2394 || SHIFT_OUT_OK (coding_category_iso_7_else
)
2395 || SHIFT_OUT_OK (coding_category_iso_8_else
)))
2397 /* Locking shift out. */
2398 mask_iso
&= ~CATEGORY_MASK_ISO_7BIT
;
2399 mask_found
|= CATEGORY_MASK_ISO_ELSE
;
2404 if (inhibit_iso_escape_detection
)
2406 single_shifting
= 0;
2409 /* Locking shift in. */
2410 mask_iso
&= ~CATEGORY_MASK_ISO_7BIT
;
2411 mask_found
|= CATEGORY_MASK_ISO_ELSE
;
2416 single_shifting
= 0;
2420 int newmask
= CATEGORY_MASK_ISO_8_ELSE
;
2422 if (inhibit_iso_escape_detection
)
2424 if (c
!= ISO_CODE_CSI
)
2426 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2427 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2428 newmask
|= CATEGORY_MASK_ISO_8_1
;
2429 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2430 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2431 newmask
|= CATEGORY_MASK_ISO_8_2
;
2432 single_shifting
= 1;
2434 if (VECTORP (Vlatin_extra_code_table
)
2435 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2437 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2438 & CODING_ISO_FLAG_LATIN_EXTRA
)
2439 newmask
|= CATEGORY_MASK_ISO_8_1
;
2440 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2441 & CODING_ISO_FLAG_LATIN_EXTRA
)
2442 newmask
|= CATEGORY_MASK_ISO_8_2
;
2444 mask_iso
&= newmask
;
2445 mask_found
|= newmask
;
2452 single_shifting
= 0;
2457 single_shifting
= 0;
2458 mask_8bit_found
= 1;
2459 if (VECTORP (Vlatin_extra_code_table
)
2460 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2464 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2465 & CODING_ISO_FLAG_LATIN_EXTRA
)
2466 newmask
|= CATEGORY_MASK_ISO_8_1
;
2467 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2468 & CODING_ISO_FLAG_LATIN_EXTRA
)
2469 newmask
|= CATEGORY_MASK_ISO_8_2
;
2470 mask_iso
&= newmask
;
2471 mask_found
|= newmask
;
2478 mask_iso
&= ~(CATEGORY_MASK_ISO_7BIT
2479 | CATEGORY_MASK_ISO_7_ELSE
);
2480 mask_found
|= CATEGORY_MASK_ISO_8_1
;
2481 mask_8bit_found
= 1;
2482 /* Check the length of succeeding codes of the range
2483 0xA0..0FF. If the byte length is odd, we exclude
2484 CATEGORY_MASK_ISO_8_2. We can check this only
2485 when we are not single shifting. */
2486 if (!single_shifting
2487 && mask_iso
& CATEGORY_MASK_ISO_8_2
)
2490 while (src
< src_end
)
2498 if (i
& 1 && src
< src_end
)
2499 mask_iso
&= ~CATEGORY_MASK_ISO_8_2
;
2501 mask_found
|= CATEGORY_MASK_ISO_8_2
;
2510 *mask
&= ~CATEGORY_MASK_ISO
;
2515 *mask
&= mask_iso
& mask_found
;
2516 if (! mask_8bit_found
)
2517 *mask
&= ~(CATEGORY_MASK_ISO_8BIT
| CATEGORY_MASK_ISO_8_ELSE
);
2522 /* Set designation state into CODING. */
2523 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2527 if (final < '0' || final >= 128 \
2528 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2529 || !SAFE_CHARSET_P (coding, id)) \
2531 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2532 goto invalid_code; \
2534 prev = CODING_ISO_DESIGNATION (coding, reg); \
2535 CODING_ISO_DESIGNATION (coding, reg) = id; \
2536 /* If there was an invalid designation to REG previously, and this \
2537 designation is ASCII to REG, we should keep this designation \
2539 if (prev == -2 && id == charset_ascii) \
2540 goto invalid_code; \
2544 #define MAYBE_FINISH_COMPOSITION() \
2547 if (composition_state == COMPOSING_NO) \
2549 /* It is assured that we have enough room for producing \
2550 characters stored in the table `components'. */ \
2551 if (charbuf + component_idx > charbuf_end) \
2552 goto no_more_source; \
2553 composition_state = COMPOSING_NO; \
2554 if (method == COMPOSITION_RELATIVE \
2555 || method == COMPOSITION_WITH_ALTCHARS) \
2557 for (i = 0; i < component_idx; i++) \
2558 *charbuf++ = components[i]; \
2559 char_offset += component_idx; \
2563 for (i = 0; i < component_idx; i += 2) \
2564 *charbuf++ = components[i]; \
2565 char_offset += (component_idx / 2) + 1; \
2570 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2571 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2572 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2573 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2574 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2577 #define DECODE_COMPOSITION_START(c1) \
2580 && composition_state == COMPOSING_COMPONENT_CHAR) \
2582 component_len = component_idx; \
2583 composition_state = COMPOSING_CHAR; \
2589 MAYBE_FINISH_COMPOSITION (); \
2590 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2591 goto no_more_source; \
2592 for (p = src; p < src_end - 1; p++) \
2593 if (*p == ISO_CODE_ESC && p[1] == '1') \
2595 if (p == src_end - 1) \
2597 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2598 goto invalid_code; \
2599 goto no_more_source; \
2602 /* This is surely the start of a composition. */ \
2603 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2604 : c1 == '2' ? COMPOSITION_WITH_RULE \
2605 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2606 : COMPOSITION_WITH_RULE_ALTCHARS); \
2607 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2608 : COMPOSING_COMPONENT_CHAR); \
2609 component_idx = component_len = 0; \
2614 /* Handle compositoin end sequence ESC 1. */
2616 #define DECODE_COMPOSITION_END() \
2618 int nchars = (component_len > 0 ? component_idx - component_len \
2619 : method == COMPOSITION_RELATIVE ? component_idx \
2620 : (component_idx + 1) / 2); \
2622 int *saved_charbuf = charbuf; \
2624 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2625 if (method != COMPOSITION_RELATIVE) \
2627 if (component_len == 0) \
2628 for (i = 0; i < component_idx; i++) \
2629 *charbuf++ = components[i]; \
2631 for (i = 0; i < component_len; i++) \
2632 *charbuf++ = components[i]; \
2633 *saved_charbuf = saved_charbuf - charbuf; \
2635 if (method == COMPOSITION_WITH_RULE) \
2636 for (i = 0; i < component_idx; i += 2, char_offset++) \
2637 *charbuf++ = components[i]; \
2639 for (i = component_len; i < component_idx; i++, char_offset++) \
2640 *charbuf++ = components[i]; \
2641 coding->annotated = 1; \
2642 composition_state = COMPOSING_NO; \
2646 /* Decode a composition rule from the byte C1 (and maybe one more byte
2647 from SRC) and store one encoded composition rule in
2648 coding->cmp_data. */
2650 #define DECODE_COMPOSITION_RULE(c1) \
2653 if (c1 < 81) /* old format (before ver.21) */ \
2655 int gref = (c1) / 9; \
2656 int nref = (c1) % 9; \
2657 if (gref == 4) gref = 10; \
2658 if (nref == 4) nref = 10; \
2659 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2661 else if (c1 < 93) /* new format (after ver.21) */ \
2663 ONE_MORE_BYTE (c2); \
2664 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2671 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2674 decode_coding_iso_2022 (coding
)
2675 struct coding_system
*coding
;
2677 unsigned char *src
= coding
->source
+ coding
->consumed
;
2678 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2679 unsigned char *src_base
;
2680 int *charbuf
= coding
->charbuf
;
2681 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- 4;
2682 int consumed_chars
= 0, consumed_chars_base
;
2683 int char_offset
= 0;
2684 int multibytep
= coding
->src_multibyte
;
2685 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2686 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2687 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2688 struct charset
*charset
;
2690 /* For handling composition sequence. */
2691 #define COMPOSING_NO 0
2692 #define COMPOSING_CHAR 1
2693 #define COMPOSING_RULE 2
2694 #define COMPOSING_COMPONENT_CHAR 3
2695 #define COMPOSING_COMPONENT_RULE 4
2697 int composition_state
= COMPOSING_NO
;
2698 enum composition_method method
;
2699 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2702 Lisp_Object attrs
, eol_type
, charset_list
;
2704 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
2705 setup_iso_safe_charsets (attrs
);
2712 consumed_chars_base
= consumed_chars
;
2714 if (charbuf
>= charbuf_end
)
2719 /* We produce no character or one character. */
2720 switch (iso_code_class
[c1
])
2722 case ISO_0x20_or_0x7F
:
2723 if (composition_state
!= COMPOSING_NO
)
2725 if (composition_state
== COMPOSING_RULE
2726 || composition_state
== COMPOSING_COMPONENT_RULE
)
2728 DECODE_COMPOSITION_RULE (c1
);
2729 components
[component_idx
++] = c1
;
2730 composition_state
--;
2733 else if (method
== COMPOSITION_WITH_RULE
)
2734 composition_state
= COMPOSING_RULE
;
2735 else if (method
== COMPOSITION_WITH_RULE_ALTCHARS
2736 && composition_state
== COMPOSING_COMPONENT_CHAR
)
2737 composition_state
= COMPOSING_COMPONENT_CHAR
;
2739 if (charset_id_0
< 0
2740 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2742 /* This is SPACE or DEL. */
2743 charset
= CHARSET_FROM_ID (charset_ascii
);
2746 /* This is a graphic character, we fall down ... */
2748 case ISO_graphic_plane_0
:
2749 if (composition_state
== COMPOSING_RULE
)
2751 DECODE_COMPOSITION_RULE (c1
);
2752 components
[component_idx
++] = c1
;
2753 composition_state
= COMPOSING_CHAR
;
2755 charset
= CHARSET_FROM_ID (charset_id_0
);
2758 case ISO_0xA0_or_0xFF
:
2759 if (charset_id_1
< 0
2760 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2761 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2763 /* This is a graphic character, we fall down ... */
2765 case ISO_graphic_plane_1
:
2766 if (charset_id_1
< 0)
2768 charset
= CHARSET_FROM_ID (charset_id_1
);
2771 case ISO_carriage_return
:
2774 if (EQ (eol_type
, Qdos
))
2777 goto no_more_source
;
2781 else if (EQ (eol_type
, Qmac
))
2787 MAYBE_FINISH_COMPOSITION ();
2788 charset
= CHARSET_FROM_ID (charset_ascii
);
2792 MAYBE_FINISH_COMPOSITION ();
2796 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2797 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
2799 CODING_ISO_INVOCATION (coding
, 0) = 1;
2800 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2804 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
2806 CODING_ISO_INVOCATION (coding
, 0) = 0;
2807 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2810 case ISO_single_shift_2_7
:
2811 case ISO_single_shift_2
:
2812 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2814 /* SS2 is handled as an escape sequence of ESC 'N' */
2816 goto label_escape_sequence
;
2818 case ISO_single_shift_3
:
2819 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2821 /* SS2 is handled as an escape sequence of ESC 'O' */
2823 goto label_escape_sequence
;
2825 case ISO_control_sequence_introducer
:
2826 /* CSI is handled as an escape sequence of ESC '[' ... */
2828 goto label_escape_sequence
;
2832 label_escape_sequence
:
2833 /* Escape sequences handled here are invocation,
2834 designation, direction specification, and character
2835 composition specification. */
2838 case '&': /* revision of following character set */
2840 if (!(c1
>= '@' && c1
<= '~'))
2843 if (c1
!= ISO_CODE_ESC
)
2846 goto label_escape_sequence
;
2848 case '$': /* designation of 2-byte character set */
2849 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
2852 if (c1
>= '@' && c1
<= 'B')
2853 { /* designation of JISX0208.1978, GB2312.1980,
2855 DECODE_DESIGNATION (0, 2, 0, c1
);
2857 else if (c1
>= 0x28 && c1
<= 0x2B)
2858 { /* designation of DIMENSION2_CHARS94 character set */
2860 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
2862 else if (c1
>= 0x2C && c1
<= 0x2F)
2863 { /* designation of DIMENSION2_CHARS96 character set */
2865 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
2869 /* We must update these variables now. */
2870 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2871 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2874 case 'n': /* invocation of locking-shift-2 */
2875 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2876 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
2878 CODING_ISO_INVOCATION (coding
, 0) = 2;
2879 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2882 case 'o': /* invocation of locking-shift-3 */
2883 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2884 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
2886 CODING_ISO_INVOCATION (coding
, 0) = 3;
2887 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2890 case 'N': /* invocation of single-shift-2 */
2891 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
2892 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
2894 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
2896 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
2900 case 'O': /* invocation of single-shift-3 */
2901 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
2902 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
2904 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
2906 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
2910 case '0': case '2': case '3': case '4': /* start composition */
2911 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
2913 DECODE_COMPOSITION_START (c1
);
2916 case '1': /* end composition */
2917 if (composition_state
== COMPOSING_NO
)
2919 DECODE_COMPOSITION_END ();
2922 case '[': /* specification of direction */
2923 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
2925 /* For the moment, nested direction is not supported.
2926 So, `coding->mode & CODING_MODE_DIRECTION' zero means
2927 left-to-right, and nozero means right-to-left. */
2931 case ']': /* end of the current direction */
2932 coding
->mode
&= ~CODING_MODE_DIRECTION
;
2934 case '0': /* end of the current direction */
2935 case '1': /* start of left-to-right direction */
2938 coding
->mode
&= ~CODING_MODE_DIRECTION
;
2943 case '2': /* start of right-to-left direction */
2946 coding
->mode
|= CODING_MODE_DIRECTION
;
2957 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
2959 if (c1
>= 0x28 && c1
<= 0x2B)
2960 { /* designation of DIMENSION1_CHARS94 character set */
2962 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
2964 else if (c1
>= 0x2C && c1
<= 0x2F)
2965 { /* designation of DIMENSION1_CHARS96 character set */
2967 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
2971 /* We must update these variables now. */
2972 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2973 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2978 /* Now we know CHARSET and 1st position code C1 of a character.
2979 Produce a decoded character while getting 2nd position code
2982 if (CHARSET_DIMENSION (charset
) > 1)
2985 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
2986 /* C2 is not in a valid range. */
2988 c1
= (c1
<< 8) | (c2
& 0x7F);
2989 if (CHARSET_DIMENSION (charset
) > 2)
2992 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
2993 /* C2 is not in a valid range. */
2995 c1
= (c1
<< 8) | (c2
& 0x7F);
2999 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3002 MAYBE_FINISH_COMPOSITION ();
3003 for (; src_base
< src
; src_base
++, char_offset
++)
3005 if (ASCII_BYTE_P (*src_base
))
3006 *charbuf
++ = *src_base
;
3008 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3011 else if (composition_state
== COMPOSING_NO
)
3017 components
[component_idx
++] = c
;
3021 MAYBE_FINISH_COMPOSITION ();
3023 consumed_chars
= consumed_chars_base
;
3025 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3030 coding
->consumed_char
+= consumed_chars_base
;
3031 coding
->consumed
= src_base
- coding
->source
;
3032 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3036 /* ISO2022 encoding stuff. */
3039 It is not enough to say just "ISO2022" on encoding, we have to
3040 specify more details. In Emacs, each coding system of ISO2022
3041 variant has the following specifications:
3042 1. Initial designation to G0 thru G3.
3043 2. Allows short-form designation?
3044 3. ASCII should be designated to G0 before control characters?
3045 4. ASCII should be designated to G0 at end of line?
3046 5. 7-bit environment or 8-bit environment?
3047 6. Use locking-shift?
3048 7. Use Single-shift?
3049 And the following two are only for Japanese:
3050 8. Use ASCII in place of JIS0201-1976-Roman?
3051 9. Use JISX0208-1983 in place of JISX0208-1978?
3052 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3053 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3057 /* Produce codes (escape sequence) for designating CHARSET to graphic
3058 register REG at DST, and increment DST. If <final-char> of CHARSET is
3059 '@', 'A', or 'B' and the coding system CODING allows, produce
3060 designation sequence of short-form. */
3062 #define ENCODE_DESIGNATION(charset, reg, coding) \
3064 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3065 char *intermediate_char_94 = "()*+"; \
3066 char *intermediate_char_96 = ",-./"; \
3067 int revision = -1; \
3070 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3071 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3073 if (revision >= 0) \
3075 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3076 EMIT_ONE_BYTE ('@' + revision); \
3078 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3079 if (CHARSET_DIMENSION (charset) == 1) \
3081 if (! CHARSET_ISO_CHARS_96 (charset)) \
3082 c = intermediate_char_94[reg]; \
3084 c = intermediate_char_96[reg]; \
3085 EMIT_ONE_ASCII_BYTE (c); \
3089 EMIT_ONE_ASCII_BYTE ('$'); \
3090 if (! CHARSET_ISO_CHARS_96 (charset)) \
3092 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3094 || final_char < '@' || final_char > 'B') \
3095 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3098 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3100 EMIT_ONE_ASCII_BYTE (final_char); \
3102 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3106 /* The following two macros produce codes (control character or escape
3107 sequence) for ISO2022 single-shift functions (single-shift-2 and
3110 #define ENCODE_SINGLE_SHIFT_2 \
3112 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3113 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3115 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3116 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3120 #define ENCODE_SINGLE_SHIFT_3 \
3122 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3123 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3125 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3126 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3130 /* The following four macros produce codes (control character or
3131 escape sequence) for ISO2022 locking-shift functions (shift-in,
3132 shift-out, locking-shift-2, and locking-shift-3). */
3134 #define ENCODE_SHIFT_IN \
3136 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3137 CODING_ISO_INVOCATION (coding, 0) = 0; \
3141 #define ENCODE_SHIFT_OUT \
3143 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3144 CODING_ISO_INVOCATION (coding, 0) = 1; \
3148 #define ENCODE_LOCKING_SHIFT_2 \
3150 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3151 CODING_ISO_INVOCATION (coding, 0) = 2; \
3155 #define ENCODE_LOCKING_SHIFT_3 \
3157 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3158 CODING_ISO_INVOCATION (coding, 0) = 3; \
3162 /* Produce codes for a DIMENSION1 character whose character set is
3163 CHARSET and whose position-code is C1. Designation and invocation
3164 sequences are also produced in advance if necessary. */
3166 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3168 int id = CHARSET_ID (charset); \
3169 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3171 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3172 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3174 EMIT_ONE_BYTE (c1 | 0x80); \
3175 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3178 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3180 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3183 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3185 EMIT_ONE_BYTE (c1 | 0x80); \
3189 /* Since CHARSET is not yet invoked to any graphic planes, we \
3190 must invoke it, or, at first, designate it to some graphic \
3191 register. Then repeat the loop to actually produce the \
3193 dst = encode_invocation_designation (charset, coding, dst, \
3198 /* Produce codes for a DIMENSION2 character whose character set is
3199 CHARSET and whose position-codes are C1 and C2. Designation and
3200 invocation codes are also produced in advance if necessary. */
3202 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3204 int id = CHARSET_ID (charset); \
3205 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3207 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3208 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3210 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3211 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3214 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3216 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3219 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3221 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3225 /* Since CHARSET is not yet invoked to any graphic planes, we \
3226 must invoke it, or, at first, designate it to some graphic \
3227 register. Then repeat the loop to actually produce the \
3229 dst = encode_invocation_designation (charset, coding, dst, \
3234 #define ENCODE_ISO_CHARACTER(charset, c) \
3236 int code = ENCODE_CHAR ((charset),(c)); \
3238 if (CHARSET_DIMENSION (charset) == 1) \
3239 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3241 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3245 /* Produce designation and invocation codes at a place pointed by DST
3246 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3250 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3251 struct charset
*charset
;
3252 struct coding_system
*coding
;
3256 int multibytep
= coding
->dst_multibyte
;
3257 int produced_chars
= *p_nchars
;
3258 int reg
; /* graphic register number */
3259 int id
= CHARSET_ID (charset
);
3261 /* At first, check designations. */
3262 for (reg
= 0; reg
< 4; reg
++)
3263 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3268 /* CHARSET is not yet designated to any graphic registers. */
3269 /* At first check the requested designation. */
3270 reg
= CODING_ISO_REQUEST (coding
, id
);
3272 /* Since CHARSET requests no special designation, designate it
3273 to graphic register 0. */
3276 ENCODE_DESIGNATION (charset
, reg
, coding
);
3279 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3280 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3282 /* Since the graphic register REG is not invoked to any graphic
3283 planes, invoke it to graphic plane 0. */
3286 case 0: /* graphic register 0 */
3290 case 1: /* graphic register 1 */
3294 case 2: /* graphic register 2 */
3295 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3296 ENCODE_SINGLE_SHIFT_2
;
3298 ENCODE_LOCKING_SHIFT_2
;
3301 case 3: /* graphic register 3 */
3302 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3303 ENCODE_SINGLE_SHIFT_3
;
3305 ENCODE_LOCKING_SHIFT_3
;
3310 *p_nchars
= produced_chars
;
3314 /* The following three macros produce codes for indicating direction
3316 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3318 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3319 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3321 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3325 #define ENCODE_DIRECTION_R2L() \
3327 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3328 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3332 #define ENCODE_DIRECTION_L2R() \
3334 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3335 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3339 /* Produce codes for designation and invocation to reset the graphic
3340 planes and registers to initial state. */
3341 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3344 struct charset *charset; \
3346 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3348 for (reg = 0; reg < 4; reg++) \
3349 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3350 && (CODING_ISO_DESIGNATION (coding, reg) \
3351 != CODING_ISO_INITIAL (coding, reg))) \
3353 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3354 ENCODE_DESIGNATION (charset, reg, coding); \
3359 /* Produce designation sequences of charsets in the line started from
3360 SRC to a place pointed by DST, and return updated DST.
3362 If the current block ends before any end-of-line, we may fail to
3363 find all the necessary designations. */
3365 static unsigned char *
3366 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3367 struct coding_system
*coding
;
3368 int *charbuf
, *charbuf_end
;
3371 struct charset
*charset
;
3372 /* Table of charsets to be designated to each graphic register. */
3374 int c
, found
= 0, reg
;
3375 int produced_chars
= 0;
3376 int multibytep
= coding
->dst_multibyte
;
3378 Lisp_Object charset_list
;
3380 attrs
= CODING_ID_ATTRS (coding
->id
);
3381 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3382 if (EQ (charset_list
, Qiso_2022
))
3383 charset_list
= Viso_2022_charset_list
;
3385 for (reg
= 0; reg
< 4; reg
++)
3395 charset
= char_charset (c
, charset_list
, NULL
);
3396 id
= CHARSET_ID (charset
);
3397 reg
= CODING_ISO_REQUEST (coding
, id
);
3398 if (reg
>= 0 && r
[reg
] < 0)
3407 for (reg
= 0; reg
< 4; reg
++)
3409 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3410 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3416 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3419 encode_coding_iso_2022 (coding
)
3420 struct coding_system
*coding
;
3422 int multibytep
= coding
->dst_multibyte
;
3423 int *charbuf
= coding
->charbuf
;
3424 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3425 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3426 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3429 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3430 && CODING_ISO_BOL (coding
));
3431 int produced_chars
= 0;
3432 Lisp_Object attrs
, eol_type
, charset_list
;
3433 int ascii_compatible
;
3436 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3438 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3440 while (charbuf
< charbuf_end
)
3442 ASSURE_DESTINATION (safe_room
);
3444 if (bol_designation
)
3446 unsigned char *dst_prev
= dst
;
3448 /* We have to produce designation sequences if any now. */
3449 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3450 bol_designation
= 0;
3451 /* We are sure that designation sequences are all ASCII bytes. */
3452 produced_chars
+= dst
- dst_prev
;
3457 /* Now encode the character C. */
3458 if (c
< 0x20 || c
== 0x7F)
3461 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3463 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3464 ENCODE_RESET_PLANE_AND_REGISTER ();
3465 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3469 for (i
= 0; i
< 4; i
++)
3470 CODING_ISO_DESIGNATION (coding
, i
)
3471 = CODING_ISO_INITIAL (coding
, i
);
3474 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3476 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3477 ENCODE_RESET_PLANE_AND_REGISTER ();
3478 EMIT_ONE_ASCII_BYTE (c
);
3480 else if (ASCII_CHAR_P (c
))
3482 if (ascii_compatible
)
3483 EMIT_ONE_ASCII_BYTE (c
);
3485 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii
), c
);
3489 struct charset
*charset
= char_charset (c
, charset_list
, NULL
);
3493 c
= coding
->default_char
;
3494 charset
= char_charset (c
, charset_list
, NULL
);
3496 ENCODE_ISO_CHARACTER (charset
, c
);
3500 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3501 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3503 ASSURE_DESTINATION (safe_room
);
3504 ENCODE_RESET_PLANE_AND_REGISTER ();
3506 coding
->result
= CODING_RESULT_SUCCESS
;
3507 CODING_ISO_BOL (coding
) = bol_designation
;
3508 coding
->produced_char
+= produced_chars
;
3509 coding
->produced
= dst
- coding
->destination
;
3514 /*** 8,9. SJIS and BIG5 handlers ***/
3516 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3517 quite widely. So, for the moment, Emacs supports them in the bare
3518 C code. But, in the future, they may be supported only by CCL. */
3520 /* SJIS is a coding system encoding three character sets: ASCII, right
3521 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3522 as is. A character of charset katakana-jisx0201 is encoded by
3523 "position-code + 0x80". A character of charset japanese-jisx0208
3524 is encoded in 2-byte but two position-codes are divided and shifted
3525 so that it fit in the range below.
3527 --- CODE RANGE of SJIS ---
3528 (character set) (range)
3530 KATAKANA-JISX0201 0xA0 .. 0xDF
3531 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3532 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3533 -------------------------------
3537 /* BIG5 is a coding system encoding two character sets: ASCII and
3538 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3539 character set and is encoded in two-byte.
3541 --- CODE RANGE of BIG5 ---
3542 (character set) (range)
3544 Big5 (1st byte) 0xA1 .. 0xFE
3545 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3546 --------------------------
3550 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3551 Check if a text is encoded in SJIS. If it is, return
3552 CATEGORY_MASK_SJIS, else return 0. */
3555 detect_coding_sjis (coding
, mask
)
3556 struct coding_system
*coding
;
3559 unsigned char *src
= coding
->source
, *src_base
= src
;
3560 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3561 int multibytep
= coding
->src_multibyte
;
3562 int consumed_chars
= 0;
3566 /* A coding system of this category is always ASCII compatible. */
3567 src
+= coding
->head_ascii
;
3574 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3577 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3581 else if (c
>= 0xA0 && c
< 0xE0)
3586 *mask
&= ~CATEGORY_MASK_SJIS
;
3592 *mask
&= CATEGORY_MASK_SJIS
;
3596 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3597 Check if a text is encoded in BIG5. If it is, return
3598 CATEGORY_MASK_BIG5, else return 0. */
3601 detect_coding_big5 (coding
, mask
)
3602 struct coding_system
*coding
;
3605 unsigned char *src
= coding
->source
, *src_base
= src
;
3606 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3607 int multibytep
= coding
->src_multibyte
;
3608 int consumed_chars
= 0;
3612 /* A coding system of this category is always ASCII compatible. */
3613 src
+= coding
->head_ascii
;
3623 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3630 *mask
&= ~CATEGORY_MASK_BIG5
;
3636 *mask
&= CATEGORY_MASK_BIG5
;
3640 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3641 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3644 decode_coding_sjis (coding
)
3645 struct coding_system
*coding
;
3647 unsigned char *src
= coding
->source
+ coding
->consumed
;
3648 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3649 unsigned char *src_base
;
3650 int *charbuf
= coding
->charbuf
;
3651 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
3652 int consumed_chars
= 0, consumed_chars_base
;
3653 int multibytep
= coding
->src_multibyte
;
3654 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3655 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3657 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3660 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3661 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3662 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3669 consumed_chars_base
= consumed_chars
;
3671 if (charbuf
>= charbuf_end
)
3678 if (EQ (eol_type
, Qdos
))
3681 goto no_more_source
;
3685 else if (EQ (eol_type
, Qmac
))
3690 struct charset
*charset
;
3693 charset
= charset_roman
;
3698 if (c
< 0xA0 || c
>= 0xE0)
3700 /* SJIS -> JISX0208 */
3702 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
3706 charset
= charset_kanji
;
3709 /* SJIS -> JISX0201-Kana */
3710 charset
= charset_kana
;
3712 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
3719 consumed_chars
= consumed_chars_base
;
3721 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3726 coding
->consumed_char
+= consumed_chars_base
;
3727 coding
->consumed
= src_base
- coding
->source
;
3728 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3732 decode_coding_big5 (coding
)
3733 struct coding_system
*coding
;
3735 unsigned char *src
= coding
->source
+ coding
->consumed
;
3736 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3737 unsigned char *src_base
;
3738 int *charbuf
= coding
->charbuf
;
3739 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
3740 int consumed_chars
= 0, consumed_chars_base
;
3741 int multibytep
= coding
->src_multibyte
;
3742 struct charset
*charset_roman
, *charset_big5
;
3743 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3745 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3747 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3748 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3755 consumed_chars_base
= consumed_chars
;
3757 if (charbuf
>= charbuf_end
)
3764 if (EQ (eol_type
, Qdos
))
3767 goto no_more_source
;
3771 else if (EQ (eol_type
, Qmac
))
3776 struct charset
*charset
;
3778 charset
= charset_roman
;
3782 if (c
< 0xA1 || c
> 0xFE)
3785 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
3788 charset
= charset_big5
;
3790 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
3798 consumed_chars
= consumed_chars_base
;
3800 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3805 coding
->consumed_char
+= consumed_chars_base
;
3806 coding
->consumed
= src_base
- coding
->source
;
3807 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3810 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3811 This function can encode charsets `ascii', `katakana-jisx0201',
3812 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3813 are sure that all these charsets are registered as official charset
3814 (i.e. do not have extended leading-codes). Characters of other
3815 charsets are produced without any encoding. If SJIS_P is 1, encode
3816 SJIS text, else encode BIG5 text. */
3819 encode_coding_sjis (coding
)
3820 struct coding_system
*coding
;
3822 int multibytep
= coding
->dst_multibyte
;
3823 int *charbuf
= coding
->charbuf
;
3824 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3825 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3826 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3828 int produced_chars
= 0;
3829 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3830 int ascii_compatible
;
3831 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3834 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3836 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3837 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3838 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3840 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3842 while (charbuf
< charbuf_end
)
3844 ASSURE_DESTINATION (safe_room
);
3846 /* Now encode the character C. */
3847 if (ASCII_CHAR_P (c
) && ascii_compatible
)
3848 EMIT_ONE_ASCII_BYTE (c
);
3852 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
3856 c
= coding
->default_char
;
3857 charset
= char_charset (c
, charset_list
, &code
);
3859 if (code
== CHARSET_INVALID_CODE (charset
))
3861 if (charset
== charset_kanji
)
3865 c1
= code
>> 8, c2
= code
& 0xFF;
3866 EMIT_TWO_BYTES (c1
, c2
);
3868 else if (charset
== charset_kana
)
3869 EMIT_ONE_BYTE (code
| 0x80);
3871 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
3874 coding
->result
= CODING_RESULT_SUCCESS
;
3875 coding
->produced_char
+= produced_chars
;
3876 coding
->produced
= dst
- coding
->destination
;
3881 encode_coding_big5 (coding
)
3882 struct coding_system
*coding
;
3884 int multibytep
= coding
->dst_multibyte
;
3885 int *charbuf
= coding
->charbuf
;
3886 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3887 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3888 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3890 int produced_chars
= 0;
3891 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3892 int ascii_compatible
;
3893 struct charset
*charset_roman
, *charset_big5
;
3896 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3898 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3899 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3900 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3902 while (charbuf
< charbuf_end
)
3904 ASSURE_DESTINATION (safe_room
);
3906 /* Now encode the character C. */
3907 if (ASCII_CHAR_P (c
) && ascii_compatible
)
3908 EMIT_ONE_ASCII_BYTE (c
);
3912 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
3916 c
= coding
->default_char
;
3917 charset
= char_charset (c
, charset_list
, &code
);
3919 if (code
== CHARSET_INVALID_CODE (charset
))
3921 if (charset
== charset_big5
)
3925 c1
= code
>> 8, c2
= code
& 0xFF;
3926 EMIT_TWO_BYTES (c1
, c2
);
3929 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
3932 coding
->result
= CODING_RESULT_SUCCESS
;
3933 coding
->produced_char
+= produced_chars
;
3934 coding
->produced
= dst
- coding
->destination
;
3939 /*** 10. CCL handlers ***/
3941 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3942 Check if a text is encoded in a coding system of which
3943 encoder/decoder are written in CCL program. If it is, return
3944 CATEGORY_MASK_CCL, else return 0. */
3947 detect_coding_ccl (coding
, mask
)
3948 struct coding_system
*coding
;
3951 unsigned char *src
= coding
->source
, *src_base
= src
;
3952 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3953 int multibytep
= coding
->src_multibyte
;
3954 int consumed_chars
= 0;
3956 unsigned char *valids
= CODING_CCL_VALIDS (coding
);
3957 int head_ascii
= coding
->head_ascii
;
3960 coding
= &coding_categories
[coding_category_ccl
];
3961 attrs
= CODING_ID_ATTRS (coding
->id
);
3962 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
3971 if (!found
&& valids
[c
] > 1)
3974 *mask
&= ~CATEGORY_MASK_CCL
;
3980 *mask
&= CATEGORY_MASK_CCL
;
3985 decode_coding_ccl (coding
)
3986 struct coding_system
*coding
;
3988 unsigned char *src
= coding
->source
+ coding
->consumed
;
3989 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3990 int *charbuf
= coding
->charbuf
;
3991 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
3992 int consumed_chars
= 0;
3993 int multibytep
= coding
->src_multibyte
;
3994 struct ccl_program ccl
;
3995 int source_charbuf
[1024];
3996 int source_byteidx
[1024];
3998 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4000 while (src
< src_end
)
4002 unsigned char *p
= src
;
4003 int *source
, *source_end
;
4007 while (i
< 1024 && p
< src_end
)
4009 source_byteidx
[i
] = p
- src
;
4010 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4013 while (i
< 1024 && p
< src_end
)
4014 source_charbuf
[i
++] = *p
++;
4016 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4019 source
= source_charbuf
;
4020 source_end
= source
+ i
;
4021 while (source
< source_end
)
4023 ccl_driver (&ccl
, source
, charbuf
,
4024 source_end
- source
, charbuf_end
- charbuf
);
4025 source
+= ccl
.consumed
;
4026 charbuf
+= ccl
.produced
;
4027 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4030 if (source
< source_end
)
4031 src
+= source_byteidx
[source
- source_charbuf
];
4034 consumed_chars
+= source
- source_charbuf
;
4036 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4037 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4043 case CCL_STAT_SUSPEND_BY_SRC
:
4044 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4046 case CCL_STAT_SUSPEND_BY_DST
:
4049 case CCL_STAT_INVALID_CMD
:
4050 coding
->result
= CODING_RESULT_INTERRUPT
;
4053 coding
->result
= CODING_RESULT_SUCCESS
;
4056 coding
->consumed_char
+= consumed_chars
;
4057 coding
->consumed
= src
- coding
->source
;
4058 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4062 encode_coding_ccl (coding
)
4063 struct coding_system
*coding
;
4065 struct ccl_program ccl
;
4066 int multibytep
= coding
->dst_multibyte
;
4067 int *charbuf
= coding
->charbuf
;
4068 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4069 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4070 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4071 unsigned char *adjusted_dst_end
= dst_end
- 1;
4072 int destination_charbuf
[1024];
4073 int i
, produced_chars
= 0;
4075 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4077 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4078 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4080 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4082 int dst_bytes
= dst_end
- dst
;
4083 if (dst_bytes
> 1024)
4086 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4087 charbuf_end
- charbuf
, dst_bytes
);
4088 charbuf
+= ccl
.consumed
;
4090 for (i
= 0; i
< ccl
.produced
; i
++)
4091 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4094 for (i
= 0; i
< ccl
.produced
; i
++)
4095 *dst
++ = destination_charbuf
[i
] & 0xFF;
4096 produced_chars
+= ccl
.produced
;
4102 case CCL_STAT_SUSPEND_BY_SRC
:
4103 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4105 case CCL_STAT_SUSPEND_BY_DST
:
4106 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
4109 case CCL_STAT_INVALID_CMD
:
4110 coding
->result
= CODING_RESULT_INTERRUPT
;
4113 coding
->result
= CODING_RESULT_SUCCESS
;
4117 coding
->produced_char
+= produced_chars
;
4118 coding
->produced
= dst
- coding
->destination
;
4124 /*** 10, 11. no-conversion handlers ***/
4126 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4129 decode_coding_raw_text (coding
)
4130 struct coding_system
*coding
;
4132 coding
->chars_at_source
= 1;
4133 coding
->consumed_char
= 0;
4134 coding
->consumed
= 0;
4135 coding
->result
= CODING_RESULT_SUCCESS
;
4139 encode_coding_raw_text (coding
)
4140 struct coding_system
*coding
;
4142 int multibytep
= coding
->dst_multibyte
;
4143 int *charbuf
= coding
->charbuf
;
4144 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4145 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4146 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4147 int produced_chars
= 0;
4152 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4154 if (coding
->src_multibyte
)
4155 while (charbuf
< charbuf_end
)
4157 ASSURE_DESTINATION (safe_room
);
4159 if (ASCII_CHAR_P (c
))
4160 EMIT_ONE_ASCII_BYTE (c
);
4161 else if (CHAR_BYTE8_P (c
))
4163 c
= CHAR_TO_BYTE8 (c
);
4168 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4170 CHAR_STRING_ADVANCE (c
, p1
);
4172 EMIT_ONE_BYTE (*p0
);
4176 while (charbuf
< charbuf_end
)
4178 ASSURE_DESTINATION (safe_room
);
4185 if (coding
->src_multibyte
)
4187 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4189 while (charbuf
< charbuf_end
)
4191 ASSURE_DESTINATION (safe_room
);
4193 if (ASCII_CHAR_P (c
))
4195 else if (CHAR_BYTE8_P (c
))
4196 *dst
++ = CHAR_TO_BYTE8 (c
);
4198 CHAR_STRING_ADVANCE (c
, dst
);
4204 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4205 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4206 *dst
++ = *charbuf
++;
4207 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4210 coding
->result
= CODING_RESULT_SUCCESS
;
4211 coding
->produced_char
+= produced_chars
;
4212 coding
->produced
= dst
- coding
->destination
;
4217 detect_coding_charset (coding
, mask
)
4218 struct coding_system
*coding
;
4221 unsigned char *src
= coding
->source
, *src_base
= src
;
4222 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4223 int multibytep
= coding
->src_multibyte
;
4224 int consumed_chars
= 0;
4225 Lisp_Object attrs
, valids
;
4227 coding
= &coding_categories
[coding_category_charset
];
4228 attrs
= CODING_ID_ATTRS (coding
->id
);
4229 valids
= AREF (attrs
, coding_attr_charset_valids
);
4231 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4232 src
+= coding
->head_ascii
;
4239 if (NILP (AREF (valids
, c
)))
4242 *mask
&= ~CATEGORY_MASK_CHARSET
;
4246 *mask
&= CATEGORY_MASK_CHARSET
;
4251 decode_coding_charset (coding
)
4252 struct coding_system
*coding
;
4254 unsigned char *src
= coding
->source
+ coding
->consumed
;
4255 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4256 unsigned char *src_base
;
4257 int *charbuf
= coding
->charbuf
;
4258 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
4259 int consumed_chars
= 0, consumed_chars_base
;
4260 int multibytep
= coding
->src_multibyte
;
4261 Lisp_Object attrs
, eol_type
, charset_list
, valids
;
4263 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4264 valids
= AREF (attrs
, coding_attr_charset_valids
);
4271 consumed_chars_base
= consumed_chars
;
4273 if (charbuf
>= charbuf_end
)
4279 if (EQ (eol_type
, Qdos
))
4285 else if (EQ (eol_type
, Qmac
))
4291 struct charset
*charset
;
4294 val
= AREF (valids
, c
);
4297 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4298 if (CHARSET_DIMENSION (charset
) > 1)
4302 if (CHARSET_DIMENSION (charset
) > 2)
4306 if (CHARSET_DIMENSION (charset
) > 3)
4313 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4322 consumed_chars
= consumed_chars_base
;
4324 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4329 coding
->consumed_char
+= consumed_chars_base
;
4330 coding
->consumed
= src_base
- coding
->source
;
4331 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4335 encode_coding_charset (coding
)
4336 struct coding_system
*coding
;
4338 int multibytep
= coding
->dst_multibyte
;
4339 int *charbuf
= coding
->charbuf
;
4340 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4341 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4342 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4343 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4344 int produced_chars
= 0;
4345 struct charset
*charset
;
4346 Lisp_Object attrs
, eol_type
, charset_list
;
4347 int ascii_compatible
;
4350 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4351 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4353 while (charbuf
< charbuf_end
)
4355 struct charset
*charset
;
4358 ASSURE_DESTINATION (safe_room
);
4360 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4361 EMIT_ONE_ASCII_BYTE (c
);
4364 charset
= char_charset (c
, charset_list
, &code
);
4367 if (CHARSET_DIMENSION (charset
) == 1)
4368 EMIT_ONE_BYTE (code
);
4369 else if (CHARSET_DIMENSION (charset
) == 2)
4370 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4371 else if (CHARSET_DIMENSION (charset
) == 3)
4372 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4374 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4375 (code
>> 8) & 0xFF, code
& 0xFF);
4378 EMIT_ONE_BYTE (coding
->default_char
);
4382 coding
->result
= CODING_RESULT_SUCCESS
;
4383 coding
->produced_char
+= produced_chars
;
4384 coding
->produced
= dst
- coding
->destination
;
4389 /*** 7. C library functions ***/
4391 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
4392 has a property `coding-system'. The value of this property is a
4393 vector of length 5 (called as coding-vector). Among elements of
4394 this vector, the first (element[0]) and the fifth (element[4])
4395 carry important information for decoding/encoding. Before
4396 decoding/encoding, this information should be set in fields of a
4397 structure of type `coding_system'.
4399 A value of property `coding-system' can be a symbol of another
4400 subsidiary coding-system. In that case, Emacs gets coding-vector
4403 `element[0]' contains information to be set in `coding->type'. The
4404 value and its meaning is as follows:
4406 0 -- coding_type_emacs_mule
4407 1 -- coding_type_sjis
4408 2 -- coding_type_iso_2022
4409 3 -- coding_type_big5
4410 4 -- coding_type_ccl encoder/decoder written in CCL
4411 nil -- coding_type_no_conversion
4412 t -- coding_type_undecided (automatic conversion on decoding,
4413 no-conversion on encoding)
4415 `element[4]' contains information to be set in `coding->flags' and
4416 `coding->spec'. The meaning varies by `coding->type'.
4418 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4419 of length 32 (of which the first 13 sub-elements are used now).
4420 Meanings of these sub-elements are:
4422 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4423 If the value is an integer of valid charset, the charset is
4424 assumed to be designated to graphic register N initially.
4426 If the value is minus, it is a minus value of charset which
4427 reserves graphic register N, which means that the charset is
4428 not designated initially but should be designated to graphic
4429 register N just before encoding a character in that charset.
4431 If the value is nil, graphic register N is never used on
4434 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4435 Each value takes t or nil. See the section ISO2022 of
4436 `coding.h' for more information.
4438 If `coding->type' is `coding_type_big5', element[4] is t to denote
4439 BIG5-ETen or nil to denote BIG5-HKU.
4441 If `coding->type' takes the other value, element[4] is ignored.
4443 Emacs Lisp's coding system also carries information about format of
4444 end-of-line in a value of property `eol-type'. If the value is
4445 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4446 it is not integer, it should be a vector of subsidiary coding
4447 systems of which property `eol-type' has one of above values.
4451 /* Setup coding context CODING from information about CODING_SYSTEM.
4452 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4453 CODING_SYSTEM is invalid, signal an error. */
4456 setup_coding_system (coding_system
, coding
)
4457 Lisp_Object coding_system
;
4458 struct coding_system
*coding
;
4461 Lisp_Object eol_type
;
4462 Lisp_Object coding_type
;
4465 if (NILP (coding_system
))
4466 coding_system
= Qno_conversion
;
4468 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4470 attrs
= CODING_ID_ATTRS (coding
->id
);
4471 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4474 coding
->head_ascii
= -1;
4475 coding
->common_flags
4476 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4478 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4479 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4480 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4481 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4483 coding_type
= CODING_ATTR_TYPE (attrs
);
4484 if (EQ (coding_type
, Qundecided
))
4486 coding
->detector
= NULL
;
4487 coding
->decoder
= decode_coding_raw_text
;
4488 coding
->encoder
= encode_coding_raw_text
;
4489 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4491 else if (EQ (coding_type
, Qiso_2022
))
4494 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4496 /* Invoke graphic register 0 to plane 0. */
4497 CODING_ISO_INVOCATION (coding
, 0) = 0;
4498 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4499 CODING_ISO_INVOCATION (coding
, 1)
4500 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4501 /* Setup the initial status of designation. */
4502 for (i
= 0; i
< 4; i
++)
4503 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4504 /* Not single shifting initially. */
4505 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4506 /* Beginning of buffer should also be regarded as bol. */
4507 CODING_ISO_BOL (coding
) = 1;
4508 coding
->detector
= detect_coding_iso_2022
;
4509 coding
->decoder
= decode_coding_iso_2022
;
4510 coding
->encoder
= encode_coding_iso_2022
;
4511 if (flags
& CODING_ISO_FLAG_SAFE
)
4512 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4513 coding
->common_flags
4514 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4515 | CODING_REQUIRE_FLUSHING_MASK
);
4516 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4517 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4518 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4520 setup_iso_safe_charsets (attrs
);
4521 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4522 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4523 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4525 CODING_ISO_FLAGS (coding
) = flags
;
4527 else if (EQ (coding_type
, Qcharset
))
4529 coding
->detector
= detect_coding_charset
;
4530 coding
->decoder
= decode_coding_charset
;
4531 coding
->encoder
= encode_coding_charset
;
4532 coding
->common_flags
4533 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4535 else if (EQ (coding_type
, Qutf_8
))
4537 coding
->detector
= detect_coding_utf_8
;
4538 coding
->decoder
= decode_coding_utf_8
;
4539 coding
->encoder
= encode_coding_utf_8
;
4540 coding
->common_flags
4541 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4543 else if (EQ (coding_type
, Qutf_16
))
4545 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4546 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4547 : EQ (val
, Qt
) ? utf_16_with_bom
4548 : utf_16_without_bom
);
4549 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4550 CODING_UTF_16_ENDIAN (coding
) = (NILP (val
) ? utf_16_big_endian
4551 : utf_16_little_endian
);
4552 CODING_UTF_16_SURROGATE (coding
) = 0;
4553 coding
->detector
= detect_coding_utf_16
;
4554 coding
->decoder
= decode_coding_utf_16
;
4555 coding
->encoder
= encode_coding_utf_16
;
4556 coding
->common_flags
4557 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4559 else if (EQ (coding_type
, Qccl
))
4561 coding
->detector
= detect_coding_ccl
;
4562 coding
->decoder
= decode_coding_ccl
;
4563 coding
->encoder
= encode_coding_ccl
;
4564 coding
->common_flags
4565 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4566 | CODING_REQUIRE_FLUSHING_MASK
);
4568 else if (EQ (coding_type
, Qemacs_mule
))
4570 coding
->detector
= detect_coding_emacs_mule
;
4571 coding
->decoder
= decode_coding_emacs_mule
;
4572 coding
->encoder
= encode_coding_emacs_mule
;
4573 coding
->common_flags
4574 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4575 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
4576 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
4578 Lisp_Object tail
, safe_charsets
;
4579 int max_charset_id
= 0;
4581 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4583 if (max_charset_id
< XFASTINT (XCAR (tail
)))
4584 max_charset_id
= XFASTINT (XCAR (tail
));
4585 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
4587 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4589 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
4590 coding
->max_charset_id
= max_charset_id
;
4591 coding
->safe_charsets
= (char *) XSTRING (safe_charsets
)->data
;
4594 else if (EQ (coding_type
, Qshift_jis
))
4596 coding
->detector
= detect_coding_sjis
;
4597 coding
->decoder
= decode_coding_sjis
;
4598 coding
->encoder
= encode_coding_sjis
;
4599 coding
->common_flags
4600 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4602 else if (EQ (coding_type
, Qbig5
))
4604 coding
->detector
= detect_coding_big5
;
4605 coding
->decoder
= decode_coding_big5
;
4606 coding
->encoder
= encode_coding_big5
;
4607 coding
->common_flags
4608 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4610 else /* EQ (coding_type, Qraw_text) */
4612 coding
->detector
= NULL
;
4613 coding
->decoder
= decode_coding_raw_text
;
4614 coding
->encoder
= encode_coding_raw_text
;
4615 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4621 /* Return raw-text or one of its subsidiaries that has the same
4622 eol_type as CODING-SYSTEM. */
4625 raw_text_coding_system (coding_system
)
4626 Lisp_Object coding_system
;
4628 Lisp_Object spec
, attrs
;
4629 Lisp_Object eol_type
, raw_text_eol_type
;
4631 spec
= CODING_SYSTEM_SPEC (coding_system
);
4632 attrs
= AREF (spec
, 0);
4634 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
4635 return coding_system
;
4637 eol_type
= AREF (spec
, 2);
4638 if (VECTORP (eol_type
))
4640 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
4641 raw_text_eol_type
= AREF (spec
, 2);
4642 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
4643 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
4644 : AREF (raw_text_eol_type
, 2));
4648 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4649 does, return one of the subsidiary that has the same eol-spec as
4650 PARENT. Otherwise, return CODING_SYSTEM. */
4653 coding_inherit_eol_type (coding_system
, parent
)
4655 Lisp_Object spec
, attrs
, eol_type
;
4657 spec
= CODING_SYSTEM_SPEC (coding_system
);
4658 attrs
= AREF (spec
, 0);
4659 eol_type
= AREF (spec
, 2);
4660 if (VECTORP (eol_type
))
4662 Lisp_Object parent_spec
;
4663 Lisp_Object parent_eol_type
;
4666 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
4667 parent_eol_type
= AREF (parent_spec
, 2);
4668 if (EQ (parent_eol_type
, Qunix
))
4669 coding_system
= AREF (eol_type
, 0);
4670 else if (EQ (parent_eol_type
, Qdos
))
4671 coding_system
= AREF (eol_type
, 1);
4672 else if (EQ (parent_eol_type
, Qmac
))
4673 coding_system
= AREF (eol_type
, 2);
4675 return coding_system
;
4678 /* Emacs has a mechanism to automatically detect a coding system if it
4679 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4680 it's impossible to distinguish some coding systems accurately
4681 because they use the same range of codes. So, at first, coding
4682 systems are categorized into 7, those are:
4684 o coding-category-emacs-mule
4686 The category for a coding system which has the same code range
4687 as Emacs' internal format. Assigned the coding-system (Lisp
4688 symbol) `emacs-mule' by default.
4690 o coding-category-sjis
4692 The category for a coding system which has the same code range
4693 as SJIS. Assigned the coding-system (Lisp
4694 symbol) `japanese-shift-jis' by default.
4696 o coding-category-iso-7
4698 The category for a coding system which has the same code range
4699 as ISO2022 of 7-bit environment. This doesn't use any locking
4700 shift and single shift functions. This can encode/decode all
4701 charsets. Assigned the coding-system (Lisp symbol)
4702 `iso-2022-7bit' by default.
4704 o coding-category-iso-7-tight
4706 Same as coding-category-iso-7 except that this can
4707 encode/decode only the specified charsets.
4709 o coding-category-iso-8-1
4711 The category for a coding system which has the same code range
4712 as ISO2022 of 8-bit environment and graphic plane 1 used only
4713 for DIMENSION1 charset. This doesn't use any locking shift
4714 and single shift functions. Assigned the coding-system (Lisp
4715 symbol) `iso-latin-1' by default.
4717 o coding-category-iso-8-2
4719 The category for a coding system which has the same code range
4720 as ISO2022 of 8-bit environment and graphic plane 1 used only
4721 for DIMENSION2 charset. This doesn't use any locking shift
4722 and single shift functions. Assigned the coding-system (Lisp
4723 symbol) `japanese-iso-8bit' by default.
4725 o coding-category-iso-7-else
4727 The category for a coding system which has the same code range
4728 as ISO2022 of 7-bit environemnt but uses locking shift or
4729 single shift functions. Assigned the coding-system (Lisp
4730 symbol) `iso-2022-7bit-lock' by default.
4732 o coding-category-iso-8-else
4734 The category for a coding system which has the same code range
4735 as ISO2022 of 8-bit environemnt but uses locking shift or
4736 single shift functions. Assigned the coding-system (Lisp
4737 symbol) `iso-2022-8bit-ss2' by default.
4739 o coding-category-big5
4741 The category for a coding system which has the same code range
4742 as BIG5. Assigned the coding-system (Lisp symbol)
4743 `cn-big5' by default.
4745 o coding-category-utf-8
4747 The category for a coding system which has the same code range
4748 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4749 symbol) `utf-8' by default.
4751 o coding-category-utf-16-be
4753 The category for a coding system in which a text has an
4754 Unicode signature (cf. Unicode Standard) in the order of BIG
4755 endian at the head. Assigned the coding-system (Lisp symbol)
4756 `utf-16-be' by default.
4758 o coding-category-utf-16-le
4760 The category for a coding system in which a text has an
4761 Unicode signature (cf. Unicode Standard) in the order of
4762 LITTLE endian at the head. Assigned the coding-system (Lisp
4763 symbol) `utf-16-le' by default.
4765 o coding-category-ccl
4767 The category for a coding system of which encoder/decoder is
4768 written in CCL programs. The default value is nil, i.e., no
4769 coding system is assigned.
4771 o coding-category-binary
4773 The category for a coding system not categorized in any of the
4774 above. Assigned the coding-system (Lisp symbol)
4775 `no-conversion' by default.
4777 Each of them is a Lisp symbol and the value is an actual
4778 `coding-system's (this is also a Lisp symbol) assigned by a user.
4779 What Emacs does actually is to detect a category of coding system.
4780 Then, it uses a `coding-system' assigned to it. If Emacs can't
4781 decide only one possible category, it selects a category of the
4782 highest priority. Priorities of categories are also specified by a
4783 user in a Lisp variable `coding-category-list'.
4787 #define EOL_SEEN_NONE 0
4788 #define EOL_SEEN_LF 1
4789 #define EOL_SEEN_CR 2
4790 #define EOL_SEEN_CRLF 4
4792 /* Detect how end-of-line of a text of length CODING->src_bytes
4793 pointed by CODING->source is encoded. Return one of
4796 #define MAX_EOL_CHECK_COUNT 3
4799 detect_eol (coding
, source
, src_bytes
)
4800 struct coding_system
*coding
;
4801 unsigned char *source
;
4802 EMACS_INT src_bytes
;
4804 Lisp_Object attrs
, coding_type
;
4805 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
4808 int eol_seen
= EOL_SEEN_NONE
;
4810 attrs
= CODING_ID_ATTRS (coding
->id
);
4811 coding_type
= CODING_ATTR_TYPE (attrs
);
4813 if (EQ (coding_type
, Qccl
))
4817 msb
= coding
->spec
.utf_16
.endian
== utf_16_little_endian
;
4820 while (src
+ 1 < src_end
)
4823 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
4828 this_eol
= EOL_SEEN_LF
;
4829 else if (src
+ 3 >= src_end
4830 || src
[msb
+ 2] != 0
4831 || src
[lsb
+ 2] != '\n')
4832 this_eol
= EOL_SEEN_CR
;
4834 this_eol
= EOL_SEEN_CRLF
;
4836 if (eol_seen
== EOL_SEEN_NONE
)
4837 /* This is the first end-of-line. */
4838 eol_seen
= this_eol
;
4839 else if (eol_seen
!= this_eol
)
4841 /* The found type is different from what found before. */
4842 eol_seen
= EOL_SEEN_LF
;
4845 if (++total
== MAX_EOL_CHECK_COUNT
)
4853 while (src
< src_end
)
4856 if (c
== '\n' || c
== '\r')
4861 this_eol
= EOL_SEEN_LF
;
4862 else if (src
>= src_end
|| *src
!= '\n')
4863 this_eol
= EOL_SEEN_CR
;
4865 this_eol
= EOL_SEEN_CRLF
, src
++;
4867 if (eol_seen
== EOL_SEEN_NONE
)
4868 /* This is the first end-of-line. */
4869 eol_seen
= this_eol
;
4870 else if (eol_seen
!= this_eol
)
4872 /* The found type is different from what found before. */
4873 eol_seen
= EOL_SEEN_LF
;
4876 if (++total
== MAX_EOL_CHECK_COUNT
)
4886 adjust_coding_eol_type (coding
, eol_seen
)
4887 struct coding_system
*coding
;
4890 Lisp_Object eol_type
;
4892 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4893 if (eol_seen
& EOL_SEEN_LF
)
4894 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
4895 else if (eol_type
& EOL_SEEN_CRLF
)
4896 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
4897 else if (eol_type
& EOL_SEEN_CR
)
4898 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
4901 /* Detect how a text specified in CODING is encoded. If a coding
4902 system is detected, update fields of CODING by the detected coding
4906 detect_coding (coding
)
4907 struct coding_system
*coding
;
4909 unsigned char *src
, *src_end
;
4910 Lisp_Object attrs
, coding_type
;
4912 coding
->consumed
= coding
->consumed_char
= 0;
4913 coding
->produced
= coding
->produced_char
= 0;
4914 coding_set_source (coding
);
4916 src_end
= coding
->source
+ coding
->src_bytes
;
4918 /* If we have not yet decided the text encoding type, detect it
4920 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
4922 int mask
= CATEGORY_MASK_ANY
;
4925 for (src
= coding
->source
; src
< src_end
; src
++)
4928 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
4930 || c
== ISO_CODE_SO
)))
4933 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
4935 if (coding
->head_ascii
< coding
->src_bytes
)
4939 for (i
= 0; i
< coding_category_raw_text
; i
++)
4941 enum coding_category category
= coding_priorities
[i
];
4942 struct coding_system
*this = coding_categories
+ category
;
4944 if (category
>= coding_category_raw_text
4945 || detected
& (1 << category
))
4950 /* No coding system of this category is defined. */
4951 mask
&= ~(1 << category
);
4955 detected
|= detected_mask
[category
];
4956 if ((*(this->detector
)) (coding
, &mask
))
4961 setup_coding_system (Qraw_text
, coding
);
4962 else if (mask
!= CATEGORY_MASK_ANY
)
4963 for (i
= 0; i
< coding_category_raw_text
; i
++)
4965 enum coding_category category
= coding_priorities
[i
];
4966 struct coding_system
*this = coding_categories
+ category
;
4968 if (mask
& (1 << category
))
4970 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
4977 attrs
= CODING_ID_ATTRS (coding
->id
);
4978 coding_type
= CODING_ATTR_TYPE (attrs
);
4980 /* If we have not yet decided the EOL type, detect it now. But, the
4981 detection is impossible for a CCL based coding system, in which
4982 case, we detct the EOL type after decoding. */
4983 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
))
4984 && ! EQ (coding_type
, Qccl
))
4986 int eol_seen
= detect_eol (coding
, coding
->source
, coding
->src_bytes
);
4988 if (eol_seen
!= EOL_SEEN_NONE
)
4989 adjust_coding_eol_type (coding
, eol_seen
);
4996 struct coding_system
*coding
;
4998 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
)))
5000 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5001 unsigned char *pend
= p
+ coding
->produced
;
5002 int eol_seen
= EOL_SEEN_NONE
;
5004 for (; p
< pend
; p
++)
5007 eol_seen
|= EOL_SEEN_LF
;
5008 else if (*p
== '\r')
5010 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5012 eol_seen
|= EOL_SEEN_CRLF
;
5016 eol_seen
|= EOL_SEEN_CR
;
5019 if (eol_seen
!= EOL_SEEN_NONE
)
5020 adjust_coding_eol_type (coding
, eol_seen
);
5023 if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qmac
))
5025 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5026 unsigned char *pend
= p
+ coding
->produced
;
5028 for (; p
< pend
; p
++)
5032 else if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qdos
))
5034 unsigned char *p
, *pbeg
, *pend
;
5035 Lisp_Object undo_list
;
5037 move_gap_both (coding
->dst_pos
+ coding
->produced_char
,
5038 coding
->dst_pos_byte
+ coding
->produced
);
5039 undo_list
= current_buffer
->undo_list
;
5040 current_buffer
->undo_list
= Qt
;
5041 del_range_2 (coding
->dst_pos
, coding
->dst_pos_byte
, GPT
, GPT_BYTE
, Qnil
);
5042 current_buffer
->undo_list
= undo_list
;
5044 pend
= pbeg
+ coding
->produced
;
5046 for (p
= pend
- 1; p
>= pbeg
; p
--)
5049 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
- p
- 1);
5052 coding
->produced_char
-= coding
->produced
- (pend
- pbeg
);
5053 coding
->produced
= pend
- pbeg
;
5054 insert_from_gap (coding
->produced_char
, coding
->produced
);
5059 translate_chars (coding
, table
)
5060 struct coding_system
*coding
;
5063 int *charbuf
= coding
->charbuf
;
5064 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5067 if (coding
->chars_at_source
)
5070 while (charbuf
< charbuf_end
)
5076 *charbuf
++ = translate_char (table
, c
);
5081 produce_chars (coding
)
5082 struct coding_system
*coding
;
5084 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5085 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5087 int produced_chars
= 0;
5089 if (! coding
->chars_at_source
)
5091 /* Characters are in coding->charbuf. */
5092 int *buf
= coding
->charbuf
;
5093 int *buf_end
= buf
+ coding
->charbuf_used
;
5094 unsigned char *adjusted_dst_end
;
5096 if (BUFFERP (coding
->src_object
)
5097 && EQ (coding
->src_object
, coding
->dst_object
))
5098 dst_end
= coding
->source
+ coding
->consumed
;
5099 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5101 while (buf
< buf_end
)
5105 if (dst
>= adjusted_dst_end
)
5107 dst
= alloc_destination (coding
,
5108 buf_end
- buf
+ MAX_MULTIBYTE_LENGTH
,
5110 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5111 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5115 if (coding
->dst_multibyte
5116 || ! CHAR_BYTE8_P (c
))
5117 CHAR_STRING_ADVANCE (c
, dst
);
5119 *dst
++ = CHAR_TO_BYTE8 (c
);
5123 /* This is an annotation data. */
5129 unsigned char *src
= coding
->source
;
5130 unsigned char *src_end
= src
+ coding
->src_bytes
;
5131 Lisp_Object eol_type
;
5133 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5135 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5137 if (coding
->src_multibyte
)
5144 unsigned char *src_base
= src
;
5150 if (EQ (eol_type
, Qdos
))
5156 else if (EQ (eol_type
, Qmac
))
5161 coding
->consumed
= src
- coding
->source
;
5163 if (EQ (coding
->src_object
, coding
->dst_object
))
5167 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5169 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5170 coding_set_source (coding
);
5171 src
= coding
->source
+ coding
->consumed
;
5172 src_end
= coding
->source
+ coding
->src_bytes
;
5182 while (src
< src_end
)
5189 if (EQ (eol_type
, Qdos
))
5195 else if (EQ (eol_type
, Qmac
))
5198 if (dst
>= dst_end
- 1)
5200 coding
->consumed
= src
- coding
->source
;
5202 if (EQ (coding
->src_object
, coding
->dst_object
))
5204 if (dst
>= dst_end
- 1)
5206 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5208 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5209 coding_set_source (coding
);
5210 src
= coding
->source
+ coding
->consumed
;
5211 src_end
= coding
->source
+ coding
->src_bytes
;
5219 if (!EQ (coding
->src_object
, coding
->dst_object
))
5221 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5225 EMACS_INT offset
= src
- coding
->source
;
5227 dst
= alloc_destination (coding
, require
, dst
);
5228 coding_set_source (coding
);
5229 src
= coding
->source
+ offset
;
5230 src_end
= coding
->source
+ coding
->src_bytes
;
5233 produced_chars
= coding
->src_chars
;
5234 while (src
< src_end
)
5240 if (EQ (eol_type
, Qdos
))
5247 else if (EQ (eol_type
, Qmac
))
5253 coding
->consumed
= coding
->src_bytes
;
5254 coding
->consumed_char
= coding
->src_chars
;
5257 produced
= dst
- (coding
->destination
+ coding
->produced
);
5258 if (BUFFERP (coding
->dst_object
))
5259 insert_from_gap (produced_chars
, produced
);
5260 coding
->produced
+= produced
;
5261 coding
->produced_char
+= produced_chars
;
5262 return produced_chars
;
5265 /* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5267 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5271 produce_composition (coding
, charbuf
)
5272 struct coding_system
*coding
;
5278 enum composition_method method
;
5280 Lisp_Object components
;
5282 buffer
= coding
->dst_object
;
5284 pos
= coding
->dst_pos
+ charbuf
[1];
5285 method
= (enum composition_method
) (charbuf
[3]);
5286 cmp_len
= charbuf
[4];
5288 if (method
== COMPOSITION_RELATIVE
)
5292 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5297 for (i
= 0; i
< len
; i
++)
5298 args
[i
] = make_number (charbuf
[i
]);
5299 components
= (method
== COMPOSITION_WITH_ALTCHARS
5300 ? Fstring (len
, args
) : Fvector (len
, args
));
5302 compose_text (pos
, pos
+ cmp_len
, components
, Qnil
, Qnil
);
5306 save_composition_data (buf
, buf_end
, prop
)
5310 enum composition_method method
= COMPOSITION_METHOD (prop
);
5311 int cmp_len
= COMPOSITION_LENGTH (prop
);
5313 if (buf
+ 4 + (MAX_COMPOSITION_COMPONENTS
* 2 - 1) > buf_end
)
5316 buf
[1] = CODING_ANNOTATE_COMPOSITION_MASK
;
5320 if (method
== COMPOSITION_RELATIVE
)
5324 Lisp_Object components
;
5327 components
= COMPOSITION_COMPONENTS (prop
);
5328 if (VECTORP (components
))
5330 len
= XVECTOR (components
)->size
;
5331 for (i
= 0; i
< len
; i
++)
5332 buf
[4 + i
] = XINT (AREF (components
, i
));
5334 else if (STRINGP (components
))
5338 len
= XSTRING (components
)->size
;
5341 FETCH_STRING_CHAR_ADVANCE (buf
[4 + i
], components
, i
, i_byte
);
5343 else if (INTEGERP (components
))
5346 buf
[4] = XINT (components
);
5348 else if (CONSP (components
))
5350 for (len
= 0; CONSP (components
);
5351 len
++, components
= XCDR (components
))
5352 buf
[4 + len
] = XINT (XCAR (components
));
5358 return (buf
+ buf
[0]);
5361 #define CHARBUF_SIZE 0x4000
5363 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5365 int size = CHARBUF_SIZE;; \
5367 coding->charbuf = NULL; \
5368 while (size > 1024) \
5370 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5371 if (coding->charbuf) \
5375 if (! coding->charbuf) \
5377 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5378 return coding->result; \
5380 coding->charbuf_size = size; \
5385 produce_annotation (coding
)
5386 struct coding_system
*coding
;
5388 int *charbuf
= coding
->charbuf
;
5389 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5391 while (charbuf
< charbuf_end
)
5397 int len
= -*charbuf
;
5400 case CODING_ANNOTATE_COMPOSITION_MASK
:
5401 produce_composition (coding
, charbuf
);
5411 /* Decode the data at CODING->src_object into CODING->dst_object.
5412 CODING->src_object is a buffer, a string, or nil.
5413 CODING->dst_object is a buffer.
5415 If CODING->src_object is a buffer, it must be the current buffer.
5416 In this case, if CODING->src_pos is positive, it is a position of
5417 the source text in the buffer, otherwise, the source text is in the
5418 gap area of the buffer, and CODING->src_pos specifies the offset of
5419 the text from GPT (which must be the same as PT). If this is the
5420 same buffer as CODING->dst_object, CODING->src_pos must be
5423 If CODING->src_object is a string, CODING->src_pos in an index to
5426 If CODING->src_object is nil, CODING->source must already point to
5427 the non-relocatable memory area. In this case, CODING->src_pos is
5428 an offset from CODING->source.
5430 The decoded data is inserted at the current point of the buffer
5435 decode_coding (coding
)
5436 struct coding_system
*coding
;
5440 if (BUFFERP (coding
->src_object
)
5441 && coding
->src_pos
> 0
5442 && coding
->src_pos
< GPT
5443 && coding
->src_pos
+ coding
->src_chars
> GPT
)
5444 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
5446 if (BUFFERP (coding
->dst_object
))
5448 if (current_buffer
!= XBUFFER (coding
->dst_object
))
5449 set_buffer_internal (XBUFFER (coding
->dst_object
));
5451 move_gap_both (PT
, PT_BYTE
);
5454 coding
->consumed
= coding
->consumed_char
= 0;
5455 coding
->produced
= coding
->produced_char
= 0;
5456 coding
->chars_at_source
= 0;
5457 coding
->result
= CODING_RESULT_SUCCESS
;
5460 ALLOC_CONVERSION_WORK_AREA (coding
);
5462 attrs
= CODING_ID_ATTRS (coding
->id
);
5466 coding_set_source (coding
);
5467 coding
->annotated
= 0;
5468 (*(coding
->decoder
)) (coding
);
5469 if (!NILP (CODING_ATTR_DECODE_TBL (attrs
)))
5470 translate_chars (CODING_ATTR_DECODE_TBL (attrs
), coding
);
5471 coding_set_destination (coding
);
5472 produce_chars (coding
);
5473 if (coding
->annotated
)
5474 produce_annotation (coding
);
5476 while (coding
->consumed
< coding
->src_bytes
5477 && ! coding
->result
);
5479 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qccl
)
5480 && SYMBOLP (CODING_ID_EOL_TYPE (coding
->id
))
5481 && ! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
5482 decode_eol (coding
);
5484 coding
->carryover_bytes
= 0;
5485 if (coding
->consumed
< coding
->src_bytes
)
5487 int nbytes
= coding
->src_bytes
- coding
->consumed
;
5490 coding_set_source (coding
);
5491 coding_set_destination (coding
);
5492 src
= coding
->source
+ coding
->consumed
;
5494 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
5496 /* Flush out unprocessed data as binary chars. We are sure
5497 that the number of data is less than the size of
5499 int *charbuf
= coding
->charbuf
;
5501 while (nbytes
-- > 0)
5504 *charbuf
++ = (c
& 0x80 ? - c
: c
);
5506 produce_chars (coding
);
5510 /* Record unprocessed bytes in coding->carryover. We are
5511 sure that the number of data is less than the size of
5512 coding->carryover. */
5513 unsigned char *p
= coding
->carryover
;
5515 coding
->carryover_bytes
= nbytes
;
5516 while (nbytes
-- > 0)
5519 coding
->consumed
= coding
->src_bytes
;
5522 return coding
->result
;
5526 consume_chars (coding
)
5527 struct coding_system
*coding
;
5529 int *buf
= coding
->charbuf
;
5530 /* -1 is to compensate for CRLF. */
5531 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
- 1;
5532 unsigned char *src
= coding
->source
+ coding
->consumed
;
5533 int pos
= coding
->src_pos
+ coding
->consumed_char
;
5534 int end_pos
= coding
->src_pos
+ coding
->src_chars
;
5535 int multibytep
= coding
->src_multibyte
;
5536 Lisp_Object eol_type
;
5538 int start
, end
, stop
;
5539 Lisp_Object object
, prop
;
5541 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5542 if (VECTORP (eol_type
))
5545 object
= coding
->src_object
;
5547 /* Note: composition handling is not yet implemented. */
5548 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
5550 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
5551 && find_composition (pos
, end_pos
, &start
, &end
, &prop
, object
)
5554 || (find_composition (end
, end_pos
, &start
, &end
, &prop
, object
)
5555 && end
<= end_pos
)))
5560 while (buf
< buf_end
)
5568 p
= save_composition_data (buf
, buf_end
, prop
);
5572 if (find_composition (end
, end_pos
, &start
, &end
, &prop
, object
)
5582 c
= STRING_CHAR_ADVANCE (src
);
5583 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
5585 if (! EQ (eol_type
, Qunix
))
5589 if (EQ (eol_type
, Qdos
))
5599 coding
->consumed
= src
- coding
->source
;
5600 coding
->consumed_char
= pos
- coding
->src_pos
;
5601 coding
->charbuf_used
= buf
- coding
->charbuf
;
5602 coding
->chars_at_source
= 0;
5606 /* Encode the text at CODING->src_object into CODING->dst_object.
5607 CODING->src_object is a buffer or a string.
5608 CODING->dst_object is a buffer or nil.
5610 If CODING->src_object is a buffer, it must be the current buffer.
5611 In this case, if CODING->src_pos is positive, it is a position of
5612 the source text in the buffer, otherwise. the source text is in the
5613 gap area of the buffer, and coding->src_pos specifies the offset of
5614 the text from GPT (which must be the same as PT). If this is the
5615 same buffer as CODING->dst_object, CODING->src_pos must be
5616 negative and CODING should not have `pre-write-conversion'.
5618 If CODING->src_object is a string, CODING should not have
5619 `pre-write-conversion'.
5621 If CODING->dst_object is a buffer, the encoded data is inserted at
5622 the current point of that buffer.
5624 If CODING->dst_object is nil, the encoded data is placed at the
5625 memory area specified by CODING->destination. */
5628 encode_coding (coding
)
5629 struct coding_system
*coding
;
5633 attrs
= CODING_ID_ATTRS (coding
->id
);
5635 if (BUFFERP (coding
->dst_object
))
5637 set_buffer_internal (XBUFFER (coding
->dst_object
));
5638 coding
->dst_multibyte
5639 = ! NILP (current_buffer
->enable_multibyte_characters
);
5642 coding
->consumed
= coding
->consumed_char
= 0;
5643 coding
->produced
= coding
->produced_char
= 0;
5644 coding
->result
= CODING_RESULT_SUCCESS
;
5647 ALLOC_CONVERSION_WORK_AREA (coding
);
5650 coding_set_source (coding
);
5651 consume_chars (coding
);
5653 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs
)))
5654 translate_chars (CODING_ATTR_ENCODE_TBL (attrs
), coding
);
5656 coding_set_destination (coding
);
5657 (*(coding
->encoder
)) (coding
);
5658 } while (coding
->consumed_char
< coding
->src_chars
);
5660 if (BUFFERP (coding
->dst_object
))
5661 insert_from_gap (coding
->produced_char
, coding
->produced
);
5663 return (coding
->result
);
5668 /* List of currently used working buffer. */
5669 Lisp_Object Vcode_conversion_work_buf_list
;
5671 /* A working buffer used by the top level conversion. */
5672 Lisp_Object Vcode_conversion_reused_work_buf
;
5675 /* Return a working buffer that can be freely used by the following
5676 code conversion. MULTIBYTEP specifies the multibyteness of the
5680 make_conversion_work_buffer (multibytep
)
5683 struct buffer
*current
= current_buffer
;
5686 if (NILP (Vcode_conversion_work_buf_list
))
5688 if (NILP (Vcode_conversion_reused_work_buf
))
5689 Vcode_conversion_reused_work_buf
5690 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5691 Vcode_conversion_work_buf_list
5692 = Fcons (Vcode_conversion_reused_work_buf
, Qnil
);
5696 int depth
= Flength (Vcode_conversion_work_buf_list
);
5699 sprintf (str
, " *code-conversion-work*<%d>", depth
);
5700 Vcode_conversion_work_buf_list
5701 = Fcons (Fget_buffer_create (build_string (str
)),
5702 Vcode_conversion_work_buf_list
);
5705 buf
= XCAR (Vcode_conversion_work_buf_list
);
5706 set_buffer_internal (XBUFFER (buf
));
5707 current_buffer
->undo_list
= Qt
;
5709 Fset_buffer_multibyte (multibytep
? Qt
: Qnil
);
5710 set_buffer_internal (current
);
5714 static struct coding_system
*saved_coding
;
5717 code_conversion_restore (info
)
5720 int depth
= Flength (Vcode_conversion_work_buf_list
);
5725 buf
= XCAR (Vcode_conversion_work_buf_list
);
5726 Vcode_conversion_work_buf_list
= XCDR (Vcode_conversion_work_buf_list
);
5727 if (depth
> 1 && !NILP (Fbuffer_live_p (buf
)))
5731 if (saved_coding
->dst_object
== Qt
5732 && saved_coding
->destination
)
5733 xfree (saved_coding
->destination
);
5735 return save_excursion_restore (info
);
5740 decode_coding_gap (coding
, chars
, bytes
)
5741 struct coding_system
*coding
;
5742 EMACS_INT chars
, bytes
;
5744 int count
= specpdl_ptr
- specpdl
;
5746 saved_coding
= coding
;
5747 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5749 coding
->src_object
= Fcurrent_buffer ();
5750 coding
->src_chars
= chars
;
5751 coding
->src_bytes
= bytes
;
5752 coding
->src_pos
= -chars
;
5753 coding
->src_pos_byte
= -bytes
;
5754 coding
->src_multibyte
= chars
< bytes
;
5755 coding
->dst_object
= coding
->src_object
;
5756 coding
->dst_pos
= PT
;
5757 coding
->dst_pos_byte
= PT_BYTE
;
5758 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
5760 if (CODING_REQUIRE_DETECTION (coding
))
5761 detect_coding (coding
);
5763 decode_coding (coding
);
5765 unbind_to (count
, Qnil
);
5766 return coding
->result
;
5770 encode_coding_gap (coding
, chars
, bytes
)
5771 struct coding_system
*coding
;
5772 EMACS_INT chars
, bytes
;
5774 int count
= specpdl_ptr
- specpdl
;
5777 saved_coding
= coding
;
5778 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5780 buffer
= Fcurrent_buffer ();
5781 coding
->src_object
= buffer
;
5782 coding
->src_chars
= chars
;
5783 coding
->src_bytes
= bytes
;
5784 coding
->src_pos
= -chars
;
5785 coding
->src_pos_byte
= -bytes
;
5786 coding
->src_multibyte
= chars
< bytes
;
5787 coding
->dst_object
= coding
->src_object
;
5788 coding
->dst_pos
= PT
;
5789 coding
->dst_pos_byte
= PT_BYTE
;
5791 encode_coding (coding
);
5793 unbind_to (count
, Qnil
);
5794 return coding
->result
;
5798 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5799 SRC_OBJECT into DST_OBJECT by coding context CODING.
5801 SRC_OBJECT is a buffer, a string, or Qnil.
5803 If it is a buffer, the text is at point of the buffer. FROM and TO
5804 are positions in the buffer.
5806 If it is a string, the text is at the beginning of the string.
5807 FROM and TO are indices to the string.
5809 If it is nil, the text is at coding->source. FROM and TO are
5810 indices to coding->source.
5812 DST_OBJECT is a buffer, Qt, or Qnil.
5814 If it is a buffer, the decoded text is inserted at point of the
5815 buffer. If the buffer is the same as SRC_OBJECT, the source text
5818 If it is Qt, a string is made from the decoded text, and
5819 set in CODING->dst_object.
5821 If it is Qnil, the decoded text is stored at CODING->destination.
5822 The called must allocate CODING->dst_bytes bytes at
5823 CODING->destination by xmalloc. If the decoded text is longer than
5824 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5828 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
5830 struct coding_system
*coding
;
5831 Lisp_Object src_object
;
5832 EMACS_INT from
, from_byte
, to
, to_byte
;
5833 Lisp_Object dst_object
;
5835 int count
= specpdl_ptr
- specpdl
;
5836 unsigned char *destination
;
5837 EMACS_INT dst_bytes
;
5838 EMACS_INT chars
= to
- from
;
5839 EMACS_INT bytes
= to_byte
- from_byte
;
5842 saved_coding
= coding
;
5843 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5845 if (NILP (dst_object
))
5847 destination
= coding
->destination
;
5848 dst_bytes
= coding
->dst_bytes
;
5851 coding
->src_object
= src_object
;
5852 coding
->src_chars
= chars
;
5853 coding
->src_bytes
= bytes
;
5854 coding
->src_multibyte
= chars
< bytes
;
5856 if (STRINGP (src_object
))
5858 coding
->src_pos
= from
;
5859 coding
->src_pos_byte
= from_byte
;
5861 else if (BUFFERP (src_object
))
5863 set_buffer_internal (XBUFFER (src_object
));
5865 move_gap_both (from
, from_byte
);
5866 if (EQ (src_object
, dst_object
))
5868 TEMP_SET_PT_BOTH (from
, from_byte
);
5869 del_range_both (from
, from_byte
, to
, to_byte
, 1);
5870 coding
->src_pos
= -chars
;
5871 coding
->src_pos_byte
= -bytes
;
5875 coding
->src_pos
= from
;
5876 coding
->src_pos_byte
= from_byte
;
5880 if (CODING_REQUIRE_DETECTION (coding
))
5881 detect_coding (coding
);
5882 attrs
= CODING_ID_ATTRS (coding
->id
);
5884 if (! NILP (CODING_ATTR_POST_READ (attrs
))
5885 || EQ (dst_object
, Qt
))
5887 coding
->dst_object
= make_conversion_work_buffer (1);
5888 coding
->dst_pos
= BEG
;
5889 coding
->dst_pos_byte
= BEG_BYTE
;
5890 coding
->dst_multibyte
= 1;
5892 else if (BUFFERP (dst_object
))
5894 coding
->dst_object
= dst_object
;
5895 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
5896 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
5897 coding
->dst_multibyte
5898 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
5902 coding
->dst_object
= Qnil
;
5903 coding
->dst_multibyte
= 1;
5906 decode_coding (coding
);
5908 if (BUFFERP (coding
->dst_object
))
5909 set_buffer_internal (XBUFFER (coding
->dst_object
));
5911 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
5913 struct gcpro gcpro1
, gcpro2
;
5914 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
5917 GCPRO2 (coding
->src_object
, coding
->dst_object
);
5918 val
= call1 (CODING_ATTR_POST_READ (attrs
),
5919 make_number (coding
->produced_char
));
5922 coding
->produced_char
+= Z
- prev_Z
;
5923 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
5926 if (EQ (dst_object
, Qt
))
5928 coding
->dst_object
= Fbuffer_string ();
5930 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
5932 set_buffer_internal (XBUFFER (coding
->dst_object
));
5933 if (dst_bytes
< coding
->produced
)
5936 = (unsigned char *) xrealloc (destination
, coding
->produced
);
5939 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
5940 unbind_to (count
, Qnil
);
5943 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
5944 move_gap_both (BEGV
, BEGV_BYTE
);
5945 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
5946 coding
->destination
= destination
;
5950 unbind_to (count
, Qnil
);
5955 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
5957 struct coding_system
*coding
;
5958 Lisp_Object src_object
;
5959 EMACS_INT from
, from_byte
, to
, to_byte
;
5960 Lisp_Object dst_object
;
5962 int count
= specpdl_ptr
- specpdl
;
5963 EMACS_INT chars
= to
- from
;
5964 EMACS_INT bytes
= to_byte
- from_byte
;
5967 saved_coding
= coding
;
5968 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5970 coding
->src_object
= src_object
;
5971 coding
->src_chars
= chars
;
5972 coding
->src_bytes
= bytes
;
5973 coding
->src_multibyte
= chars
< bytes
;
5975 attrs
= CODING_ID_ATTRS (coding
->id
);
5977 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
5981 coding
->src_object
= make_conversion_work_buffer (coding
->src_multibyte
);
5982 set_buffer_internal (XBUFFER (coding
->src_object
));
5983 if (STRINGP (src_object
))
5984 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
5985 else if (BUFFERP (src_object
))
5986 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
5988 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
5990 if (EQ (src_object
, dst_object
))
5992 set_buffer_internal (XBUFFER (src_object
));
5993 del_range_both (from
, from_byte
, to
, to_byte
, 1);
5994 set_buffer_internal (XBUFFER (coding
->src_object
));
5997 val
= call2 (CODING_ATTR_PRE_WRITE (attrs
),
5998 make_number (1), make_number (chars
));
6001 move_gap_both (BEG
, BEG_BYTE
);
6002 coding
->src_chars
= Z
- BEG
;
6003 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6004 coding
->src_pos
= BEG
;
6005 coding
->src_pos_byte
= BEG_BYTE
;
6006 coding
->src_multibyte
= Z
< Z_BYTE
;
6008 else if (STRINGP (src_object
))
6010 coding
->src_pos
= from
;
6011 coding
->src_pos_byte
= from_byte
;
6013 else if (BUFFERP (src_object
))
6015 set_buffer_internal (XBUFFER (src_object
));
6017 move_gap_both (from
, from_byte
);
6018 if (EQ (src_object
, dst_object
))
6020 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6021 coding
->src_pos
= -chars
;
6022 coding
->src_pos_byte
= -bytes
;
6026 coding
->src_pos
= from
;
6027 coding
->src_pos_byte
= from_byte
;
6031 if (BUFFERP (dst_object
))
6033 coding
->dst_object
= dst_object
;
6034 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6035 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6036 coding
->dst_multibyte
6037 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6039 else if (EQ (dst_object
, Qt
))
6041 coding
->dst_object
= Qnil
;
6042 coding
->destination
= (unsigned char *) xmalloc (coding
->src_chars
);
6043 coding
->dst_bytes
= coding
->src_chars
;
6044 coding
->dst_multibyte
= 0;
6048 coding
->dst_object
= Qnil
;
6049 coding
->dst_multibyte
= 0;
6052 encode_coding (coding
);
6054 if (EQ (dst_object
, Qt
))
6056 if (BUFFERP (coding
->dst_object
))
6057 coding
->dst_object
= Fbuffer_string ();
6061 = make_unibyte_string ((char *) coding
->destination
,
6063 xfree (coding
->destination
);
6067 unbind_to (count
, Qnil
);
6072 preferred_coding_system ()
6074 int id
= coding_categories
[coding_priorities
[0]].id
;
6076 return CODING_ID_NAME (id
);
6081 /*** 8. Emacs Lisp library functions ***/
6083 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6084 doc
: /* Return t if OBJECT is nil or a coding-system.
6085 See the documentation of `define-coding-system' for information
6086 about coding-system objects. */)
6090 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6093 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6094 Sread_non_nil_coding_system
, 1, 1, 0,
6095 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6102 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6103 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6105 while (XSTRING (val
)->size
== 0);
6106 return (Fintern (val
, Qnil
));
6109 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6110 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6111 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6112 (prompt
, default_coding_system
)
6113 Lisp_Object prompt
, default_coding_system
;
6116 if (SYMBOLP (default_coding_system
))
6117 XSETSTRING (default_coding_system
, XSYMBOL (default_coding_system
)->name
);
6118 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6119 Qt
, Qnil
, Qcoding_system_history
,
6120 default_coding_system
, Qnil
);
6121 return (XSTRING (val
)->size
== 0 ? Qnil
: Fintern (val
, Qnil
));
6124 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6126 doc
: /* Check validity of CODING-SYSTEM.
6127 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6128 It is valid if it is a symbol with a non-nil `coding-system' property.
6129 The value of property should be a vector of length 5. */)
6131 Lisp_Object coding_system
;
6133 CHECK_SYMBOL (coding_system
);
6134 if (!NILP (Fcoding_system_p (coding_system
)))
6135 return coding_system
;
6137 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
6142 detect_coding_system (src
, src_bytes
, highest
, multibytep
, coding_system
)
6144 int src_bytes
, highest
;
6146 Lisp_Object coding_system
;
6148 unsigned char *src_end
= src
+ src_bytes
;
6149 int mask
= CATEGORY_MASK_ANY
;
6152 Lisp_Object attrs
, eol_type
;
6154 struct coding_system coding
;
6156 if (NILP (coding_system
))
6157 coding_system
= Qundecided
;
6158 setup_coding_system (coding_system
, &coding
);
6159 attrs
= CODING_ID_ATTRS (coding
.id
);
6160 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
6162 coding
.source
= src
;
6163 coding
.src_bytes
= src_bytes
;
6164 coding
.src_multibyte
= multibytep
;
6165 coding
.consumed
= 0;
6167 if (XINT (CODING_ATTR_CATEGORY (attrs
)) != coding_category_undecided
)
6169 mask
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
6173 coding_system
= Qnil
;
6174 for (; src
< src_end
; src
++)
6177 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
6179 || c
== ISO_CODE_SO
)))
6182 coding
.head_ascii
= src
- coding
.source
;
6185 for (i
= 0; i
< coding_category_raw_text
; i
++)
6187 enum coding_category category
= coding_priorities
[i
];
6188 struct coding_system
*this = coding_categories
+ category
;
6190 if (category
>= coding_category_raw_text
6191 || detected
& (1 << category
))
6196 /* No coding system of this category is defined. */
6197 mask
&= ~(1 << category
);
6201 detected
|= detected_mask
[category
];
6202 if ((*(coding_categories
[category
].detector
)) (&coding
, &mask
)
6205 mask
&= detected_mask
[category
];
6213 val
= Fcons (make_number (coding_category_raw_text
), Qnil
);
6214 else if (mask
== CATEGORY_MASK_ANY
)
6215 val
= Fcons (make_number (coding_category_undecided
), Qnil
);
6218 for (i
= 0; i
< coding_category_raw_text
; i
++)
6219 if (mask
& (1 << coding_priorities
[i
]))
6221 val
= Fcons (make_number (coding_priorities
[i
]), Qnil
);
6228 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6229 if (mask
& (1 << coding_priorities
[i
]))
6230 val
= Fcons (make_number (coding_priorities
[i
]), val
);
6234 int one_byte_eol
= -1, two_byte_eol
= -1;
6237 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
6239 struct coding_system
*this
6240 = (NILP (coding_system
) ? coding_categories
+ XINT (XCAR (tail
))
6244 attrs
= CODING_ID_ATTRS (this->id
);
6245 eol_type
= CODING_ID_EOL_TYPE (this->id
);
6246 XSETCAR (tail
, CODING_ID_NAME (this->id
));
6247 if (VECTORP (eol_type
))
6249 if (EQ (CODING_ATTR_TYPE (attrs
), Qutf_16
))
6251 if (two_byte_eol
< 0)
6252 two_byte_eol
= detect_eol (this, coding
.source
, src_bytes
);
6253 this_eol
= two_byte_eol
;
6257 if (one_byte_eol
< 0)
6258 one_byte_eol
=detect_eol (this, coding
.source
, src_bytes
);
6259 this_eol
= one_byte_eol
;
6261 if (this_eol
== EOL_SEEN_LF
)
6262 XSETCAR (tail
, AREF (eol_type
, 0));
6263 else if (this_eol
== EOL_SEEN_CRLF
)
6264 XSETCAR (tail
, AREF (eol_type
, 1));
6265 else if (this_eol
== EOL_SEEN_CR
)
6266 XSETCAR (tail
, AREF (eol_type
, 2));
6271 return (highest
? XCAR (val
) : val
);
6275 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
6277 doc
: /* Detect coding system of the text in the region between START and END.
6278 Return a list of possible coding systems ordered by priority.
6280 If only ASCII characters are found, it returns a list of single element
6281 `undecided' or its subsidiary coding system according to a detected
6284 If optional argument HIGHEST is non-nil, return the coding system of
6285 highest priority. */)
6286 (start
, end
, highest
)
6287 Lisp_Object start
, end
, highest
;
6290 int from_byte
, to_byte
;
6292 CHECK_NUMBER_COERCE_MARKER (start
);
6293 CHECK_NUMBER_COERCE_MARKER (end
);
6295 validate_region (&start
, &end
);
6296 from
= XINT (start
), to
= XINT (end
);
6297 from_byte
= CHAR_TO_BYTE (from
);
6298 to_byte
= CHAR_TO_BYTE (to
);
6300 if (from
< GPT
&& to
>= GPT
)
6301 move_gap_both (to
, to_byte
);
6303 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
6304 to_byte
- from_byte
,
6306 !NILP (current_buffer
6307 ->enable_multibyte_characters
),
6311 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
6313 doc
: /* Detect coding system of the text in STRING.
6314 Return a list of possible coding systems ordered by priority.
6316 If only ASCII characters are found, it returns a list of single element
6317 `undecided' or its subsidiary coding system according to a detected
6320 If optional argument HIGHEST is non-nil, return the coding system of
6321 highest priority. */)
6323 Lisp_Object string
, highest
;
6325 CHECK_STRING (string
);
6327 return detect_coding_system (XSTRING (string
)->data
,
6328 STRING_BYTES (XSTRING (string
)),
6330 STRING_MULTIBYTE (string
),
6336 char_encodable_p (c
, attrs
)
6341 struct charset
*charset
;
6343 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
6344 CONSP (tail
); tail
= XCDR (tail
))
6346 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
6347 if (CHAR_CHARSET_P (c
, charset
))
6350 return (! NILP (tail
));
6354 /* Return a list of coding systems that safely encode the text between
6355 START and END. If EXCLUDE is non-nil, it is a list of coding
6356 systems not to check. The returned list doesn't contain any such
6357 coding systems. In any case, If the text contains only ASCII or is
6358 unibyte, return t. */
6360 DEFUN ("find-coding-systems-region-internal",
6361 Ffind_coding_systems_region_internal
,
6362 Sfind_coding_systems_region_internal
, 2, 3, 0,
6363 doc
: /* Internal use only. */)
6364 (start
, end
, exclude
)
6365 Lisp_Object start
, end
, exclude
;
6367 Lisp_Object coding_attrs_list
, safe_codings
;
6368 EMACS_INT start_byte
, end_byte
;
6369 unsigned char *p
, *pbeg
, *pend
;
6371 Lisp_Object tail
, elt
;
6373 if (STRINGP (start
))
6375 if (!STRING_MULTIBYTE (start
)
6376 && XSTRING (start
)->size
!= STRING_BYTES (XSTRING (start
)))
6379 end_byte
= STRING_BYTES (XSTRING (start
));
6383 CHECK_NUMBER_COERCE_MARKER (start
);
6384 CHECK_NUMBER_COERCE_MARKER (end
);
6385 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
6386 args_out_of_range (start
, end
);
6387 if (NILP (current_buffer
->enable_multibyte_characters
))
6389 start_byte
= CHAR_TO_BYTE (XINT (start
));
6390 end_byte
= CHAR_TO_BYTE (XINT (end
));
6391 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
6394 if (start
< GPT
&& end
> GPT
)
6396 if ((GPT
- start
) < (end
- GPT
))
6397 move_gap_both (start
, start_byte
);
6399 move_gap_both (end
, end_byte
);
6403 coding_attrs_list
= Qnil
;
6404 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
6406 || NILP (Fmemq (XCAR (tail
), exclude
)))
6410 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
6411 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
6412 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
6413 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
6416 if (STRINGP (start
))
6417 p
= pbeg
= XSTRING (start
)->data
;
6419 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
6420 pend
= p
+ (end_byte
- start_byte
);
6422 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
6423 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
6427 if (ASCII_BYTE_P (*p
))
6431 c
= STRING_CHAR_ADVANCE (p
);
6433 charset_map_loaded
= 0;
6434 for (tail
= coding_attrs_list
; CONSP (tail
);)
6439 else if (char_encodable_p (c
, elt
))
6441 else if (CONSP (XCDR (tail
)))
6443 XSETCAR (tail
, XCAR (XCDR (tail
)));
6444 XSETCDR (tail
, XCDR (XCDR (tail
)));
6448 XSETCAR (tail
, Qnil
);
6452 if (charset_map_loaded
)
6454 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
6456 if (STRINGP (start
))
6457 pbeg
= XSTRING (start
)->data
;
6459 pbeg
= BYTE_POS_ADDR (start_byte
);
6460 p
= pbeg
+ p_offset
;
6461 pend
= pbeg
+ pend_offset
;
6466 safe_codings
= Qnil
;
6467 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
6468 if (! NILP (XCAR (tail
)))
6469 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
6471 return safe_codings
;
6475 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
6476 Scheck_coding_systems_region
, 3, 3, 0,
6477 doc
: /* Check if the region is encodable by coding systems.
6479 START and END are buffer positions specifying the region.
6480 CODING-SYSTEM-LIST is a list of coding systems to check.
6482 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6483 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6484 whole region, POS0, POS1, ... are buffer positions where non-encodable
6485 characters are found.
6487 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6490 START may be a string. In that case, check if the string is
6491 encodable, and the value contains indices to the string instead of
6492 buffer positions. END is ignored. */)
6493 (start
, end
, coding_system_list
)
6494 Lisp_Object start
, end
, coding_system_list
;
6497 EMACS_INT start_byte
, end_byte
;
6499 unsigned char *p
, *pbeg
, *pend
;
6501 Lisp_Object tail
, elt
;
6503 if (STRINGP (start
))
6505 if (!STRING_MULTIBYTE (start
)
6506 && XSTRING (start
)->size
!= STRING_BYTES (XSTRING (start
)))
6509 end_byte
= STRING_BYTES (XSTRING (start
));
6514 CHECK_NUMBER_COERCE_MARKER (start
);
6515 CHECK_NUMBER_COERCE_MARKER (end
);
6516 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
6517 args_out_of_range (start
, end
);
6518 if (NILP (current_buffer
->enable_multibyte_characters
))
6520 start_byte
= CHAR_TO_BYTE (XINT (start
));
6521 end_byte
= CHAR_TO_BYTE (XINT (end
));
6522 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
6525 if (start
< GPT
&& end
> GPT
)
6527 if ((GPT
- start
) < (end
- GPT
))
6528 move_gap_both (start
, start_byte
);
6530 move_gap_both (end
, end_byte
);
6536 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
6539 list
= Fcons (Fcons (elt
, Fcons (AREF (CODING_SYSTEM_SPEC (elt
), 0),
6544 if (STRINGP (start
))
6545 p
= pbeg
= XSTRING (start
)->data
;
6547 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
6548 pend
= p
+ (end_byte
- start_byte
);
6550 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
6551 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
6555 if (ASCII_BYTE_P (*p
))
6559 c
= STRING_CHAR_ADVANCE (p
);
6561 charset_map_loaded
= 0;
6562 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
6564 elt
= XCDR (XCAR (tail
));
6565 if (! char_encodable_p (c
, XCAR (elt
)))
6566 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
6568 if (charset_map_loaded
)
6570 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
6572 if (STRINGP (start
))
6573 pbeg
= XSTRING (start
)->data
;
6575 pbeg
= BYTE_POS_ADDR (start_byte
);
6576 p
= pbeg
+ p_offset
;
6577 pend
= pbeg
+ pend_offset
;
6585 for (; CONSP (tail
); tail
= XCDR (tail
))
6588 if (CONSP (XCDR (XCDR (elt
))))
6589 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
6599 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
6600 Lisp_Object start
, end
, coding_system
, dst_object
;
6601 int encodep
, norecord
;
6603 struct coding_system coding
;
6604 EMACS_INT from
, from_byte
, to
, to_byte
;
6605 Lisp_Object src_object
;
6607 CHECK_NUMBER_COERCE_MARKER (start
);
6608 CHECK_NUMBER_COERCE_MARKER (end
);
6609 if (NILP (coding_system
))
6610 coding_system
= Qno_conversion
;
6612 CHECK_CODING_SYSTEM (coding_system
);
6613 src_object
= Fcurrent_buffer ();
6614 if (NILP (dst_object
))
6615 dst_object
= src_object
;
6616 else if (! EQ (dst_object
, Qt
))
6617 CHECK_BUFFER (dst_object
);
6619 validate_region (&start
, &end
);
6620 from
= XFASTINT (start
);
6621 from_byte
= CHAR_TO_BYTE (from
);
6622 to
= XFASTINT (end
);
6623 to_byte
= CHAR_TO_BYTE (to
);
6625 setup_coding_system (coding_system
, &coding
);
6626 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6629 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
6632 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
6635 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
6637 if (coding
.result
!= CODING_RESULT_SUCCESS
)
6638 error ("Code conversion error: %d", coding
.result
);
6640 return (BUFFERP (dst_object
)
6641 ? make_number (coding
.produced_char
)
6642 : coding
.dst_object
);
6646 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
6647 3, 4, "r\nzCoding system: ",
6648 doc
: /* Decode the current region from the specified coding system.
6649 When called from a program, takes four arguments:
6650 START, END, CODING-SYSTEM, and DESTINATION.
6651 START and END are buffer positions.
6653 Optional 4th arguments DESTINATION specifies where the decoded text goes.
6654 If nil, the region between START and END is replace by the decoded text.
6655 If buffer, the decoded text is inserted in the buffer.
6656 If t, the decoded text is returned.
6658 This function sets `last-coding-system-used' to the precise coding system
6659 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6660 not fully specified.)
6661 It returns the length of the decoded text. */)
6662 (start
, end
, coding_system
, destination
)
6663 Lisp_Object start
, end
, coding_system
, destination
;
6665 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
6668 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
6669 3, 4, "r\nzCoding system: ",
6670 doc
: /* Encode the current region by specified coding system.
6671 When called from a program, takes three arguments:
6672 START, END, and CODING-SYSTEM. START and END are buffer positions.
6674 Optional 4th arguments DESTINATION specifies where the encoded text goes.
6675 If nil, the region between START and END is replace by the encoded text.
6676 If buffer, the encoded text is inserted in the buffer.
6677 If t, the encoded text is returned.
6679 This function sets `last-coding-system-used' to the precise coding system
6680 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6681 not fully specified.)
6682 It returns the length of the encoded text. */)
6683 (start
, end
, coding_system
, destination
)
6684 Lisp_Object start
, end
, coding_system
, destination
;
6686 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
6690 code_convert_string (string
, coding_system
, dst_object
,
6691 encodep
, nocopy
, norecord
)
6692 Lisp_Object string
, coding_system
, dst_object
;
6693 int encodep
, nocopy
, norecord
;
6695 struct coding_system coding
;
6696 EMACS_INT chars
, bytes
;
6698 CHECK_STRING (string
);
6699 if (NILP (coding_system
))
6702 Vlast_coding_system_used
= Qno_conversion
;
6703 if (NILP (dst_object
))
6704 return (nocopy
? Fcopy_sequence (string
) : string
);
6707 if (NILP (coding_system
))
6708 coding_system
= Qno_conversion
;
6710 CHECK_CODING_SYSTEM (coding_system
);
6711 if (NILP (dst_object
))
6713 else if (! EQ (dst_object
, Qt
))
6714 CHECK_BUFFER (dst_object
);
6716 setup_coding_system (coding_system
, &coding
);
6717 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6718 chars
= XSTRING (string
)->size
;
6719 bytes
= STRING_BYTES (XSTRING (string
));
6721 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
6723 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
6725 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
6727 if (coding
.result
!= CODING_RESULT_SUCCESS
)
6728 error ("Code conversion error: %d", coding
.result
);
6730 return (BUFFERP (dst_object
)
6731 ? make_number (coding
.produced_char
)
6732 : coding
.dst_object
);
6736 /* Encode or decode STRING according to CODING_SYSTEM.
6737 Do not set Vlast_coding_system_used.
6739 This function is called only from macros DECODE_FILE and
6740 ENCODE_FILE, thus we ignore character composition. */
6743 code_convert_string_norecord (string
, coding_system
, encodep
)
6744 Lisp_Object string
, coding_system
;
6747 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
6751 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
6753 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6755 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6756 if the decoding operation is trivial.
6758 Optional fourth arg BUFFER non-nil meant that the decoded text is
6759 inserted in BUFFER instead of returned as a astring. In this case,
6760 the return value is BUFFER.
6762 This function sets `last-coding-system-used' to the precise coding system
6763 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6764 not fully specified. */)
6765 (string
, coding_system
, nocopy
, buffer
)
6766 Lisp_Object string
, coding_system
, nocopy
, buffer
;
6768 return code_convert_string (string
, coding_system
, buffer
,
6769 0, ! NILP (nocopy
), 0);
6772 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
6774 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
6776 Optional third arg NOCOPY non-nil means it is OK to return STRING
6777 itself if the encoding operation is trivial.
6779 Optional fourth arg BUFFER non-nil meant that the encoded text is
6780 inserted in BUFFER instead of returned as a astring. In this case,
6781 the return value is BUFFER.
6783 This function sets `last-coding-system-used' to the precise coding system
6784 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6785 not fully specified.) */)
6786 (string
, coding_system
, nocopy
, buffer
)
6787 Lisp_Object string
, coding_system
, nocopy
, buffer
;
6789 return code_convert_string (string
, coding_system
, buffer
,
6790 nocopy
, ! NILP (nocopy
), 1);
6794 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
6795 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
6796 Return the corresponding character. */)
6800 Lisp_Object spec
, attrs
, val
;
6801 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
6804 CHECK_NATNUM (code
);
6805 c
= XFASTINT (code
);
6806 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
6807 attrs
= AREF (spec
, 0);
6809 if (ASCII_BYTE_P (c
)
6810 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6813 val
= CODING_ATTR_CHARSET_LIST (attrs
);
6814 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
6815 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
6816 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
)));
6819 charset
= charset_roman
;
6820 else if (c
>= 0xA0 && c
< 0xDF)
6822 charset
= charset_kana
;
6827 int s1
= c
>> 8, s2
= c
& 0x7F;
6829 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
6830 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
6831 error ("Invalid code: %d", code
);
6833 charset
= charset_kanji
;
6835 c
= DECODE_CHAR (charset
, c
);
6837 error ("Invalid code: %d", code
);
6838 return make_number (c
);
6842 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
6843 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
6844 Return the corresponding code in SJIS. */)
6848 Lisp_Object spec
, attrs
, charset_list
;
6850 struct charset
*charset
;
6853 CHECK_CHARACTER (ch
);
6855 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
6856 attrs
= AREF (spec
, 0);
6858 if (ASCII_CHAR_P (c
)
6859 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6862 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
6863 charset
= char_charset (c
, charset_list
, &code
);
6864 if (code
== CHARSET_INVALID_CODE (charset
))
6865 error ("Can't encode by shift_jis encoding: %d", c
);
6868 return make_number (code
);
6871 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
6872 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
6873 Return the corresponding character. */)
6877 Lisp_Object spec
, attrs
, val
;
6878 struct charset
*charset_roman
, *charset_big5
, *charset
;
6881 CHECK_NATNUM (code
);
6882 c
= XFASTINT (code
);
6883 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
6884 attrs
= AREF (spec
, 0);
6886 if (ASCII_BYTE_P (c
)
6887 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6890 val
= CODING_ATTR_CHARSET_LIST (attrs
);
6891 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
6892 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
6895 charset
= charset_roman
;
6898 int b1
= c
>> 8, b2
= c
& 0x7F;
6899 if (b1
< 0xA1 || b1
> 0xFE
6900 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
6901 error ("Invalid code: %d", code
);
6902 charset
= charset_big5
;
6904 c
= DECODE_CHAR (charset
, (unsigned )c
);
6906 error ("Invalid code: %d", code
);
6907 return make_number (c
);
6910 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
6911 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
6912 Return the corresponding character code in Big5. */)
6916 Lisp_Object spec
, attrs
, charset_list
;
6917 struct charset
*charset
;
6921 CHECK_CHARACTER (ch
);
6923 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
6924 attrs
= AREF (spec
, 0);
6925 if (ASCII_CHAR_P (c
)
6926 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6929 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
6930 charset
= char_charset (c
, charset_list
, &code
);
6931 if (code
== CHARSET_INVALID_CODE (charset
))
6932 error ("Can't encode by Big5 encoding: %d", c
);
6934 return make_number (code
);
6938 DEFUN ("set-terminal-coding-system-internal",
6939 Fset_terminal_coding_system_internal
,
6940 Sset_terminal_coding_system_internal
, 1, 1, 0,
6941 doc
: /* Internal use only. */)
6944 CHECK_SYMBOL (coding_system
);
6945 setup_coding_system (Fcheck_coding_system (coding_system
),
6948 /* We had better not send unsafe characters to terminal. */
6949 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
6950 /* Characer composition should be disabled. */
6951 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6952 terminal_coding
.src_multibyte
= 1;
6953 terminal_coding
.dst_multibyte
= 0;
6957 DEFUN ("set-safe-terminal-coding-system-internal",
6958 Fset_safe_terminal_coding_system_internal
,
6959 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
6960 doc
: /* Internal use only. */)
6963 CHECK_SYMBOL (coding_system
);
6964 setup_coding_system (Fcheck_coding_system (coding_system
),
6965 &safe_terminal_coding
);
6966 /* Characer composition should be disabled. */
6967 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6968 safe_terminal_coding
.src_multibyte
= 1;
6969 safe_terminal_coding
.dst_multibyte
= 0;
6973 DEFUN ("terminal-coding-system",
6974 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
6975 doc
: /* Return coding system specified for terminal output. */)
6978 return CODING_ID_NAME (terminal_coding
.id
);
6981 DEFUN ("set-keyboard-coding-system-internal",
6982 Fset_keyboard_coding_system_internal
,
6983 Sset_keyboard_coding_system_internal
, 1, 1, 0,
6984 doc
: /* Internal use only. */)
6986 Lisp_Object coding_system
;
6988 CHECK_SYMBOL (coding_system
);
6989 setup_coding_system (Fcheck_coding_system (coding_system
),
6991 /* Characer composition should be disabled. */
6992 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6996 DEFUN ("keyboard-coding-system",
6997 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
6998 doc
: /* Return coding system specified for decoding keyboard input. */)
7001 return CODING_ID_NAME (keyboard_coding
.id
);
7005 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
7006 Sfind_operation_coding_system
, 1, MANY
, 0,
7007 doc
: /* Choose a coding system for an operation based on the target name.
7008 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7009 DECODING-SYSTEM is the coding system to use for decoding
7010 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7011 for encoding (in case OPERATION does encoding).
7013 The first argument OPERATION specifies an I/O primitive:
7014 For file I/O, `insert-file-contents' or `write-region'.
7015 For process I/O, `call-process', `call-process-region', or `start-process'.
7016 For network I/O, `open-network-stream'.
7018 The remaining arguments should be the same arguments that were passed
7019 to the primitive. Depending on which primitive, one of those arguments
7020 is selected as the TARGET. For example, if OPERATION does file I/O,
7021 whichever argument specifies the file name is TARGET.
7023 TARGET has a meaning which depends on OPERATION:
7024 For file I/O, TARGET is a file name.
7025 For process I/O, TARGET is a process name.
7026 For network I/O, TARGET is a service name or a port number
7028 This function looks up what specified for TARGET in,
7029 `file-coding-system-alist', `process-coding-system-alist',
7030 or `network-coding-system-alist' depending on OPERATION.
7031 They may specify a coding system, a cons of coding systems,
7032 or a function symbol to call.
7033 In the last case, we call the function with one argument,
7034 which is a list of all the arguments given to this function.
7036 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7041 Lisp_Object operation
, target_idx
, target
, val
;
7042 register Lisp_Object chain
;
7045 error ("Too few arguments");
7046 operation
= args
[0];
7047 if (!SYMBOLP (operation
)
7048 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
7049 error ("Invalid first arguement");
7050 if (nargs
< 1 + XINT (target_idx
))
7051 error ("Too few arguments for operation: %s",
7052 XSYMBOL (operation
)->name
->data
);
7053 target
= args
[XINT (target_idx
) + 1];
7054 if (!(STRINGP (target
)
7055 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
7056 error ("Invalid %dth argument", XINT (target_idx
) + 1);
7058 chain
= ((EQ (operation
, Qinsert_file_contents
)
7059 || EQ (operation
, Qwrite_region
))
7060 ? Vfile_coding_system_alist
7061 : (EQ (operation
, Qopen_network_stream
)
7062 ? Vnetwork_coding_system_alist
7063 : Vprocess_coding_system_alist
));
7067 for (; CONSP (chain
); chain
= XCDR (chain
))
7073 && ((STRINGP (target
)
7074 && STRINGP (XCAR (elt
))
7075 && fast_string_match (XCAR (elt
), target
) >= 0)
7076 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
7079 /* Here, if VAL is both a valid coding system and a valid
7080 function symbol, we return VAL as a coding system. */
7083 if (! SYMBOLP (val
))
7085 if (! NILP (Fcoding_system_p (val
)))
7086 return Fcons (val
, val
);
7087 if (! NILP (Ffboundp (val
)))
7089 val
= call1 (val
, Flist (nargs
, args
));
7092 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
7093 return Fcons (val
, val
);
7101 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
7102 Sset_coding_system_priority
, 1, MANY
, 0,
7103 doc
: /* Put higher priority to coding systems of the arguments. */)
7109 int changed
[coding_category_max
];
7110 enum coding_category priorities
[coding_category_max
];
7112 bzero (changed
, sizeof changed
);
7114 for (i
= j
= 0; i
< nargs
; i
++)
7116 enum coding_category category
;
7117 Lisp_Object spec
, attrs
;
7119 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
7120 attrs
= AREF (spec
, 0);
7121 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7122 if (changed
[category
])
7123 /* Ignore this coding system because a coding system of the
7124 same category already had a higher priority. */
7126 changed
[category
] = 1;
7127 priorities
[j
++] = category
;
7128 if (coding_categories
[category
].id
>= 0
7129 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
7130 setup_coding_system (args
[i
], &coding_categories
[category
]);
7133 /* Now we have decided top J priorities. Reflect the order of the
7134 original priorities to the remaining priorities. */
7136 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
7138 while (j
< coding_category_max
7139 && changed
[coding_priorities
[j
]])
7141 if (j
== coding_category_max
)
7143 priorities
[i
] = coding_priorities
[j
];
7146 bcopy (priorities
, coding_priorities
, sizeof priorities
);
7150 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
7151 Scoding_system_priority_list
, 0, 1, 0,
7152 doc
: /* Return a list of coding systems ordered by their priorities. */)
7154 Lisp_Object highestp
;
7159 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
7161 enum coding_category category
= coding_priorities
[i
];
7162 int id
= coding_categories
[category
].id
;
7167 attrs
= CODING_ID_ATTRS (id
);
7168 if (! NILP (highestp
))
7169 return CODING_ATTR_BASE_NAME (attrs
);
7170 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
7172 return Fnreverse (val
);
7176 make_subsidiaries (base
)
7179 Lisp_Object subsidiaries
;
7180 char *suffixes
[] = { "-unix", "-dos", "-mac" };
7181 int base_name_len
= STRING_BYTES (XSYMBOL (base
)->name
);
7182 char *buf
= (char *) alloca (base_name_len
+ 6);
7185 bcopy (XSYMBOL (base
)->name
->data
, buf
, base_name_len
);
7186 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
7187 for (i
= 0; i
< 3; i
++)
7189 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
7190 ASET (subsidiaries
, i
, intern (buf
));
7192 return subsidiaries
;
7196 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
7197 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
7198 doc
: /* For internal use only. */)
7204 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
7205 Lisp_Object attrs
; /* Vector of attributes. */
7206 Lisp_Object eol_type
;
7207 Lisp_Object aliases
;
7208 Lisp_Object coding_type
, charset_list
, safe_charsets
;
7209 enum coding_category category
;
7210 Lisp_Object tail
, val
;
7211 int max_charset_id
= 0;
7214 if (nargs
< coding_arg_max
)
7217 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
7219 name
= args
[coding_arg_name
];
7220 CHECK_SYMBOL (name
);
7221 CODING_ATTR_BASE_NAME (attrs
) = name
;
7223 val
= args
[coding_arg_mnemonic
];
7224 if (! STRINGP (val
))
7225 CHECK_CHARACTER (val
);
7226 CODING_ATTR_MNEMONIC (attrs
) = val
;
7228 coding_type
= args
[coding_arg_coding_type
];
7229 CHECK_SYMBOL (coding_type
);
7230 CODING_ATTR_TYPE (attrs
) = coding_type
;
7232 charset_list
= args
[coding_arg_charset_list
];
7233 if (SYMBOLP (charset_list
))
7235 if (EQ (charset_list
, Qiso_2022
))
7237 if (! EQ (coding_type
, Qiso_2022
))
7238 error ("Invalid charset-list");
7239 charset_list
= Viso_2022_charset_list
;
7241 else if (EQ (charset_list
, Qemacs_mule
))
7243 if (! EQ (coding_type
, Qemacs_mule
))
7244 error ("Invalid charset-list");
7245 charset_list
= Vemacs_mule_charset_list
;
7247 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7248 if (max_charset_id
< XFASTINT (XCAR (tail
)))
7249 max_charset_id
= XFASTINT (XCAR (tail
));
7253 charset_list
= Fcopy_sequence (charset_list
);
7254 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
7256 struct charset
*charset
;
7259 CHECK_CHARSET_GET_CHARSET (val
, charset
);
7260 if (EQ (coding_type
, Qiso_2022
)
7261 ? CHARSET_ISO_FINAL (charset
) < 0
7262 : EQ (coding_type
, Qemacs_mule
)
7263 ? CHARSET_EMACS_MULE_ID (charset
) < 0
7265 error ("Can't handle charset `%s'",
7266 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7268 XCAR (tail
) = make_number (charset
->id
);
7269 if (max_charset_id
< charset
->id
)
7270 max_charset_id
= charset
->id
;
7273 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
7275 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
7277 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7278 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
7279 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
7281 val
= args
[coding_arg_decode_translation_table
];
7283 CHECK_CHAR_TABLE (val
);
7284 CODING_ATTR_DECODE_TBL (attrs
) = val
;
7286 val
= args
[coding_arg_encode_translation_table
];
7288 CHECK_CHAR_TABLE (val
);
7289 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
7291 val
= args
[coding_arg_post_read_conversion
];
7293 CODING_ATTR_POST_READ (attrs
) = val
;
7295 val
= args
[coding_arg_pre_write_conversion
];
7297 CODING_ATTR_PRE_WRITE (attrs
) = val
;
7299 val
= args
[coding_arg_default_char
];
7301 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
7304 CHECK_CHARACTER (val
);
7305 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
7308 val
= args
[coding_arg_plist
];
7310 CODING_ATTR_PLIST (attrs
) = val
;
7312 if (EQ (coding_type
, Qcharset
))
7314 val
= Fmake_vector (make_number (256), Qnil
);
7316 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7318 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
7320 for (i
= charset
->code_space
[0]; i
<= charset
->code_space
[1]; i
++)
7321 if (NILP (AREF (val
, i
)))
7322 ASET (val
, i
, XCAR (tail
));
7324 ASET (attrs
, coding_attr_charset_valids
, val
);
7325 category
= coding_category_charset
;
7327 else if (EQ (coding_type
, Qccl
))
7331 if (nargs
< coding_arg_ccl_max
)
7334 val
= args
[coding_arg_ccl_decoder
];
7335 CHECK_CCL_PROGRAM (val
);
7337 val
= Fcopy_sequence (val
);
7338 ASET (attrs
, coding_attr_ccl_decoder
, val
);
7340 val
= args
[coding_arg_ccl_encoder
];
7341 CHECK_CCL_PROGRAM (val
);
7343 val
= Fcopy_sequence (val
);
7344 ASET (attrs
, coding_attr_ccl_encoder
, val
);
7346 val
= args
[coding_arg_ccl_valids
];
7347 valids
= Fmake_string (make_number (256), make_number (0));
7348 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
7352 ASET (valids
, XINT (val
), 1);
7358 CHECK_NUMBER (XCAR (val
));
7359 CHECK_NUMBER (XCDR (val
));
7360 from
= XINT (XCAR (val
));
7361 to
= XINT (XCDR (val
));
7362 for (i
= from
; i
<= to
; i
++)
7363 ASET (valids
, i
, 1);
7366 ASET (attrs
, coding_attr_ccl_valids
, valids
);
7368 category
= coding_category_ccl
;
7370 else if (EQ (coding_type
, Qutf_16
))
7372 Lisp_Object bom
, endian
;
7374 if (nargs
< coding_arg_utf16_max
)
7377 bom
= args
[coding_arg_utf16_bom
];
7378 if (! NILP (bom
) && ! EQ (bom
, Qt
))
7381 CHECK_CODING_SYSTEM (XCAR (bom
));
7382 CHECK_CODING_SYSTEM (XCDR (bom
));
7384 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
7386 endian
= args
[coding_arg_utf16_endian
];
7387 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
7389 category
= (CONSP (bom
)
7390 ? coding_category_utf_16_auto
7393 ? coding_category_utf_16_be_nosig
7394 : coding_category_utf_16_le_nosig
)
7396 ? coding_category_utf_16_be
7397 : coding_category_utf_16_le
));
7399 else if (EQ (coding_type
, Qiso_2022
))
7401 Lisp_Object initial
, reg_usage
, request
, flags
;
7402 struct charset
*charset
;
7405 if (nargs
< coding_arg_iso2022_max
)
7408 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
7409 CHECK_VECTOR (initial
);
7410 for (i
= 0; i
< 4; i
++)
7412 val
= Faref (initial
, make_number (i
));
7415 CHECK_CHARSET_GET_ID (val
, id
);
7416 ASET (initial
, i
, make_number (id
));
7419 ASET (initial
, i
, make_number (-1));
7422 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
7423 CHECK_CONS (reg_usage
);
7424 CHECK_NATNUM (XCAR (reg_usage
));
7425 CHECK_NATNUM (XCDR (reg_usage
));
7427 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
7428 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
7434 CHECK_CHARSET_GET_ID (XCAR (val
), id
);
7435 CHECK_NATNUM (XCDR (val
));
7436 if (XINT (XCDR (val
)) >= 4)
7437 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
7438 XCAR (val
) = make_number (id
);
7441 flags
= args
[coding_arg_iso2022_flags
];
7442 CHECK_NATNUM (flags
);
7444 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
7445 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
7447 ASET (attrs
, coding_attr_iso_initial
, initial
);
7448 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
7449 ASET (attrs
, coding_attr_iso_request
, request
);
7450 ASET (attrs
, coding_attr_iso_flags
, flags
);
7451 setup_iso_safe_charsets (attrs
);
7453 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
7454 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
7455 | CODING_ISO_FLAG_SINGLE_SHIFT
))
7456 ? coding_category_iso_7_else
7457 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
7458 ? coding_category_iso_7
7459 : coding_category_iso_7_tight
);
7462 int id
= XINT (AREF (initial
, 1));
7464 category
= (((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
7465 | CODING_ISO_FLAG_SINGLE_SHIFT
))
7466 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
7468 ? coding_category_iso_8_else
7469 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
7470 ? coding_category_iso_8_1
7471 : coding_category_iso_8_2
);
7474 else if (EQ (coding_type
, Qemacs_mule
))
7476 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
7477 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
7479 category
= coding_category_emacs_mule
;
7481 else if (EQ (coding_type
, Qshift_jis
))
7484 struct charset
*charset
;
7486 if (XINT (Flength (charset_list
)) != 3)
7487 error ("There should be just three charsets");
7489 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7490 if (CHARSET_DIMENSION (charset
) != 1)
7491 error ("Dimension of charset %s is not one",
7492 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7494 charset_list
= XCDR (charset_list
);
7495 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7496 if (CHARSET_DIMENSION (charset
) != 1)
7497 error ("Dimension of charset %s is not one",
7498 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7500 charset_list
= XCDR (charset_list
);
7501 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7502 if (CHARSET_DIMENSION (charset
) != 2)
7503 error ("Dimension of charset %s is not two",
7504 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7506 category
= coding_category_sjis
;
7507 Vsjis_coding_system
= name
;
7509 else if (EQ (coding_type
, Qbig5
))
7511 struct charset
*charset
;
7513 if (XINT (Flength (charset_list
)) != 2)
7514 error ("There should be just two charsets");
7516 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7517 if (CHARSET_DIMENSION (charset
) != 1)
7518 error ("Dimension of charset %s is not one",
7519 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7521 charset_list
= XCDR (charset_list
);
7522 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7523 if (CHARSET_DIMENSION (charset
) != 2)
7524 error ("Dimension of charset %s is not two",
7525 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7527 category
= coding_category_big5
;
7528 Vbig5_coding_system
= name
;
7530 else if (EQ (coding_type
, Qraw_text
))
7531 category
= coding_category_raw_text
;
7532 else if (EQ (coding_type
, Qutf_8
))
7533 category
= coding_category_utf_8
;
7534 else if (EQ (coding_type
, Qundecided
))
7535 category
= coding_category_undecided
;
7537 error ("Invalid coding system type: %s",
7538 XSYMBOL (coding_type
)->name
->data
);
7540 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
7542 eol_type
= args
[coding_arg_eol_type
];
7543 if (! NILP (eol_type
)
7544 && ! EQ (eol_type
, Qunix
)
7545 && ! EQ (eol_type
, Qdos
)
7546 && ! EQ (eol_type
, Qmac
))
7547 error ("Invalid eol-type");
7549 aliases
= Fcons (name
, Qnil
);
7551 if (NILP (eol_type
))
7553 eol_type
= make_subsidiaries (name
);
7554 for (i
= 0; i
< 3; i
++)
7556 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
7558 this_name
= AREF (eol_type
, i
);
7559 this_aliases
= Fcons (this_name
, Qnil
);
7560 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
7561 this_spec
= Fmake_vector (make_number (3), attrs
);
7562 ASET (this_spec
, 1, this_aliases
);
7563 ASET (this_spec
, 2, this_eol_type
);
7564 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
7565 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
7566 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
7567 Vcoding_system_alist
);
7571 spec_vec
= Fmake_vector (make_number (3), attrs
);
7572 ASET (spec_vec
, 1, aliases
);
7573 ASET (spec_vec
, 2, eol_type
);
7575 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
7576 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
7577 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
7578 Vcoding_system_alist
);
7581 int id
= coding_categories
[category
].id
;
7583 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
7584 setup_coding_system (name
, &coding_categories
[category
]);
7590 return Fsignal (Qwrong_number_of_arguments
,
7591 Fcons (intern ("define-coding-system-internal"),
7592 make_number (nargs
)));
7595 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
7596 Sdefine_coding_system_alias
, 2, 2, 0,
7597 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7598 (alias
, coding_system
)
7599 Lisp_Object alias
, coding_system
;
7601 Lisp_Object spec
, aliases
, eol_type
;
7603 CHECK_SYMBOL (alias
);
7604 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7605 aliases
= AREF (spec
, 1);
7606 while (!NILP (XCDR (aliases
)))
7607 aliases
= XCDR (aliases
);
7608 XCDR (aliases
) = Fcons (alias
, Qnil
);
7610 eol_type
= AREF (spec
, 2);
7611 if (VECTORP (eol_type
))
7613 Lisp_Object subsidiaries
;
7616 subsidiaries
= make_subsidiaries (alias
);
7617 for (i
= 0; i
< 3; i
++)
7618 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
7619 AREF (eol_type
, i
));
7621 ASET (spec
, 2, subsidiaries
);
7624 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
7625 Vcoding_system_alist
= Fcons (Fcons (alias
, Qnil
), Vcoding_system_alist
);
7630 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
7632 doc
: /* Return the base of CODING-SYSTEM.
7633 Any alias or subsidiary coding systems are not base coding system. */)
7635 Lisp_Object coding_system
;
7637 Lisp_Object spec
, attrs
;
7639 if (NILP (coding_system
))
7640 return (Qno_conversion
);
7641 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7642 attrs
= AREF (spec
, 0);
7643 return CODING_ATTR_BASE_NAME (attrs
);
7646 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
7648 doc
: "Return the property list of CODING-SYSTEM.")
7650 Lisp_Object coding_system
;
7652 Lisp_Object spec
, attrs
;
7654 if (NILP (coding_system
))
7655 coding_system
= Qno_conversion
;
7656 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7657 attrs
= AREF (spec
, 0);
7658 return CODING_ATTR_PLIST (attrs
);
7662 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
7664 doc
: /* Return the list of aliases of CODING-SYSTEM.
7665 A base coding system is what made by `define-coding-system'.
7666 Any alias nor subsidiary coding systems are not base coding system. */)
7668 Lisp_Object coding_system
;
7672 if (NILP (coding_system
))
7673 coding_system
= Qno_conversion
;
7674 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7675 return AREF (spec
, 2);
7678 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
7679 Scoding_system_eol_type
, 1, 1, 0,
7680 doc
: /* Return eol-type of CODING-SYSTEM.
7681 An eol-type is integer 0, 1, 2, or a vector of coding systems.
7683 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7684 and CR respectively.
7686 A vector value indicates that a format of end-of-line should be
7687 detected automatically. Nth element of the vector is the subsidiary
7688 coding system whose eol-type is N. */)
7690 Lisp_Object coding_system
;
7692 Lisp_Object spec
, eol_type
;
7695 if (NILP (coding_system
))
7696 coding_system
= Qno_conversion
;
7697 if (! CODING_SYSTEM_P (coding_system
))
7699 spec
= CODING_SYSTEM_SPEC (coding_system
);
7700 eol_type
= AREF (spec
, 2);
7701 if (VECTORP (eol_type
))
7702 return Fcopy_sequence (eol_type
);
7703 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
7704 return make_number (n
);
7710 /*** 9. Post-amble ***/
7717 for (i
= 0; i
< coding_category_max
; i
++)
7719 coding_categories
[i
].id
= -1;
7720 coding_priorities
[i
] = i
;
7723 /* ISO2022 specific initialize routine. */
7724 for (i
= 0; i
< 0x20; i
++)
7725 iso_code_class
[i
] = ISO_control_0
;
7726 for (i
= 0x21; i
< 0x7F; i
++)
7727 iso_code_class
[i
] = ISO_graphic_plane_0
;
7728 for (i
= 0x80; i
< 0xA0; i
++)
7729 iso_code_class
[i
] = ISO_control_1
;
7730 for (i
= 0xA1; i
< 0xFF; i
++)
7731 iso_code_class
[i
] = ISO_graphic_plane_1
;
7732 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
7733 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
7734 iso_code_class
[ISO_CODE_CR
] = ISO_carriage_return
;
7735 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
7736 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
7737 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
7738 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
7739 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
7740 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
7741 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
7743 inhibit_pre_post_conversion
= 0;
7745 for (i
= 0; i
< 256; i
++)
7747 emacs_mule_bytes
[i
] = 1;
7756 staticpro (&Vcoding_system_hash_table
);
7757 Vcoding_system_hash_table
= Fmakehash (Qeq
);
7759 staticpro (&Vsjis_coding_system
);
7760 Vsjis_coding_system
= Qnil
;
7762 staticpro (&Vbig5_coding_system
);
7763 Vbig5_coding_system
= Qnil
;
7765 staticpro (&Vcode_conversion_work_buf_list
);
7766 Vcode_conversion_work_buf_list
= Qnil
;
7768 staticpro (&Vcode_conversion_reused_work_buf
);
7769 Vcode_conversion_reused_work_buf
= Qnil
;
7771 DEFSYM (Qcharset
, "charset");
7772 DEFSYM (Qtarget_idx
, "target-idx");
7773 DEFSYM (Qcoding_system_history
, "coding-system-history");
7774 Fset (Qcoding_system_history
, Qnil
);
7776 /* Target FILENAME is the first argument. */
7777 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
7778 /* Target FILENAME is the third argument. */
7779 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
7781 DEFSYM (Qcall_process
, "call-process");
7782 /* Target PROGRAM is the first argument. */
7783 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
7785 DEFSYM (Qcall_process_region
, "call-process-region");
7786 /* Target PROGRAM is the third argument. */
7787 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
7789 DEFSYM (Qstart_process
, "start-process");
7790 /* Target PROGRAM is the third argument. */
7791 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
7793 DEFSYM (Qopen_network_stream
, "open-network-stream");
7794 /* Target SERVICE is the fourth argument. */
7795 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
7797 DEFSYM (Qcoding_system
, "coding-system");
7798 DEFSYM (Qcoding_aliases
, "coding-aliases");
7800 DEFSYM (Qeol_type
, "eol-type");
7801 DEFSYM (Qunix
, "unix");
7802 DEFSYM (Qdos
, "dos");
7803 DEFSYM (Qmac
, "mac");
7805 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
7806 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
7807 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
7808 DEFSYM (Qdefault_char
, "default-char");
7809 DEFSYM (Qundecided
, "undecided");
7810 DEFSYM (Qno_conversion
, "no-conversion");
7811 DEFSYM (Qraw_text
, "raw-text");
7813 DEFSYM (Qiso_2022
, "iso-2022");
7815 DEFSYM (Qutf_8
, "utf-8");
7817 DEFSYM (Qutf_16
, "utf-16");
7818 DEFSYM (Qutf_16_be
, "utf-16-be");
7819 DEFSYM (Qutf_16_be_nosig
, "utf-16-be-nosig");
7820 DEFSYM (Qutf_16_le
, "utf-16-l3");
7821 DEFSYM (Qutf_16_le_nosig
, "utf-16-le-nosig");
7822 DEFSYM (Qsignature
, "signature");
7823 DEFSYM (Qendian
, "endian");
7824 DEFSYM (Qbig
, "big");
7825 DEFSYM (Qlittle
, "little");
7827 DEFSYM (Qshift_jis
, "shift-jis");
7828 DEFSYM (Qbig5
, "big5");
7830 DEFSYM (Qcoding_system_p
, "coding-system-p");
7832 DEFSYM (Qcoding_system_error
, "coding-system-error");
7833 Fput (Qcoding_system_error
, Qerror_conditions
,
7834 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
7835 Fput (Qcoding_system_error
, Qerror_message
,
7836 build_string ("Invalid coding system"));
7838 /* Intern this now in case it isn't already done.
7839 Setting this variable twice is harmless.
7840 But don't staticpro it here--that is done in alloc.c. */
7841 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
7843 DEFSYM (Qtranslation_table
, "translation-table");
7844 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
7845 DEFSYM (Qtranslation_table_id
, "translation-table-id");
7846 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
7847 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
7849 DEFSYM (Qchar_coding_system
, "char-coding-system");
7851 Fput (Qchar_coding_system
, Qchar_table_extra_slots
, make_number (2));
7853 DEFSYM (Qvalid_codes
, "valid-codes");
7855 DEFSYM (Qemacs_mule
, "emacs-mule");
7857 Vcoding_category_table
7858 = Fmake_vector (make_number (coding_category_max
), Qnil
);
7859 staticpro (&Vcoding_category_table
);
7860 /* Followings are target of code detection. */
7861 ASET (Vcoding_category_table
, coding_category_iso_7
,
7862 intern ("coding-category-iso-7"));
7863 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
7864 intern ("coding-category-iso-7-tight"));
7865 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
7866 intern ("coding-category-iso-8-1"));
7867 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
7868 intern ("coding-category-iso-8-2"));
7869 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
7870 intern ("coding-category-iso-7-else"));
7871 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
7872 intern ("coding-category-iso-8-else"));
7873 ASET (Vcoding_category_table
, coding_category_utf_8
,
7874 intern ("coding-category-utf-8"));
7875 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
7876 intern ("coding-category-utf-16-be"));
7877 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
7878 intern ("coding-category-utf-16-le"));
7879 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
7880 intern ("coding-category-utf-16-be-nosig"));
7881 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
7882 intern ("coding-category-utf-16-le-nosig"));
7883 ASET (Vcoding_category_table
, coding_category_charset
,
7884 intern ("coding-category-charset"));
7885 ASET (Vcoding_category_table
, coding_category_sjis
,
7886 intern ("coding-category-sjis"));
7887 ASET (Vcoding_category_table
, coding_category_big5
,
7888 intern ("coding-category-big5"));
7889 ASET (Vcoding_category_table
, coding_category_ccl
,
7890 intern ("coding-category-ccl"));
7891 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
7892 intern ("coding-category-emacs-mule"));
7893 /* Followings are NOT target of code detection. */
7894 ASET (Vcoding_category_table
, coding_category_raw_text
,
7895 intern ("coding-category-raw-text"));
7896 ASET (Vcoding_category_table
, coding_category_undecided
,
7897 intern ("coding-category-undecided"));
7899 defsubr (&Scoding_system_p
);
7900 defsubr (&Sread_coding_system
);
7901 defsubr (&Sread_non_nil_coding_system
);
7902 defsubr (&Scheck_coding_system
);
7903 defsubr (&Sdetect_coding_region
);
7904 defsubr (&Sdetect_coding_string
);
7905 defsubr (&Sfind_coding_systems_region_internal
);
7906 defsubr (&Scheck_coding_systems_region
);
7907 defsubr (&Sdecode_coding_region
);
7908 defsubr (&Sencode_coding_region
);
7909 defsubr (&Sdecode_coding_string
);
7910 defsubr (&Sencode_coding_string
);
7911 defsubr (&Sdecode_sjis_char
);
7912 defsubr (&Sencode_sjis_char
);
7913 defsubr (&Sdecode_big5_char
);
7914 defsubr (&Sencode_big5_char
);
7915 defsubr (&Sset_terminal_coding_system_internal
);
7916 defsubr (&Sset_safe_terminal_coding_system_internal
);
7917 defsubr (&Sterminal_coding_system
);
7918 defsubr (&Sset_keyboard_coding_system_internal
);
7919 defsubr (&Skeyboard_coding_system
);
7920 defsubr (&Sfind_operation_coding_system
);
7921 defsubr (&Sset_coding_system_priority
);
7922 defsubr (&Sdefine_coding_system_internal
);
7923 defsubr (&Sdefine_coding_system_alias
);
7924 defsubr (&Scoding_system_base
);
7925 defsubr (&Scoding_system_plist
);
7926 defsubr (&Scoding_system_aliases
);
7927 defsubr (&Scoding_system_eol_type
);
7928 defsubr (&Scoding_system_priority_list
);
7930 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
7931 doc
: /* List of coding systems.
7933 Do not alter the value of this variable manually. This variable should be
7934 updated by the functions `define-coding-system' and
7935 `define-coding-system-alias'. */);
7936 Vcoding_system_list
= Qnil
;
7938 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
7939 doc
: /* Alist of coding system names.
7940 Each element is one element list of coding system name.
7941 This variable is given to `completing-read' as TABLE argument.
7943 Do not alter the value of this variable manually. This variable should be
7944 updated by the functions `make-coding-system' and
7945 `define-coding-system-alias'. */);
7946 Vcoding_system_alist
= Qnil
;
7948 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
7949 doc
: /* List of coding-categories (symbols) ordered by priority.
7951 On detecting a coding system, Emacs tries code detection algorithms
7952 associated with each coding-category one by one in this order. When
7953 one algorithm agrees with a byte sequence of source text, the coding
7954 system bound to the corresponding coding-category is selected. */);
7958 Vcoding_category_list
= Qnil
;
7959 for (i
= coding_category_max
- 1; i
>= 0; i
--)
7960 Vcoding_category_list
7961 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
7962 Vcoding_category_list
);
7965 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
7966 doc
: /* Specify the coding system for read operations.
7967 It is useful to bind this variable with `let', but do not set it globally.
7968 If the value is a coding system, it is used for decoding on read operation.
7969 If not, an appropriate element is used from one of the coding system alists:
7970 There are three such tables, `file-coding-system-alist',
7971 `process-coding-system-alist', and `network-coding-system-alist'. */);
7972 Vcoding_system_for_read
= Qnil
;
7974 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
7975 doc
: /* Specify the coding system for write operations.
7976 Programs bind this variable with `let', but you should not set it globally.
7977 If the value is a coding system, it is used for encoding of output,
7978 when writing it to a file and when sending it to a file or subprocess.
7980 If this does not specify a coding system, an appropriate element
7981 is used from one of the coding system alists:
7982 There are three such tables, `file-coding-system-alist',
7983 `process-coding-system-alist', and `network-coding-system-alist'.
7984 For output to files, if the above procedure does not specify a coding system,
7985 the value of `buffer-file-coding-system' is used. */);
7986 Vcoding_system_for_write
= Qnil
;
7988 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
7990 Coding system used in the latest file or process I/O. */);
7991 Vlast_coding_system_used
= Qnil
;
7993 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
7995 *Non-nil means always inhibit code conversion of end-of-line format.
7996 See info node `Coding Systems' and info node `Text and Binary' concerning
7997 such conversion. */);
7998 inhibit_eol_conversion
= 0;
8000 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
8002 Non-nil means process buffer inherits coding system of process output.
8003 Bind it to t if the process output is to be treated as if it were a file
8004 read from some filesystem. */);
8005 inherit_process_coding_system
= 0;
8007 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
8009 Alist to decide a coding system to use for a file I/O operation.
8010 The format is ((PATTERN . VAL) ...),
8011 where PATTERN is a regular expression matching a file name,
8012 VAL is a coding system, a cons of coding systems, or a function symbol.
8013 If VAL is a coding system, it is used for both decoding and encoding
8015 If VAL is a cons of coding systems, the car part is used for decoding,
8016 and the cdr part is used for encoding.
8017 If VAL is a function symbol, the function must return a coding system
8018 or a cons of coding systems which are used as above. The function gets
8019 the arguments with which `find-operation-coding-systems' was called.
8021 See also the function `find-operation-coding-system'
8022 and the variable `auto-coding-alist'. */);
8023 Vfile_coding_system_alist
= Qnil
;
8025 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
8027 Alist to decide a coding system to use for a process I/O operation.
8028 The format is ((PATTERN . VAL) ...),
8029 where PATTERN is a regular expression matching a program name,
8030 VAL is a coding system, a cons of coding systems, or a function symbol.
8031 If VAL is a coding system, it is used for both decoding what received
8032 from the program and encoding what sent to the program.
8033 If VAL is a cons of coding systems, the car part is used for decoding,
8034 and the cdr part is used for encoding.
8035 If VAL is a function symbol, the function must return a coding system
8036 or a cons of coding systems which are used as above.
8038 See also the function `find-operation-coding-system'. */);
8039 Vprocess_coding_system_alist
= Qnil
;
8041 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
8043 Alist to decide a coding system to use for a network I/O operation.
8044 The format is ((PATTERN . VAL) ...),
8045 where PATTERN is a regular expression matching a network service name
8046 or is a port number to connect to,
8047 VAL is a coding system, a cons of coding systems, or a function symbol.
8048 If VAL is a coding system, it is used for both decoding what received
8049 from the network stream and encoding what sent to the network stream.
8050 If VAL is a cons of coding systems, the car part is used for decoding,
8051 and the cdr part is used for encoding.
8052 If VAL is a function symbol, the function must return a coding system
8053 or a cons of coding systems which are used as above.
8055 See also the function `find-operation-coding-system'. */);
8056 Vnetwork_coding_system_alist
= Qnil
;
8058 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
8059 doc
: /* Coding system to use with system messages.
8060 Also used for decoding keyboard input on X Window system. */);
8061 Vlocale_coding_system
= Qnil
;
8063 /* The eol mnemonics are reset in startup.el system-dependently. */
8064 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
8066 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8067 eol_mnemonic_unix
= build_string (":");
8069 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
8071 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8072 eol_mnemonic_dos
= build_string ("\\");
8074 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
8076 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
8077 eol_mnemonic_mac
= build_string ("/");
8079 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
8081 *String displayed in mode line when end-of-line format is not yet determined. */);
8082 eol_mnemonic_undecided
= build_string (":");
8084 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
8086 *Non-nil enables character translation while encoding and decoding. */);
8087 Venable_character_translation
= Qt
;
8089 DEFVAR_LISP ("standard-translation-table-for-decode",
8090 &Vstandard_translation_table_for_decode
,
8091 doc
: /* Table for translating characters while decoding. */);
8092 Vstandard_translation_table_for_decode
= Qnil
;
8094 DEFVAR_LISP ("standard-translation-table-for-encode",
8095 &Vstandard_translation_table_for_encode
,
8096 doc
: /* Table for translating characters while encoding. */);
8097 Vstandard_translation_table_for_encode
= Qnil
;
8099 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
8100 doc
: /* Alist of charsets vs revision numbers.
8101 While encoding, if a charset (car part of an element) is found,
8102 designate it with the escape sequence identifying revision (cdr part
8103 of the element). */);
8104 Vcharset_revision_table
= Qnil
;
8106 DEFVAR_LISP ("default-process-coding-system",
8107 &Vdefault_process_coding_system
,
8108 doc
: /* Cons of coding systems used for process I/O by default.
8109 The car part is used for decoding a process output,
8110 the cdr part is used for encoding a text to be sent to a process. */);
8111 Vdefault_process_coding_system
= Qnil
;
8113 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
8115 Table of extra Latin codes in the range 128..159 (inclusive).
8116 This is a vector of length 256.
8117 If Nth element is non-nil, the existence of code N in a file
8118 \(or output of subprocess) doesn't prevent it to be detected as
8119 a coding system of ISO 2022 variant which has a flag
8120 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8121 or reading output of a subprocess.
8122 Only 128th through 159th elements has a meaning. */);
8123 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
8125 DEFVAR_LISP ("select-safe-coding-system-function",
8126 &Vselect_safe_coding_system_function
,
8128 Function to call to select safe coding system for encoding a text.
8130 If set, this function is called to force a user to select a proper
8131 coding system which can encode the text in the case that a default
8132 coding system used in each operation can't encode the text.
8134 The default value is `select-safe-coding-system' (which see). */);
8135 Vselect_safe_coding_system_function
= Qnil
;
8137 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table
,
8139 Char-table containing safe coding systems of each characters.
8140 Each element doesn't include such generic coding systems that can
8141 encode any characters. They are in the first extra slot. */);
8142 Vchar_coding_system_table
= Fmake_char_table (Qchar_coding_system
, Qnil
);
8144 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8145 &inhibit_iso_escape_detection
,
8147 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8149 By default, on reading a file, Emacs tries to detect how the text is
8150 encoded. This code detection is sensitive to escape sequences. If
8151 the sequence is valid as ISO2022, the code is determined as one of
8152 the ISO2022 encodings, and the file is decoded by the corresponding
8153 coding system (e.g. `iso-2022-7bit').
8155 However, there may be a case that you want to read escape sequences in
8156 a file as is. In such a case, you can set this variable to non-nil.
8157 Then, as the code detection ignores any escape sequences, no file is
8158 detected as encoded in some ISO2022 encoding. The result is that all
8159 escape sequences become visible in a buffer.
8161 The default value is nil, and it is strongly recommended not to change
8162 it. That is because many Emacs Lisp source files that contain
8163 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8164 in Emacs's distribution, and they won't be decoded correctly on
8165 reading if you suppress escape sequence detection.
8167 The other way to read escape sequences in a file without decoding is
8168 to explicitly specify some coding system that doesn't use ISO2022's
8169 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8170 inhibit_iso_escape_detection
= 0;
8173 Lisp_Object args
[coding_arg_max
];
8174 Lisp_Object plist
[14];
8177 for (i
= 0; i
< coding_arg_max
; i
++)
8180 plist
[0] = intern (":name");
8181 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
8182 plist
[2] = intern (":mnemonic");
8183 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
8184 plist
[4] = intern (":coding-type");
8185 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
8186 plist
[6] = intern (":ascii-compatible-p");
8187 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
8188 plist
[8] = intern (":default-char");
8189 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
8190 plist
[10] = intern (":docstring");
8191 plist
[11] = build_string ("Do no conversion.\n\
8193 When you visit a file with this coding, the file is read into a\n\
8194 unibyte buffer as is, thus each byte of a file is treated as a\n\
8196 plist
[12] = intern (":eol-type");
8197 plist
[13] = args
[coding_arg_eol_type
] = Qunix
;
8198 args
[coding_arg_plist
] = Flist (14, plist
);
8199 Fdefine_coding_system_internal (coding_arg_max
, args
);
8202 setup_coding_system (Qno_conversion
, &keyboard_coding
);
8203 setup_coding_system (Qno_conversion
, &terminal_coding
);
8204 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
8208 emacs_strerror (error_number
)
8213 synchronize_system_messages_locale ();
8214 str
= strerror (error_number
);
8216 if (! NILP (Vlocale_coding_system
))
8218 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
8219 Vlocale_coding_system
,
8221 str
= (char *) XSTRING (dec
)->data
;