1 /* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 Coding system is an object for a encoding mechanism that contains
50 information about how to convert byte sequence to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from a
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the mechanism of encoding. Here's a brief descrition about type.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by code converter defined for each
77 o Old Emacs' internal format (emacs-mule)
79 The coding system adopted by an old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode a text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for a text containing raw eight-bit data. Emacs
112 treat each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How end-of-line of a text is encoded depends on a system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text characters encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
155 Below is the template of these functions. */
159 detect_coding_XXX (coding
, mask
)
160 struct coding_system
*coding
;
163 unsigned char *src
= coding
->source
;
164 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
165 int multibytep
= coding
->src_multibyte
;
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
175 /* Check if it conforms to XXX. If not, break the loop. */
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask
&= ~CODING_CATEGORY_XXX
;
181 /* The source exausted. */
183 /* ASCII characters only. */
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask
&= CODING_CATEGORY_XXX
;
191 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
203 Below is the template of these functions. */
207 decode_coding_XXXX (coding
)
208 struct coding_system
*coding
;
210 unsigned char *src
= coding
->source
+ coding
->consumed
;
211 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base
;
216 /* A buffer to produce decoded characters. */
217 int *charbuf
= coding
->charbuf
;
218 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
219 int multibytep
= coding
->src_multibyte
;
224 if (charbuf
< charbuf_end
)
225 /* No more room to produce a decoded character. */
232 if (src_base
< src_end
233 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base
< src_end
&& charbuf
< charbuf_end
)
237 *charbuf
++ = *src_base
++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
241 /* Remember how many characters we produced. */
242 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
246 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
262 Below is a template of these functions. */
265 encode_coding_XXX (coding
)
266 struct coding_system
*coding
;
268 int multibytep
= coding
->dst_multibyte
;
269 int *charbuf
= coding
->charbuf
;
270 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
271 unsigned char *dst
= coding
->destination
+ coding
->produced
;
272 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
273 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
274 int produced_chars
= 0;
276 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
279 /* Encode C into DST, and increment DST. */
281 label_no_more_destination
:
282 /* How many chars and bytes we produced. */
283 coding
->produced_char
+= produced_chars
;
284 coding
->produced
= dst
- coding
->destination
;
289 /*** 1. Preamble ***/
296 #include "character.h"
299 #include "composite.h"
303 Lisp_Object Vcoding_system_hash_table
;
305 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
306 Lisp_Object Qunix
, Qdos
, Qmac
;
307 Lisp_Object Qbuffer_file_coding_system
;
308 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
309 Lisp_Object Qdefault_char
;
310 Lisp_Object Qno_conversion
, Qundecided
;
311 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
312 Lisp_Object Qutf_16_be_nosig
, Qutf_16_be
, Qutf_16_le_nosig
, Qutf_16_le
;
313 Lisp_Object Qsignature
, Qendian
, Qbig
, Qlittle
;
314 Lisp_Object Qcoding_system_history
;
315 Lisp_Object Qvalid_codes
;
317 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
318 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
319 Lisp_Object Qstart_process
, Qopen_network_stream
;
320 Lisp_Object Qtarget_idx
;
322 Lisp_Object Vselect_safe_coding_system_function
;
324 /* Mnemonic string for each format of end-of-line. */
325 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
326 /* Mnemonic string to indicate format of end-of-line is not yet
328 Lisp_Object eol_mnemonic_undecided
;
332 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
334 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
336 /* Coding system emacs-mule and raw-text are for converting only
337 end-of-line format. */
338 Lisp_Object Qemacs_mule
, Qraw_text
;
340 /* Coding-systems are handed between Emacs Lisp programs and C internal
341 routines by the following three variables. */
342 /* Coding-system for reading files and receiving data from process. */
343 Lisp_Object Vcoding_system_for_read
;
344 /* Coding-system for writing files and sending data to process. */
345 Lisp_Object Vcoding_system_for_write
;
346 /* Coding-system actually used in the latest I/O. */
347 Lisp_Object Vlast_coding_system_used
;
349 /* A vector of length 256 which contains information about special
350 Latin codes (especially for dealing with Microsoft codes). */
351 Lisp_Object Vlatin_extra_code_table
;
353 /* Flag to inhibit code conversion of end-of-line format. */
354 int inhibit_eol_conversion
;
356 /* Flag to inhibit ISO2022 escape sequence detection. */
357 int inhibit_iso_escape_detection
;
359 /* Flag to make buffer-file-coding-system inherit from process-coding. */
360 int inherit_process_coding_system
;
362 /* Coding system to be used to encode text for terminal display. */
363 struct coding_system terminal_coding
;
365 /* Coding system to be used to encode text for terminal display when
366 terminal coding system is nil. */
367 struct coding_system safe_terminal_coding
;
369 /* Coding system of what is sent from terminal keyboard. */
370 struct coding_system keyboard_coding
;
372 Lisp_Object Vfile_coding_system_alist
;
373 Lisp_Object Vprocess_coding_system_alist
;
374 Lisp_Object Vnetwork_coding_system_alist
;
376 Lisp_Object Vlocale_coding_system
;
380 /* Flag to tell if we look up translation table on character code
382 Lisp_Object Venable_character_translation
;
383 /* Standard translation table to look up on decoding (reading). */
384 Lisp_Object Vstandard_translation_table_for_decode
;
385 /* Standard translation table to look up on encoding (writing). */
386 Lisp_Object Vstandard_translation_table_for_encode
;
388 Lisp_Object Qtranslation_table
;
389 Lisp_Object Qtranslation_table_id
;
390 Lisp_Object Qtranslation_table_for_decode
;
391 Lisp_Object Qtranslation_table_for_encode
;
393 /* Alist of charsets vs revision number. */
394 static Lisp_Object Vcharset_revision_table
;
396 /* Default coding systems used for process I/O. */
397 Lisp_Object Vdefault_process_coding_system
;
399 /* Global flag to tell that we can't call post-read-conversion and
400 pre-write-conversion functions. Usually the value is zero, but it
401 is set to 1 temporarily while such functions are running. This is
402 to avoid infinite recursive call. */
403 static int inhibit_pre_post_conversion
;
405 /* Char-table containing safe coding systems of each character. */
406 Lisp_Object Vchar_coding_system_table
;
407 Lisp_Object Qchar_coding_system
;
409 /* Two special coding systems. */
410 Lisp_Object Vsjis_coding_system
;
411 Lisp_Object Vbig5_coding_system
;
414 static int detect_coding_utf_8
P_ ((struct coding_system
*, int *));
415 static void decode_coding_utf_8
P_ ((struct coding_system
*));
416 static int encode_coding_utf_8
P_ ((struct coding_system
*));
418 static int detect_coding_utf_16
P_ ((struct coding_system
*, int *));
419 static void decode_coding_utf_16
P_ ((struct coding_system
*));
420 static int encode_coding_utf_16
P_ ((struct coding_system
*));
422 static int detect_coding_iso_2022
P_ ((struct coding_system
*, int *));
423 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
424 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
426 static int detect_coding_emacs_mule
P_ ((struct coding_system
*, int *));
427 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
428 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
430 static int detect_coding_sjis
P_ ((struct coding_system
*, int *));
431 static void decode_coding_sjis
P_ ((struct coding_system
*));
432 static int encode_coding_sjis
P_ ((struct coding_system
*));
434 static int detect_coding_big5
P_ ((struct coding_system
*, int *));
435 static void decode_coding_big5
P_ ((struct coding_system
*));
436 static int encode_coding_big5
P_ ((struct coding_system
*));
438 static int detect_coding_ccl
P_ ((struct coding_system
*, int *));
439 static void decode_coding_ccl
P_ ((struct coding_system
*));
440 static int encode_coding_ccl
P_ ((struct coding_system
*));
442 static void decode_coding_raw_text
P_ ((struct coding_system
*));
443 static int encode_coding_raw_text
P_ ((struct coding_system
*));
446 /* ISO2022 section */
448 #define CODING_ISO_INITIAL(coding, reg) \
449 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
450 coding_attr_iso_initial), \
454 #define CODING_ISO_REQUEST(coding, charset_id) \
455 ((charset_id <= (coding)->max_charset_id \
456 ? (coding)->safe_charsets[charset_id] \
460 #define CODING_ISO_FLAGS(coding) \
461 ((coding)->spec.iso_2022.flags)
462 #define CODING_ISO_DESIGNATION(coding, reg) \
463 ((coding)->spec.iso_2022.current_designation[reg])
464 #define CODING_ISO_INVOCATION(coding, plane) \
465 ((coding)->spec.iso_2022.current_invocation[plane])
466 #define CODING_ISO_SINGLE_SHIFTING(coding) \
467 ((coding)->spec.iso_2022.single_shifting)
468 #define CODING_ISO_BOL(coding) \
469 ((coding)->spec.iso_2022.bol)
470 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
471 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
473 /* Control characters of ISO2022. */
474 /* code */ /* function */
475 #define ISO_CODE_LF 0x0A /* line-feed */
476 #define ISO_CODE_CR 0x0D /* carriage-return */
477 #define ISO_CODE_SO 0x0E /* shift-out */
478 #define ISO_CODE_SI 0x0F /* shift-in */
479 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
480 #define ISO_CODE_ESC 0x1B /* escape */
481 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
482 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
483 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
485 /* All code (1-byte) of ISO2022 is classified into one of the
487 enum iso_code_class_type
489 ISO_control_0
, /* Control codes in the range
490 0x00..0x1F and 0x7F, except for the
491 following 5 codes. */
492 ISO_carriage_return
, /* ISO_CODE_CR (0x0D) */
493 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
494 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
495 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
496 ISO_escape
, /* ISO_CODE_SO (0x1B) */
497 ISO_control_1
, /* Control codes in the range
498 0x80..0x9F, except for the
499 following 3 codes. */
500 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
501 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
502 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
503 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
504 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
505 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
506 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
509 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
510 `iso-flags' attribute of an iso2022 coding system. */
512 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
513 instead of the correct short-form sequence (e.g. ESC $ A). */
514 #define CODING_ISO_FLAG_LONG_FORM 0x0001
516 /* If set, reset graphic planes and registers at end-of-line to the
518 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
520 /* If set, reset graphic planes and registers before any control
521 characters to the initial state. */
522 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
524 /* If set, encode by 7-bit environment. */
525 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
527 /* If set, use locking-shift function. */
528 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
530 /* If set, use single-shift function. Overwrite
531 CODING_ISO_FLAG_LOCKING_SHIFT. */
532 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
534 /* If set, use designation escape sequence. */
535 #define CODING_ISO_FLAG_DESIGNATION 0x0040
537 /* If set, produce revision number sequence. */
538 #define CODING_ISO_FLAG_REVISION 0x0080
540 /* If set, produce ISO6429's direction specifying sequence. */
541 #define CODING_ISO_FLAG_DIRECTION 0x0100
543 /* If set, assume designation states are reset at beginning of line on
545 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
547 /* If set, designation sequence should be placed at beginning of line
549 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
551 /* If set, do not encode unsafe charactes on output. */
552 #define CODING_ISO_FLAG_SAFE 0x0800
554 /* If set, extra latin codes (128..159) are accepted as a valid code
556 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
558 #define CODING_ISO_FLAG_COMPOSITION 0x2000
560 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
562 #define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
564 /* A character to be produced on output if encoding of the original
565 character is prohibited by CODING_ISO_FLAG_SAFE. */
566 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
570 #define CODING_UTF_16_BOM(coding) \
571 ((coding)->spec.utf_16.bom)
573 #define CODING_UTF_16_ENDIAN(coding) \
574 ((coding)->spec.utf_16.endian)
576 #define CODING_UTF_16_SURROGATE(coding) \
577 ((coding)->spec.utf_16.surrogate)
581 #define CODING_CCL_DECODER(coding) \
582 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
583 #define CODING_CCL_ENCODER(coding) \
584 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
585 #define CODING_CCL_VALIDS(coding) \
586 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
589 /* Index for each coding category in `coding_category_table' */
593 coding_category_iso_7
,
594 coding_category_iso_7_tight
,
595 coding_category_iso_8_1
,
596 coding_category_iso_8_2
,
597 coding_category_iso_7_else
,
598 coding_category_iso_8_else
,
599 coding_category_utf_8
,
600 coding_category_utf_16_auto
,
601 coding_category_utf_16_be
,
602 coding_category_utf_16_le
,
603 coding_category_utf_16_be_nosig
,
604 coding_category_utf_16_le_nosig
,
605 coding_category_charset
,
606 coding_category_sjis
,
607 coding_category_big5
,
609 coding_category_emacs_mule
,
610 /* All above are targets of code detection. */
611 coding_category_raw_text
,
612 coding_category_undecided
,
616 /* Definitions of flag bits used in detect_coding_XXXX. */
617 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
618 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
619 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
620 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
621 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
622 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
623 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
624 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
625 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
626 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
627 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
628 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
629 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
630 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
631 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
632 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
634 /* This value is returned if detect_coding_mask () find nothing other
635 than ASCII characters. */
636 #define CATEGORY_MASK_ANY \
637 (CATEGORY_MASK_ISO_7 \
638 | CATEGORY_MASK_ISO_7_TIGHT \
639 | CATEGORY_MASK_ISO_8_1 \
640 | CATEGORY_MASK_ISO_8_2 \
641 | CATEGORY_MASK_ISO_7_ELSE \
642 | CATEGORY_MASK_ISO_8_ELSE \
643 | CATEGORY_MASK_UTF_8 \
644 | CATEGORY_MASK_UTF_16_BE \
645 | CATEGORY_MASK_UTF_16_LE \
646 | CATEGORY_MASK_UTF_16_BE_NOSIG \
647 | CATEGORY_MASK_UTF_16_LE_NOSIG \
648 | CATEGORY_MASK_CHARSET \
649 | CATEGORY_MASK_SJIS \
650 | CATEGORY_MASK_BIG5 \
651 | CATEGORY_MASK_CCL \
652 | CATEGORY_MASK_EMACS_MULE)
655 #define CATEGORY_MASK_ISO_7BIT \
656 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
658 #define CATEGORY_MASK_ISO_8BIT \
659 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
661 #define CATEGORY_MASK_ISO_ELSE \
662 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
664 #define CATEGORY_MASK_ISO_ESCAPE \
665 (CATEGORY_MASK_ISO_7 \
666 | CATEGORY_MASK_ISO_7_TIGHT \
667 | CATEGORY_MASK_ISO_7_ELSE \
668 | CATEGORY_MASK_ISO_8_ELSE)
670 #define CATEGORY_MASK_ISO \
671 ( CATEGORY_MASK_ISO_7BIT \
672 | CATEGORY_MASK_ISO_8BIT \
673 | CATEGORY_MASK_ISO_ELSE)
675 #define CATEGORY_MASK_UTF_16 \
676 (CATEGORY_MASK_UTF_16_BE \
677 | CATEGORY_MASK_UTF_16_LE \
678 | CATEGORY_MASK_UTF_16_BE_NOSIG \
679 | CATEGORY_MASK_UTF_16_LE_NOSIG)
682 /* List of symbols `coding-category-xxx' ordered by priority. This
683 variable is exposed to Emacs Lisp. */
684 static Lisp_Object Vcoding_category_list
;
686 /* Table of coding categories (Lisp symbols). This variable is for
688 static Lisp_Object Vcoding_category_table
;
690 /* Table of coding-categories ordered by priority. */
691 static enum coding_category coding_priorities
[coding_category_max
];
693 /* Nth element is a coding context for the coding system bound to the
694 Nth coding category. */
695 static struct coding_system coding_categories
[coding_category_max
];
697 static int detected_mask
[coding_category_raw_text
] =
705 CATEGORY_MASK_UTF_16
,
706 CATEGORY_MASK_UTF_16
,
707 CATEGORY_MASK_UTF_16
,
708 CATEGORY_MASK_UTF_16
,
709 CATEGORY_MASK_UTF_16
,
710 CATEGORY_MASK_CHARSET
,
714 CATEGORY_MASK_EMACS_MULE
717 /*** Commonly used macros and functions ***/
720 #define min(a, b) ((a) < (b) ? (a) : (b))
723 #define max(a, b) ((a) > (b) ? (a) : (b))
726 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
728 attrs = CODING_ID_ATTRS (coding->id); \
729 eol_type = CODING_ID_EOL_TYPE (coding->id); \
730 if (VECTORP (eol_type)) \
732 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
736 /* Safely get one byte from the source text pointed by SRC which ends
737 at SRC_END, and set C to that byte. If there are not enough bytes
738 in the source, it jumps to `no_more_source'. The caller
739 should declare and set these variables appropriately in advance:
740 src, src_end, multibytep
743 #define ONE_MORE_BYTE(c) \
745 if (src == src_end) \
747 if (src_base < src) \
748 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
749 goto no_more_source; \
752 if (multibytep && (c & 0x80)) \
754 if ((c & 0xFE) != 0xC0) \
755 error ("Undecodable char found"); \
756 c = ((c & 1) << 6) | *src++; \
762 #define ONE_MORE_BYTE_NO_CHECK(c) \
765 if (multibytep && (c & 0x80)) \
767 if ((c & 0xFE) != 0xC0) \
768 error ("Undecodable char found"); \
769 c = ((c & 1) << 6) | *src++; \
774 /* Store a byte C in the place pointed by DST and increment DST to the
775 next free point, and increment PRODUCED_CHARS. The caller should
776 assure that C is 0..127, and declare and set the variable `dst'
777 appropriately in advance.
781 #define EMIT_ONE_ASCII_BYTE(c) \
788 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
790 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
792 produced_chars += 2; \
793 *dst++ = (c1), *dst++ = (c2); \
797 /* Store a byte C in the place pointed by DST and increment DST to the
798 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
799 nonzero, store in an appropriate multibyte from. The caller should
800 declare and set the variables `dst' and `multibytep' appropriately
803 #define EMIT_ONE_BYTE(c) \
810 ch = BYTE8_TO_CHAR (ch); \
811 CHAR_STRING_ADVANCE (ch, dst); \
818 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
820 #define EMIT_TWO_BYTES(c1, c2) \
822 produced_chars += 2; \
829 ch = BYTE8_TO_CHAR (ch); \
830 CHAR_STRING_ADVANCE (ch, dst); \
833 ch = BYTE8_TO_CHAR (ch); \
834 CHAR_STRING_ADVANCE (ch, dst); \
844 #define EMIT_THREE_BYTES(c1, c2, c3) \
846 EMIT_ONE_BYTE (c1); \
847 EMIT_TWO_BYTES (c2, c3); \
851 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
853 EMIT_TWO_BYTES (c1, c2); \
854 EMIT_TWO_BYTES (c3, c4); \
858 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
860 charset_map_loaded = 0; \
861 c = DECODE_CHAR (charset, code); \
862 if (charset_map_loaded) \
864 unsigned char *orig = coding->source; \
867 coding_set_source (coding); \
868 offset = coding->source - orig; \
870 src_base += offset; \
876 #define ASSURE_DESTINATION(bytes) \
878 if (dst + (bytes) >= dst_end) \
880 int more_bytes = charbuf_end - charbuf + (bytes); \
882 dst = alloc_destination (coding, more_bytes, dst); \
883 dst_end = coding->destination + coding->dst_bytes; \
890 coding_set_source (coding
)
891 struct coding_system
*coding
;
893 if (BUFFERP (coding
->src_object
))
895 if (coding
->src_pos
< 0)
896 coding
->source
= GAP_END_ADDR
+ coding
->src_pos_byte
;
899 struct buffer
*buf
= XBUFFER (coding
->src_object
);
900 EMACS_INT gpt_byte
= BUF_GPT_BYTE (buf
);
901 unsigned char *beg_addr
= BUF_BEG_ADDR (buf
);
903 coding
->source
= beg_addr
+ coding
->src_pos_byte
- 1;
904 if (coding
->src_pos_byte
>= gpt_byte
)
905 coding
->source
+= BUF_GAP_SIZE (buf
);
908 else if (STRINGP (coding
->src_object
))
910 coding
->source
= (XSTRING (coding
->src_object
)->data
911 + coding
->src_pos_byte
);
914 /* Otherwise, the source is C string and is never relocated
915 automatically. Thus we don't have to update anything. */
920 coding_set_destination (coding
)
921 struct coding_system
*coding
;
923 if (BUFFERP (coding
->dst_object
))
925 /* We are sure that coding->dst_pos_byte is before the gap of the
927 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
928 + coding
->dst_pos_byte
- 1);
929 if (coding
->src_pos
< 0)
930 coding
->dst_bytes
= (GAP_END_ADDR
931 - (coding
->src_bytes
- coding
->consumed
)
932 - coding
->destination
);
934 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
935 - coding
->destination
);
938 /* Otherwise, the destination is C string and is never relocated
939 automatically. Thus we don't have to update anything. */
945 coding_alloc_by_realloc (coding
, bytes
)
946 struct coding_system
*coding
;
949 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
950 coding
->dst_bytes
+ bytes
);
951 coding
->dst_bytes
+= bytes
;
955 coding_alloc_by_making_gap (coding
, bytes
)
956 struct coding_system
*coding
;
959 if (BUFFERP (coding
->dst_object
)
960 && EQ (coding
->src_object
, coding
->dst_object
))
962 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
964 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
966 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
970 Lisp_Object this_buffer
;
972 this_buffer
= Fcurrent_buffer ();
973 set_buffer_internal (XBUFFER (coding
->dst_object
));
975 set_buffer_internal (XBUFFER (this_buffer
));
980 static unsigned char *
981 alloc_destination (coding
, nbytes
, dst
)
982 struct coding_system
*coding
;
986 EMACS_INT offset
= dst
- coding
->destination
;
988 if (BUFFERP (coding
->dst_object
))
989 coding_alloc_by_making_gap (coding
, nbytes
);
991 coding_alloc_by_realloc (coding
, nbytes
);
992 coding
->result
= CODING_RESULT_SUCCESS
;
993 coding_set_destination (coding
);
994 dst
= coding
->destination
+ offset
;
999 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1006 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1007 Check if a text is encoded in UTF-8. If it is, return
1008 CATEGORY_MASK_UTF_8, else return 0. */
1010 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1011 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1012 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1013 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1014 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1015 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1018 detect_coding_utf_8 (coding
, mask
)
1019 struct coding_system
*coding
;
1022 unsigned char *src
= coding
->source
, *src_base
= src
;
1023 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1024 int multibytep
= coding
->src_multibyte
;
1025 int consumed_chars
= 0;
1028 /* A coding system of this category is always ASCII compatible. */
1029 src
+= coding
->head_ascii
;
1033 int c
, c1
, c2
, c3
, c4
;
1036 if (UTF_8_1_OCTET_P (c
))
1039 if (! UTF_8_EXTRA_OCTET_P (c1
))
1041 if (UTF_8_2_OCTET_LEADING_P (c
))
1047 if (! UTF_8_EXTRA_OCTET_P (c2
))
1049 if (UTF_8_3_OCTET_LEADING_P (c
))
1055 if (! UTF_8_EXTRA_OCTET_P (c3
))
1057 if (UTF_8_4_OCTET_LEADING_P (c
))
1063 if (! UTF_8_EXTRA_OCTET_P (c4
))
1065 if (UTF_8_5_OCTET_LEADING_P (c
))
1072 *mask
&= ~CATEGORY_MASK_UTF_8
;
1078 *mask
&= CATEGORY_MASK_UTF_8
;
1084 decode_coding_utf_8 (coding
)
1085 struct coding_system
*coding
;
1087 unsigned char *src
= coding
->source
+ coding
->consumed
;
1088 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1089 unsigned char *src_base
;
1090 int *charbuf
= coding
->charbuf
;
1091 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1092 int consumed_chars
= 0, consumed_chars_base
;
1093 int multibytep
= coding
->src_multibyte
;
1094 Lisp_Object attr
, eol_type
, charset_list
;
1096 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1100 int c
, c1
, c2
, c3
, c4
, c5
;
1103 consumed_chars_base
= consumed_chars
;
1105 if (charbuf
>= charbuf_end
)
1109 if (UTF_8_1_OCTET_P(c1
))
1114 if (EQ (eol_type
, Qdos
))
1117 goto no_more_source
;
1121 else if (EQ (eol_type
, Qmac
))
1128 if (! UTF_8_EXTRA_OCTET_P (c2
))
1130 if (UTF_8_2_OCTET_LEADING_P (c1
))
1131 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1135 if (! UTF_8_EXTRA_OCTET_P (c3
))
1137 if (UTF_8_3_OCTET_LEADING_P (c1
))
1138 c
= (((c1
& 0xF) << 12)
1139 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1143 if (! UTF_8_EXTRA_OCTET_P (c4
))
1145 if (UTF_8_4_OCTET_LEADING_P (c1
))
1146 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1147 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1151 if (! UTF_8_EXTRA_OCTET_P (c5
))
1153 if (UTF_8_5_OCTET_LEADING_P (c1
))
1155 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1156 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1173 consumed_chars
= consumed_chars_base
;
1175 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1180 coding
->consumed_char
+= consumed_chars_base
;
1181 coding
->consumed
= src_base
- coding
->source
;
1182 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1187 encode_coding_utf_8 (coding
)
1188 struct coding_system
*coding
;
1190 int multibytep
= coding
->dst_multibyte
;
1191 int *charbuf
= coding
->charbuf
;
1192 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1193 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1194 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1195 int produced_chars
= 0;
1200 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1202 while (charbuf
< charbuf_end
)
1204 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1206 ASSURE_DESTINATION (safe_room
);
1208 CHAR_STRING_ADVANCE (c
, pend
);
1209 for (p
= str
; p
< pend
; p
++)
1215 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1217 while (charbuf
< charbuf_end
)
1219 ASSURE_DESTINATION (safe_room
);
1221 dst
+= CHAR_STRING (c
, dst
);
1225 coding
->result
= CODING_RESULT_SUCCESS
;
1226 coding
->produced_char
+= produced_chars
;
1227 coding
->produced
= dst
- coding
->destination
;
1232 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1233 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1234 Little Endian (otherwise). If it is, return
1235 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1238 #define UTF_16_HIGH_SURROGATE_P(val) \
1239 (((val) & 0xFC00) == 0xD800)
1241 #define UTF_16_LOW_SURROGATE_P(val) \
1242 (((val) & 0xFC00) == 0xDC00)
1244 #define UTF_16_INVALID_P(val) \
1245 (((val) == 0xFFFE) \
1246 || ((val) == 0xFFFF) \
1247 || UTF_16_LOW_SURROGATE_P (val))
1251 detect_coding_utf_16 (coding
, mask
)
1252 struct coding_system
*coding
;
1255 unsigned char *src
= coding
->source
, *src_base
= src
;
1256 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1257 int multibytep
= coding
->src_multibyte
;
1258 int consumed_chars
= 0;
1264 if ((c1
== 0xFF) && (c2
== 0xFE))
1266 *mask
&= CATEGORY_MASK_UTF_16_LE
;
1269 else if ((c1
== 0xFE) && (c2
== 0xFF))
1271 *mask
&= CATEGORY_MASK_UTF_16_BE
;
1279 decode_coding_utf_16 (coding
)
1280 struct coding_system
*coding
;
1282 unsigned char *src
= coding
->source
+ coding
->consumed
;
1283 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1284 unsigned char *src_base
;
1285 int *charbuf
= coding
->charbuf
;
1286 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1287 int consumed_chars
= 0, consumed_chars_base
;
1288 int multibytep
= coding
->src_multibyte
;
1289 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1290 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1291 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1292 Lisp_Object attr
, eol_type
, charset_list
;
1294 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1296 if (bom
!= utf_16_without_bom
)
1304 if (bom
== utf_16_with_bom
)
1306 if (endian
== utf_16_big_endian
1307 ? c
!= 0xFFFE : c
!= 0xFEFF)
1309 /* We are sure that there's enouph room at CHARBUF. */
1318 CODING_UTF_16_ENDIAN (coding
)
1319 = endian
= utf_16_big_endian
;
1320 else if (c
== 0xFEFF)
1321 CODING_UTF_16_ENDIAN (coding
)
1322 = endian
= utf_16_little_endian
;
1325 CODING_UTF_16_ENDIAN (coding
)
1326 = endian
= utf_16_big_endian
;
1330 CODING_UTF_16_BOM (coding
) = utf_16_with_bom
;
1338 consumed_chars_base
= consumed_chars
;
1340 if (charbuf
+ 2 >= charbuf_end
)
1345 c
= (endian
== utf_16_big_endian
1346 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1349 if (! UTF_16_LOW_SURROGATE_P (c
))
1351 if (endian
== utf_16_big_endian
)
1352 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1354 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1358 if (UTF_16_HIGH_SURROGATE_P (c
))
1359 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1365 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1366 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1372 if (UTF_16_HIGH_SURROGATE_P (c
))
1373 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1380 coding
->consumed_char
+= consumed_chars_base
;
1381 coding
->consumed
= src_base
- coding
->source
;
1382 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1386 encode_coding_utf_16 (coding
)
1387 struct coding_system
*coding
;
1389 int multibytep
= coding
->dst_multibyte
;
1390 int *charbuf
= coding
->charbuf
;
1391 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1392 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1393 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1395 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1396 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1397 int produced_chars
= 0;
1398 Lisp_Object attrs
, eol_type
, charset_list
;
1401 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1403 if (bom
== utf_16_with_bom
)
1405 ASSURE_DESTINATION (safe_room
);
1407 EMIT_TWO_BYTES (0xFF, 0xFE);
1409 EMIT_TWO_BYTES (0xFE, 0xFF);
1410 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1413 while (charbuf
< charbuf_end
)
1415 ASSURE_DESTINATION (safe_room
);
1417 if (c
>= MAX_UNICODE_CHAR
)
1418 c
= coding
->default_char
;
1423 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1425 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1432 c1
= (c
>> 10) + 0xD800;
1433 c2
= (c
& 0x3FF) + 0xDC00;
1435 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1437 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1440 coding
->result
= CODING_RESULT_SUCCESS
;
1441 coding
->produced
= dst
- coding
->destination
;
1442 coding
->produced_char
+= produced_chars
;
1447 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1449 /* Emacs' internal format for representation of multiple character
1450 sets is a kind of multi-byte encoding, i.e. characters are
1451 represented by variable-length sequences of one-byte codes.
1453 ASCII characters and control characters (e.g. `tab', `newline') are
1454 represented by one-byte sequences which are their ASCII codes, in
1455 the range 0x00 through 0x7F.
1457 8-bit characters of the range 0x80..0x9F are represented by
1458 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1461 8-bit characters of the range 0xA0..0xFF are represented by
1462 one-byte sequences which are their 8-bit code.
1464 The other characters are represented by a sequence of `base
1465 leading-code', optional `extended leading-code', and one or two
1466 `position-code's. The length of the sequence is determined by the
1467 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1468 whereas extended leading-code and position-code take the range 0xA0
1469 through 0xFF. See `charset.h' for more details about leading-code
1472 --- CODE RANGE of Emacs' internal format ---
1476 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1477 eight-bit-graphic 0xA0..0xBF
1478 ELSE 0x81..0x9D + [0xA0..0xFF]+
1479 ---------------------------------------------
1481 As this is the internal character representation, the format is
1482 usually not used externally (i.e. in a file or in a data sent to a
1483 process). But, it is possible to have a text externally in this
1484 format (i.e. by encoding by the coding system `emacs-mule').
1486 In that case, a sequence of one-byte codes has a slightly different
1489 At first, all characters in eight-bit-control are represented by
1490 one-byte sequences which are their 8-bit code.
1492 Next, character composition data are represented by the byte
1493 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1495 METHOD is 0xF0 plus one of composition method (enum
1496 composition_method),
1498 BYTES is 0xA0 plus a byte length of this composition data,
1500 CHARS is 0x20 plus a number of characters composed by this
1503 COMPONENTs are characters of multibye form or composition
1504 rules encoded by two-byte of ASCII codes.
1506 In addition, for backward compatibility, the following formats are
1507 also recognized as composition data on decoding.
1510 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1513 MSEQ is a multibyte form but in these special format:
1514 ASCII: 0xA0 ASCII_CODE+0x80,
1515 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1516 RULE is a one byte code of the range 0xA0..0xF0 that
1517 represents a composition rule.
1520 char emacs_mule_bytes
[256];
1522 /* Leading-code followed by extended leading-code. */
1523 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1524 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1525 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1526 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1530 emacs_mule_char (coding
, composition
, nbytes
, nchars
)
1531 struct coding_system
*coding
;
1533 int *nbytes
, *nchars
;
1535 unsigned char *src
= coding
->source
+ coding
->consumed
;
1536 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1537 int multibytep
= coding
->src_multibyte
;
1538 unsigned char *src_base
= src
;
1539 struct charset
*charset
;
1542 int consumed_chars
= 0;
1553 *nbytes
= src
- src_base
;
1554 *nchars
= consumed_chars
;
1559 switch (emacs_mule_bytes
[c
])
1562 if (! (charset
= emacs_mule_charset
[c
]))
1569 if (c
== LEADING_CODE_PRIVATE_11
1570 || c
== LEADING_CODE_PRIVATE_12
)
1573 if (! (charset
= emacs_mule_charset
[c
]))
1580 if (! (charset
= emacs_mule_charset
[c
]))
1583 code
= (c
& 0x7F) << 7;
1590 if (! (charset
= emacs_mule_charset
[c
]))
1593 code
= (c
& 0x7F) << 7;
1600 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
) ? charset_ascii
1601 : code
< 0xA0 ? charset_8_bit_control
1602 : charset_8_bit_graphic
);
1608 c
= DECODE_CHAR (charset
, code
);
1611 *nbytes
= src
- src_base
;
1612 *nchars
= consumed_chars
;
1623 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1624 Check if a text is encoded in `emacs-mule'. */
1627 detect_coding_emacs_mule (coding
, mask
)
1628 struct coding_system
*coding
;
1631 unsigned char *src
= coding
->source
, *src_base
= src
;
1632 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1633 int multibytep
= coding
->src_multibyte
;
1634 int consumed_chars
= 0;
1638 /* A coding system of this category is always ASCII compatible. */
1639 src
+= coding
->head_ascii
;
1647 /* Perhaps the start of composite character. We simple skip
1648 it because analyzing it is too heavy for detecting. But,
1649 at least, we check that the composite character
1650 constitues of more than 4 bytes. */
1651 unsigned char *src_base
;
1661 if (src
- src_base
<= 4)
1671 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1676 unsigned char *src_base
= src
- 1;
1683 if (src
- src_base
!= emacs_mule_bytes
[*src_base
])
1688 *mask
&= ~CATEGORY_MASK_EMACS_MULE
;
1694 *mask
&= CATEGORY_MASK_EMACS_MULE
;
1699 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1701 /* Decode a character represented as a component of composition
1702 sequence of Emacs 20/21 style at SRC. Set C to that character and
1703 update SRC to the head of next character (or an encoded composition
1704 rule). If SRC doesn't points a composition component, set C to -1.
1705 If SRC points an invalid byte sequence, global exit by a return
1708 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1712 int nbytes, nchars; \
1714 if (src == src_end) \
1716 c = emacs_mule_char (coding, 1, &nbytes, &nchars); \
1721 goto invalid_code; \
1725 consumed_chars += nchars; \
1730 /* Decode a composition rule represented as a component of composition
1731 sequence of Emacs 20 style at SRC. Set C to the rule. If SRC
1732 points an invalid byte sequence, set C to -1. */
1734 #define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \
1736 int c, gref, nref; \
1738 if (src < src_end) \
1739 goto invalid_code; \
1740 ONE_MORE_BYTE_NO_CHECK (c); \
1742 if (c < 0 || c >= 81) \
1743 goto invalid_code; \
1745 gref = c / 9, nref = c % 9; \
1746 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1750 #define ADD_COMPOSITION_DATA(buf, method, nchars) \
1753 *buf++ = coding->produced_char + char_offset; \
1754 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1760 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1762 /* Emacs 21 style format. The first three bytes at SRC are \
1763 (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1764 the byte length of this composition information, CHARS is the \
1765 number of characters composed by this composition. */ \
1766 enum composition_method method = c - 0xF0; \
1767 int consumed_chars_limit; \
1768 int nbytes, nchars; \
1770 ONE_MORE_BYTE (c); \
1771 nbytes = c - 0xA0; \
1773 goto invalid_code; \
1774 ONE_MORE_BYTE (c); \
1775 nchars = c - 0xA0; \
1776 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1777 consumed_chars_limit = consumed_chars_base + nbytes; \
1778 if (method != COMPOSITION_RELATIVE) \
1781 while (consumed_chars < consumed_chars_limit) \
1783 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1784 DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \
1786 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1788 if (consumed_chars < consumed_chars_limit) \
1789 goto invalid_code; \
1794 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1796 /* Emacs 20 style format for relative composition. */ \
1797 /* Store multibyte form of characters to be composed. */ \
1798 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1799 int *buf = components; \
1803 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1804 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1805 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1807 goto invalid_code; \
1808 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1809 for (j = 0; j < i; j++) \
1810 *charbuf++ = components[j]; \
1814 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1816 /* Emacs 20 style format for rule-base composition. */ \
1817 /* Store multibyte form of characters to be composed. */ \
1818 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1819 int *buf = components; \
1822 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1823 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1825 DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \
1826 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1828 if (i < 1 || (buf - components) % 2 == 0) \
1829 goto invalid_code; \
1830 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1831 goto no_more_source; \
1832 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1833 for (j = 0; j < i; j++) \
1834 *charbuf++ = components[j]; \
1835 for (j = 0; j < i; j += 2) \
1836 *charbuf++ = components[j]; \
1841 decode_coding_emacs_mule (coding
)
1842 struct coding_system
*coding
;
1844 unsigned char *src
= coding
->source
+ coding
->consumed
;
1845 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1846 unsigned char *src_base
;
1847 int *charbuf
= coding
->charbuf
;
1848 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1849 int consumed_chars
= 0, consumed_chars_base
;
1850 int char_offset
= 0;
1851 int multibytep
= coding
->src_multibyte
;
1852 Lisp_Object attrs
, eol_type
, charset_list
;
1854 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1861 consumed_chars_base
= consumed_chars
;
1863 if (charbuf
>= charbuf_end
)
1872 if (EQ (eol_type
, Qdos
))
1875 goto no_more_source
;
1879 else if (EQ (eol_type
, Qmac
))
1887 if (charbuf
+ 5 + (MAX_COMPOSITION_COMPONENTS
* 2) - 1 > charbuf_end
)
1890 if (c
- 0xF0 >= COMPOSITION_RELATIVE
1891 && c
- 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS
)
1892 DECODE_EMACS_MULE_21_COMPOSITION (c
);
1894 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
1896 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
1900 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
1904 c
= emacs_mule_char (coding
, 0, &nbytes
, &nchars
);
1918 consumed_chars
= consumed_chars_base
;
1920 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1925 coding
->consumed_char
+= consumed_chars_base
;
1926 coding
->consumed
= src_base
- coding
->source
;
1927 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1931 #define EMACS_MULE_LEADING_CODES(id, codes) \
1934 codes[0] = id, codes[1] = 0; \
1935 else if (id < 0xE0) \
1936 codes[0] = 0x9A, codes[1] = id; \
1937 else if (id < 0xF0) \
1938 codes[0] = 0x9B, codes[1] = id; \
1939 else if (id < 0xF5) \
1940 codes[0] = 0x9C, codes[1] = id; \
1942 codes[0] = 0x9D, codes[1] = id; \
1947 encode_coding_emacs_mule (coding
)
1948 struct coding_system
*coding
;
1950 int multibytep
= coding
->dst_multibyte
;
1951 int *charbuf
= coding
->charbuf
;
1952 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1953 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1954 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1956 int produced_chars
= 0;
1957 Lisp_Object attrs
, eol_type
, charset_list
;
1960 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1962 while (charbuf
< charbuf_end
)
1964 ASSURE_DESTINATION (safe_room
);
1966 if (ASCII_CHAR_P (c
))
1967 EMIT_ONE_ASCII_BYTE (c
);
1970 struct charset
*charset
;
1974 unsigned char leading_codes
[2];
1976 charset
= char_charset (c
, charset_list
, &code
);
1979 c
= coding
->default_char
;
1980 if (ASCII_CHAR_P (c
))
1982 EMIT_ONE_ASCII_BYTE (c
);
1985 charset
= char_charset (c
, charset_list
, &code
);
1987 dimension
= CHARSET_DIMENSION (charset
);
1988 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
1989 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
1990 EMIT_ONE_BYTE (leading_codes
[0]);
1991 if (leading_codes
[1])
1992 EMIT_ONE_BYTE (leading_codes
[1]);
1994 EMIT_ONE_BYTE (code
);
1997 EMIT_ONE_BYTE (code
>> 8);
1998 EMIT_ONE_BYTE (code
& 0xFF);
2002 coding
->result
= CODING_RESULT_SUCCESS
;
2003 coding
->produced_char
+= produced_chars
;
2004 coding
->produced
= dst
- coding
->destination
;
2009 /*** 7. ISO2022 handlers ***/
2011 /* The following note describes the coding system ISO2022 briefly.
2012 Since the intention of this note is to help understand the
2013 functions in this file, some parts are NOT ACCURATE or OVERLY
2014 SIMPLIFIED. For thorough understanding, please refer to the
2015 original document of ISO2022.
2017 ISO2022 provides many mechanisms to encode several character sets
2018 in 7-bit and 8-bit environments. For 7-bite environments, all text
2019 is encoded using bytes less than 128. This may make the encoded
2020 text a little bit longer, but the text passes more easily through
2021 several gateways, some of which strip off MSB (Most Signigant Bit).
2023 There are two kinds of character sets: control character set and
2024 graphic character set. The former contains control characters such
2025 as `newline' and `escape' to provide control functions (control
2026 functions are also provided by escape sequences). The latter
2027 contains graphic characters such as 'A' and '-'. Emacs recognizes
2028 two control character sets and many graphic character sets.
2030 Graphic character sets are classified into one of the following
2031 four classes, according to the number of bytes (DIMENSION) and
2032 number of characters in one dimension (CHARS) of the set:
2033 - DIMENSION1_CHARS94
2034 - DIMENSION1_CHARS96
2035 - DIMENSION2_CHARS94
2036 - DIMENSION2_CHARS96
2038 In addition, each character set is assigned an identification tag,
2039 unique for each set, called "final character" (denoted as <F>
2040 hereafter). The <F> of each character set is decided by ECMA(*)
2041 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2042 (0x30..0x3F are for private use only).
2044 Note (*): ECMA = European Computer Manufacturers Association
2046 Here are examples of graphic character set [NAME(<F>)]:
2047 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2048 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2049 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2050 o DIMENSION2_CHARS96 -- none for the moment
2052 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2053 C0 [0x00..0x1F] -- control character plane 0
2054 GL [0x20..0x7F] -- graphic character plane 0
2055 C1 [0x80..0x9F] -- control character plane 1
2056 GR [0xA0..0xFF] -- graphic character plane 1
2058 A control character set is directly designated and invoked to C0 or
2059 C1 by an escape sequence. The most common case is that:
2060 - ISO646's control character set is designated/invoked to C0, and
2061 - ISO6429's control character set is designated/invoked to C1,
2062 and usually these designations/invocations are omitted in encoded
2063 text. In a 7-bit environment, only C0 can be used, and a control
2064 character for C1 is encoded by an appropriate escape sequence to
2065 fit into the environment. All control characters for C1 are
2066 defined to have corresponding escape sequences.
2068 A graphic character set is at first designated to one of four
2069 graphic registers (G0 through G3), then these graphic registers are
2070 invoked to GL or GR. These designations and invocations can be
2071 done independently. The most common case is that G0 is invoked to
2072 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2073 these invocations and designations are omitted in encoded text.
2074 In a 7-bit environment, only GL can be used.
2076 When a graphic character set of CHARS94 is invoked to GL, codes
2077 0x20 and 0x7F of the GL area work as control characters SPACE and
2078 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2081 There are two ways of invocation: locking-shift and single-shift.
2082 With locking-shift, the invocation lasts until the next different
2083 invocation, whereas with single-shift, the invocation affects the
2084 following character only and doesn't affect the locking-shift
2085 state. Invocations are done by the following control characters or
2088 ----------------------------------------------------------------------
2089 abbrev function cntrl escape seq description
2090 ----------------------------------------------------------------------
2091 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2092 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2093 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2094 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2095 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2096 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2097 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2098 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2099 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2100 ----------------------------------------------------------------------
2101 (*) These are not used by any known coding system.
2103 Control characters for these functions are defined by macros
2104 ISO_CODE_XXX in `coding.h'.
2106 Designations are done by the following escape sequences:
2107 ----------------------------------------------------------------------
2108 escape sequence description
2109 ----------------------------------------------------------------------
2110 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2111 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2112 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2113 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2114 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2115 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2116 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2117 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2118 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2119 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2120 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2121 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2122 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2123 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2124 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2125 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2126 ----------------------------------------------------------------------
2128 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2129 of dimension 1, chars 94, and final character <F>, etc...
2131 Note (*): Although these designations are not allowed in ISO2022,
2132 Emacs accepts them on decoding, and produces them on encoding
2133 CHARS96 character sets in a coding system which is characterized as
2134 7-bit environment, non-locking-shift, and non-single-shift.
2136 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2137 '(' must be omitted. We refer to this as "short-form" hereafter.
2139 Now you may notice that there are a lot of ways for encoding the
2140 same multilingual text in ISO2022. Actually, there exist many
2141 coding systems such as Compound Text (used in X11's inter client
2142 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2143 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
2144 localized platforms), and all of these are variants of ISO2022.
2146 In addition to the above, Emacs handles two more kinds of escape
2147 sequences: ISO6429's direction specification and Emacs' private
2148 sequence for specifying character composition.
2150 ISO6429's direction specification takes the following form:
2151 o CSI ']' -- end of the current direction
2152 o CSI '0' ']' -- end of the current direction
2153 o CSI '1' ']' -- start of left-to-right text
2154 o CSI '2' ']' -- start of right-to-left text
2155 The control character CSI (0x9B: control sequence introducer) is
2156 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2158 Character composition specification takes the following form:
2159 o ESC '0' -- start relative composition
2160 o ESC '1' -- end composition
2161 o ESC '2' -- start rule-base composition (*)
2162 o ESC '3' -- start relative composition with alternate chars (**)
2163 o ESC '4' -- start rule-base composition with alternate chars (**)
2164 Since these are not standard escape sequences of any ISO standard,
2165 the use of them for these meaning is restricted to Emacs only.
2167 (*) This form is used only in Emacs 20.5 and the older versions,
2168 but the newer versions can safely decode it.
2169 (**) This form is used only in Emacs 21.1 and the newer versions,
2170 and the older versions can't decode it.
2172 Here's a list of examples usages of these composition escape
2173 sequences (categorized by `enum composition_method').
2175 COMPOSITION_RELATIVE:
2176 ESC 0 CHAR [ CHAR ] ESC 1
2177 COMPOSITOIN_WITH_RULE:
2178 ESC 2 CHAR [ RULE CHAR ] ESC 1
2179 COMPOSITION_WITH_ALTCHARS:
2180 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2181 COMPOSITION_WITH_RULE_ALTCHARS:
2182 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2184 enum iso_code_class_type iso_code_class
[256];
2186 #define SAFE_CHARSET_P(coding, id) \
2187 ((id) <= (coding)->max_charset_id \
2188 && (coding)->safe_charsets[id] >= 0)
2191 #define SHIFT_OUT_OK(category) \
2192 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2195 setup_iso_safe_charsets (Lisp_Object attrs
)
2197 Lisp_Object charset_list
, safe_charsets
;
2198 Lisp_Object request
;
2199 Lisp_Object reg_usage
;
2202 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2205 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2206 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2207 && ! EQ (charset_list
, Viso_2022_charset_list
))
2209 CODING_ATTR_CHARSET_LIST (attrs
)
2210 = charset_list
= Viso_2022_charset_list
;
2211 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2214 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2218 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2220 int id
= XINT (XCAR (tail
));
2221 if (max_charset_id
< id
)
2222 max_charset_id
= id
;
2225 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2227 request
= AREF (attrs
, coding_attr_iso_request
);
2228 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2229 reg94
= XINT (XCAR (reg_usage
));
2230 reg96
= XINT (XCDR (reg_usage
));
2232 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2236 struct charset
*charset
;
2239 charset
= CHARSET_FROM_ID (XINT (id
));
2240 reg
= Fcdr (Fassq (request
, id
));
2242 XSTRING (safe_charsets
)->data
[XINT (id
)] = XINT (reg
);
2243 else if (charset
->iso_chars_96
)
2246 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg96
;
2251 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg94
;
2254 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2258 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2259 Check if a text is encoded in ISO2022. If it is, returns an
2260 integer in which appropriate flag bits any of:
2262 CATEGORY_MASK_ISO_7_TIGHT
2263 CATEGORY_MASK_ISO_8_1
2264 CATEGORY_MASK_ISO_8_2
2265 CATEGORY_MASK_ISO_7_ELSE
2266 CATEGORY_MASK_ISO_8_ELSE
2267 are set. If a code which should never appear in ISO2022 is found,
2271 detect_coding_iso_2022 (coding
, mask
)
2272 struct coding_system
*coding
;
2275 unsigned char *src
= coding
->source
, *src_base
= src
;
2276 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2277 int multibytep
= coding
->src_multibyte
;
2278 int mask_iso
= CATEGORY_MASK_ISO
;
2279 int mask_found
= 0, mask_8bit_found
= 0;
2280 int reg
[4], shift_out
= 0, single_shifting
= 0;
2283 int consumed_chars
= 0;
2286 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2288 struct coding_system
*this = &(coding_categories
[i
]);
2289 Lisp_Object attrs
, val
;
2291 attrs
= CODING_ID_ATTRS (this->id
);
2292 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2293 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2294 setup_iso_safe_charsets (attrs
);
2295 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2296 this->max_charset_id
= XSTRING (val
)->size
- 1;
2297 this->safe_charsets
= (char *) XSTRING (val
)->data
;
2300 /* A coding system of this category is always ASCII compatible. */
2301 src
+= coding
->head_ascii
;
2303 reg
[0] = charset_ascii
, reg
[1] = reg
[2] = reg
[3] = -1;
2304 while (mask_iso
&& src
< src_end
)
2310 if (inhibit_iso_escape_detection
)
2312 single_shifting
= 0;
2314 if (c
>= '(' && c
<= '/')
2316 /* Designation sequence for a charset of dimension 1. */
2318 if (c1
< ' ' || c1
>= 0x80
2319 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2320 /* Invalid designation sequence. Just ignore. */
2322 reg
[(c
- '(') % 4] = id
;
2326 /* Designation sequence for a charset of dimension 2. */
2328 if (c
>= '@' && c
<= 'B')
2329 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2330 reg
[0] = id
= iso_charset_table
[1][0][c
];
2331 else if (c
>= '(' && c
<= '/')
2334 if (c1
< ' ' || c1
>= 0x80
2335 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2336 /* Invalid designation sequence. Just ignore. */
2338 reg
[(c
- '(') % 4] = id
;
2341 /* Invalid designation sequence. Just ignore. */
2344 else if (c
== 'N' || c
== 'O')
2346 /* ESC <Fe> for SS2 or SS3. */
2347 mask_iso
&= CATEGORY_MASK_ISO_7_ELSE
;
2350 else if (c
>= '0' && c
<= '4')
2352 /* ESC <Fp> for start/end composition. */
2353 mask_found
|= CATEGORY_MASK_ISO
;
2358 /* Invalid escape sequence. */
2359 mask_iso
&= ~CATEGORY_MASK_ISO_ESCAPE
;
2363 /* We found a valid designation sequence for CHARSET. */
2364 mask_iso
&= ~CATEGORY_MASK_ISO_8BIT
;
2365 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2367 mask_found
|= CATEGORY_MASK_ISO_7
;
2369 mask_iso
&= ~CATEGORY_MASK_ISO_7
;
2370 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2372 mask_found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2374 mask_iso
&= ~CATEGORY_MASK_ISO_7_TIGHT
;
2375 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2377 mask_found
|= CATEGORY_MASK_ISO_7_ELSE
;
2379 mask_iso
&= ~CATEGORY_MASK_ISO_7_ELSE
;
2380 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2382 mask_found
|= CATEGORY_MASK_ISO_8_ELSE
;
2384 mask_iso
&= ~CATEGORY_MASK_ISO_8_ELSE
;
2388 if (inhibit_iso_escape_detection
)
2390 single_shifting
= 0;
2393 || SHIFT_OUT_OK (coding_category_iso_7_else
)
2394 || SHIFT_OUT_OK (coding_category_iso_8_else
)))
2396 /* Locking shift out. */
2397 mask_iso
&= ~CATEGORY_MASK_ISO_7BIT
;
2398 mask_found
|= CATEGORY_MASK_ISO_ELSE
;
2403 if (inhibit_iso_escape_detection
)
2405 single_shifting
= 0;
2408 /* Locking shift in. */
2409 mask_iso
&= ~CATEGORY_MASK_ISO_7BIT
;
2410 mask_found
|= CATEGORY_MASK_ISO_ELSE
;
2415 single_shifting
= 0;
2419 int newmask
= CATEGORY_MASK_ISO_8_ELSE
;
2421 if (inhibit_iso_escape_detection
)
2423 if (c
!= ISO_CODE_CSI
)
2425 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2426 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2427 newmask
|= CATEGORY_MASK_ISO_8_1
;
2428 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2429 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2430 newmask
|= CATEGORY_MASK_ISO_8_2
;
2431 single_shifting
= 1;
2433 if (VECTORP (Vlatin_extra_code_table
)
2434 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2436 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2437 & CODING_ISO_FLAG_LATIN_EXTRA
)
2438 newmask
|= CATEGORY_MASK_ISO_8_1
;
2439 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2440 & CODING_ISO_FLAG_LATIN_EXTRA
)
2441 newmask
|= CATEGORY_MASK_ISO_8_2
;
2443 mask_iso
&= newmask
;
2444 mask_found
|= newmask
;
2451 single_shifting
= 0;
2456 single_shifting
= 0;
2457 mask_8bit_found
= 1;
2458 if (VECTORP (Vlatin_extra_code_table
)
2459 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2463 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2464 & CODING_ISO_FLAG_LATIN_EXTRA
)
2465 newmask
|= CATEGORY_MASK_ISO_8_1
;
2466 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2467 & CODING_ISO_FLAG_LATIN_EXTRA
)
2468 newmask
|= CATEGORY_MASK_ISO_8_2
;
2469 mask_iso
&= newmask
;
2470 mask_found
|= newmask
;
2477 mask_iso
&= ~(CATEGORY_MASK_ISO_7BIT
2478 | CATEGORY_MASK_ISO_7_ELSE
);
2479 mask_found
|= CATEGORY_MASK_ISO_8_1
;
2480 mask_8bit_found
= 1;
2481 /* Check the length of succeeding codes of the range
2482 0xA0..0FF. If the byte length is odd, we exclude
2483 CATEGORY_MASK_ISO_8_2. We can check this only
2484 when we are not single shifting. */
2485 if (!single_shifting
2486 && mask_iso
& CATEGORY_MASK_ISO_8_2
)
2489 while (src
< src_end
)
2497 if (i
& 1 && src
< src_end
)
2498 mask_iso
&= ~CATEGORY_MASK_ISO_8_2
;
2500 mask_found
|= CATEGORY_MASK_ISO_8_2
;
2509 *mask
&= ~CATEGORY_MASK_ISO
;
2514 *mask
&= mask_iso
& mask_found
;
2515 if (! mask_8bit_found
)
2516 *mask
&= ~(CATEGORY_MASK_ISO_8BIT
| CATEGORY_MASK_ISO_8_ELSE
);
2521 /* Set designation state into CODING. */
2522 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2526 if (final < '0' || final >= 128 \
2527 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2528 || !SAFE_CHARSET_P (coding, id)) \
2530 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2531 goto invalid_code; \
2533 prev = CODING_ISO_DESIGNATION (coding, reg); \
2534 CODING_ISO_DESIGNATION (coding, reg) = id; \
2535 /* If there was an invalid designation to REG previously, and this \
2536 designation is ASCII to REG, we should keep this designation \
2538 if (prev == -2 && id == charset_ascii) \
2539 goto invalid_code; \
2543 #define MAYBE_FINISH_COMPOSITION() \
2546 if (composition_state == COMPOSING_NO) \
2548 /* It is assured that we have enough room for producing \
2549 characters stored in the table `components'. */ \
2550 if (charbuf + component_idx > charbuf_end) \
2551 goto no_more_source; \
2552 composition_state = COMPOSING_NO; \
2553 if (method == COMPOSITION_RELATIVE \
2554 || method == COMPOSITION_WITH_ALTCHARS) \
2556 for (i = 0; i < component_idx; i++) \
2557 *charbuf++ = components[i]; \
2558 char_offset += component_idx; \
2562 for (i = 0; i < component_idx; i += 2) \
2563 *charbuf++ = components[i]; \
2564 char_offset += (component_idx / 2) + 1; \
2569 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2570 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2571 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2572 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2573 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2576 #define DECODE_COMPOSITION_START(c1) \
2579 && composition_state == COMPOSING_COMPONENT_CHAR) \
2581 component_len = component_idx; \
2582 composition_state = COMPOSING_CHAR; \
2588 MAYBE_FINISH_COMPOSITION (); \
2589 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2590 goto no_more_source; \
2591 for (p = src; p < src_end - 1; p++) \
2592 if (*p == ISO_CODE_ESC && p[1] == '1') \
2594 if (p == src_end - 1) \
2596 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2597 goto invalid_code; \
2598 goto no_more_source; \
2601 /* This is surely the start of a composition. */ \
2602 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2603 : c1 == '2' ? COMPOSITION_WITH_RULE \
2604 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2605 : COMPOSITION_WITH_RULE_ALTCHARS); \
2606 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2607 : COMPOSING_COMPONENT_CHAR); \
2608 component_idx = component_len = 0; \
2613 /* Handle compositoin end sequence ESC 1. */
2615 #define DECODE_COMPOSITION_END() \
2617 int nchars = (component_len > 0 ? component_idx - component_len \
2618 : method == COMPOSITION_RELATIVE ? component_idx \
2619 : (component_idx + 1) / 2); \
2621 int *saved_charbuf = charbuf; \
2623 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2624 if (method != COMPOSITION_RELATIVE) \
2626 if (component_len == 0) \
2627 for (i = 0; i < component_idx; i++) \
2628 *charbuf++ = components[i]; \
2630 for (i = 0; i < component_len; i++) \
2631 *charbuf++ = components[i]; \
2632 *saved_charbuf = saved_charbuf - charbuf; \
2634 if (method == COMPOSITION_WITH_RULE) \
2635 for (i = 0; i < component_idx; i += 2, char_offset++) \
2636 *charbuf++ = components[i]; \
2638 for (i = component_len; i < component_idx; i++, char_offset++) \
2639 *charbuf++ = components[i]; \
2640 coding->annotated = 1; \
2641 composition_state = COMPOSING_NO; \
2645 /* Decode a composition rule from the byte C1 (and maybe one more byte
2646 from SRC) and store one encoded composition rule in
2647 coding->cmp_data. */
2649 #define DECODE_COMPOSITION_RULE(c1) \
2652 if (c1 < 81) /* old format (before ver.21) */ \
2654 int gref = (c1) / 9; \
2655 int nref = (c1) % 9; \
2656 if (gref == 4) gref = 10; \
2657 if (nref == 4) nref = 10; \
2658 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2660 else if (c1 < 93) /* new format (after ver.21) */ \
2662 ONE_MORE_BYTE (c2); \
2663 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2670 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2673 decode_coding_iso_2022 (coding
)
2674 struct coding_system
*coding
;
2676 unsigned char *src
= coding
->source
+ coding
->consumed
;
2677 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2678 unsigned char *src_base
;
2679 int *charbuf
= coding
->charbuf
;
2680 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- 4;
2681 int consumed_chars
= 0, consumed_chars_base
;
2682 int char_offset
= 0;
2683 int multibytep
= coding
->src_multibyte
;
2684 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2685 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2686 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2687 struct charset
*charset
;
2689 /* For handling composition sequence. */
2690 #define COMPOSING_NO 0
2691 #define COMPOSING_CHAR 1
2692 #define COMPOSING_RULE 2
2693 #define COMPOSING_COMPONENT_CHAR 3
2694 #define COMPOSING_COMPONENT_RULE 4
2696 int composition_state
= COMPOSING_NO
;
2697 enum composition_method method
;
2698 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2701 Lisp_Object attrs
, eol_type
, charset_list
;
2703 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
2704 setup_iso_safe_charsets (attrs
);
2711 consumed_chars_base
= consumed_chars
;
2713 if (charbuf
>= charbuf_end
)
2718 /* We produce no character or one character. */
2719 switch (iso_code_class
[c1
])
2721 case ISO_0x20_or_0x7F
:
2722 if (composition_state
!= COMPOSING_NO
)
2724 if (composition_state
== COMPOSING_RULE
2725 || composition_state
== COMPOSING_COMPONENT_RULE
)
2727 DECODE_COMPOSITION_RULE (c1
);
2728 components
[component_idx
++] = c1
;
2729 composition_state
--;
2732 else if (method
== COMPOSITION_WITH_RULE
)
2733 composition_state
= COMPOSING_RULE
;
2734 else if (method
== COMPOSITION_WITH_RULE_ALTCHARS
2735 && composition_state
== COMPOSING_COMPONENT_CHAR
)
2736 composition_state
= COMPOSING_COMPONENT_CHAR
;
2738 if (charset_id_0
< 0
2739 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2741 /* This is SPACE or DEL. */
2742 charset
= CHARSET_FROM_ID (charset_ascii
);
2745 /* This is a graphic character, we fall down ... */
2747 case ISO_graphic_plane_0
:
2748 if (composition_state
== COMPOSING_RULE
)
2750 DECODE_COMPOSITION_RULE (c1
);
2751 components
[component_idx
++] = c1
;
2752 composition_state
= COMPOSING_CHAR
;
2754 charset
= CHARSET_FROM_ID (charset_id_0
);
2757 case ISO_0xA0_or_0xFF
:
2758 if (charset_id_1
< 0
2759 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2760 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2762 /* This is a graphic character, we fall down ... */
2764 case ISO_graphic_plane_1
:
2765 if (charset_id_1
< 0)
2767 charset
= CHARSET_FROM_ID (charset_id_1
);
2770 case ISO_carriage_return
:
2773 if (EQ (eol_type
, Qdos
))
2776 goto no_more_source
;
2780 else if (EQ (eol_type
, Qmac
))
2786 MAYBE_FINISH_COMPOSITION ();
2787 charset
= CHARSET_FROM_ID (charset_ascii
);
2791 MAYBE_FINISH_COMPOSITION ();
2795 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2796 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
2798 CODING_ISO_INVOCATION (coding
, 0) = 1;
2799 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2803 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
2805 CODING_ISO_INVOCATION (coding
, 0) = 0;
2806 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2809 case ISO_single_shift_2_7
:
2810 case ISO_single_shift_2
:
2811 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2813 /* SS2 is handled as an escape sequence of ESC 'N' */
2815 goto label_escape_sequence
;
2817 case ISO_single_shift_3
:
2818 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2820 /* SS2 is handled as an escape sequence of ESC 'O' */
2822 goto label_escape_sequence
;
2824 case ISO_control_sequence_introducer
:
2825 /* CSI is handled as an escape sequence of ESC '[' ... */
2827 goto label_escape_sequence
;
2831 label_escape_sequence
:
2832 /* Escape sequences handled here are invocation,
2833 designation, direction specification, and character
2834 composition specification. */
2837 case '&': /* revision of following character set */
2839 if (!(c1
>= '@' && c1
<= '~'))
2842 if (c1
!= ISO_CODE_ESC
)
2845 goto label_escape_sequence
;
2847 case '$': /* designation of 2-byte character set */
2848 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
2851 if (c1
>= '@' && c1
<= 'B')
2852 { /* designation of JISX0208.1978, GB2312.1980,
2854 DECODE_DESIGNATION (0, 2, 0, c1
);
2856 else if (c1
>= 0x28 && c1
<= 0x2B)
2857 { /* designation of DIMENSION2_CHARS94 character set */
2859 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
2861 else if (c1
>= 0x2C && c1
<= 0x2F)
2862 { /* designation of DIMENSION2_CHARS96 character set */
2864 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
2868 /* We must update these variables now. */
2869 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2870 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2873 case 'n': /* invocation of locking-shift-2 */
2874 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2875 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
2877 CODING_ISO_INVOCATION (coding
, 0) = 2;
2878 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2881 case 'o': /* invocation of locking-shift-3 */
2882 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2883 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
2885 CODING_ISO_INVOCATION (coding
, 0) = 3;
2886 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2889 case 'N': /* invocation of single-shift-2 */
2890 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
2891 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
2893 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
2895 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
2899 case 'O': /* invocation of single-shift-3 */
2900 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
2901 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
2903 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
2905 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
2909 case '0': case '2': case '3': case '4': /* start composition */
2910 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
2912 DECODE_COMPOSITION_START (c1
);
2915 case '1': /* end composition */
2916 if (composition_state
== COMPOSING_NO
)
2918 DECODE_COMPOSITION_END ();
2921 case '[': /* specification of direction */
2922 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
2924 /* For the moment, nested direction is not supported.
2925 So, `coding->mode & CODING_MODE_DIRECTION' zero means
2926 left-to-right, and nozero means right-to-left. */
2930 case ']': /* end of the current direction */
2931 coding
->mode
&= ~CODING_MODE_DIRECTION
;
2933 case '0': /* end of the current direction */
2934 case '1': /* start of left-to-right direction */
2937 coding
->mode
&= ~CODING_MODE_DIRECTION
;
2942 case '2': /* start of right-to-left direction */
2945 coding
->mode
|= CODING_MODE_DIRECTION
;
2956 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
2958 if (c1
>= 0x28 && c1
<= 0x2B)
2959 { /* designation of DIMENSION1_CHARS94 character set */
2961 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
2963 else if (c1
>= 0x2C && c1
<= 0x2F)
2964 { /* designation of DIMENSION1_CHARS96 character set */
2966 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
2970 /* We must update these variables now. */
2971 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2972 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2977 /* Now we know CHARSET and 1st position code C1 of a character.
2978 Produce a decoded character while getting 2nd position code
2981 if (CHARSET_DIMENSION (charset
) > 1)
2984 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
2985 /* C2 is not in a valid range. */
2987 c1
= (c1
<< 8) | (c2
& 0x7F);
2988 if (CHARSET_DIMENSION (charset
) > 2)
2991 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
2992 /* C2 is not in a valid range. */
2994 c1
= (c1
<< 8) | (c2
& 0x7F);
2998 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3001 MAYBE_FINISH_COMPOSITION ();
3002 for (; src_base
< src
; src_base
++, char_offset
++)
3004 if (ASCII_BYTE_P (*src_base
))
3005 *charbuf
++ = *src_base
;
3007 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3010 else if (composition_state
== COMPOSING_NO
)
3016 components
[component_idx
++] = c
;
3020 MAYBE_FINISH_COMPOSITION ();
3022 consumed_chars
= consumed_chars_base
;
3024 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3029 coding
->consumed_char
+= consumed_chars_base
;
3030 coding
->consumed
= src_base
- coding
->source
;
3031 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3035 /* ISO2022 encoding stuff. */
3038 It is not enough to say just "ISO2022" on encoding, we have to
3039 specify more details. In Emacs, each coding system of ISO2022
3040 variant has the following specifications:
3041 1. Initial designation to G0 thru G3.
3042 2. Allows short-form designation?
3043 3. ASCII should be designated to G0 before control characters?
3044 4. ASCII should be designated to G0 at end of line?
3045 5. 7-bit environment or 8-bit environment?
3046 6. Use locking-shift?
3047 7. Use Single-shift?
3048 And the following two are only for Japanese:
3049 8. Use ASCII in place of JIS0201-1976-Roman?
3050 9. Use JISX0208-1983 in place of JISX0208-1978?
3051 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3052 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3056 /* Produce codes (escape sequence) for designating CHARSET to graphic
3057 register REG at DST, and increment DST. If <final-char> of CHARSET is
3058 '@', 'A', or 'B' and the coding system CODING allows, produce
3059 designation sequence of short-form. */
3061 #define ENCODE_DESIGNATION(charset, reg, coding) \
3063 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3064 char *intermediate_char_94 = "()*+"; \
3065 char *intermediate_char_96 = ",-./"; \
3066 int revision = -1; \
3069 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3070 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3072 if (revision >= 0) \
3074 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3075 EMIT_ONE_BYTE ('@' + revision); \
3077 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3078 if (CHARSET_DIMENSION (charset) == 1) \
3080 if (! CHARSET_ISO_CHARS_96 (charset)) \
3081 c = intermediate_char_94[reg]; \
3083 c = intermediate_char_96[reg]; \
3084 EMIT_ONE_ASCII_BYTE (c); \
3088 EMIT_ONE_ASCII_BYTE ('$'); \
3089 if (! CHARSET_ISO_CHARS_96 (charset)) \
3091 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3093 || final_char < '@' || final_char > 'B') \
3094 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3097 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3099 EMIT_ONE_ASCII_BYTE (final_char); \
3101 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3105 /* The following two macros produce codes (control character or escape
3106 sequence) for ISO2022 single-shift functions (single-shift-2 and
3109 #define ENCODE_SINGLE_SHIFT_2 \
3111 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3112 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3114 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3115 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3119 #define ENCODE_SINGLE_SHIFT_3 \
3121 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3122 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3124 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3125 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3129 /* The following four macros produce codes (control character or
3130 escape sequence) for ISO2022 locking-shift functions (shift-in,
3131 shift-out, locking-shift-2, and locking-shift-3). */
3133 #define ENCODE_SHIFT_IN \
3135 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3136 CODING_ISO_INVOCATION (coding, 0) = 0; \
3140 #define ENCODE_SHIFT_OUT \
3142 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3143 CODING_ISO_INVOCATION (coding, 0) = 1; \
3147 #define ENCODE_LOCKING_SHIFT_2 \
3149 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3150 CODING_ISO_INVOCATION (coding, 0) = 2; \
3154 #define ENCODE_LOCKING_SHIFT_3 \
3156 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3157 CODING_ISO_INVOCATION (coding, 0) = 3; \
3161 /* Produce codes for a DIMENSION1 character whose character set is
3162 CHARSET and whose position-code is C1. Designation and invocation
3163 sequences are also produced in advance if necessary. */
3165 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3167 int id = CHARSET_ID (charset); \
3168 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3170 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3171 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3173 EMIT_ONE_BYTE (c1 | 0x80); \
3174 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3177 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3179 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3182 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3184 EMIT_ONE_BYTE (c1 | 0x80); \
3188 /* Since CHARSET is not yet invoked to any graphic planes, we \
3189 must invoke it, or, at first, designate it to some graphic \
3190 register. Then repeat the loop to actually produce the \
3192 dst = encode_invocation_designation (charset, coding, dst, \
3197 /* Produce codes for a DIMENSION2 character whose character set is
3198 CHARSET and whose position-codes are C1 and C2. Designation and
3199 invocation codes are also produced in advance if necessary. */
3201 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3203 int id = CHARSET_ID (charset); \
3204 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3206 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3207 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3209 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3210 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3213 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3215 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3218 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3220 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3224 /* Since CHARSET is not yet invoked to any graphic planes, we \
3225 must invoke it, or, at first, designate it to some graphic \
3226 register. Then repeat the loop to actually produce the \
3228 dst = encode_invocation_designation (charset, coding, dst, \
3233 #define ENCODE_ISO_CHARACTER(charset, c) \
3235 int code = ENCODE_CHAR ((charset),(c)); \
3237 if (CHARSET_DIMENSION (charset) == 1) \
3238 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3240 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3244 /* Produce designation and invocation codes at a place pointed by DST
3245 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3249 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3250 struct charset
*charset
;
3251 struct coding_system
*coding
;
3255 int multibytep
= coding
->dst_multibyte
;
3256 int produced_chars
= *p_nchars
;
3257 int reg
; /* graphic register number */
3258 int id
= CHARSET_ID (charset
);
3260 /* At first, check designations. */
3261 for (reg
= 0; reg
< 4; reg
++)
3262 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3267 /* CHARSET is not yet designated to any graphic registers. */
3268 /* At first check the requested designation. */
3269 reg
= CODING_ISO_REQUEST (coding
, id
);
3271 /* Since CHARSET requests no special designation, designate it
3272 to graphic register 0. */
3275 ENCODE_DESIGNATION (charset
, reg
, coding
);
3278 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3279 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3281 /* Since the graphic register REG is not invoked to any graphic
3282 planes, invoke it to graphic plane 0. */
3285 case 0: /* graphic register 0 */
3289 case 1: /* graphic register 1 */
3293 case 2: /* graphic register 2 */
3294 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3295 ENCODE_SINGLE_SHIFT_2
;
3297 ENCODE_LOCKING_SHIFT_2
;
3300 case 3: /* graphic register 3 */
3301 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3302 ENCODE_SINGLE_SHIFT_3
;
3304 ENCODE_LOCKING_SHIFT_3
;
3309 *p_nchars
= produced_chars
;
3313 /* The following three macros produce codes for indicating direction
3315 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3317 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3318 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3320 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3324 #define ENCODE_DIRECTION_R2L() \
3326 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3327 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3331 #define ENCODE_DIRECTION_L2R() \
3333 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3334 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3338 /* Produce codes for designation and invocation to reset the graphic
3339 planes and registers to initial state. */
3340 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3343 struct charset *charset; \
3345 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3347 for (reg = 0; reg < 4; reg++) \
3348 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3349 && (CODING_ISO_DESIGNATION (coding, reg) \
3350 != CODING_ISO_INITIAL (coding, reg))) \
3352 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3353 ENCODE_DESIGNATION (charset, reg, coding); \
3358 /* Produce designation sequences of charsets in the line started from
3359 SRC to a place pointed by DST, and return updated DST.
3361 If the current block ends before any end-of-line, we may fail to
3362 find all the necessary designations. */
3364 static unsigned char *
3365 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3366 struct coding_system
*coding
;
3367 int *charbuf
, *charbuf_end
;
3370 struct charset
*charset
;
3371 /* Table of charsets to be designated to each graphic register. */
3373 int c
, found
= 0, reg
;
3374 int produced_chars
= 0;
3375 int multibytep
= coding
->dst_multibyte
;
3377 Lisp_Object charset_list
;
3379 attrs
= CODING_ID_ATTRS (coding
->id
);
3380 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3381 if (EQ (charset_list
, Qiso_2022
))
3382 charset_list
= Viso_2022_charset_list
;
3384 for (reg
= 0; reg
< 4; reg
++)
3394 charset
= char_charset (c
, charset_list
, NULL
);
3395 id
= CHARSET_ID (charset
);
3396 reg
= CODING_ISO_REQUEST (coding
, id
);
3397 if (reg
>= 0 && r
[reg
] < 0)
3406 for (reg
= 0; reg
< 4; reg
++)
3408 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3409 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3415 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3418 encode_coding_iso_2022 (coding
)
3419 struct coding_system
*coding
;
3421 int multibytep
= coding
->dst_multibyte
;
3422 int *charbuf
= coding
->charbuf
;
3423 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3424 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3425 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3428 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3429 && CODING_ISO_BOL (coding
));
3430 int produced_chars
= 0;
3431 Lisp_Object attrs
, eol_type
, charset_list
;
3432 int ascii_compatible
;
3435 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3437 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3439 while (charbuf
< charbuf_end
)
3441 ASSURE_DESTINATION (safe_room
);
3443 if (bol_designation
)
3445 unsigned char *dst_prev
= dst
;
3447 /* We have to produce designation sequences if any now. */
3448 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3449 bol_designation
= 0;
3450 /* We are sure that designation sequences are all ASCII bytes. */
3451 produced_chars
+= dst
- dst_prev
;
3456 /* Now encode the character C. */
3457 if (c
< 0x20 || c
== 0x7F)
3460 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3462 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3463 ENCODE_RESET_PLANE_AND_REGISTER ();
3464 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3468 for (i
= 0; i
< 4; i
++)
3469 CODING_ISO_DESIGNATION (coding
, i
)
3470 = CODING_ISO_INITIAL (coding
, i
);
3473 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3475 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3476 ENCODE_RESET_PLANE_AND_REGISTER ();
3477 EMIT_ONE_ASCII_BYTE (c
);
3479 else if (ASCII_CHAR_P (c
))
3481 if (ascii_compatible
)
3482 EMIT_ONE_ASCII_BYTE (c
);
3484 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii
), c
);
3488 struct charset
*charset
= char_charset (c
, charset_list
, NULL
);
3492 c
= coding
->default_char
;
3493 charset
= char_charset (c
, charset_list
, NULL
);
3495 ENCODE_ISO_CHARACTER (charset
, c
);
3499 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3500 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3502 ASSURE_DESTINATION (safe_room
);
3503 ENCODE_RESET_PLANE_AND_REGISTER ();
3505 coding
->result
= CODING_RESULT_SUCCESS
;
3506 CODING_ISO_BOL (coding
) = bol_designation
;
3507 coding
->produced_char
+= produced_chars
;
3508 coding
->produced
= dst
- coding
->destination
;
3513 /*** 8,9. SJIS and BIG5 handlers ***/
3515 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3516 quite widely. So, for the moment, Emacs supports them in the bare
3517 C code. But, in the future, they may be supported only by CCL. */
3519 /* SJIS is a coding system encoding three character sets: ASCII, right
3520 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3521 as is. A character of charset katakana-jisx0201 is encoded by
3522 "position-code + 0x80". A character of charset japanese-jisx0208
3523 is encoded in 2-byte but two position-codes are divided and shifted
3524 so that it fit in the range below.
3526 --- CODE RANGE of SJIS ---
3527 (character set) (range)
3529 KATAKANA-JISX0201 0xA0 .. 0xDF
3530 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3531 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3532 -------------------------------
3536 /* BIG5 is a coding system encoding two character sets: ASCII and
3537 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3538 character set and is encoded in two-byte.
3540 --- CODE RANGE of BIG5 ---
3541 (character set) (range)
3543 Big5 (1st byte) 0xA1 .. 0xFE
3544 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3545 --------------------------
3549 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3550 Check if a text is encoded in SJIS. If it is, return
3551 CATEGORY_MASK_SJIS, else return 0. */
3554 detect_coding_sjis (coding
, mask
)
3555 struct coding_system
*coding
;
3558 unsigned char *src
= coding
->source
, *src_base
= src
;
3559 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3560 int multibytep
= coding
->src_multibyte
;
3561 int consumed_chars
= 0;
3565 /* A coding system of this category is always ASCII compatible. */
3566 src
+= coding
->head_ascii
;
3573 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3576 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3580 else if (c
>= 0xA0 && c
< 0xE0)
3585 *mask
&= ~CATEGORY_MASK_SJIS
;
3591 *mask
&= CATEGORY_MASK_SJIS
;
3595 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3596 Check if a text is encoded in BIG5. If it is, return
3597 CATEGORY_MASK_BIG5, else return 0. */
3600 detect_coding_big5 (coding
, mask
)
3601 struct coding_system
*coding
;
3604 unsigned char *src
= coding
->source
, *src_base
= src
;
3605 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3606 int multibytep
= coding
->src_multibyte
;
3607 int consumed_chars
= 0;
3611 /* A coding system of this category is always ASCII compatible. */
3612 src
+= coding
->head_ascii
;
3622 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3629 *mask
&= ~CATEGORY_MASK_BIG5
;
3635 *mask
&= CATEGORY_MASK_BIG5
;
3639 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3640 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3643 decode_coding_sjis (coding
)
3644 struct coding_system
*coding
;
3646 unsigned char *src
= coding
->source
+ coding
->consumed
;
3647 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3648 unsigned char *src_base
;
3649 int *charbuf
= coding
->charbuf
;
3650 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
3651 int consumed_chars
= 0, consumed_chars_base
;
3652 int multibytep
= coding
->src_multibyte
;
3653 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3654 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3656 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3659 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3660 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3661 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3668 consumed_chars_base
= consumed_chars
;
3670 if (charbuf
>= charbuf_end
)
3677 if (EQ (eol_type
, Qdos
))
3680 goto no_more_source
;
3684 else if (EQ (eol_type
, Qmac
))
3689 struct charset
*charset
;
3692 charset
= charset_roman
;
3697 if (c
< 0xA0 || c
>= 0xE0)
3699 /* SJIS -> JISX0208 */
3701 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
3705 charset
= charset_kanji
;
3708 /* SJIS -> JISX0201-Kana */
3709 charset
= charset_kana
;
3711 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
3718 consumed_chars
= consumed_chars_base
;
3720 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3725 coding
->consumed_char
+= consumed_chars_base
;
3726 coding
->consumed
= src_base
- coding
->source
;
3727 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3731 decode_coding_big5 (coding
)
3732 struct coding_system
*coding
;
3734 unsigned char *src
= coding
->source
+ coding
->consumed
;
3735 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3736 unsigned char *src_base
;
3737 int *charbuf
= coding
->charbuf
;
3738 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
3739 int consumed_chars
= 0, consumed_chars_base
;
3740 int multibytep
= coding
->src_multibyte
;
3741 struct charset
*charset_roman
, *charset_big5
;
3742 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3744 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3746 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3747 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3754 consumed_chars_base
= consumed_chars
;
3756 if (charbuf
>= charbuf_end
)
3763 if (EQ (eol_type
, Qdos
))
3766 goto no_more_source
;
3770 else if (EQ (eol_type
, Qmac
))
3775 struct charset
*charset
;
3777 charset
= charset_roman
;
3781 if (c
< 0xA1 || c
> 0xFE)
3784 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
3787 charset
= charset_big5
;
3789 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
3797 consumed_chars
= consumed_chars_base
;
3799 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3804 coding
->consumed_char
+= consumed_chars_base
;
3805 coding
->consumed
= src_base
- coding
->source
;
3806 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3809 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3810 This function can encode charsets `ascii', `katakana-jisx0201',
3811 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3812 are sure that all these charsets are registered as official charset
3813 (i.e. do not have extended leading-codes). Characters of other
3814 charsets are produced without any encoding. If SJIS_P is 1, encode
3815 SJIS text, else encode BIG5 text. */
3818 encode_coding_sjis (coding
)
3819 struct coding_system
*coding
;
3821 int multibytep
= coding
->dst_multibyte
;
3822 int *charbuf
= coding
->charbuf
;
3823 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3824 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3825 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3827 int produced_chars
= 0;
3828 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3829 int ascii_compatible
;
3830 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3833 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3835 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3836 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3837 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3839 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3841 while (charbuf
< charbuf_end
)
3843 ASSURE_DESTINATION (safe_room
);
3845 /* Now encode the character C. */
3846 if (ASCII_CHAR_P (c
) && ascii_compatible
)
3847 EMIT_ONE_ASCII_BYTE (c
);
3851 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
3855 c
= coding
->default_char
;
3856 charset
= char_charset (c
, charset_list
, &code
);
3858 if (code
== CHARSET_INVALID_CODE (charset
))
3860 if (charset
== charset_kanji
)
3864 c1
= code
>> 8, c2
= code
& 0xFF;
3865 EMIT_TWO_BYTES (c1
, c2
);
3867 else if (charset
== charset_kana
)
3868 EMIT_ONE_BYTE (code
| 0x80);
3870 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
3873 coding
->result
= CODING_RESULT_SUCCESS
;
3874 coding
->produced_char
+= produced_chars
;
3875 coding
->produced
= dst
- coding
->destination
;
3880 encode_coding_big5 (coding
)
3881 struct coding_system
*coding
;
3883 int multibytep
= coding
->dst_multibyte
;
3884 int *charbuf
= coding
->charbuf
;
3885 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3886 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3887 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3889 int produced_chars
= 0;
3890 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3891 int ascii_compatible
;
3892 struct charset
*charset_roman
, *charset_big5
;
3895 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3897 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3898 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3899 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3901 while (charbuf
< charbuf_end
)
3903 ASSURE_DESTINATION (safe_room
);
3905 /* Now encode the character C. */
3906 if (ASCII_CHAR_P (c
) && ascii_compatible
)
3907 EMIT_ONE_ASCII_BYTE (c
);
3911 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
3915 c
= coding
->default_char
;
3916 charset
= char_charset (c
, charset_list
, &code
);
3918 if (code
== CHARSET_INVALID_CODE (charset
))
3920 if (charset
== charset_big5
)
3924 c1
= code
>> 8, c2
= code
& 0xFF;
3925 EMIT_TWO_BYTES (c1
, c2
);
3928 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
3931 coding
->result
= CODING_RESULT_SUCCESS
;
3932 coding
->produced_char
+= produced_chars
;
3933 coding
->produced
= dst
- coding
->destination
;
3938 /*** 10. CCL handlers ***/
3940 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3941 Check if a text is encoded in a coding system of which
3942 encoder/decoder are written in CCL program. If it is, return
3943 CATEGORY_MASK_CCL, else return 0. */
3946 detect_coding_ccl (coding
, mask
)
3947 struct coding_system
*coding
;
3950 unsigned char *src
= coding
->source
, *src_base
= src
;
3951 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3952 int multibytep
= coding
->src_multibyte
;
3953 int consumed_chars
= 0;
3955 unsigned char *valids
= CODING_CCL_VALIDS (coding
);
3956 int head_ascii
= coding
->head_ascii
;
3959 coding
= &coding_categories
[coding_category_ccl
];
3960 attrs
= CODING_ID_ATTRS (coding
->id
);
3961 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
3970 if (!found
&& valids
[c
] > 1)
3973 *mask
&= ~CATEGORY_MASK_CCL
;
3979 *mask
&= CATEGORY_MASK_CCL
;
3984 decode_coding_ccl (coding
)
3985 struct coding_system
*coding
;
3987 unsigned char *src
= coding
->source
+ coding
->consumed
;
3988 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3989 int *charbuf
= coding
->charbuf
;
3990 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
3991 int consumed_chars
= 0;
3992 int multibytep
= coding
->src_multibyte
;
3993 struct ccl_program ccl
;
3994 int source_charbuf
[1024];
3995 int source_byteidx
[1024];
3997 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
3999 while (src
< src_end
)
4001 unsigned char *p
= src
;
4002 int *source
, *source_end
;
4006 while (i
< 1024 && p
< src_end
)
4008 source_byteidx
[i
] = p
- src
;
4009 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4012 while (i
< 1024 && p
< src_end
)
4013 source_charbuf
[i
++] = *p
++;
4015 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4018 source
= source_charbuf
;
4019 source_end
= source
+ i
;
4020 while (source
< source_end
)
4022 ccl_driver (&ccl
, source
, charbuf
,
4023 source_end
- source
, charbuf_end
- charbuf
);
4024 source
+= ccl
.consumed
;
4025 charbuf
+= ccl
.produced
;
4026 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4029 if (source
< source_end
)
4030 src
+= source_byteidx
[source
- source_charbuf
];
4033 consumed_chars
+= source
- source_charbuf
;
4035 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4036 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4042 case CCL_STAT_SUSPEND_BY_SRC
:
4043 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4045 case CCL_STAT_SUSPEND_BY_DST
:
4048 case CCL_STAT_INVALID_CMD
:
4049 coding
->result
= CODING_RESULT_INTERRUPT
;
4052 coding
->result
= CODING_RESULT_SUCCESS
;
4055 coding
->consumed_char
+= consumed_chars
;
4056 coding
->consumed
= src
- coding
->source
;
4057 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4061 encode_coding_ccl (coding
)
4062 struct coding_system
*coding
;
4064 struct ccl_program ccl
;
4065 int multibytep
= coding
->dst_multibyte
;
4066 int *charbuf
= coding
->charbuf
;
4067 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4068 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4069 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4070 unsigned char *adjusted_dst_end
= dst_end
- 1;
4071 int destination_charbuf
[1024];
4072 int i
, produced_chars
= 0;
4074 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4076 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4077 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4079 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4081 int dst_bytes
= dst_end
- dst
;
4082 if (dst_bytes
> 1024)
4085 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4086 charbuf_end
- charbuf
, dst_bytes
);
4087 charbuf
+= ccl
.consumed
;
4089 for (i
= 0; i
< ccl
.produced
; i
++)
4090 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4093 for (i
= 0; i
< ccl
.produced
; i
++)
4094 *dst
++ = destination_charbuf
[i
] & 0xFF;
4095 produced_chars
+= ccl
.produced
;
4101 case CCL_STAT_SUSPEND_BY_SRC
:
4102 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4104 case CCL_STAT_SUSPEND_BY_DST
:
4105 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
4108 case CCL_STAT_INVALID_CMD
:
4109 coding
->result
= CODING_RESULT_INTERRUPT
;
4112 coding
->result
= CODING_RESULT_SUCCESS
;
4116 coding
->produced_char
+= produced_chars
;
4117 coding
->produced
= dst
- coding
->destination
;
4123 /*** 10, 11. no-conversion handlers ***/
4125 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4128 decode_coding_raw_text (coding
)
4129 struct coding_system
*coding
;
4131 coding
->chars_at_source
= 1;
4132 coding
->consumed_char
= 0;
4133 coding
->consumed
= 0;
4134 coding
->result
= CODING_RESULT_SUCCESS
;
4138 encode_coding_raw_text (coding
)
4139 struct coding_system
*coding
;
4141 int multibytep
= coding
->dst_multibyte
;
4142 int *charbuf
= coding
->charbuf
;
4143 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4144 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4145 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4146 int produced_chars
= 0;
4151 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4153 if (coding
->src_multibyte
)
4154 while (charbuf
< charbuf_end
)
4156 ASSURE_DESTINATION (safe_room
);
4158 if (ASCII_CHAR_P (c
))
4159 EMIT_ONE_ASCII_BYTE (c
);
4160 else if (CHAR_BYTE8_P (c
))
4162 c
= CHAR_TO_BYTE8 (c
);
4167 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4169 CHAR_STRING_ADVANCE (c
, p1
);
4171 EMIT_ONE_BYTE (*p0
);
4175 while (charbuf
< charbuf_end
)
4177 ASSURE_DESTINATION (safe_room
);
4184 if (coding
->src_multibyte
)
4186 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4188 while (charbuf
< charbuf_end
)
4190 ASSURE_DESTINATION (safe_room
);
4192 if (ASCII_CHAR_P (c
))
4194 else if (CHAR_BYTE8_P (c
))
4195 *dst
++ = CHAR_TO_BYTE8 (c
);
4197 CHAR_STRING_ADVANCE (c
, dst
);
4203 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4204 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4205 *dst
++ = *charbuf
++;
4206 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4209 coding
->result
= CODING_RESULT_SUCCESS
;
4210 coding
->produced_char
+= produced_chars
;
4211 coding
->produced
= dst
- coding
->destination
;
4216 detect_coding_charset (coding
, mask
)
4217 struct coding_system
*coding
;
4220 unsigned char *src
= coding
->source
, *src_base
= src
;
4221 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4222 int multibytep
= coding
->src_multibyte
;
4223 int consumed_chars
= 0;
4224 Lisp_Object attrs
, valids
;
4226 coding
= &coding_categories
[coding_category_charset
];
4227 attrs
= CODING_ID_ATTRS (coding
->id
);
4228 valids
= AREF (attrs
, coding_attr_charset_valids
);
4230 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4231 src
+= coding
->head_ascii
;
4238 if (NILP (AREF (valids
, c
)))
4241 *mask
&= ~CATEGORY_MASK_CHARSET
;
4245 *mask
&= CATEGORY_MASK_CHARSET
;
4250 decode_coding_charset (coding
)
4251 struct coding_system
*coding
;
4253 unsigned char *src
= coding
->source
+ coding
->consumed
;
4254 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4255 unsigned char *src_base
;
4256 int *charbuf
= coding
->charbuf
;
4257 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
4258 int consumed_chars
= 0, consumed_chars_base
;
4259 int multibytep
= coding
->src_multibyte
;
4260 Lisp_Object attrs
, eol_type
, charset_list
, valids
;
4262 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4263 valids
= AREF (attrs
, coding_attr_charset_valids
);
4270 consumed_chars_base
= consumed_chars
;
4272 if (charbuf
>= charbuf_end
)
4278 if (EQ (eol_type
, Qdos
))
4284 else if (EQ (eol_type
, Qmac
))
4290 struct charset
*charset
;
4293 val
= AREF (valids
, c
);
4296 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4297 if (CHARSET_DIMENSION (charset
) > 1)
4301 if (CHARSET_DIMENSION (charset
) > 2)
4305 if (CHARSET_DIMENSION (charset
) > 3)
4312 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4321 consumed_chars
= consumed_chars_base
;
4323 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4328 coding
->consumed_char
+= consumed_chars_base
;
4329 coding
->consumed
= src_base
- coding
->source
;
4330 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4334 encode_coding_charset (coding
)
4335 struct coding_system
*coding
;
4337 int multibytep
= coding
->dst_multibyte
;
4338 int *charbuf
= coding
->charbuf
;
4339 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4340 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4341 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4342 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4343 int produced_chars
= 0;
4344 Lisp_Object attrs
, eol_type
, charset_list
;
4345 int ascii_compatible
;
4348 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4349 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4351 while (charbuf
< charbuf_end
)
4353 struct charset
*charset
;
4356 ASSURE_DESTINATION (safe_room
);
4358 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4359 EMIT_ONE_ASCII_BYTE (c
);
4362 charset
= char_charset (c
, charset_list
, &code
);
4365 if (CHARSET_DIMENSION (charset
) == 1)
4366 EMIT_ONE_BYTE (code
);
4367 else if (CHARSET_DIMENSION (charset
) == 2)
4368 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4369 else if (CHARSET_DIMENSION (charset
) == 3)
4370 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4372 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4373 (code
>> 8) & 0xFF, code
& 0xFF);
4376 EMIT_ONE_BYTE (coding
->default_char
);
4380 coding
->result
= CODING_RESULT_SUCCESS
;
4381 coding
->produced_char
+= produced_chars
;
4382 coding
->produced
= dst
- coding
->destination
;
4387 /*** 7. C library functions ***/
4389 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
4390 has a property `coding-system'. The value of this property is a
4391 vector of length 5 (called as coding-vector). Among elements of
4392 this vector, the first (element[0]) and the fifth (element[4])
4393 carry important information for decoding/encoding. Before
4394 decoding/encoding, this information should be set in fields of a
4395 structure of type `coding_system'.
4397 A value of property `coding-system' can be a symbol of another
4398 subsidiary coding-system. In that case, Emacs gets coding-vector
4401 `element[0]' contains information to be set in `coding->type'. The
4402 value and its meaning is as follows:
4404 0 -- coding_type_emacs_mule
4405 1 -- coding_type_sjis
4406 2 -- coding_type_iso_2022
4407 3 -- coding_type_big5
4408 4 -- coding_type_ccl encoder/decoder written in CCL
4409 nil -- coding_type_no_conversion
4410 t -- coding_type_undecided (automatic conversion on decoding,
4411 no-conversion on encoding)
4413 `element[4]' contains information to be set in `coding->flags' and
4414 `coding->spec'. The meaning varies by `coding->type'.
4416 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4417 of length 32 (of which the first 13 sub-elements are used now).
4418 Meanings of these sub-elements are:
4420 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4421 If the value is an integer of valid charset, the charset is
4422 assumed to be designated to graphic register N initially.
4424 If the value is minus, it is a minus value of charset which
4425 reserves graphic register N, which means that the charset is
4426 not designated initially but should be designated to graphic
4427 register N just before encoding a character in that charset.
4429 If the value is nil, graphic register N is never used on
4432 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4433 Each value takes t or nil. See the section ISO2022 of
4434 `coding.h' for more information.
4436 If `coding->type' is `coding_type_big5', element[4] is t to denote
4437 BIG5-ETen or nil to denote BIG5-HKU.
4439 If `coding->type' takes the other value, element[4] is ignored.
4441 Emacs Lisp's coding system also carries information about format of
4442 end-of-line in a value of property `eol-type'. If the value is
4443 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4444 it is not integer, it should be a vector of subsidiary coding
4445 systems of which property `eol-type' has one of above values.
4449 /* Setup coding context CODING from information about CODING_SYSTEM.
4450 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4451 CODING_SYSTEM is invalid, signal an error. */
4454 setup_coding_system (coding_system
, coding
)
4455 Lisp_Object coding_system
;
4456 struct coding_system
*coding
;
4459 Lisp_Object eol_type
;
4460 Lisp_Object coding_type
;
4463 if (NILP (coding_system
))
4464 coding_system
= Qno_conversion
;
4466 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4468 attrs
= CODING_ID_ATTRS (coding
->id
);
4469 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4472 coding
->head_ascii
= -1;
4473 coding
->common_flags
4474 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4476 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4477 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4478 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4479 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4481 coding_type
= CODING_ATTR_TYPE (attrs
);
4482 if (EQ (coding_type
, Qundecided
))
4484 coding
->detector
= NULL
;
4485 coding
->decoder
= decode_coding_raw_text
;
4486 coding
->encoder
= encode_coding_raw_text
;
4487 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4489 else if (EQ (coding_type
, Qiso_2022
))
4492 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4494 /* Invoke graphic register 0 to plane 0. */
4495 CODING_ISO_INVOCATION (coding
, 0) = 0;
4496 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4497 CODING_ISO_INVOCATION (coding
, 1)
4498 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4499 /* Setup the initial status of designation. */
4500 for (i
= 0; i
< 4; i
++)
4501 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4502 /* Not single shifting initially. */
4503 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4504 /* Beginning of buffer should also be regarded as bol. */
4505 CODING_ISO_BOL (coding
) = 1;
4506 coding
->detector
= detect_coding_iso_2022
;
4507 coding
->decoder
= decode_coding_iso_2022
;
4508 coding
->encoder
= encode_coding_iso_2022
;
4509 if (flags
& CODING_ISO_FLAG_SAFE
)
4510 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4511 coding
->common_flags
4512 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4513 | CODING_REQUIRE_FLUSHING_MASK
);
4514 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4515 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4516 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4518 setup_iso_safe_charsets (attrs
);
4519 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4520 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4521 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4523 CODING_ISO_FLAGS (coding
) = flags
;
4525 else if (EQ (coding_type
, Qcharset
))
4527 coding
->detector
= detect_coding_charset
;
4528 coding
->decoder
= decode_coding_charset
;
4529 coding
->encoder
= encode_coding_charset
;
4530 coding
->common_flags
4531 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4533 else if (EQ (coding_type
, Qutf_8
))
4535 coding
->detector
= detect_coding_utf_8
;
4536 coding
->decoder
= decode_coding_utf_8
;
4537 coding
->encoder
= encode_coding_utf_8
;
4538 coding
->common_flags
4539 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4541 else if (EQ (coding_type
, Qutf_16
))
4543 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4544 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4545 : EQ (val
, Qt
) ? utf_16_with_bom
4546 : utf_16_without_bom
);
4547 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4548 CODING_UTF_16_ENDIAN (coding
) = (NILP (val
) ? utf_16_big_endian
4549 : utf_16_little_endian
);
4550 CODING_UTF_16_SURROGATE (coding
) = 0;
4551 coding
->detector
= detect_coding_utf_16
;
4552 coding
->decoder
= decode_coding_utf_16
;
4553 coding
->encoder
= encode_coding_utf_16
;
4554 coding
->common_flags
4555 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4557 else if (EQ (coding_type
, Qccl
))
4559 coding
->detector
= detect_coding_ccl
;
4560 coding
->decoder
= decode_coding_ccl
;
4561 coding
->encoder
= encode_coding_ccl
;
4562 coding
->common_flags
4563 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4564 | CODING_REQUIRE_FLUSHING_MASK
);
4566 else if (EQ (coding_type
, Qemacs_mule
))
4568 coding
->detector
= detect_coding_emacs_mule
;
4569 coding
->decoder
= decode_coding_emacs_mule
;
4570 coding
->encoder
= encode_coding_emacs_mule
;
4571 coding
->common_flags
4572 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4573 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
4574 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
4576 Lisp_Object tail
, safe_charsets
;
4577 int max_charset_id
= 0;
4579 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4581 if (max_charset_id
< XFASTINT (XCAR (tail
)))
4582 max_charset_id
= XFASTINT (XCAR (tail
));
4583 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
4585 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4587 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
4588 coding
->max_charset_id
= max_charset_id
;
4589 coding
->safe_charsets
= (char *) XSTRING (safe_charsets
)->data
;
4592 else if (EQ (coding_type
, Qshift_jis
))
4594 coding
->detector
= detect_coding_sjis
;
4595 coding
->decoder
= decode_coding_sjis
;
4596 coding
->encoder
= encode_coding_sjis
;
4597 coding
->common_flags
4598 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4600 else if (EQ (coding_type
, Qbig5
))
4602 coding
->detector
= detect_coding_big5
;
4603 coding
->decoder
= decode_coding_big5
;
4604 coding
->encoder
= encode_coding_big5
;
4605 coding
->common_flags
4606 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4608 else /* EQ (coding_type, Qraw_text) */
4610 coding
->detector
= NULL
;
4611 coding
->decoder
= decode_coding_raw_text
;
4612 coding
->encoder
= encode_coding_raw_text
;
4613 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4619 /* Return raw-text or one of its subsidiaries that has the same
4620 eol_type as CODING-SYSTEM. */
4623 raw_text_coding_system (coding_system
)
4624 Lisp_Object coding_system
;
4626 Lisp_Object spec
, attrs
;
4627 Lisp_Object eol_type
, raw_text_eol_type
;
4629 spec
= CODING_SYSTEM_SPEC (coding_system
);
4630 attrs
= AREF (spec
, 0);
4632 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
4633 return coding_system
;
4635 eol_type
= AREF (spec
, 2);
4636 if (VECTORP (eol_type
))
4638 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
4639 raw_text_eol_type
= AREF (spec
, 2);
4640 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
4641 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
4642 : AREF (raw_text_eol_type
, 2));
4646 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4647 does, return one of the subsidiary that has the same eol-spec as
4648 PARENT. Otherwise, return CODING_SYSTEM. */
4651 coding_inherit_eol_type (coding_system
, parent
)
4652 Lisp_Object coding_system
, parent
;
4654 Lisp_Object spec
, attrs
, eol_type
;
4656 spec
= CODING_SYSTEM_SPEC (coding_system
);
4657 attrs
= AREF (spec
, 0);
4658 eol_type
= AREF (spec
, 2);
4659 if (VECTORP (eol_type
))
4661 Lisp_Object parent_spec
;
4662 Lisp_Object parent_eol_type
;
4665 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
4666 parent_eol_type
= AREF (parent_spec
, 2);
4667 if (EQ (parent_eol_type
, Qunix
))
4668 coding_system
= AREF (eol_type
, 0);
4669 else if (EQ (parent_eol_type
, Qdos
))
4670 coding_system
= AREF (eol_type
, 1);
4671 else if (EQ (parent_eol_type
, Qmac
))
4672 coding_system
= AREF (eol_type
, 2);
4674 return coding_system
;
4677 /* Emacs has a mechanism to automatically detect a coding system if it
4678 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4679 it's impossible to distinguish some coding systems accurately
4680 because they use the same range of codes. So, at first, coding
4681 systems are categorized into 7, those are:
4683 o coding-category-emacs-mule
4685 The category for a coding system which has the same code range
4686 as Emacs' internal format. Assigned the coding-system (Lisp
4687 symbol) `emacs-mule' by default.
4689 o coding-category-sjis
4691 The category for a coding system which has the same code range
4692 as SJIS. Assigned the coding-system (Lisp
4693 symbol) `japanese-shift-jis' by default.
4695 o coding-category-iso-7
4697 The category for a coding system which has the same code range
4698 as ISO2022 of 7-bit environment. This doesn't use any locking
4699 shift and single shift functions. This can encode/decode all
4700 charsets. Assigned the coding-system (Lisp symbol)
4701 `iso-2022-7bit' by default.
4703 o coding-category-iso-7-tight
4705 Same as coding-category-iso-7 except that this can
4706 encode/decode only the specified charsets.
4708 o coding-category-iso-8-1
4710 The category for a coding system which has the same code range
4711 as ISO2022 of 8-bit environment and graphic plane 1 used only
4712 for DIMENSION1 charset. This doesn't use any locking shift
4713 and single shift functions. Assigned the coding-system (Lisp
4714 symbol) `iso-latin-1' by default.
4716 o coding-category-iso-8-2
4718 The category for a coding system which has the same code range
4719 as ISO2022 of 8-bit environment and graphic plane 1 used only
4720 for DIMENSION2 charset. This doesn't use any locking shift
4721 and single shift functions. Assigned the coding-system (Lisp
4722 symbol) `japanese-iso-8bit' by default.
4724 o coding-category-iso-7-else
4726 The category for a coding system which has the same code range
4727 as ISO2022 of 7-bit environemnt but uses locking shift or
4728 single shift functions. Assigned the coding-system (Lisp
4729 symbol) `iso-2022-7bit-lock' by default.
4731 o coding-category-iso-8-else
4733 The category for a coding system which has the same code range
4734 as ISO2022 of 8-bit environemnt but uses locking shift or
4735 single shift functions. Assigned the coding-system (Lisp
4736 symbol) `iso-2022-8bit-ss2' by default.
4738 o coding-category-big5
4740 The category for a coding system which has the same code range
4741 as BIG5. Assigned the coding-system (Lisp symbol)
4742 `cn-big5' by default.
4744 o coding-category-utf-8
4746 The category for a coding system which has the same code range
4747 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4748 symbol) `utf-8' by default.
4750 o coding-category-utf-16-be
4752 The category for a coding system in which a text has an
4753 Unicode signature (cf. Unicode Standard) in the order of BIG
4754 endian at the head. Assigned the coding-system (Lisp symbol)
4755 `utf-16-be' by default.
4757 o coding-category-utf-16-le
4759 The category for a coding system in which a text has an
4760 Unicode signature (cf. Unicode Standard) in the order of
4761 LITTLE endian at the head. Assigned the coding-system (Lisp
4762 symbol) `utf-16-le' by default.
4764 o coding-category-ccl
4766 The category for a coding system of which encoder/decoder is
4767 written in CCL programs. The default value is nil, i.e., no
4768 coding system is assigned.
4770 o coding-category-binary
4772 The category for a coding system not categorized in any of the
4773 above. Assigned the coding-system (Lisp symbol)
4774 `no-conversion' by default.
4776 Each of them is a Lisp symbol and the value is an actual
4777 `coding-system's (this is also a Lisp symbol) assigned by a user.
4778 What Emacs does actually is to detect a category of coding system.
4779 Then, it uses a `coding-system' assigned to it. If Emacs can't
4780 decide only one possible category, it selects a category of the
4781 highest priority. Priorities of categories are also specified by a
4782 user in a Lisp variable `coding-category-list'.
4786 #define EOL_SEEN_NONE 0
4787 #define EOL_SEEN_LF 1
4788 #define EOL_SEEN_CR 2
4789 #define EOL_SEEN_CRLF 4
4791 /* Detect how end-of-line of a text of length CODING->src_bytes
4792 pointed by CODING->source is encoded. Return one of
4795 #define MAX_EOL_CHECK_COUNT 3
4798 detect_eol (coding
, source
, src_bytes
)
4799 struct coding_system
*coding
;
4800 unsigned char *source
;
4801 EMACS_INT src_bytes
;
4803 Lisp_Object attrs
, coding_type
;
4804 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
4807 int eol_seen
= EOL_SEEN_NONE
;
4809 attrs
= CODING_ID_ATTRS (coding
->id
);
4810 coding_type
= CODING_ATTR_TYPE (attrs
);
4812 if (EQ (coding_type
, Qccl
))
4816 msb
= coding
->spec
.utf_16
.endian
== utf_16_little_endian
;
4819 while (src
+ 1 < src_end
)
4822 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
4827 this_eol
= EOL_SEEN_LF
;
4828 else if (src
+ 3 >= src_end
4829 || src
[msb
+ 2] != 0
4830 || src
[lsb
+ 2] != '\n')
4831 this_eol
= EOL_SEEN_CR
;
4833 this_eol
= EOL_SEEN_CRLF
;
4835 if (eol_seen
== EOL_SEEN_NONE
)
4836 /* This is the first end-of-line. */
4837 eol_seen
= this_eol
;
4838 else if (eol_seen
!= this_eol
)
4840 /* The found type is different from what found before. */
4841 eol_seen
= EOL_SEEN_LF
;
4844 if (++total
== MAX_EOL_CHECK_COUNT
)
4852 while (src
< src_end
)
4855 if (c
== '\n' || c
== '\r')
4860 this_eol
= EOL_SEEN_LF
;
4861 else if (src
>= src_end
|| *src
!= '\n')
4862 this_eol
= EOL_SEEN_CR
;
4864 this_eol
= EOL_SEEN_CRLF
, src
++;
4866 if (eol_seen
== EOL_SEEN_NONE
)
4867 /* This is the first end-of-line. */
4868 eol_seen
= this_eol
;
4869 else if (eol_seen
!= this_eol
)
4871 /* The found type is different from what found before. */
4872 eol_seen
= EOL_SEEN_LF
;
4875 if (++total
== MAX_EOL_CHECK_COUNT
)
4885 adjust_coding_eol_type (coding
, eol_seen
)
4886 struct coding_system
*coding
;
4889 Lisp_Object eol_type
;
4891 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4892 if (eol_seen
& EOL_SEEN_LF
)
4893 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
4894 else if (eol_type
& EOL_SEEN_CRLF
)
4895 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
4896 else if (eol_type
& EOL_SEEN_CR
)
4897 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
4900 /* Detect how a text specified in CODING is encoded. If a coding
4901 system is detected, update fields of CODING by the detected coding
4905 detect_coding (coding
)
4906 struct coding_system
*coding
;
4908 unsigned char *src
, *src_end
;
4909 Lisp_Object attrs
, coding_type
;
4911 coding
->consumed
= coding
->consumed_char
= 0;
4912 coding
->produced
= coding
->produced_char
= 0;
4913 coding_set_source (coding
);
4915 src_end
= coding
->source
+ coding
->src_bytes
;
4917 /* If we have not yet decided the text encoding type, detect it
4919 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
4921 int mask
= CATEGORY_MASK_ANY
;
4924 for (src
= coding
->source
; src
< src_end
; src
++)
4927 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
4929 || c
== ISO_CODE_SO
)))
4932 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
4934 if (coding
->head_ascii
< coding
->src_bytes
)
4938 for (i
= 0; i
< coding_category_raw_text
; i
++)
4940 enum coding_category category
= coding_priorities
[i
];
4941 struct coding_system
*this = coding_categories
+ category
;
4943 if (category
>= coding_category_raw_text
4944 || detected
& (1 << category
))
4949 /* No coding system of this category is defined. */
4950 mask
&= ~(1 << category
);
4954 detected
|= detected_mask
[category
];
4955 if ((*(this->detector
)) (coding
, &mask
))
4960 setup_coding_system (Qraw_text
, coding
);
4961 else if (mask
!= CATEGORY_MASK_ANY
)
4962 for (i
= 0; i
< coding_category_raw_text
; i
++)
4964 enum coding_category category
= coding_priorities
[i
];
4965 struct coding_system
*this = coding_categories
+ category
;
4967 if (mask
& (1 << category
))
4969 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
4976 attrs
= CODING_ID_ATTRS (coding
->id
);
4977 coding_type
= CODING_ATTR_TYPE (attrs
);
4979 /* If we have not yet decided the EOL type, detect it now. But, the
4980 detection is impossible for a CCL based coding system, in which
4981 case, we detct the EOL type after decoding. */
4982 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
))
4983 && ! EQ (coding_type
, Qccl
))
4985 int eol_seen
= detect_eol (coding
, coding
->source
, coding
->src_bytes
);
4987 if (eol_seen
!= EOL_SEEN_NONE
)
4988 adjust_coding_eol_type (coding
, eol_seen
);
4995 struct coding_system
*coding
;
4997 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
)))
4999 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5000 unsigned char *pend
= p
+ coding
->produced
;
5001 int eol_seen
= EOL_SEEN_NONE
;
5003 for (; p
< pend
; p
++)
5006 eol_seen
|= EOL_SEEN_LF
;
5007 else if (*p
== '\r')
5009 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5011 eol_seen
|= EOL_SEEN_CRLF
;
5015 eol_seen
|= EOL_SEEN_CR
;
5018 if (eol_seen
!= EOL_SEEN_NONE
)
5019 adjust_coding_eol_type (coding
, eol_seen
);
5022 if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qmac
))
5024 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5025 unsigned char *pend
= p
+ coding
->produced
;
5027 for (; p
< pend
; p
++)
5031 else if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qdos
))
5033 unsigned char *p
, *pbeg
, *pend
;
5034 Lisp_Object undo_list
;
5036 move_gap_both (coding
->dst_pos
+ coding
->produced_char
,
5037 coding
->dst_pos_byte
+ coding
->produced
);
5038 undo_list
= current_buffer
->undo_list
;
5039 current_buffer
->undo_list
= Qt
;
5040 del_range_2 (coding
->dst_pos
, coding
->dst_pos_byte
, GPT
, GPT_BYTE
, Qnil
);
5041 current_buffer
->undo_list
= undo_list
;
5043 pend
= pbeg
+ coding
->produced
;
5045 for (p
= pend
- 1; p
>= pbeg
; p
--)
5048 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
- p
- 1);
5051 coding
->produced_char
-= coding
->produced
- (pend
- pbeg
);
5052 coding
->produced
= pend
- pbeg
;
5053 insert_from_gap (coding
->produced_char
, coding
->produced
);
5058 translate_chars (coding
, table
)
5059 struct coding_system
*coding
;
5062 int *charbuf
= coding
->charbuf
;
5063 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5066 if (coding
->chars_at_source
)
5069 while (charbuf
< charbuf_end
)
5075 *charbuf
++ = translate_char (table
, c
);
5080 produce_chars (coding
)
5081 struct coding_system
*coding
;
5083 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5084 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5086 int produced_chars
= 0;
5088 if (! coding
->chars_at_source
)
5090 /* Characters are in coding->charbuf. */
5091 int *buf
= coding
->charbuf
;
5092 int *buf_end
= buf
+ coding
->charbuf_used
;
5093 unsigned char *adjusted_dst_end
;
5095 if (BUFFERP (coding
->src_object
)
5096 && EQ (coding
->src_object
, coding
->dst_object
))
5097 dst_end
= coding
->source
+ coding
->consumed
;
5098 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5100 while (buf
< buf_end
)
5104 if (dst
>= adjusted_dst_end
)
5106 dst
= alloc_destination (coding
,
5107 buf_end
- buf
+ MAX_MULTIBYTE_LENGTH
,
5109 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5110 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5114 if (coding
->dst_multibyte
5115 || ! CHAR_BYTE8_P (c
))
5116 CHAR_STRING_ADVANCE (c
, dst
);
5118 *dst
++ = CHAR_TO_BYTE8 (c
);
5122 /* This is an annotation data. */
5128 unsigned char *src
= coding
->source
;
5129 unsigned char *src_end
= src
+ coding
->src_bytes
;
5130 Lisp_Object eol_type
;
5132 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5134 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5136 if (coding
->src_multibyte
)
5143 unsigned char *src_base
= src
;
5149 if (EQ (eol_type
, Qdos
))
5155 else if (EQ (eol_type
, Qmac
))
5160 coding
->consumed
= src
- coding
->source
;
5162 if (EQ (coding
->src_object
, coding
->dst_object
))
5166 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5168 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5169 coding_set_source (coding
);
5170 src
= coding
->source
+ coding
->consumed
;
5171 src_end
= coding
->source
+ coding
->src_bytes
;
5181 while (src
< src_end
)
5188 if (EQ (eol_type
, Qdos
))
5194 else if (EQ (eol_type
, Qmac
))
5197 if (dst
>= dst_end
- 1)
5199 coding
->consumed
= src
- coding
->source
;
5201 if (EQ (coding
->src_object
, coding
->dst_object
))
5203 if (dst
>= dst_end
- 1)
5205 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5207 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5208 coding_set_source (coding
);
5209 src
= coding
->source
+ coding
->consumed
;
5210 src_end
= coding
->source
+ coding
->src_bytes
;
5218 if (!EQ (coding
->src_object
, coding
->dst_object
))
5220 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5224 EMACS_INT offset
= src
- coding
->source
;
5226 dst
= alloc_destination (coding
, require
, dst
);
5227 coding_set_source (coding
);
5228 src
= coding
->source
+ offset
;
5229 src_end
= coding
->source
+ coding
->src_bytes
;
5232 produced_chars
= coding
->src_chars
;
5233 while (src
< src_end
)
5239 if (EQ (eol_type
, Qdos
))
5246 else if (EQ (eol_type
, Qmac
))
5252 coding
->consumed
= coding
->src_bytes
;
5253 coding
->consumed_char
= coding
->src_chars
;
5256 produced
= dst
- (coding
->destination
+ coding
->produced
);
5257 if (BUFFERP (coding
->dst_object
))
5258 insert_from_gap (produced_chars
, produced
);
5259 coding
->produced
+= produced
;
5260 coding
->produced_char
+= produced_chars
;
5261 return produced_chars
;
5264 /* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5266 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5270 produce_composition (coding
, charbuf
)
5271 struct coding_system
*coding
;
5277 enum composition_method method
;
5279 Lisp_Object components
;
5281 buffer
= coding
->dst_object
;
5283 pos
= coding
->dst_pos
+ charbuf
[1];
5284 method
= (enum composition_method
) (charbuf
[3]);
5285 cmp_len
= charbuf
[4];
5287 if (method
== COMPOSITION_RELATIVE
)
5291 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5296 for (i
= 0; i
< len
; i
++)
5297 args
[i
] = make_number (charbuf
[i
]);
5298 components
= (method
== COMPOSITION_WITH_ALTCHARS
5299 ? Fstring (len
, args
) : Fvector (len
, args
));
5301 compose_text (pos
, pos
+ cmp_len
, components
, Qnil
, Qnil
);
5305 save_composition_data (buf
, buf_end
, prop
)
5309 enum composition_method method
= COMPOSITION_METHOD (prop
);
5310 int cmp_len
= COMPOSITION_LENGTH (prop
);
5312 if (buf
+ 4 + (MAX_COMPOSITION_COMPONENTS
* 2 - 1) > buf_end
)
5315 buf
[1] = CODING_ANNOTATE_COMPOSITION_MASK
;
5319 if (method
== COMPOSITION_RELATIVE
)
5323 Lisp_Object components
;
5326 components
= COMPOSITION_COMPONENTS (prop
);
5327 if (VECTORP (components
))
5329 len
= XVECTOR (components
)->size
;
5330 for (i
= 0; i
< len
; i
++)
5331 buf
[4 + i
] = XINT (AREF (components
, i
));
5333 else if (STRINGP (components
))
5337 len
= XSTRING (components
)->size
;
5340 FETCH_STRING_CHAR_ADVANCE (buf
[4 + i
], components
, i
, i_byte
);
5342 else if (INTEGERP (components
))
5345 buf
[4] = XINT (components
);
5347 else if (CONSP (components
))
5349 for (len
= 0; CONSP (components
);
5350 len
++, components
= XCDR (components
))
5351 buf
[4 + len
] = XINT (XCAR (components
));
5357 return (buf
+ buf
[0]);
5360 #define CHARBUF_SIZE 0x4000
5362 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5364 int size = CHARBUF_SIZE;; \
5366 coding->charbuf = NULL; \
5367 while (size > 1024) \
5369 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5370 if (coding->charbuf) \
5374 if (! coding->charbuf) \
5376 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5377 return coding->result; \
5379 coding->charbuf_size = size; \
5384 produce_annotation (coding
)
5385 struct coding_system
*coding
;
5387 int *charbuf
= coding
->charbuf
;
5388 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5390 while (charbuf
< charbuf_end
)
5396 int len
= -*charbuf
;
5399 case CODING_ANNOTATE_COMPOSITION_MASK
:
5400 produce_composition (coding
, charbuf
);
5410 /* Decode the data at CODING->src_object into CODING->dst_object.
5411 CODING->src_object is a buffer, a string, or nil.
5412 CODING->dst_object is a buffer.
5414 If CODING->src_object is a buffer, it must be the current buffer.
5415 In this case, if CODING->src_pos is positive, it is a position of
5416 the source text in the buffer, otherwise, the source text is in the
5417 gap area of the buffer, and CODING->src_pos specifies the offset of
5418 the text from GPT (which must be the same as PT). If this is the
5419 same buffer as CODING->dst_object, CODING->src_pos must be
5422 If CODING->src_object is a string, CODING->src_pos in an index to
5425 If CODING->src_object is nil, CODING->source must already point to
5426 the non-relocatable memory area. In this case, CODING->src_pos is
5427 an offset from CODING->source.
5429 The decoded data is inserted at the current point of the buffer
5434 decode_coding (coding
)
5435 struct coding_system
*coding
;
5439 if (BUFFERP (coding
->src_object
)
5440 && coding
->src_pos
> 0
5441 && coding
->src_pos
< GPT
5442 && coding
->src_pos
+ coding
->src_chars
> GPT
)
5443 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
5445 if (BUFFERP (coding
->dst_object
))
5447 if (current_buffer
!= XBUFFER (coding
->dst_object
))
5448 set_buffer_internal (XBUFFER (coding
->dst_object
));
5450 move_gap_both (PT
, PT_BYTE
);
5453 coding
->consumed
= coding
->consumed_char
= 0;
5454 coding
->produced
= coding
->produced_char
= 0;
5455 coding
->chars_at_source
= 0;
5456 coding
->result
= CODING_RESULT_SUCCESS
;
5459 ALLOC_CONVERSION_WORK_AREA (coding
);
5461 attrs
= CODING_ID_ATTRS (coding
->id
);
5465 coding_set_source (coding
);
5466 coding
->annotated
= 0;
5467 (*(coding
->decoder
)) (coding
);
5468 if (!NILP (CODING_ATTR_DECODE_TBL (attrs
)))
5469 translate_chars (CODING_ATTR_DECODE_TBL (attrs
), coding
);
5470 coding_set_destination (coding
);
5471 produce_chars (coding
);
5472 if (coding
->annotated
)
5473 produce_annotation (coding
);
5475 while (coding
->consumed
< coding
->src_bytes
5476 && ! coding
->result
);
5478 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qccl
)
5479 && SYMBOLP (CODING_ID_EOL_TYPE (coding
->id
))
5480 && ! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
5481 decode_eol (coding
);
5483 coding
->carryover_bytes
= 0;
5484 if (coding
->consumed
< coding
->src_bytes
)
5486 int nbytes
= coding
->src_bytes
- coding
->consumed
;
5489 coding_set_source (coding
);
5490 coding_set_destination (coding
);
5491 src
= coding
->source
+ coding
->consumed
;
5493 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
5495 /* Flush out unprocessed data as binary chars. We are sure
5496 that the number of data is less than the size of
5498 int *charbuf
= coding
->charbuf
;
5500 while (nbytes
-- > 0)
5503 *charbuf
++ = (c
& 0x80 ? - c
: c
);
5505 produce_chars (coding
);
5509 /* Record unprocessed bytes in coding->carryover. We are
5510 sure that the number of data is less than the size of
5511 coding->carryover. */
5512 unsigned char *p
= coding
->carryover
;
5514 coding
->carryover_bytes
= nbytes
;
5515 while (nbytes
-- > 0)
5518 coding
->consumed
= coding
->src_bytes
;
5521 return coding
->result
;
5525 consume_chars (coding
)
5526 struct coding_system
*coding
;
5528 int *buf
= coding
->charbuf
;
5529 /* -1 is to compensate for CRLF. */
5530 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
- 1;
5531 unsigned char *src
= coding
->source
+ coding
->consumed
;
5532 int pos
= coding
->src_pos
+ coding
->consumed_char
;
5533 int end_pos
= coding
->src_pos
+ coding
->src_chars
;
5534 int multibytep
= coding
->src_multibyte
;
5535 Lisp_Object eol_type
;
5537 int start
, end
, stop
;
5538 Lisp_Object object
, prop
;
5540 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5541 if (VECTORP (eol_type
))
5544 object
= coding
->src_object
;
5546 /* Note: composition handling is not yet implemented. */
5547 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
5549 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
5550 && find_composition (pos
, end_pos
, &start
, &end
, &prop
, object
)
5553 || (find_composition (end
, end_pos
, &start
, &end
, &prop
, object
)
5554 && end
<= end_pos
)))
5559 while (buf
< buf_end
)
5567 p
= save_composition_data (buf
, buf_end
, prop
);
5571 if (find_composition (end
, end_pos
, &start
, &end
, &prop
, object
)
5581 c
= STRING_CHAR_ADVANCE (src
);
5582 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
5584 if (! EQ (eol_type
, Qunix
))
5588 if (EQ (eol_type
, Qdos
))
5598 coding
->consumed
= src
- coding
->source
;
5599 coding
->consumed_char
= pos
- coding
->src_pos
;
5600 coding
->charbuf_used
= buf
- coding
->charbuf
;
5601 coding
->chars_at_source
= 0;
5605 /* Encode the text at CODING->src_object into CODING->dst_object.
5606 CODING->src_object is a buffer or a string.
5607 CODING->dst_object is a buffer or nil.
5609 If CODING->src_object is a buffer, it must be the current buffer.
5610 In this case, if CODING->src_pos is positive, it is a position of
5611 the source text in the buffer, otherwise. the source text is in the
5612 gap area of the buffer, and coding->src_pos specifies the offset of
5613 the text from GPT (which must be the same as PT). If this is the
5614 same buffer as CODING->dst_object, CODING->src_pos must be
5615 negative and CODING should not have `pre-write-conversion'.
5617 If CODING->src_object is a string, CODING should not have
5618 `pre-write-conversion'.
5620 If CODING->dst_object is a buffer, the encoded data is inserted at
5621 the current point of that buffer.
5623 If CODING->dst_object is nil, the encoded data is placed at the
5624 memory area specified by CODING->destination. */
5627 encode_coding (coding
)
5628 struct coding_system
*coding
;
5632 attrs
= CODING_ID_ATTRS (coding
->id
);
5634 if (BUFFERP (coding
->dst_object
))
5636 set_buffer_internal (XBUFFER (coding
->dst_object
));
5637 coding
->dst_multibyte
5638 = ! NILP (current_buffer
->enable_multibyte_characters
);
5641 coding
->consumed
= coding
->consumed_char
= 0;
5642 coding
->produced
= coding
->produced_char
= 0;
5643 coding
->result
= CODING_RESULT_SUCCESS
;
5646 ALLOC_CONVERSION_WORK_AREA (coding
);
5649 coding_set_source (coding
);
5650 consume_chars (coding
);
5652 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs
)))
5653 translate_chars (CODING_ATTR_ENCODE_TBL (attrs
), coding
);
5655 coding_set_destination (coding
);
5656 (*(coding
->encoder
)) (coding
);
5657 } while (coding
->consumed_char
< coding
->src_chars
);
5659 if (BUFFERP (coding
->dst_object
))
5660 insert_from_gap (coding
->produced_char
, coding
->produced
);
5662 return (coding
->result
);
5667 /* List of currently used working buffer. */
5668 Lisp_Object Vcode_conversion_work_buf_list
;
5670 /* A working buffer used by the top level conversion. */
5671 Lisp_Object Vcode_conversion_reused_work_buf
;
5674 /* Return a working buffer that can be freely used by the following
5675 code conversion. MULTIBYTEP specifies the multibyteness of the
5679 make_conversion_work_buffer (multibytep
)
5682 struct buffer
*current
= current_buffer
;
5685 if (NILP (Vcode_conversion_work_buf_list
))
5687 if (NILP (Vcode_conversion_reused_work_buf
))
5688 Vcode_conversion_reused_work_buf
5689 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5690 Vcode_conversion_work_buf_list
5691 = Fcons (Vcode_conversion_reused_work_buf
, Qnil
);
5695 int depth
= Flength (Vcode_conversion_work_buf_list
);
5698 sprintf (str
, " *code-conversion-work*<%d>", depth
);
5699 Vcode_conversion_work_buf_list
5700 = Fcons (Fget_buffer_create (build_string (str
)),
5701 Vcode_conversion_work_buf_list
);
5704 buf
= XCAR (Vcode_conversion_work_buf_list
);
5705 set_buffer_internal (XBUFFER (buf
));
5706 current_buffer
->undo_list
= Qt
;
5708 Fset_buffer_multibyte (multibytep
? Qt
: Qnil
);
5709 set_buffer_internal (current
);
5713 static struct coding_system
*saved_coding
;
5716 code_conversion_restore (info
)
5719 int depth
= Flength (Vcode_conversion_work_buf_list
);
5724 buf
= XCAR (Vcode_conversion_work_buf_list
);
5725 Vcode_conversion_work_buf_list
= XCDR (Vcode_conversion_work_buf_list
);
5726 if (depth
> 1 && !NILP (Fbuffer_live_p (buf
)))
5730 if (saved_coding
->dst_object
== Qt
5731 && saved_coding
->destination
)
5732 xfree (saved_coding
->destination
);
5734 return save_excursion_restore (info
);
5739 decode_coding_gap (coding
, chars
, bytes
)
5740 struct coding_system
*coding
;
5741 EMACS_INT chars
, bytes
;
5743 int count
= specpdl_ptr
- specpdl
;
5745 saved_coding
= coding
;
5746 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5748 coding
->src_object
= Fcurrent_buffer ();
5749 coding
->src_chars
= chars
;
5750 coding
->src_bytes
= bytes
;
5751 coding
->src_pos
= -chars
;
5752 coding
->src_pos_byte
= -bytes
;
5753 coding
->src_multibyte
= chars
< bytes
;
5754 coding
->dst_object
= coding
->src_object
;
5755 coding
->dst_pos
= PT
;
5756 coding
->dst_pos_byte
= PT_BYTE
;
5757 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
5759 if (CODING_REQUIRE_DETECTION (coding
))
5760 detect_coding (coding
);
5762 decode_coding (coding
);
5764 unbind_to (count
, Qnil
);
5765 return coding
->result
;
5769 encode_coding_gap (coding
, chars
, bytes
)
5770 struct coding_system
*coding
;
5771 EMACS_INT chars
, bytes
;
5773 int count
= specpdl_ptr
- specpdl
;
5776 saved_coding
= coding
;
5777 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5779 buffer
= Fcurrent_buffer ();
5780 coding
->src_object
= buffer
;
5781 coding
->src_chars
= chars
;
5782 coding
->src_bytes
= bytes
;
5783 coding
->src_pos
= -chars
;
5784 coding
->src_pos_byte
= -bytes
;
5785 coding
->src_multibyte
= chars
< bytes
;
5786 coding
->dst_object
= coding
->src_object
;
5787 coding
->dst_pos
= PT
;
5788 coding
->dst_pos_byte
= PT_BYTE
;
5790 encode_coding (coding
);
5792 unbind_to (count
, Qnil
);
5793 return coding
->result
;
5797 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5798 SRC_OBJECT into DST_OBJECT by coding context CODING.
5800 SRC_OBJECT is a buffer, a string, or Qnil.
5802 If it is a buffer, the text is at point of the buffer. FROM and TO
5803 are positions in the buffer.
5805 If it is a string, the text is at the beginning of the string.
5806 FROM and TO are indices to the string.
5808 If it is nil, the text is at coding->source. FROM and TO are
5809 indices to coding->source.
5811 DST_OBJECT is a buffer, Qt, or Qnil.
5813 If it is a buffer, the decoded text is inserted at point of the
5814 buffer. If the buffer is the same as SRC_OBJECT, the source text
5817 If it is Qt, a string is made from the decoded text, and
5818 set in CODING->dst_object.
5820 If it is Qnil, the decoded text is stored at CODING->destination.
5821 The called must allocate CODING->dst_bytes bytes at
5822 CODING->destination by xmalloc. If the decoded text is longer than
5823 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5827 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
5829 struct coding_system
*coding
;
5830 Lisp_Object src_object
;
5831 EMACS_INT from
, from_byte
, to
, to_byte
;
5832 Lisp_Object dst_object
;
5834 int count
= specpdl_ptr
- specpdl
;
5835 unsigned char *destination
;
5836 EMACS_INT dst_bytes
;
5837 EMACS_INT chars
= to
- from
;
5838 EMACS_INT bytes
= to_byte
- from_byte
;
5841 saved_coding
= coding
;
5842 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5844 if (NILP (dst_object
))
5846 destination
= coding
->destination
;
5847 dst_bytes
= coding
->dst_bytes
;
5850 coding
->src_object
= src_object
;
5851 coding
->src_chars
= chars
;
5852 coding
->src_bytes
= bytes
;
5853 coding
->src_multibyte
= chars
< bytes
;
5855 if (STRINGP (src_object
))
5857 coding
->src_pos
= from
;
5858 coding
->src_pos_byte
= from_byte
;
5860 else if (BUFFERP (src_object
))
5862 set_buffer_internal (XBUFFER (src_object
));
5864 move_gap_both (from
, from_byte
);
5865 if (EQ (src_object
, dst_object
))
5867 TEMP_SET_PT_BOTH (from
, from_byte
);
5868 del_range_both (from
, from_byte
, to
, to_byte
, 1);
5869 coding
->src_pos
= -chars
;
5870 coding
->src_pos_byte
= -bytes
;
5874 coding
->src_pos
= from
;
5875 coding
->src_pos_byte
= from_byte
;
5879 if (CODING_REQUIRE_DETECTION (coding
))
5880 detect_coding (coding
);
5881 attrs
= CODING_ID_ATTRS (coding
->id
);
5883 if (! NILP (CODING_ATTR_POST_READ (attrs
))
5884 || EQ (dst_object
, Qt
))
5886 coding
->dst_object
= make_conversion_work_buffer (1);
5887 coding
->dst_pos
= BEG
;
5888 coding
->dst_pos_byte
= BEG_BYTE
;
5889 coding
->dst_multibyte
= 1;
5891 else if (BUFFERP (dst_object
))
5893 coding
->dst_object
= dst_object
;
5894 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
5895 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
5896 coding
->dst_multibyte
5897 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
5901 coding
->dst_object
= Qnil
;
5902 coding
->dst_multibyte
= 1;
5905 decode_coding (coding
);
5907 if (BUFFERP (coding
->dst_object
))
5908 set_buffer_internal (XBUFFER (coding
->dst_object
));
5910 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
5912 struct gcpro gcpro1
, gcpro2
;
5913 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
5916 GCPRO2 (coding
->src_object
, coding
->dst_object
);
5917 val
= call1 (CODING_ATTR_POST_READ (attrs
),
5918 make_number (coding
->produced_char
));
5921 coding
->produced_char
+= Z
- prev_Z
;
5922 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
5925 if (EQ (dst_object
, Qt
))
5927 coding
->dst_object
= Fbuffer_string ();
5929 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
5931 set_buffer_internal (XBUFFER (coding
->dst_object
));
5932 if (dst_bytes
< coding
->produced
)
5935 = (unsigned char *) xrealloc (destination
, coding
->produced
);
5938 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
5939 unbind_to (count
, Qnil
);
5942 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
5943 move_gap_both (BEGV
, BEGV_BYTE
);
5944 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
5945 coding
->destination
= destination
;
5949 unbind_to (count
, Qnil
);
5954 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
5956 struct coding_system
*coding
;
5957 Lisp_Object src_object
;
5958 EMACS_INT from
, from_byte
, to
, to_byte
;
5959 Lisp_Object dst_object
;
5961 int count
= specpdl_ptr
- specpdl
;
5962 EMACS_INT chars
= to
- from
;
5963 EMACS_INT bytes
= to_byte
- from_byte
;
5966 saved_coding
= coding
;
5967 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5969 coding
->src_object
= src_object
;
5970 coding
->src_chars
= chars
;
5971 coding
->src_bytes
= bytes
;
5972 coding
->src_multibyte
= chars
< bytes
;
5974 attrs
= CODING_ID_ATTRS (coding
->id
);
5976 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
5980 coding
->src_object
= make_conversion_work_buffer (coding
->src_multibyte
);
5981 set_buffer_internal (XBUFFER (coding
->src_object
));
5982 if (STRINGP (src_object
))
5983 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
5984 else if (BUFFERP (src_object
))
5985 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
5987 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
5989 if (EQ (src_object
, dst_object
))
5991 set_buffer_internal (XBUFFER (src_object
));
5992 del_range_both (from
, from_byte
, to
, to_byte
, 1);
5993 set_buffer_internal (XBUFFER (coding
->src_object
));
5996 val
= call2 (CODING_ATTR_PRE_WRITE (attrs
),
5997 make_number (1), make_number (chars
));
6000 move_gap_both (BEG
, BEG_BYTE
);
6001 coding
->src_chars
= Z
- BEG
;
6002 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6003 coding
->src_pos
= BEG
;
6004 coding
->src_pos_byte
= BEG_BYTE
;
6005 coding
->src_multibyte
= Z
< Z_BYTE
;
6007 else if (STRINGP (src_object
))
6009 coding
->src_pos
= from
;
6010 coding
->src_pos_byte
= from_byte
;
6012 else if (BUFFERP (src_object
))
6014 set_buffer_internal (XBUFFER (src_object
));
6016 move_gap_both (from
, from_byte
);
6017 if (EQ (src_object
, dst_object
))
6019 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6020 coding
->src_pos
= -chars
;
6021 coding
->src_pos_byte
= -bytes
;
6025 coding
->src_pos
= from
;
6026 coding
->src_pos_byte
= from_byte
;
6030 if (BUFFERP (dst_object
))
6032 coding
->dst_object
= dst_object
;
6033 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6034 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6035 coding
->dst_multibyte
6036 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6038 else if (EQ (dst_object
, Qt
))
6040 coding
->dst_object
= Qnil
;
6041 coding
->destination
= (unsigned char *) xmalloc (coding
->src_chars
);
6042 coding
->dst_bytes
= coding
->src_chars
;
6043 coding
->dst_multibyte
= 0;
6047 coding
->dst_object
= Qnil
;
6048 coding
->dst_multibyte
= 0;
6051 encode_coding (coding
);
6053 if (EQ (dst_object
, Qt
))
6055 if (BUFFERP (coding
->dst_object
))
6056 coding
->dst_object
= Fbuffer_string ();
6060 = make_unibyte_string ((char *) coding
->destination
,
6062 xfree (coding
->destination
);
6066 unbind_to (count
, Qnil
);
6071 preferred_coding_system ()
6073 int id
= coding_categories
[coding_priorities
[0]].id
;
6075 return CODING_ID_NAME (id
);
6080 /*** 8. Emacs Lisp library functions ***/
6082 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6083 doc
: /* Return t if OBJECT is nil or a coding-system.
6084 See the documentation of `define-coding-system' for information
6085 about coding-system objects. */)
6089 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6092 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6093 Sread_non_nil_coding_system
, 1, 1, 0,
6094 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6101 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6102 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6104 while (XSTRING (val
)->size
== 0);
6105 return (Fintern (val
, Qnil
));
6108 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6109 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6110 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6111 (prompt
, default_coding_system
)
6112 Lisp_Object prompt
, default_coding_system
;
6115 if (SYMBOLP (default_coding_system
))
6116 XSETSTRING (default_coding_system
, XSYMBOL (default_coding_system
)->name
);
6117 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6118 Qt
, Qnil
, Qcoding_system_history
,
6119 default_coding_system
, Qnil
);
6120 return (XSTRING (val
)->size
== 0 ? Qnil
: Fintern (val
, Qnil
));
6123 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6125 doc
: /* Check validity of CODING-SYSTEM.
6126 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6127 It is valid if it is a symbol with a non-nil `coding-system' property.
6128 The value of property should be a vector of length 5. */)
6130 Lisp_Object coding_system
;
6132 CHECK_SYMBOL (coding_system
);
6133 if (!NILP (Fcoding_system_p (coding_system
)))
6134 return coding_system
;
6136 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
6141 detect_coding_system (src
, src_bytes
, highest
, multibytep
, coding_system
)
6143 int src_bytes
, highest
;
6145 Lisp_Object coding_system
;
6147 unsigned char *src_end
= src
+ src_bytes
;
6148 int mask
= CATEGORY_MASK_ANY
;
6151 Lisp_Object attrs
, eol_type
;
6153 struct coding_system coding
;
6155 if (NILP (coding_system
))
6156 coding_system
= Qundecided
;
6157 setup_coding_system (coding_system
, &coding
);
6158 attrs
= CODING_ID_ATTRS (coding
.id
);
6159 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
6161 coding
.source
= src
;
6162 coding
.src_bytes
= src_bytes
;
6163 coding
.src_multibyte
= multibytep
;
6164 coding
.consumed
= 0;
6166 if (XINT (CODING_ATTR_CATEGORY (attrs
)) != coding_category_undecided
)
6168 mask
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
6172 coding_system
= Qnil
;
6173 for (; src
< src_end
; src
++)
6176 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
6178 || c
== ISO_CODE_SO
)))
6181 coding
.head_ascii
= src
- coding
.source
;
6184 for (i
= 0; i
< coding_category_raw_text
; i
++)
6186 enum coding_category category
= coding_priorities
[i
];
6187 struct coding_system
*this = coding_categories
+ category
;
6189 if (category
>= coding_category_raw_text
6190 || detected
& (1 << category
))
6195 /* No coding system of this category is defined. */
6196 mask
&= ~(1 << category
);
6200 detected
|= detected_mask
[category
];
6201 if ((*(coding_categories
[category
].detector
)) (&coding
, &mask
)
6204 mask
&= detected_mask
[category
];
6212 val
= Fcons (make_number (coding_category_raw_text
), Qnil
);
6213 else if (mask
== CATEGORY_MASK_ANY
)
6214 val
= Fcons (make_number (coding_category_undecided
), Qnil
);
6217 for (i
= 0; i
< coding_category_raw_text
; i
++)
6218 if (mask
& (1 << coding_priorities
[i
]))
6220 val
= Fcons (make_number (coding_priorities
[i
]), Qnil
);
6227 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6228 if (mask
& (1 << coding_priorities
[i
]))
6229 val
= Fcons (make_number (coding_priorities
[i
]), val
);
6233 int one_byte_eol
= -1, two_byte_eol
= -1;
6236 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
6238 struct coding_system
*this
6239 = (NILP (coding_system
) ? coding_categories
+ XINT (XCAR (tail
))
6243 attrs
= CODING_ID_ATTRS (this->id
);
6244 eol_type
= CODING_ID_EOL_TYPE (this->id
);
6245 XSETCAR (tail
, CODING_ID_NAME (this->id
));
6246 if (VECTORP (eol_type
))
6248 if (EQ (CODING_ATTR_TYPE (attrs
), Qutf_16
))
6250 if (two_byte_eol
< 0)
6251 two_byte_eol
= detect_eol (this, coding
.source
, src_bytes
);
6252 this_eol
= two_byte_eol
;
6256 if (one_byte_eol
< 0)
6257 one_byte_eol
=detect_eol (this, coding
.source
, src_bytes
);
6258 this_eol
= one_byte_eol
;
6260 if (this_eol
== EOL_SEEN_LF
)
6261 XSETCAR (tail
, AREF (eol_type
, 0));
6262 else if (this_eol
== EOL_SEEN_CRLF
)
6263 XSETCAR (tail
, AREF (eol_type
, 1));
6264 else if (this_eol
== EOL_SEEN_CR
)
6265 XSETCAR (tail
, AREF (eol_type
, 2));
6270 return (highest
? XCAR (val
) : val
);
6274 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
6276 doc
: /* Detect coding system of the text in the region between START and END.
6277 Return a list of possible coding systems ordered by priority.
6279 If only ASCII characters are found, it returns a list of single element
6280 `undecided' or its subsidiary coding system according to a detected
6283 If optional argument HIGHEST is non-nil, return the coding system of
6284 highest priority. */)
6285 (start
, end
, highest
)
6286 Lisp_Object start
, end
, highest
;
6289 int from_byte
, to_byte
;
6291 CHECK_NUMBER_COERCE_MARKER (start
);
6292 CHECK_NUMBER_COERCE_MARKER (end
);
6294 validate_region (&start
, &end
);
6295 from
= XINT (start
), to
= XINT (end
);
6296 from_byte
= CHAR_TO_BYTE (from
);
6297 to_byte
= CHAR_TO_BYTE (to
);
6299 if (from
< GPT
&& to
>= GPT
)
6300 move_gap_both (to
, to_byte
);
6302 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
6303 to_byte
- from_byte
,
6305 !NILP (current_buffer
6306 ->enable_multibyte_characters
),
6310 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
6312 doc
: /* Detect coding system of the text in STRING.
6313 Return a list of possible coding systems ordered by priority.
6315 If only ASCII characters are found, it returns a list of single element
6316 `undecided' or its subsidiary coding system according to a detected
6319 If optional argument HIGHEST is non-nil, return the coding system of
6320 highest priority. */)
6322 Lisp_Object string
, highest
;
6324 CHECK_STRING (string
);
6326 return detect_coding_system (XSTRING (string
)->data
,
6327 STRING_BYTES (XSTRING (string
)),
6329 STRING_MULTIBYTE (string
),
6335 char_encodable_p (c
, attrs
)
6340 struct charset
*charset
;
6342 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
6343 CONSP (tail
); tail
= XCDR (tail
))
6345 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
6346 if (CHAR_CHARSET_P (c
, charset
))
6349 return (! NILP (tail
));
6353 /* Return a list of coding systems that safely encode the text between
6354 START and END. If EXCLUDE is non-nil, it is a list of coding
6355 systems not to check. The returned list doesn't contain any such
6356 coding systems. In any case, If the text contains only ASCII or is
6357 unibyte, return t. */
6359 DEFUN ("find-coding-systems-region-internal",
6360 Ffind_coding_systems_region_internal
,
6361 Sfind_coding_systems_region_internal
, 2, 3, 0,
6362 doc
: /* Internal use only. */)
6363 (start
, end
, exclude
)
6364 Lisp_Object start
, end
, exclude
;
6366 Lisp_Object coding_attrs_list
, safe_codings
;
6367 EMACS_INT start_byte
, end_byte
;
6368 unsigned char *p
, *pbeg
, *pend
;
6370 Lisp_Object tail
, elt
;
6372 if (STRINGP (start
))
6374 if (!STRING_MULTIBYTE (start
)
6375 && XSTRING (start
)->size
!= STRING_BYTES (XSTRING (start
)))
6378 end_byte
= STRING_BYTES (XSTRING (start
));
6382 CHECK_NUMBER_COERCE_MARKER (start
);
6383 CHECK_NUMBER_COERCE_MARKER (end
);
6384 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
6385 args_out_of_range (start
, end
);
6386 if (NILP (current_buffer
->enable_multibyte_characters
))
6388 start_byte
= CHAR_TO_BYTE (XINT (start
));
6389 end_byte
= CHAR_TO_BYTE (XINT (end
));
6390 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
6393 if (start
< GPT
&& end
> GPT
)
6395 if ((GPT
- start
) < (end
- GPT
))
6396 move_gap_both (start
, start_byte
);
6398 move_gap_both (end
, end_byte
);
6402 coding_attrs_list
= Qnil
;
6403 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
6405 || NILP (Fmemq (XCAR (tail
), exclude
)))
6409 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
6410 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
6411 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
6412 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
6415 if (STRINGP (start
))
6416 p
= pbeg
= XSTRING (start
)->data
;
6418 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
6419 pend
= p
+ (end_byte
- start_byte
);
6421 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
6422 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
6426 if (ASCII_BYTE_P (*p
))
6430 c
= STRING_CHAR_ADVANCE (p
);
6432 charset_map_loaded
= 0;
6433 for (tail
= coding_attrs_list
; CONSP (tail
);)
6438 else if (char_encodable_p (c
, elt
))
6440 else if (CONSP (XCDR (tail
)))
6442 XSETCAR (tail
, XCAR (XCDR (tail
)));
6443 XSETCDR (tail
, XCDR (XCDR (tail
)));
6447 XSETCAR (tail
, Qnil
);
6451 if (charset_map_loaded
)
6453 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
6455 if (STRINGP (start
))
6456 pbeg
= XSTRING (start
)->data
;
6458 pbeg
= BYTE_POS_ADDR (start_byte
);
6459 p
= pbeg
+ p_offset
;
6460 pend
= pbeg
+ pend_offset
;
6465 safe_codings
= Qnil
;
6466 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
6467 if (! NILP (XCAR (tail
)))
6468 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
6470 return safe_codings
;
6474 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
6475 Scheck_coding_systems_region
, 3, 3, 0,
6476 doc
: /* Check if the region is encodable by coding systems.
6478 START and END are buffer positions specifying the region.
6479 CODING-SYSTEM-LIST is a list of coding systems to check.
6481 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6482 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6483 whole region, POS0, POS1, ... are buffer positions where non-encodable
6484 characters are found.
6486 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6489 START may be a string. In that case, check if the string is
6490 encodable, and the value contains indices to the string instead of
6491 buffer positions. END is ignored. */)
6492 (start
, end
, coding_system_list
)
6493 Lisp_Object start
, end
, coding_system_list
;
6496 EMACS_INT start_byte
, end_byte
;
6498 unsigned char *p
, *pbeg
, *pend
;
6500 Lisp_Object tail
, elt
;
6502 if (STRINGP (start
))
6504 if (!STRING_MULTIBYTE (start
)
6505 && XSTRING (start
)->size
!= STRING_BYTES (XSTRING (start
)))
6508 end_byte
= STRING_BYTES (XSTRING (start
));
6513 CHECK_NUMBER_COERCE_MARKER (start
);
6514 CHECK_NUMBER_COERCE_MARKER (end
);
6515 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
6516 args_out_of_range (start
, end
);
6517 if (NILP (current_buffer
->enable_multibyte_characters
))
6519 start_byte
= CHAR_TO_BYTE (XINT (start
));
6520 end_byte
= CHAR_TO_BYTE (XINT (end
));
6521 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
6524 if (start
< GPT
&& end
> GPT
)
6526 if ((GPT
- start
) < (end
- GPT
))
6527 move_gap_both (start
, start_byte
);
6529 move_gap_both (end
, end_byte
);
6535 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
6538 list
= Fcons (Fcons (elt
, Fcons (AREF (CODING_SYSTEM_SPEC (elt
), 0),
6543 if (STRINGP (start
))
6544 p
= pbeg
= XSTRING (start
)->data
;
6546 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
6547 pend
= p
+ (end_byte
- start_byte
);
6549 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
6550 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
6554 if (ASCII_BYTE_P (*p
))
6558 c
= STRING_CHAR_ADVANCE (p
);
6560 charset_map_loaded
= 0;
6561 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
6563 elt
= XCDR (XCAR (tail
));
6564 if (! char_encodable_p (c
, XCAR (elt
)))
6565 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
6567 if (charset_map_loaded
)
6569 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
6571 if (STRINGP (start
))
6572 pbeg
= XSTRING (start
)->data
;
6574 pbeg
= BYTE_POS_ADDR (start_byte
);
6575 p
= pbeg
+ p_offset
;
6576 pend
= pbeg
+ pend_offset
;
6584 for (; CONSP (tail
); tail
= XCDR (tail
))
6587 if (CONSP (XCDR (XCDR (elt
))))
6588 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
6598 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
6599 Lisp_Object start
, end
, coding_system
, dst_object
;
6600 int encodep
, norecord
;
6602 struct coding_system coding
;
6603 EMACS_INT from
, from_byte
, to
, to_byte
;
6604 Lisp_Object src_object
;
6606 CHECK_NUMBER_COERCE_MARKER (start
);
6607 CHECK_NUMBER_COERCE_MARKER (end
);
6608 if (NILP (coding_system
))
6609 coding_system
= Qno_conversion
;
6611 CHECK_CODING_SYSTEM (coding_system
);
6612 src_object
= Fcurrent_buffer ();
6613 if (NILP (dst_object
))
6614 dst_object
= src_object
;
6615 else if (! EQ (dst_object
, Qt
))
6616 CHECK_BUFFER (dst_object
);
6618 validate_region (&start
, &end
);
6619 from
= XFASTINT (start
);
6620 from_byte
= CHAR_TO_BYTE (from
);
6621 to
= XFASTINT (end
);
6622 to_byte
= CHAR_TO_BYTE (to
);
6624 setup_coding_system (coding_system
, &coding
);
6625 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6628 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
6631 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
6634 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
6636 if (coding
.result
!= CODING_RESULT_SUCCESS
)
6637 error ("Code conversion error: %d", coding
.result
);
6639 return (BUFFERP (dst_object
)
6640 ? make_number (coding
.produced_char
)
6641 : coding
.dst_object
);
6645 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
6646 3, 4, "r\nzCoding system: ",
6647 doc
: /* Decode the current region from the specified coding system.
6648 When called from a program, takes four arguments:
6649 START, END, CODING-SYSTEM, and DESTINATION.
6650 START and END are buffer positions.
6652 Optional 4th arguments DESTINATION specifies where the decoded text goes.
6653 If nil, the region between START and END is replace by the decoded text.
6654 If buffer, the decoded text is inserted in the buffer.
6655 If t, the decoded text is returned.
6657 This function sets `last-coding-system-used' to the precise coding system
6658 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6659 not fully specified.)
6660 It returns the length of the decoded text. */)
6661 (start
, end
, coding_system
, destination
)
6662 Lisp_Object start
, end
, coding_system
, destination
;
6664 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
6667 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
6668 3, 4, "r\nzCoding system: ",
6669 doc
: /* Encode the current region by specified coding system.
6670 When called from a program, takes three arguments:
6671 START, END, and CODING-SYSTEM. START and END are buffer positions.
6673 Optional 4th arguments DESTINATION specifies where the encoded text goes.
6674 If nil, the region between START and END is replace by the encoded text.
6675 If buffer, the encoded text is inserted in the buffer.
6676 If t, the encoded text is returned.
6678 This function sets `last-coding-system-used' to the precise coding system
6679 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6680 not fully specified.)
6681 It returns the length of the encoded text. */)
6682 (start
, end
, coding_system
, destination
)
6683 Lisp_Object start
, end
, coding_system
, destination
;
6685 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
6689 code_convert_string (string
, coding_system
, dst_object
,
6690 encodep
, nocopy
, norecord
)
6691 Lisp_Object string
, coding_system
, dst_object
;
6692 int encodep
, nocopy
, norecord
;
6694 struct coding_system coding
;
6695 EMACS_INT chars
, bytes
;
6697 CHECK_STRING (string
);
6698 if (NILP (coding_system
))
6701 Vlast_coding_system_used
= Qno_conversion
;
6702 if (NILP (dst_object
))
6703 return (nocopy
? Fcopy_sequence (string
) : string
);
6706 if (NILP (coding_system
))
6707 coding_system
= Qno_conversion
;
6709 CHECK_CODING_SYSTEM (coding_system
);
6710 if (NILP (dst_object
))
6712 else if (! EQ (dst_object
, Qt
))
6713 CHECK_BUFFER (dst_object
);
6715 setup_coding_system (coding_system
, &coding
);
6716 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6717 chars
= XSTRING (string
)->size
;
6718 bytes
= STRING_BYTES (XSTRING (string
));
6720 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
6722 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
6724 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
6726 if (coding
.result
!= CODING_RESULT_SUCCESS
)
6727 error ("Code conversion error: %d", coding
.result
);
6729 return (BUFFERP (dst_object
)
6730 ? make_number (coding
.produced_char
)
6731 : coding
.dst_object
);
6735 /* Encode or decode STRING according to CODING_SYSTEM.
6736 Do not set Vlast_coding_system_used.
6738 This function is called only from macros DECODE_FILE and
6739 ENCODE_FILE, thus we ignore character composition. */
6742 code_convert_string_norecord (string
, coding_system
, encodep
)
6743 Lisp_Object string
, coding_system
;
6746 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
6750 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
6752 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6754 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6755 if the decoding operation is trivial.
6757 Optional fourth arg BUFFER non-nil meant that the decoded text is
6758 inserted in BUFFER instead of returned as a astring. In this case,
6759 the return value is BUFFER.
6761 This function sets `last-coding-system-used' to the precise coding system
6762 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6763 not fully specified. */)
6764 (string
, coding_system
, nocopy
, buffer
)
6765 Lisp_Object string
, coding_system
, nocopy
, buffer
;
6767 return code_convert_string (string
, coding_system
, buffer
,
6768 0, ! NILP (nocopy
), 0);
6771 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
6773 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
6775 Optional third arg NOCOPY non-nil means it is OK to return STRING
6776 itself if the encoding operation is trivial.
6778 Optional fourth arg BUFFER non-nil meant that the encoded text is
6779 inserted in BUFFER instead of returned as a astring. In this case,
6780 the return value is BUFFER.
6782 This function sets `last-coding-system-used' to the precise coding system
6783 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6784 not fully specified.) */)
6785 (string
, coding_system
, nocopy
, buffer
)
6786 Lisp_Object string
, coding_system
, nocopy
, buffer
;
6788 return code_convert_string (string
, coding_system
, buffer
,
6789 nocopy
, ! NILP (nocopy
), 1);
6793 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
6794 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
6795 Return the corresponding character. */)
6799 Lisp_Object spec
, attrs
, val
;
6800 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
6803 CHECK_NATNUM (code
);
6804 c
= XFASTINT (code
);
6805 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
6806 attrs
= AREF (spec
, 0);
6808 if (ASCII_BYTE_P (c
)
6809 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6812 val
= CODING_ATTR_CHARSET_LIST (attrs
);
6813 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
6814 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
6815 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
)));
6818 charset
= charset_roman
;
6819 else if (c
>= 0xA0 && c
< 0xDF)
6821 charset
= charset_kana
;
6826 int s1
= c
>> 8, s2
= c
& 0x7F;
6828 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
6829 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
6830 error ("Invalid code: %d", code
);
6832 charset
= charset_kanji
;
6834 c
= DECODE_CHAR (charset
, c
);
6836 error ("Invalid code: %d", code
);
6837 return make_number (c
);
6841 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
6842 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
6843 Return the corresponding code in SJIS. */)
6847 Lisp_Object spec
, attrs
, charset_list
;
6849 struct charset
*charset
;
6852 CHECK_CHARACTER (ch
);
6854 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
6855 attrs
= AREF (spec
, 0);
6857 if (ASCII_CHAR_P (c
)
6858 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6861 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
6862 charset
= char_charset (c
, charset_list
, &code
);
6863 if (code
== CHARSET_INVALID_CODE (charset
))
6864 error ("Can't encode by shift_jis encoding: %d", c
);
6867 return make_number (code
);
6870 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
6871 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
6872 Return the corresponding character. */)
6876 Lisp_Object spec
, attrs
, val
;
6877 struct charset
*charset_roman
, *charset_big5
, *charset
;
6880 CHECK_NATNUM (code
);
6881 c
= XFASTINT (code
);
6882 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
6883 attrs
= AREF (spec
, 0);
6885 if (ASCII_BYTE_P (c
)
6886 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6889 val
= CODING_ATTR_CHARSET_LIST (attrs
);
6890 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
6891 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
6894 charset
= charset_roman
;
6897 int b1
= c
>> 8, b2
= c
& 0x7F;
6898 if (b1
< 0xA1 || b1
> 0xFE
6899 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
6900 error ("Invalid code: %d", code
);
6901 charset
= charset_big5
;
6903 c
= DECODE_CHAR (charset
, (unsigned )c
);
6905 error ("Invalid code: %d", code
);
6906 return make_number (c
);
6909 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
6910 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
6911 Return the corresponding character code in Big5. */)
6915 Lisp_Object spec
, attrs
, charset_list
;
6916 struct charset
*charset
;
6920 CHECK_CHARACTER (ch
);
6922 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
6923 attrs
= AREF (spec
, 0);
6924 if (ASCII_CHAR_P (c
)
6925 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6928 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
6929 charset
= char_charset (c
, charset_list
, &code
);
6930 if (code
== CHARSET_INVALID_CODE (charset
))
6931 error ("Can't encode by Big5 encoding: %d", c
);
6933 return make_number (code
);
6937 DEFUN ("set-terminal-coding-system-internal",
6938 Fset_terminal_coding_system_internal
,
6939 Sset_terminal_coding_system_internal
, 1, 1, 0,
6940 doc
: /* Internal use only. */)
6942 Lisp_Object coding_system
;
6944 CHECK_SYMBOL (coding_system
);
6945 setup_coding_system (Fcheck_coding_system (coding_system
),
6948 /* We had better not send unsafe characters to terminal. */
6949 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
6950 /* Characer composition should be disabled. */
6951 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6952 terminal_coding
.src_multibyte
= 1;
6953 terminal_coding
.dst_multibyte
= 0;
6957 DEFUN ("set-safe-terminal-coding-system-internal",
6958 Fset_safe_terminal_coding_system_internal
,
6959 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
6960 doc
: /* Internal use only. */)
6962 Lisp_Object coding_system
;
6964 CHECK_SYMBOL (coding_system
);
6965 setup_coding_system (Fcheck_coding_system (coding_system
),
6966 &safe_terminal_coding
);
6967 /* Characer composition should be disabled. */
6968 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6969 safe_terminal_coding
.src_multibyte
= 1;
6970 safe_terminal_coding
.dst_multibyte
= 0;
6974 DEFUN ("terminal-coding-system",
6975 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
6976 doc
: /* Return coding system specified for terminal output. */)
6979 return CODING_ID_NAME (terminal_coding
.id
);
6982 DEFUN ("set-keyboard-coding-system-internal",
6983 Fset_keyboard_coding_system_internal
,
6984 Sset_keyboard_coding_system_internal
, 1, 1, 0,
6985 doc
: /* Internal use only. */)
6987 Lisp_Object coding_system
;
6989 CHECK_SYMBOL (coding_system
);
6990 setup_coding_system (Fcheck_coding_system (coding_system
),
6992 /* Characer composition should be disabled. */
6993 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6997 DEFUN ("keyboard-coding-system",
6998 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
6999 doc
: /* Return coding system specified for decoding keyboard input. */)
7002 return CODING_ID_NAME (keyboard_coding
.id
);
7006 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
7007 Sfind_operation_coding_system
, 1, MANY
, 0,
7008 doc
: /* Choose a coding system for an operation based on the target name.
7009 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7010 DECODING-SYSTEM is the coding system to use for decoding
7011 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7012 for encoding (in case OPERATION does encoding).
7014 The first argument OPERATION specifies an I/O primitive:
7015 For file I/O, `insert-file-contents' or `write-region'.
7016 For process I/O, `call-process', `call-process-region', or `start-process'.
7017 For network I/O, `open-network-stream'.
7019 The remaining arguments should be the same arguments that were passed
7020 to the primitive. Depending on which primitive, one of those arguments
7021 is selected as the TARGET. For example, if OPERATION does file I/O,
7022 whichever argument specifies the file name is TARGET.
7024 TARGET has a meaning which depends on OPERATION:
7025 For file I/O, TARGET is a file name.
7026 For process I/O, TARGET is a process name.
7027 For network I/O, TARGET is a service name or a port number
7029 This function looks up what specified for TARGET in,
7030 `file-coding-system-alist', `process-coding-system-alist',
7031 or `network-coding-system-alist' depending on OPERATION.
7032 They may specify a coding system, a cons of coding systems,
7033 or a function symbol to call.
7034 In the last case, we call the function with one argument,
7035 which is a list of all the arguments given to this function.
7037 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7042 Lisp_Object operation
, target_idx
, target
, val
;
7043 register Lisp_Object chain
;
7046 error ("Too few arguments");
7047 operation
= args
[0];
7048 if (!SYMBOLP (operation
)
7049 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
7050 error ("Invalid first arguement");
7051 if (nargs
< 1 + XINT (target_idx
))
7052 error ("Too few arguments for operation: %s",
7053 XSYMBOL (operation
)->name
->data
);
7054 target
= args
[XINT (target_idx
) + 1];
7055 if (!(STRINGP (target
)
7056 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
7057 error ("Invalid %dth argument", XINT (target_idx
) + 1);
7059 chain
= ((EQ (operation
, Qinsert_file_contents
)
7060 || EQ (operation
, Qwrite_region
))
7061 ? Vfile_coding_system_alist
7062 : (EQ (operation
, Qopen_network_stream
)
7063 ? Vnetwork_coding_system_alist
7064 : Vprocess_coding_system_alist
));
7068 for (; CONSP (chain
); chain
= XCDR (chain
))
7074 && ((STRINGP (target
)
7075 && STRINGP (XCAR (elt
))
7076 && fast_string_match (XCAR (elt
), target
) >= 0)
7077 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
7080 /* Here, if VAL is both a valid coding system and a valid
7081 function symbol, we return VAL as a coding system. */
7084 if (! SYMBOLP (val
))
7086 if (! NILP (Fcoding_system_p (val
)))
7087 return Fcons (val
, val
);
7088 if (! NILP (Ffboundp (val
)))
7090 val
= call1 (val
, Flist (nargs
, args
));
7093 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
7094 return Fcons (val
, val
);
7102 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
7103 Sset_coding_system_priority
, 1, MANY
, 0,
7104 doc
: /* Put higher priority to coding systems of the arguments. */)
7110 int changed
[coding_category_max
];
7111 enum coding_category priorities
[coding_category_max
];
7113 bzero (changed
, sizeof changed
);
7115 for (i
= j
= 0; i
< nargs
; i
++)
7117 enum coding_category category
;
7118 Lisp_Object spec
, attrs
;
7120 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
7121 attrs
= AREF (spec
, 0);
7122 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7123 if (changed
[category
])
7124 /* Ignore this coding system because a coding system of the
7125 same category already had a higher priority. */
7127 changed
[category
] = 1;
7128 priorities
[j
++] = category
;
7129 if (coding_categories
[category
].id
>= 0
7130 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
7131 setup_coding_system (args
[i
], &coding_categories
[category
]);
7134 /* Now we have decided top J priorities. Reflect the order of the
7135 original priorities to the remaining priorities. */
7137 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
7139 while (j
< coding_category_max
7140 && changed
[coding_priorities
[j
]])
7142 if (j
== coding_category_max
)
7144 priorities
[i
] = coding_priorities
[j
];
7147 bcopy (priorities
, coding_priorities
, sizeof priorities
);
7151 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
7152 Scoding_system_priority_list
, 0, 1, 0,
7153 doc
: /* Return a list of coding systems ordered by their priorities. */)
7155 Lisp_Object highestp
;
7160 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
7162 enum coding_category category
= coding_priorities
[i
];
7163 int id
= coding_categories
[category
].id
;
7168 attrs
= CODING_ID_ATTRS (id
);
7169 if (! NILP (highestp
))
7170 return CODING_ATTR_BASE_NAME (attrs
);
7171 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
7173 return Fnreverse (val
);
7177 make_subsidiaries (base
)
7180 Lisp_Object subsidiaries
;
7181 char *suffixes
[] = { "-unix", "-dos", "-mac" };
7182 int base_name_len
= STRING_BYTES (XSYMBOL (base
)->name
);
7183 char *buf
= (char *) alloca (base_name_len
+ 6);
7186 bcopy (XSYMBOL (base
)->name
->data
, buf
, base_name_len
);
7187 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
7188 for (i
= 0; i
< 3; i
++)
7190 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
7191 ASET (subsidiaries
, i
, intern (buf
));
7193 return subsidiaries
;
7197 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
7198 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
7199 doc
: /* For internal use only. */)
7205 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
7206 Lisp_Object attrs
; /* Vector of attributes. */
7207 Lisp_Object eol_type
;
7208 Lisp_Object aliases
;
7209 Lisp_Object coding_type
, charset_list
, safe_charsets
;
7210 enum coding_category category
;
7211 Lisp_Object tail
, val
;
7212 int max_charset_id
= 0;
7215 if (nargs
< coding_arg_max
)
7218 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
7220 name
= args
[coding_arg_name
];
7221 CHECK_SYMBOL (name
);
7222 CODING_ATTR_BASE_NAME (attrs
) = name
;
7224 val
= args
[coding_arg_mnemonic
];
7225 if (! STRINGP (val
))
7226 CHECK_CHARACTER (val
);
7227 CODING_ATTR_MNEMONIC (attrs
) = val
;
7229 coding_type
= args
[coding_arg_coding_type
];
7230 CHECK_SYMBOL (coding_type
);
7231 CODING_ATTR_TYPE (attrs
) = coding_type
;
7233 charset_list
= args
[coding_arg_charset_list
];
7234 if (SYMBOLP (charset_list
))
7236 if (EQ (charset_list
, Qiso_2022
))
7238 if (! EQ (coding_type
, Qiso_2022
))
7239 error ("Invalid charset-list");
7240 charset_list
= Viso_2022_charset_list
;
7242 else if (EQ (charset_list
, Qemacs_mule
))
7244 if (! EQ (coding_type
, Qemacs_mule
))
7245 error ("Invalid charset-list");
7246 charset_list
= Vemacs_mule_charset_list
;
7248 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7249 if (max_charset_id
< XFASTINT (XCAR (tail
)))
7250 max_charset_id
= XFASTINT (XCAR (tail
));
7254 charset_list
= Fcopy_sequence (charset_list
);
7255 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
7257 struct charset
*charset
;
7260 CHECK_CHARSET_GET_CHARSET (val
, charset
);
7261 if (EQ (coding_type
, Qiso_2022
)
7262 ? CHARSET_ISO_FINAL (charset
) < 0
7263 : EQ (coding_type
, Qemacs_mule
)
7264 ? CHARSET_EMACS_MULE_ID (charset
) < 0
7266 error ("Can't handle charset `%s'",
7267 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7269 XCAR (tail
) = make_number (charset
->id
);
7270 if (max_charset_id
< charset
->id
)
7271 max_charset_id
= charset
->id
;
7274 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
7276 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
7278 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7279 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
7280 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
7282 val
= args
[coding_arg_decode_translation_table
];
7284 CHECK_CHAR_TABLE (val
);
7285 CODING_ATTR_DECODE_TBL (attrs
) = val
;
7287 val
= args
[coding_arg_encode_translation_table
];
7289 CHECK_CHAR_TABLE (val
);
7290 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
7292 val
= args
[coding_arg_post_read_conversion
];
7294 CODING_ATTR_POST_READ (attrs
) = val
;
7296 val
= args
[coding_arg_pre_write_conversion
];
7298 CODING_ATTR_PRE_WRITE (attrs
) = val
;
7300 val
= args
[coding_arg_default_char
];
7302 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
7305 CHECK_CHARACTER (val
);
7306 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
7309 val
= args
[coding_arg_plist
];
7311 CODING_ATTR_PLIST (attrs
) = val
;
7313 if (EQ (coding_type
, Qcharset
))
7315 val
= Fmake_vector (make_number (256), Qnil
);
7317 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7319 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
7320 int idx
= (CHARSET_DIMENSION (charset
) - 1) * 4;
7322 for (i
= charset
->code_space
[idx
];
7323 i
<= charset
->code_space
[idx
+ 1]; i
++)
7325 if (NILP (AREF (val
, i
)))
7326 ASET (val
, i
, XCAR (tail
));
7328 error ("Charsets conflicts in the first byte");
7331 ASET (attrs
, coding_attr_charset_valids
, val
);
7332 category
= coding_category_charset
;
7334 else if (EQ (coding_type
, Qccl
))
7338 if (nargs
< coding_arg_ccl_max
)
7341 val
= args
[coding_arg_ccl_decoder
];
7342 CHECK_CCL_PROGRAM (val
);
7344 val
= Fcopy_sequence (val
);
7345 ASET (attrs
, coding_attr_ccl_decoder
, val
);
7347 val
= args
[coding_arg_ccl_encoder
];
7348 CHECK_CCL_PROGRAM (val
);
7350 val
= Fcopy_sequence (val
);
7351 ASET (attrs
, coding_attr_ccl_encoder
, val
);
7353 val
= args
[coding_arg_ccl_valids
];
7354 valids
= Fmake_string (make_number (256), make_number (0));
7355 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
7359 ASET (valids
, XINT (val
), 1);
7365 CHECK_NUMBER (XCAR (val
));
7366 CHECK_NUMBER (XCDR (val
));
7367 from
= XINT (XCAR (val
));
7368 to
= XINT (XCDR (val
));
7369 for (i
= from
; i
<= to
; i
++)
7370 ASET (valids
, i
, 1);
7373 ASET (attrs
, coding_attr_ccl_valids
, valids
);
7375 category
= coding_category_ccl
;
7377 else if (EQ (coding_type
, Qutf_16
))
7379 Lisp_Object bom
, endian
;
7381 if (nargs
< coding_arg_utf16_max
)
7384 bom
= args
[coding_arg_utf16_bom
];
7385 if (! NILP (bom
) && ! EQ (bom
, Qt
))
7388 CHECK_CODING_SYSTEM (XCAR (bom
));
7389 CHECK_CODING_SYSTEM (XCDR (bom
));
7391 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
7393 endian
= args
[coding_arg_utf16_endian
];
7394 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
7396 category
= (CONSP (bom
)
7397 ? coding_category_utf_16_auto
7400 ? coding_category_utf_16_be_nosig
7401 : coding_category_utf_16_le_nosig
)
7403 ? coding_category_utf_16_be
7404 : coding_category_utf_16_le
));
7406 else if (EQ (coding_type
, Qiso_2022
))
7408 Lisp_Object initial
, reg_usage
, request
, flags
;
7411 if (nargs
< coding_arg_iso2022_max
)
7414 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
7415 CHECK_VECTOR (initial
);
7416 for (i
= 0; i
< 4; i
++)
7418 val
= Faref (initial
, make_number (i
));
7421 CHECK_CHARSET_GET_ID (val
, id
);
7422 ASET (initial
, i
, make_number (id
));
7425 ASET (initial
, i
, make_number (-1));
7428 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
7429 CHECK_CONS (reg_usage
);
7430 CHECK_NATNUM (XCAR (reg_usage
));
7431 CHECK_NATNUM (XCDR (reg_usage
));
7433 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
7434 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
7440 CHECK_CHARSET_GET_ID (XCAR (val
), id
);
7441 CHECK_NATNUM (XCDR (val
));
7442 if (XINT (XCDR (val
)) >= 4)
7443 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
7444 XCAR (val
) = make_number (id
);
7447 flags
= args
[coding_arg_iso2022_flags
];
7448 CHECK_NATNUM (flags
);
7450 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
7451 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
7453 ASET (attrs
, coding_attr_iso_initial
, initial
);
7454 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
7455 ASET (attrs
, coding_attr_iso_request
, request
);
7456 ASET (attrs
, coding_attr_iso_flags
, flags
);
7457 setup_iso_safe_charsets (attrs
);
7459 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
7460 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
7461 | CODING_ISO_FLAG_SINGLE_SHIFT
))
7462 ? coding_category_iso_7_else
7463 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
7464 ? coding_category_iso_7
7465 : coding_category_iso_7_tight
);
7468 int id
= XINT (AREF (initial
, 1));
7470 category
= (((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
7471 | CODING_ISO_FLAG_SINGLE_SHIFT
))
7472 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
7474 ? coding_category_iso_8_else
7475 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
7476 ? coding_category_iso_8_1
7477 : coding_category_iso_8_2
);
7480 else if (EQ (coding_type
, Qemacs_mule
))
7482 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
7483 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
7485 category
= coding_category_emacs_mule
;
7487 else if (EQ (coding_type
, Qshift_jis
))
7490 struct charset
*charset
;
7492 if (XINT (Flength (charset_list
)) != 3)
7493 error ("There should be just three charsets");
7495 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7496 if (CHARSET_DIMENSION (charset
) != 1)
7497 error ("Dimension of charset %s is not one",
7498 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7500 charset_list
= XCDR (charset_list
);
7501 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7502 if (CHARSET_DIMENSION (charset
) != 1)
7503 error ("Dimension of charset %s is not one",
7504 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7506 charset_list
= XCDR (charset_list
);
7507 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7508 if (CHARSET_DIMENSION (charset
) != 2)
7509 error ("Dimension of charset %s is not two",
7510 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7512 category
= coding_category_sjis
;
7513 Vsjis_coding_system
= name
;
7515 else if (EQ (coding_type
, Qbig5
))
7517 struct charset
*charset
;
7519 if (XINT (Flength (charset_list
)) != 2)
7520 error ("There should be just two charsets");
7522 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7523 if (CHARSET_DIMENSION (charset
) != 1)
7524 error ("Dimension of charset %s is not one",
7525 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7527 charset_list
= XCDR (charset_list
);
7528 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7529 if (CHARSET_DIMENSION (charset
) != 2)
7530 error ("Dimension of charset %s is not two",
7531 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7533 category
= coding_category_big5
;
7534 Vbig5_coding_system
= name
;
7536 else if (EQ (coding_type
, Qraw_text
))
7537 category
= coding_category_raw_text
;
7538 else if (EQ (coding_type
, Qutf_8
))
7539 category
= coding_category_utf_8
;
7540 else if (EQ (coding_type
, Qundecided
))
7541 category
= coding_category_undecided
;
7543 error ("Invalid coding system type: %s",
7544 XSYMBOL (coding_type
)->name
->data
);
7546 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
7548 eol_type
= args
[coding_arg_eol_type
];
7549 if (! NILP (eol_type
)
7550 && ! EQ (eol_type
, Qunix
)
7551 && ! EQ (eol_type
, Qdos
)
7552 && ! EQ (eol_type
, Qmac
))
7553 error ("Invalid eol-type");
7555 aliases
= Fcons (name
, Qnil
);
7557 if (NILP (eol_type
))
7559 eol_type
= make_subsidiaries (name
);
7560 for (i
= 0; i
< 3; i
++)
7562 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
7564 this_name
= AREF (eol_type
, i
);
7565 this_aliases
= Fcons (this_name
, Qnil
);
7566 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
7567 this_spec
= Fmake_vector (make_number (3), attrs
);
7568 ASET (this_spec
, 1, this_aliases
);
7569 ASET (this_spec
, 2, this_eol_type
);
7570 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
7571 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
7572 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
7573 Vcoding_system_alist
);
7577 spec_vec
= Fmake_vector (make_number (3), attrs
);
7578 ASET (spec_vec
, 1, aliases
);
7579 ASET (spec_vec
, 2, eol_type
);
7581 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
7582 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
7583 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
7584 Vcoding_system_alist
);
7587 int id
= coding_categories
[category
].id
;
7589 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
7590 setup_coding_system (name
, &coding_categories
[category
]);
7596 return Fsignal (Qwrong_number_of_arguments
,
7597 Fcons (intern ("define-coding-system-internal"),
7598 make_number (nargs
)));
7601 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
7602 Sdefine_coding_system_alias
, 2, 2, 0,
7603 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7604 (alias
, coding_system
)
7605 Lisp_Object alias
, coding_system
;
7607 Lisp_Object spec
, aliases
, eol_type
;
7609 CHECK_SYMBOL (alias
);
7610 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7611 aliases
= AREF (spec
, 1);
7612 while (!NILP (XCDR (aliases
)))
7613 aliases
= XCDR (aliases
);
7614 XCDR (aliases
) = Fcons (alias
, Qnil
);
7616 eol_type
= AREF (spec
, 2);
7617 if (VECTORP (eol_type
))
7619 Lisp_Object subsidiaries
;
7622 subsidiaries
= make_subsidiaries (alias
);
7623 for (i
= 0; i
< 3; i
++)
7624 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
7625 AREF (eol_type
, i
));
7627 ASET (spec
, 2, subsidiaries
);
7630 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
7631 Vcoding_system_alist
= Fcons (Fcons (alias
, Qnil
), Vcoding_system_alist
);
7636 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
7638 doc
: /* Return the base of CODING-SYSTEM.
7639 Any alias or subsidiary coding systems are not base coding system. */)
7641 Lisp_Object coding_system
;
7643 Lisp_Object spec
, attrs
;
7645 if (NILP (coding_system
))
7646 return (Qno_conversion
);
7647 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7648 attrs
= AREF (spec
, 0);
7649 return CODING_ATTR_BASE_NAME (attrs
);
7652 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
7654 doc
: "Return the property list of CODING-SYSTEM.")
7656 Lisp_Object coding_system
;
7658 Lisp_Object spec
, attrs
;
7660 if (NILP (coding_system
))
7661 coding_system
= Qno_conversion
;
7662 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7663 attrs
= AREF (spec
, 0);
7664 return CODING_ATTR_PLIST (attrs
);
7668 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
7670 doc
: /* Return the list of aliases of CODING-SYSTEM.
7671 A base coding system is what made by `define-coding-system'.
7672 Any alias nor subsidiary coding systems are not base coding system. */)
7674 Lisp_Object coding_system
;
7678 if (NILP (coding_system
))
7679 coding_system
= Qno_conversion
;
7680 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7681 return AREF (spec
, 2);
7684 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
7685 Scoding_system_eol_type
, 1, 1, 0,
7686 doc
: /* Return eol-type of CODING-SYSTEM.
7687 An eol-type is integer 0, 1, 2, or a vector of coding systems.
7689 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7690 and CR respectively.
7692 A vector value indicates that a format of end-of-line should be
7693 detected automatically. Nth element of the vector is the subsidiary
7694 coding system whose eol-type is N. */)
7696 Lisp_Object coding_system
;
7698 Lisp_Object spec
, eol_type
;
7701 if (NILP (coding_system
))
7702 coding_system
= Qno_conversion
;
7703 if (! CODING_SYSTEM_P (coding_system
))
7705 spec
= CODING_SYSTEM_SPEC (coding_system
);
7706 eol_type
= AREF (spec
, 2);
7707 if (VECTORP (eol_type
))
7708 return Fcopy_sequence (eol_type
);
7709 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
7710 return make_number (n
);
7716 /*** 9. Post-amble ***/
7723 for (i
= 0; i
< coding_category_max
; i
++)
7725 coding_categories
[i
].id
= -1;
7726 coding_priorities
[i
] = i
;
7729 /* ISO2022 specific initialize routine. */
7730 for (i
= 0; i
< 0x20; i
++)
7731 iso_code_class
[i
] = ISO_control_0
;
7732 for (i
= 0x21; i
< 0x7F; i
++)
7733 iso_code_class
[i
] = ISO_graphic_plane_0
;
7734 for (i
= 0x80; i
< 0xA0; i
++)
7735 iso_code_class
[i
] = ISO_control_1
;
7736 for (i
= 0xA1; i
< 0xFF; i
++)
7737 iso_code_class
[i
] = ISO_graphic_plane_1
;
7738 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
7739 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
7740 iso_code_class
[ISO_CODE_CR
] = ISO_carriage_return
;
7741 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
7742 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
7743 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
7744 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
7745 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
7746 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
7747 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
7749 inhibit_pre_post_conversion
= 0;
7751 for (i
= 0; i
< 256; i
++)
7753 emacs_mule_bytes
[i
] = 1;
7762 staticpro (&Vcoding_system_hash_table
);
7763 Vcoding_system_hash_table
= Fmakehash (Qeq
);
7765 staticpro (&Vsjis_coding_system
);
7766 Vsjis_coding_system
= Qnil
;
7768 staticpro (&Vbig5_coding_system
);
7769 Vbig5_coding_system
= Qnil
;
7771 staticpro (&Vcode_conversion_work_buf_list
);
7772 Vcode_conversion_work_buf_list
= Qnil
;
7774 staticpro (&Vcode_conversion_reused_work_buf
);
7775 Vcode_conversion_reused_work_buf
= Qnil
;
7777 DEFSYM (Qcharset
, "charset");
7778 DEFSYM (Qtarget_idx
, "target-idx");
7779 DEFSYM (Qcoding_system_history
, "coding-system-history");
7780 Fset (Qcoding_system_history
, Qnil
);
7782 /* Target FILENAME is the first argument. */
7783 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
7784 /* Target FILENAME is the third argument. */
7785 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
7787 DEFSYM (Qcall_process
, "call-process");
7788 /* Target PROGRAM is the first argument. */
7789 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
7791 DEFSYM (Qcall_process_region
, "call-process-region");
7792 /* Target PROGRAM is the third argument. */
7793 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
7795 DEFSYM (Qstart_process
, "start-process");
7796 /* Target PROGRAM is the third argument. */
7797 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
7799 DEFSYM (Qopen_network_stream
, "open-network-stream");
7800 /* Target SERVICE is the fourth argument. */
7801 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
7803 DEFSYM (Qcoding_system
, "coding-system");
7804 DEFSYM (Qcoding_aliases
, "coding-aliases");
7806 DEFSYM (Qeol_type
, "eol-type");
7807 DEFSYM (Qunix
, "unix");
7808 DEFSYM (Qdos
, "dos");
7809 DEFSYM (Qmac
, "mac");
7811 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
7812 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
7813 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
7814 DEFSYM (Qdefault_char
, "default-char");
7815 DEFSYM (Qundecided
, "undecided");
7816 DEFSYM (Qno_conversion
, "no-conversion");
7817 DEFSYM (Qraw_text
, "raw-text");
7819 DEFSYM (Qiso_2022
, "iso-2022");
7821 DEFSYM (Qutf_8
, "utf-8");
7823 DEFSYM (Qutf_16
, "utf-16");
7824 DEFSYM (Qutf_16_be
, "utf-16-be");
7825 DEFSYM (Qutf_16_be_nosig
, "utf-16-be-nosig");
7826 DEFSYM (Qutf_16_le
, "utf-16-l3");
7827 DEFSYM (Qutf_16_le_nosig
, "utf-16-le-nosig");
7828 DEFSYM (Qsignature
, "signature");
7829 DEFSYM (Qendian
, "endian");
7830 DEFSYM (Qbig
, "big");
7831 DEFSYM (Qlittle
, "little");
7833 DEFSYM (Qshift_jis
, "shift-jis");
7834 DEFSYM (Qbig5
, "big5");
7836 DEFSYM (Qcoding_system_p
, "coding-system-p");
7838 DEFSYM (Qcoding_system_error
, "coding-system-error");
7839 Fput (Qcoding_system_error
, Qerror_conditions
,
7840 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
7841 Fput (Qcoding_system_error
, Qerror_message
,
7842 build_string ("Invalid coding system"));
7844 /* Intern this now in case it isn't already done.
7845 Setting this variable twice is harmless.
7846 But don't staticpro it here--that is done in alloc.c. */
7847 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
7849 DEFSYM (Qtranslation_table
, "translation-table");
7850 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
7851 DEFSYM (Qtranslation_table_id
, "translation-table-id");
7852 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
7853 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
7855 DEFSYM (Qchar_coding_system
, "char-coding-system");
7857 Fput (Qchar_coding_system
, Qchar_table_extra_slots
, make_number (2));
7859 DEFSYM (Qvalid_codes
, "valid-codes");
7861 DEFSYM (Qemacs_mule
, "emacs-mule");
7863 Vcoding_category_table
7864 = Fmake_vector (make_number (coding_category_max
), Qnil
);
7865 staticpro (&Vcoding_category_table
);
7866 /* Followings are target of code detection. */
7867 ASET (Vcoding_category_table
, coding_category_iso_7
,
7868 intern ("coding-category-iso-7"));
7869 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
7870 intern ("coding-category-iso-7-tight"));
7871 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
7872 intern ("coding-category-iso-8-1"));
7873 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
7874 intern ("coding-category-iso-8-2"));
7875 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
7876 intern ("coding-category-iso-7-else"));
7877 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
7878 intern ("coding-category-iso-8-else"));
7879 ASET (Vcoding_category_table
, coding_category_utf_8
,
7880 intern ("coding-category-utf-8"));
7881 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
7882 intern ("coding-category-utf-16-be"));
7883 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
7884 intern ("coding-category-utf-16-le"));
7885 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
7886 intern ("coding-category-utf-16-be-nosig"));
7887 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
7888 intern ("coding-category-utf-16-le-nosig"));
7889 ASET (Vcoding_category_table
, coding_category_charset
,
7890 intern ("coding-category-charset"));
7891 ASET (Vcoding_category_table
, coding_category_sjis
,
7892 intern ("coding-category-sjis"));
7893 ASET (Vcoding_category_table
, coding_category_big5
,
7894 intern ("coding-category-big5"));
7895 ASET (Vcoding_category_table
, coding_category_ccl
,
7896 intern ("coding-category-ccl"));
7897 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
7898 intern ("coding-category-emacs-mule"));
7899 /* Followings are NOT target of code detection. */
7900 ASET (Vcoding_category_table
, coding_category_raw_text
,
7901 intern ("coding-category-raw-text"));
7902 ASET (Vcoding_category_table
, coding_category_undecided
,
7903 intern ("coding-category-undecided"));
7905 defsubr (&Scoding_system_p
);
7906 defsubr (&Sread_coding_system
);
7907 defsubr (&Sread_non_nil_coding_system
);
7908 defsubr (&Scheck_coding_system
);
7909 defsubr (&Sdetect_coding_region
);
7910 defsubr (&Sdetect_coding_string
);
7911 defsubr (&Sfind_coding_systems_region_internal
);
7912 defsubr (&Scheck_coding_systems_region
);
7913 defsubr (&Sdecode_coding_region
);
7914 defsubr (&Sencode_coding_region
);
7915 defsubr (&Sdecode_coding_string
);
7916 defsubr (&Sencode_coding_string
);
7917 defsubr (&Sdecode_sjis_char
);
7918 defsubr (&Sencode_sjis_char
);
7919 defsubr (&Sdecode_big5_char
);
7920 defsubr (&Sencode_big5_char
);
7921 defsubr (&Sset_terminal_coding_system_internal
);
7922 defsubr (&Sset_safe_terminal_coding_system_internal
);
7923 defsubr (&Sterminal_coding_system
);
7924 defsubr (&Sset_keyboard_coding_system_internal
);
7925 defsubr (&Skeyboard_coding_system
);
7926 defsubr (&Sfind_operation_coding_system
);
7927 defsubr (&Sset_coding_system_priority
);
7928 defsubr (&Sdefine_coding_system_internal
);
7929 defsubr (&Sdefine_coding_system_alias
);
7930 defsubr (&Scoding_system_base
);
7931 defsubr (&Scoding_system_plist
);
7932 defsubr (&Scoding_system_aliases
);
7933 defsubr (&Scoding_system_eol_type
);
7934 defsubr (&Scoding_system_priority_list
);
7936 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
7937 doc
: /* List of coding systems.
7939 Do not alter the value of this variable manually. This variable should be
7940 updated by the functions `define-coding-system' and
7941 `define-coding-system-alias'. */);
7942 Vcoding_system_list
= Qnil
;
7944 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
7945 doc
: /* Alist of coding system names.
7946 Each element is one element list of coding system name.
7947 This variable is given to `completing-read' as TABLE argument.
7949 Do not alter the value of this variable manually. This variable should be
7950 updated by the functions `make-coding-system' and
7951 `define-coding-system-alias'. */);
7952 Vcoding_system_alist
= Qnil
;
7954 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
7955 doc
: /* List of coding-categories (symbols) ordered by priority.
7957 On detecting a coding system, Emacs tries code detection algorithms
7958 associated with each coding-category one by one in this order. When
7959 one algorithm agrees with a byte sequence of source text, the coding
7960 system bound to the corresponding coding-category is selected. */);
7964 Vcoding_category_list
= Qnil
;
7965 for (i
= coding_category_max
- 1; i
>= 0; i
--)
7966 Vcoding_category_list
7967 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
7968 Vcoding_category_list
);
7971 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
7972 doc
: /* Specify the coding system for read operations.
7973 It is useful to bind this variable with `let', but do not set it globally.
7974 If the value is a coding system, it is used for decoding on read operation.
7975 If not, an appropriate element is used from one of the coding system alists:
7976 There are three such tables, `file-coding-system-alist',
7977 `process-coding-system-alist', and `network-coding-system-alist'. */);
7978 Vcoding_system_for_read
= Qnil
;
7980 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
7981 doc
: /* Specify the coding system for write operations.
7982 Programs bind this variable with `let', but you should not set it globally.
7983 If the value is a coding system, it is used for encoding of output,
7984 when writing it to a file and when sending it to a file or subprocess.
7986 If this does not specify a coding system, an appropriate element
7987 is used from one of the coding system alists:
7988 There are three such tables, `file-coding-system-alist',
7989 `process-coding-system-alist', and `network-coding-system-alist'.
7990 For output to files, if the above procedure does not specify a coding system,
7991 the value of `buffer-file-coding-system' is used. */);
7992 Vcoding_system_for_write
= Qnil
;
7994 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
7996 Coding system used in the latest file or process I/O. */);
7997 Vlast_coding_system_used
= Qnil
;
7999 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
8001 *Non-nil means always inhibit code conversion of end-of-line format.
8002 See info node `Coding Systems' and info node `Text and Binary' concerning
8003 such conversion. */);
8004 inhibit_eol_conversion
= 0;
8006 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
8008 Non-nil means process buffer inherits coding system of process output.
8009 Bind it to t if the process output is to be treated as if it were a file
8010 read from some filesystem. */);
8011 inherit_process_coding_system
= 0;
8013 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
8015 Alist to decide a coding system to use for a file I/O operation.
8016 The format is ((PATTERN . VAL) ...),
8017 where PATTERN is a regular expression matching a file name,
8018 VAL is a coding system, a cons of coding systems, or a function symbol.
8019 If VAL is a coding system, it is used for both decoding and encoding
8021 If VAL is a cons of coding systems, the car part is used for decoding,
8022 and the cdr part is used for encoding.
8023 If VAL is a function symbol, the function must return a coding system
8024 or a cons of coding systems which are used as above. The function gets
8025 the arguments with which `find-operation-coding-systems' was called.
8027 See also the function `find-operation-coding-system'
8028 and the variable `auto-coding-alist'. */);
8029 Vfile_coding_system_alist
= Qnil
;
8031 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
8033 Alist to decide a coding system to use for a process I/O operation.
8034 The format is ((PATTERN . VAL) ...),
8035 where PATTERN is a regular expression matching a program name,
8036 VAL is a coding system, a cons of coding systems, or a function symbol.
8037 If VAL is a coding system, it is used for both decoding what received
8038 from the program and encoding what sent to the program.
8039 If VAL is a cons of coding systems, the car part is used for decoding,
8040 and the cdr part is used for encoding.
8041 If VAL is a function symbol, the function must return a coding system
8042 or a cons of coding systems which are used as above.
8044 See also the function `find-operation-coding-system'. */);
8045 Vprocess_coding_system_alist
= Qnil
;
8047 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
8049 Alist to decide a coding system to use for a network I/O operation.
8050 The format is ((PATTERN . VAL) ...),
8051 where PATTERN is a regular expression matching a network service name
8052 or is a port number to connect to,
8053 VAL is a coding system, a cons of coding systems, or a function symbol.
8054 If VAL is a coding system, it is used for both decoding what received
8055 from the network stream and encoding what sent to the network stream.
8056 If VAL is a cons of coding systems, the car part is used for decoding,
8057 and the cdr part is used for encoding.
8058 If VAL is a function symbol, the function must return a coding system
8059 or a cons of coding systems which are used as above.
8061 See also the function `find-operation-coding-system'. */);
8062 Vnetwork_coding_system_alist
= Qnil
;
8064 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
8065 doc
: /* Coding system to use with system messages.
8066 Also used for decoding keyboard input on X Window system. */);
8067 Vlocale_coding_system
= Qnil
;
8069 /* The eol mnemonics are reset in startup.el system-dependently. */
8070 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
8072 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8073 eol_mnemonic_unix
= build_string (":");
8075 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
8077 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8078 eol_mnemonic_dos
= build_string ("\\");
8080 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
8082 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
8083 eol_mnemonic_mac
= build_string ("/");
8085 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
8087 *String displayed in mode line when end-of-line format is not yet determined. */);
8088 eol_mnemonic_undecided
= build_string (":");
8090 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
8092 *Non-nil enables character translation while encoding and decoding. */);
8093 Venable_character_translation
= Qt
;
8095 DEFVAR_LISP ("standard-translation-table-for-decode",
8096 &Vstandard_translation_table_for_decode
,
8097 doc
: /* Table for translating characters while decoding. */);
8098 Vstandard_translation_table_for_decode
= Qnil
;
8100 DEFVAR_LISP ("standard-translation-table-for-encode",
8101 &Vstandard_translation_table_for_encode
,
8102 doc
: /* Table for translating characters while encoding. */);
8103 Vstandard_translation_table_for_encode
= Qnil
;
8105 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
8106 doc
: /* Alist of charsets vs revision numbers.
8107 While encoding, if a charset (car part of an element) is found,
8108 designate it with the escape sequence identifying revision (cdr part
8109 of the element). */);
8110 Vcharset_revision_table
= Qnil
;
8112 DEFVAR_LISP ("default-process-coding-system",
8113 &Vdefault_process_coding_system
,
8114 doc
: /* Cons of coding systems used for process I/O by default.
8115 The car part is used for decoding a process output,
8116 the cdr part is used for encoding a text to be sent to a process. */);
8117 Vdefault_process_coding_system
= Qnil
;
8119 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
8121 Table of extra Latin codes in the range 128..159 (inclusive).
8122 This is a vector of length 256.
8123 If Nth element is non-nil, the existence of code N in a file
8124 \(or output of subprocess) doesn't prevent it to be detected as
8125 a coding system of ISO 2022 variant which has a flag
8126 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8127 or reading output of a subprocess.
8128 Only 128th through 159th elements has a meaning. */);
8129 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
8131 DEFVAR_LISP ("select-safe-coding-system-function",
8132 &Vselect_safe_coding_system_function
,
8134 Function to call to select safe coding system for encoding a text.
8136 If set, this function is called to force a user to select a proper
8137 coding system which can encode the text in the case that a default
8138 coding system used in each operation can't encode the text.
8140 The default value is `select-safe-coding-system' (which see). */);
8141 Vselect_safe_coding_system_function
= Qnil
;
8143 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table
,
8145 Char-table containing safe coding systems of each characters.
8146 Each element doesn't include such generic coding systems that can
8147 encode any characters. They are in the first extra slot. */);
8148 Vchar_coding_system_table
= Fmake_char_table (Qchar_coding_system
, Qnil
);
8150 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8151 &inhibit_iso_escape_detection
,
8153 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8155 By default, on reading a file, Emacs tries to detect how the text is
8156 encoded. This code detection is sensitive to escape sequences. If
8157 the sequence is valid as ISO2022, the code is determined as one of
8158 the ISO2022 encodings, and the file is decoded by the corresponding
8159 coding system (e.g. `iso-2022-7bit').
8161 However, there may be a case that you want to read escape sequences in
8162 a file as is. In such a case, you can set this variable to non-nil.
8163 Then, as the code detection ignores any escape sequences, no file is
8164 detected as encoded in some ISO2022 encoding. The result is that all
8165 escape sequences become visible in a buffer.
8167 The default value is nil, and it is strongly recommended not to change
8168 it. That is because many Emacs Lisp source files that contain
8169 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8170 in Emacs's distribution, and they won't be decoded correctly on
8171 reading if you suppress escape sequence detection.
8173 The other way to read escape sequences in a file without decoding is
8174 to explicitly specify some coding system that doesn't use ISO2022's
8175 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8176 inhibit_iso_escape_detection
= 0;
8179 Lisp_Object args
[coding_arg_max
];
8180 Lisp_Object plist
[14];
8183 for (i
= 0; i
< coding_arg_max
; i
++)
8186 plist
[0] = intern (":name");
8187 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
8188 plist
[2] = intern (":mnemonic");
8189 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
8190 plist
[4] = intern (":coding-type");
8191 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
8192 plist
[6] = intern (":ascii-compatible-p");
8193 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
8194 plist
[8] = intern (":default-char");
8195 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
8196 plist
[10] = intern (":docstring");
8197 plist
[11] = build_string ("Do no conversion.\n\
8199 When you visit a file with this coding, the file is read into a\n\
8200 unibyte buffer as is, thus each byte of a file is treated as a\n\
8202 plist
[12] = intern (":eol-type");
8203 plist
[13] = args
[coding_arg_eol_type
] = Qunix
;
8204 args
[coding_arg_plist
] = Flist (14, plist
);
8205 Fdefine_coding_system_internal (coding_arg_max
, args
);
8208 setup_coding_system (Qno_conversion
, &keyboard_coding
);
8209 setup_coding_system (Qno_conversion
, &terminal_coding
);
8210 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
8214 emacs_strerror (error_number
)
8219 synchronize_system_messages_locale ();
8220 str
= strerror (error_number
);
8222 if (! NILP (Vlocale_coding_system
))
8224 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
8225 Vlocale_coding_system
,
8227 str
= (char *) XSTRING (dec
)->data
;